FedLibSVMDataset#

class fl_sim.data_processing.FedLibSVMDataset(dataset_name: str, num_clients: int, iid: bool = True, criterion_name: str = 'svm', seed: int = 0)[source]#

Bases: FedDataset

property candidate_models: Dict[str, Module]#

A set of candidate models.

property criteria_mapping: Dict[str, Module]#

Mapping from criterion name to criterion.

property doi: List[str]#

DOI(s) related to the dataset.

download_if_needed() None[source]#

Download data if needed.

evaluate(probs: Tensor, truths: Tensor) Dict[str, float][source]#

Evaluation using predictions and ground truth.

Parameters:
Returns:

Evaluation results.

Return type:

Dict[str, float]

extra_repr_keys() List[str][source]#

Extra keys for __repr__() and __str__().

get_dataloader(train_bs: int | None = None, test_bs: int | None = None, client_idx: int | None = None) Tuple[DataLoader, DataLoader][source]#

Get local dataloader at client client_idx or get the global dataloader.

Parameters:
  • train_bs (int, optional) – Batch size for training dataloader. If None, use default batch size.

  • test_bs (int, optional) – Batch size for testing dataloader. If None, use default batch size.

  • client_idx (int, optional) – Index of the client to get dataloader. If None, get the dataloader containing all data. Usually used for centralized training.

Returns:

classmethod list_all_libsvm_datasets() DataFrame[source]#

List all LibSVM datasets.

classmethod list_datasets() List[str][source]#

List all available LibSVM datasets.

load_partition_data(batch_size: int | None = None) tuple[source]#

Partition data into all local clients.

Parameters:

batch_size (int, optional) – Batch size for dataloader. If None, use default batch size.

Returns:

  • train_clients_num: int

    Number of training clients.

  • train_data_num: int

    Number of training data.

  • test_data_num: int

    Number of testing data.

  • train_data_global: torch.utils.data.DataLoader

    Global training dataloader.

  • test_data_global: torch.utils.data.DataLoader

    Global testing dataloader.

  • data_local_num_dict: dict

    Number of local training data for each client.

  • train_data_local_dict: dict

    Local training dataloader for each client.

  • test_data_local_dict: dict

    Local testing dataloader for each client.

  • n_class: int

    Number of classes.

Return type:

tuple

load_partition_data_distributed(process_id: int, batch_size: int | None = None) tuple[source]#

Get local dataloader at client process_id or get global dataloader.

Parameters:
  • process_id (int) – Index of the client to get dataloader. If None, get the dataloader containing all data, usually used for centralized training.

  • batch_size (int, optional) – Batch size for dataloader. If None, use default batch size.

Returns:

Return type:

tuple

property num_classes: int#

Number of classes.

property num_features: int#

Number of features.

reset_seed(seed: int) None[source]#

Reset the seed and re-partition the data.

Parameters:

seed (int) – Random seed for data partitioning.

Return type:

None

property url: str#

URL for downloading the dataset.