Skip to content

ModelNet40 Data Loaders¤

Some utility factories are provided to easily instanciate dataloaders.

Core ModelNet40 utilities¤

download_modelnet40(dir: Path | str | None = None) -> None ¤

Download the full ModelNet40 dataset archive as two .h5 files (train & test) (~ 1 Go).

Parameters:

  • dir (Path | str | None, default: None ) –

    Where to store the downloaded filed. Defaults to None.

Source code in src/polar/train/data/modelnet.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def download_modelnet40(dir: Path | str | None = None) -> None:
    """ Download the full ModelNet40 dataset archive as two `.h5` files (train & test) (~ 1 Go).

    Args:
        dir (Path | str | None, optional): Where to store the downloaded filed. Defaults to None.
    """
    if dir is None:
        dir = Path(__file__).resolve().parent / 'modelnet40'
    dir = Path(dir).resolve()
    dir.mkdir(exist_ok=True)
    if not (dir / 'modelnet40_ply_hdf5_2048').exists():
        www = Path('https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip')
        os.system(f'wget --no-check-certificate {www}; unzip {www.name}')
        os.system(f'mv {www.stem} {str(dir)}')
        os.system(f'rm {www.name}')

ModelNet(rootdir: str = 'modelnet', split: str = 'train', classes: Sequence[str] | None = None, exclude_classes: Sequence[str] | None = None, samples_per_class: int | None = None, return_labels: bool = False) ¤

Bases: Dataset

Parameters:

  • rootdir (str, default: 'modelnet' ) –

    Path to the directory containing the .h5 files. Defaults to 'modelnet'.

  • split (str, default: 'train' ) –

    'train' or 'test. Defaults to 'train'.

  • classes (Sequence[str] | None, default: None ) –

    Shape categories to use. If None, load all categories. See ModelNet.all_classes. Defaults to None.

  • exclude_classes (Sequence[str] | None, default: None ) –

    Shape categories to exclude from the dataset. Defaults to None.

  • samples_per_class (int | None, default: None ) –

    Number of point clouds per category to load. Defaults to None.

  • return_labels (bool, default: False ) –

    If True, return the class index alongside the point cloud. Defaults to False.

Source code in src/polar/train/data/modelnet.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def __init__(
    self,
    rootdir: str = 'modelnet', split: str = 'train', classes: Sequence[str] | None = None,
    exclude_classes: Sequence[str] | None = None, samples_per_class: int | None = None, return_labels: bool = False
) -> None:
    """_summary_

    Args:
        rootdir (str, optional): Path to the directory containing the `.h5` files. Defaults to 'modelnet'.
        split (str, optional): 'train' or 'test. Defaults to 'train'.
        classes (Sequence[str] | None, optional):
            Shape categories to use. If `None`, load all categories. See `ModelNet.all_classes`. Defaults to `None`.
        exclude_classes (Sequence[str] | None, optional):
            Shape categories to exclude from the dataset. Defaults to `None`.
        samples_per_class (int | None, optional): Number of point clouds per category to load. Defaults to `None`.
        return_labels (bool, optional):
            If `True`, return the class index alongside the point cloud. Defaults to `False`.
    """
    super().__init__()
    load_params = (rootdir, split, classes, exclude_classes, samples_per_class)
    self.points, self.labels = load_and_select_samples(*load_params)
    self.split = split
    self.return_labels = return_labels
all_classes: tuple[str, ...] property ¤

Tuple of 40 strings, one for each class.


Base Loaders¤

get_modelnet_dataloader(batch_size: int, num_workers: int, rootdir: str = 'modelnet', split: str = 'train', classes: Sequence[str] | None = None, exclude_classes: Sequence[str] | None = None, samples_per_class: int | None = None, return_labels: bool = False) -> DataLoader[ModelNet] ¤

Instanciate a basic ModelNet Pytorch dataloader. Each batch is composed of fixed length point clouds (batch_size, num_points, 3).

Parameters:

  • batch_size (int) –

    Batch size.

  • num_workers (int) –

    Parallel loading with num_workers processes.

  • rootdir (str, default: 'modelnet' ) –

    Path to the directory containing the .h5 files. Defaults to 'modelnet'.

  • split (str, default: 'train' ) –

    'train' or 'test. Defaults to 'train'.

  • classes (Sequence[str] | None, default: None ) –

    Shape categories to use. If None, load all categories. See ModelNet.all_classes. Defaults to None.

  • exclude_classes (Sequence[str] | None, default: None ) –

    Shape categories to exclude from the dataset. Defaults to None.

  • samples_per_class (int | None, default: None ) –

    Number of point clouds per category to load. Defaults to None.

  • return_labels (bool, default: False ) –

    If True, return the class index alongside the point cloud. Defaults to False.

Returns:

  • DataLoader[ModelNet]

    Standard Pytorch DataLoader.

Source code in src/polar/train/data/factory.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def get_modelnet_dataloader(
    batch_size: int, num_workers: int,
    rootdir: str = 'modelnet', split: str = 'train', classes: Sequence[str] | None = None,
    exclude_classes: Sequence[str] | None = None, samples_per_class: int | None = None, return_labels: bool = False
) -> DataLoader[ModelNet]:
    """ Instanciate a basic ModelNet Pytorch dataloader. Each batch is composed of fixed length point clouds
        `(batch_size, num_points, 3)`.

    Args:
        batch_size (int): Batch size.
        num_workers (int): Parallel loading with `num_workers` processes.
        rootdir (str, optional): Path to the directory containing the `.h5` files. Defaults to 'modelnet'.
        split (str, optional): 'train' or 'test. Defaults to 'train'.
        classes (Sequence[str] | None, optional):
            Shape categories to use. If `None`, load all categories. See `ModelNet.all_classes`. Defaults to `None`.
        exclude_classes (Sequence[str] | None, optional):
            Shape categories to exclude from the dataset. Defaults to `None`.
        samples_per_class (int | None, optional): Number of point clouds per category to load. Defaults to `None`.
        return_labels (bool, optional):
            If `True`, return the class index alongside the point cloud. Defaults to `False`.

    Returns:
        Standard Pytorch DataLoader.
    """
    dataset = ModelNet(rootdir, split, classes, exclude_classes, samples_per_class, return_labels)
    loader_params = dict(num_workers=num_workers, pin_memory=True, shuffle=True, drop_last=False)
    return DataLoader(dataset, **loader_params, batch_size=batch_size)  # type: ignore

get_modelnet_dataloaders(batch_size: int, num_workers: int, rootdir: str = 'modelnet', classes: Sequence[str] | None = None, exclude_classes: Sequence[str] | None = None, samples_per_class: int | None = None, return_labels: bool = False) -> tuple[DataLoader[ModelNet], DataLoader[ModelNet]] ¤

Same as src.polar.train.data.factory.get_modelnet_dataloader, but returns a tuple of train and test dataloaders.

Returns:

  • tuple[DataLoader[ModelNet], DataLoader[ModelNet]]

    Train loader, Test loader.

Source code in src/polar/train/data/factory.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def get_modelnet_dataloaders(
    batch_size: int, num_workers: int,
    rootdir: str = 'modelnet', classes: Sequence[str] | None = None,
    exclude_classes: Sequence[str] | None = None, samples_per_class: int | None = None, return_labels: bool = False
) -> tuple[DataLoader[ModelNet], DataLoader[ModelNet]]:
    """ Same as [src.polar.train.data.factory.get_modelnet_dataloader][], but returns a tuple of train and test
        dataloaders.

    Returns:
        Train loader, Test loader.
    """
    split = 'train'
    train_loader = get_modelnet_dataloader(**locals())
    split = 'test'
    locals_ = {k: v for k, v in locals().items() if k != 'train_loader'}
    test_loader = get_modelnet_dataloader(**locals_)
    return train_loader, test_loader

get_modelnet_dataloader_from_args(args: Namespace) -> DataLoader[ModelNet] ¤

Same as src.polar.train.data.factory.get_modelnet_dataloader, but accepts an argparse.Namespace object instead of keyword arguments.

Returns:

  • DataLoader[ModelNet]

    Standard Pytorch DataLoader.

Source code in src/polar/train/data/factory.py
65
66
67
68
69
70
71
72
73
74
def get_modelnet_dataloader_from_args(args: Namespace) -> DataLoader[ModelNet]:
    """ Same as [src.polar.train.data.factory.get_modelnet_dataloader][], but accepts an `argparse.Namespace` object
        instead of keyword arguments.

    Returns:
        Standard Pytorch DataLoader.
    """
    dataset = ModelNet.from_args(args)
    loader_params = dict(num_workers=args.num_workers, pin_memory=True, shuffle=True, drop_last=False)
    return DataLoader(dataset, **loader_params, batch_size=args.batch_size)

get_modelnet_dataloaders_from_args(args: Namespace) -> tuple[DataLoader[ModelNet], DataLoader[ModelNet]] ¤

Same as src.polar.train.data.factory.get_modelnet_dataloader_from_args, but returns a tuple of train and test dataloaders.

Returns:

  • tuple[DataLoader[ModelNet], DataLoader[ModelNet]]

    Train loader, Test loader.

Source code in src/polar/train/data/factory.py
77
78
79
80
81
82
83
84
85
86
87
88
def get_modelnet_dataloaders_from_args(args: Namespace) -> tuple[DataLoader[ModelNet], DataLoader[ModelNet]]:
    """ Same as [src.polar.train.data.factory.get_modelnet_dataloader_from_args][], but returns a tuple of train and test
        dataloaders.

    Returns:
        Train loader, Test loader.
    """
    args.split = 'train'
    train_loader = get_modelnet_dataloader_from_args(args)
    args.split = 'test'
    test_loader = get_modelnet_dataloader_from_args(args)
    return train_loader, test_loader

Augmented Loaders¤

AugmentedDataLoader(dataloader: DataLoader[ModelNet], num_points: int = 1024, shuffle: bool = False, sigma: float = 0.0, min_scale: float = 1.0, max_angle: float = 180.0, max_trans: float = 0.0, keep_ratio: float = 1.0, p: float = 1, handle_device: bool = True) ¤

Apply same motion to sources and targets. Degrade sources only. Intented to be used to train an autoencoder to reconstruct and restore point clouds.

Parameters:

  • dataloader (DataLoader[ModelNet]) –

    A ModelNet dataloader instance, typically from src.polar.train.data.factory.get_modelnet_dataloader.

  • num_points (int, default: 1024 ) –

    Number of points in each cloud. Defaults to 1024.

  • shuffle (bool, default: False ) –

    Shuffle the dense point clouds (5000 points before sampling num_points) points. If True, sources and targets will be two unique sampling of the same underlying surface. Defaults to False.

  • sigma (float, default: 0.0 ) –

    Isotropic noise standard deviation. Defaults to 0.

  • min_scale (float, default: 1.0 ) –

    If \(< 1\), will randomly scale each batch with a factor \(s \sim \mathcal{U}(\text{min_scale}, 1)\). Defaults to 1.

  • max_angle (float, default: 180.0 ) –

    For each point cloud, randomly sample a rotation whose relative angle with the identity is in \([0, \text{max_angle}]\). Defaults to 180.

  • max_trans (float, default: 0.0 ) –

    For each point cloud, randomly sample a translation whose norm is int \([0, \text{max_trans}]\). Defaults to 0.

  • keep_ratio (float, default: 1.0 ) –

    If \(< 1\), will randomly crop each batch with a factor \(k \sim \mathcal{U}(\text{keep_ratio}, 1)\). Defaults to 1.

  • p (float, default: 1 ) –

    Probability to apply the augmentation. Defaults to 1.

  • handle_device (bool, default: True ) –

    If True, will guess the device and move point clouds to it. Defaults to True.

Source code in src/polar/train/data/augmented_loader.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def __init__(
    self, dataloader: DataLoader[ModelNet], num_points: int = 1024, shuffle: bool = False,
    sigma: float = 0., min_scale: float = 1., max_angle: float = 180., max_trans: float = 0.,
    keep_ratio: float = 1., p: float = 1, handle_device: bool = True
) -> None:
    r"""_summary_

    Args:
        dataloader (DataLoader[ModelNet]):
            A ModelNet dataloader instance, typically from [src.polar.train.data.factory.get_modelnet_dataloader][].
        num_points (int, optional): Number of points in each cloud. Defaults to 1024.
        shuffle (bool, optional):
            Shuffle the dense point clouds (5000 points before sampling `num_points`) points. If `True`, sources
            and targets will be two unique sampling of the same underlying surface. Defaults to False.
        sigma (float, optional): Isotropic noise standard deviation. Defaults to `0`.
        min_scale (float, optional):
            If $< 1$, will randomly scale each batch with a factor $s \sim \mathcal{U}(\text{min_scale}, 1)$.
            Defaults to `1`.
        max_angle (float, optional):
            For each point cloud, randomly sample a rotation whose relative angle with the identity is in
            $[0, \text{max_angle}]$. Defaults to `180`.
        max_trans (float, optional):
            For each point cloud, randomly sample a translation whose norm is int $[0, \text{max_trans}]$.
            Defaults to `0`.
        keep_ratio (float, optional):
            If $< 1$, will randomly crop each batch with a factor $k \sim \mathcal{U}(\text{keep_ratio}, 1)$.
            Defaults to `1`.
        p (float, optional): Probability to apply the augmentation. Defaults to `1`.
        handle_device (bool, optional):
            If `True`, will guess the device and move point clouds to it. Defaults to `True`.
    """
    self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.handle_device = handle_device
    self.dataloader = dataloader
    self.num_points = num_points
    self.shuffle = shuffle
    self.sigma = sigma
    self.min_scale = min_scale
    self.max_angle = max_angle
    self.max_trans = max_trans
    self.keep_ratio = keep_ratio
    self.init_transforms(p)

get_augmented_dataloader(batch_size: int, num_workers: int, rootdir: str = 'modelnet', split: str = 'train', classes: Sequence[str] | None = None, exclude_classes: Sequence[str] | None = None, samples_per_class: int | None = None, return_labels: bool = False, num_points: int = 1024, shuffle: bool = False, sigma: float = 0.0, min_scale: float = 1.0, max_angle: float = 180.0, max_trans: float = 0.0, keep_ratio: float = 1.0, p: float = 1, handle_device: bool = True) -> AugmentedDataLoader ¤

See src.polar.train.data.factory.AugmentedDataLoader for the arguments description.

Returns:

Source code in src/polar/train/data/factory.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def get_augmented_dataloader(
    batch_size: int, num_workers: int,
    rootdir: str = 'modelnet', split: str = 'train', classes: Sequence[str] | None = None,
    exclude_classes: Sequence[str] | None = None, samples_per_class: int | None = None, return_labels: bool = False,
    num_points: int = 1024, shuffle: bool = False, sigma: float = 0., min_scale: float = 1.,
    max_angle: float = 180., max_trans: float = 0., keep_ratio: float = 1., p: float = 1, handle_device: bool = True
) -> AugmentedDataLoader:
    """ See src.polar.train.data.factory.AugmentedDataLoader for the arguments description.

    Returns:
        A ModelNet40 dataloader with random motions and degradations. 
    """
    dataloader = get_modelnet_dataloader(batch_size, num_workers, rootdir, split, classes, exclude_classes,
                                         samples_per_class, return_labels)
    augmented_dataloader = AugmentedDataLoader(dataloader, num_points, shuffle, sigma, min_scale, max_angle, max_trans,
                                               keep_ratio, p, handle_device)
    return augmented_dataloader

get_augmented_dataloaders(batch_size: int, num_workers: int, rootdir: str = 'modelnet', classes: Sequence[str] | None = None, exclude_classes: Sequence[str] | None = None, samples_per_class: int | None = None, return_labels: bool = False, num_points: int = 1024, shuffle: bool = False, sigma: float = 0.0, min_scale: float = 1.0, max_angle: float = 180.0, max_trans: float = 0.0, keep_ratio: float = 1.0, p: float = 1, handle_device: bool = True) -> tuple[AugmentedDataLoader, AugmentedDataLoader] ¤

See src.polar.train.data.factory.AugmentedDataLoader for the arguments description. Same as src.polar.train.data.factory.get_augmented_dataloader, but but returns a tuple of train and test dataloaders.

Returns:

Source code in src/polar/train/data/factory.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def get_augmented_dataloaders(
    batch_size: int, num_workers: int,
    rootdir: str = 'modelnet', classes: Sequence[str] | None = None,
    exclude_classes: Sequence[str] | None = None, samples_per_class: int | None = None, return_labels: bool = False,
    num_points: int = 1024, shuffle: bool = False, sigma: float = 0., min_scale: float = 1.,
    max_angle: float = 180., max_trans: float = 0., keep_ratio: float = 1., p: float = 1, handle_device: bool = True
) -> tuple[AugmentedDataLoader, AugmentedDataLoader]:
    """ See src.polar.train.data.factory.AugmentedDataLoader for the arguments description. Same as
        [src.polar.train.data.factory.get_augmented_dataloader][], but but returns a tuple of train and test dataloaders.

    Returns:
        A ModelNet40 dataloader with random motions and degradations. 
    """
    split = 'train'
    train_loader = get_augmented_dataloader(**locals())
    split = 'test'
    locals_ = {k: v for k, v in locals().items() if k != 'train_loader'}
    test_loader = get_augmented_dataloader(**locals_)
    return train_loader, test_loader

get_augmented_dataloader_from_args(args: Namespace) -> AugmentedDataLoader ¤

See src.polar.train.data.factory.AugmentedDataLoader for the arguments description. Same as src.polar.train.data.factory.get_augmented_dataloader, but accepts an argparse.Namespace object instead of keyword arguments.

Returns:

Source code in src/polar/train/data/factory.py
133
134
135
136
137
138
139
140
141
142
143
144
145
def get_augmented_dataloader_from_args(args: Namespace) -> AugmentedDataLoader:
    """ See src.polar.train.data.factory.AugmentedDataLoader for the arguments description. Same as
        [src.polar.train.data.factory.get_augmented_dataloader][], but accepts an `argparse.Namespace` object
        instead of keyword arguments.

    Returns:
        A ModelNet40 dataloader with random motions and degradations. 
    """
    dataloader = get_modelnet_dataloader_from_args(args)
    keys: tuple[str, ...] = ('num_points', 'shuffle', 'sigma', 'min_scale', 'max_angle', 'max_trans', 'keep_ratio',
                             'source_only', 'p', 'handle_device')
    params = args_to_param_subset(args, keys)
    return AugmentedDataLoader(dataloader, **params)

get_augmented_dataloaders_from_args(args: Namespace) -> tuple[AugmentedDataLoader, AugmentedDataLoader] ¤

Same as src.polar.train.data.factory.get_augmented_dataloader_from_args, but returns a tuple of train and test dataloaders.

Returns:

Source code in src/polar/train/data/factory.py
148
149
150
151
152
153
154
155
156
157
158
159
def get_augmented_dataloaders_from_args(args: Namespace) -> tuple[AugmentedDataLoader, AugmentedDataLoader]:
    """ Same as [src.polar.train.data.factory.get_augmented_dataloader_from_args][], but returns a tuple of train and test
        dataloaders.

    Returns:
        Train loader, Test loader.
    """
    args.split = 'train'
    train_loader = get_augmented_dataloader_from_args(args)
    args.split = 'test'
    test_loader = get_augmented_dataloader_from_args(args)
    return train_loader, test_loader