65 lines
2.9 KiB
Python
65 lines
2.9 KiB
Python
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
# copy of this software and associated documentation files (the "Software"),
|
|
# to deal in the Software without restriction, including without limitation
|
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
# and/or sell copies of the Software, and to permit persons to whom the
|
|
# Software is furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
# DEALINGS IN THE SOFTWARE.
|
|
#
|
|
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
import torch.distributed as dist
|
|
from abc import ABC
|
|
from torch.utils.data import DataLoader, DistributedSampler, Dataset
|
|
|
|
from se3_transformer.runtime.utils import get_local_rank
|
|
|
|
|
|
def _get_dataloader(dataset: Dataset, shuffle: bool, **kwargs) -> DataLoader:
|
|
# Classic or distributed dataloader depending on the context
|
|
sampler = DistributedSampler(dataset, shuffle=shuffle) if dist.is_initialized() else None
|
|
return DataLoader(dataset, shuffle=(shuffle and sampler is None), sampler=sampler, **kwargs)
|
|
|
|
|
|
class DataModule(ABC):
|
|
""" Abstract DataModule. Children must define self.ds_{train | val | test}. """
|
|
|
|
def __init__(self, **dataloader_kwargs):
|
|
super().__init__()
|
|
if get_local_rank() == 0:
|
|
self.prepare_data()
|
|
|
|
# Wait until rank zero has prepared the data (download, preprocessing, ...)
|
|
if dist.is_initialized():
|
|
dist.barrier(device_ids=[get_local_rank()])
|
|
|
|
self.dataloader_kwargs = {'pin_memory': True, 'persistent_workers': dataloader_kwargs.get('num_workers', 0) > 0,
|
|
**dataloader_kwargs}
|
|
self.ds_train, self.ds_val, self.ds_test = None, None, None
|
|
|
|
def prepare_data(self):
|
|
""" Method called only once per node. Put here any downloading or preprocessing """
|
|
pass
|
|
|
|
def train_dataloader(self) -> DataLoader:
|
|
return _get_dataloader(self.ds_train, shuffle=True, **self.dataloader_kwargs)
|
|
|
|
def val_dataloader(self) -> DataLoader:
|
|
return _get_dataloader(self.ds_val, shuffle=False, **self.dataloader_kwargs)
|
|
|
|
def test_dataloader(self) -> DataLoader:
|
|
return _get_dataloader(self.ds_test, shuffle=False, **self.dataloader_kwargs)
|