DeepLearningExamples/DGLPyTorch/DrugDiscovery/SE3Transformer/se3_transformer/data_loading/data_module.py
2021-11-02 15:06:21 +01:00

65 lines
2.9 KiB
Python

# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES
# SPDX-License-Identifier: MIT
import torch.distributed as dist
from abc import ABC
from torch.utils.data import DataLoader, DistributedSampler, Dataset
from se3_transformer.runtime.utils import get_local_rank
def _get_dataloader(dataset: Dataset, shuffle: bool, **kwargs) -> DataLoader:
# Classic or distributed dataloader depending on the context
sampler = DistributedSampler(dataset, shuffle=shuffle) if dist.is_initialized() else None
return DataLoader(dataset, shuffle=(shuffle and sampler is None), sampler=sampler, **kwargs)
class DataModule(ABC):
""" Abstract DataModule. Children must define self.ds_{train | val | test}. """
def __init__(self, **dataloader_kwargs):
super().__init__()
if get_local_rank() == 0:
self.prepare_data()
# Wait until rank zero has prepared the data (download, preprocessing, ...)
if dist.is_initialized():
dist.barrier(device_ids=[get_local_rank()])
self.dataloader_kwargs = {'pin_memory': True, 'persistent_workers': dataloader_kwargs.get('num_workers', 0) > 0,
**dataloader_kwargs}
self.ds_train, self.ds_val, self.ds_test = None, None, None
def prepare_data(self):
""" Method called only once per node. Put here any downloading or preprocessing """
pass
def train_dataloader(self) -> DataLoader:
return _get_dataloader(self.ds_train, shuffle=True, **self.dataloader_kwargs)
def val_dataloader(self) -> DataLoader:
return _get_dataloader(self.ds_val, shuffle=False, **self.dataloader_kwargs)
def test_dataloader(self) -> DataLoader:
return _get_dataloader(self.ds_test, shuffle=False, **self.dataloader_kwargs)