# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import collections import math import os import pathlib import re import pynvml pynvml.nvmlInit() def systemGetDriverVersion(): return pynvml.nvmlSystemGetDriverVersion() def deviceGetCount(): return pynvml.nvmlDeviceGetCount() class device: # assume nvml returns list of 64 bit ints _nvml_affinity_elements = math.ceil(os.cpu_count() / 64) def __init__(self, device_idx): super().__init__() self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_idx) def getName(self): return pynvml.nvmlDeviceGetName(self.handle) def getCpuAffinity(self): affinity_string = "" for j in pynvml.nvmlDeviceGetCpuAffinity( self.handle, device._nvml_affinity_elements ): # assume nvml returns list of 64 bit ints affinity_string = "{:064b}".format(j) + affinity_string affinity_list = [int(x) for x in affinity_string] affinity_list.reverse() # so core 0 is in 0th element of list ret = [i for i, e in enumerate(affinity_list) if e != 0] return ret def set_socket_affinity(gpu_id): dev = device(gpu_id) affinity = dev.getCpuAffinity() os.sched_setaffinity(0, affinity) def set_single_affinity(gpu_id): dev = device(gpu_id) affinity = dev.getCpuAffinity() os.sched_setaffinity(0, affinity[:1]) def set_single_unique_affinity(gpu_id, nproc_per_node): devices = [device(i) for i in range(nproc_per_node)] socket_affinities = [dev.getCpuAffinity() for dev in devices] siblings_list = get_thread_siblings_list() siblings_dict = dict(siblings_list) # remove siblings for idx, socket_affinity in enumerate(socket_affinities): socket_affinities[idx] = list( set(socket_affinity) - set(siblings_dict.values()) ) affinities = [] assigned = [] for socket_affinity in socket_affinities: for core in socket_affinity: if core not in assigned: affinities.append([core]) assigned.append(core) break os.sched_setaffinity(0, affinities[gpu_id]) def set_socket_unique_affinity(gpu_id, nproc_per_node, mode): device_ids = [device(i) for i in range(nproc_per_node)] socket_affinities = [dev.getCpuAffinity() for dev in device_ids] siblings_list = get_thread_siblings_list() siblings_dict = dict(siblings_list) # remove siblings for idx, socket_affinity in enumerate(socket_affinities): socket_affinities[idx] = list( set(socket_affinity) - set(siblings_dict.values()) ) socket_affinities_to_device_ids = collections.defaultdict(list) for idx, socket_affinity in enumerate(socket_affinities): socket_affinities_to_device_ids[tuple(socket_affinity)].append(idx) for socket_affinity, device_ids in socket_affinities_to_device_ids.items(): devices_per_group = len(device_ids) cores_per_device = len(socket_affinity) // devices_per_group for group_id, device_id in enumerate(device_ids): if device_id == gpu_id: if mode == "interleaved": affinity = list(socket_affinity[group_id::devices_per_group]) elif mode == "continuous": affinity = list( socket_affinity[group_id * cores_per_device:(group_id + 1) * cores_per_device] ) else: raise RuntimeError("Unknown set_socket_unique_affinity mode") # reintroduce siblings affinity += [ siblings_dict[aff] for aff in affinity if aff in siblings_dict ] os.sched_setaffinity(0, affinity) def get_thread_siblings_list(): path = "/sys/devices/system/cpu/cpu*/topology/thread_siblings_list" thread_siblings_list = [] pattern = re.compile(r"(\d+)\D(\d+)") for fname in pathlib.Path(path[0]).glob(path[1:]): with open(fname) as f: content = f.read().strip() res = pattern.findall(content) if res: pair = tuple(map(int, res[0])) thread_siblings_list.append(pair) return thread_siblings_list def set_affinity(gpu_id, nproc_per_node, mode="socket"): if mode == "socket": set_socket_affinity(gpu_id) elif mode == "single": set_single_affinity(gpu_id) elif mode == "single_unique": set_single_unique_affinity(gpu_id, nproc_per_node) elif mode == "socket_unique_interleaved": set_socket_unique_affinity(gpu_id, nproc_per_node, "interleaved") elif mode == "socket_unique_continuous": set_socket_unique_affinity(gpu_id, nproc_per_node, "continuous") else: raise RuntimeError("Unknown affinity mode") affinity = os.sched_getaffinity(0) return affinity