158 lines
5.3 KiB
Python
158 lines
5.3 KiB
Python
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import collections
|
|
import math
|
|
import os
|
|
import pathlib
|
|
import re
|
|
|
|
import pynvml
|
|
|
|
pynvml.nvmlInit()
|
|
|
|
|
|
def systemGetDriverVersion():
|
|
return pynvml.nvmlSystemGetDriverVersion()
|
|
|
|
|
|
def deviceGetCount():
|
|
return pynvml.nvmlDeviceGetCount()
|
|
|
|
|
|
class device:
|
|
# assume nvml returns list of 64 bit ints
|
|
_nvml_affinity_elements = math.ceil(os.cpu_count() / 64)
|
|
|
|
def __init__(self, device_idx):
|
|
super().__init__()
|
|
self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_idx)
|
|
|
|
def getName(self):
|
|
return pynvml.nvmlDeviceGetName(self.handle)
|
|
|
|
def getCpuAffinity(self):
|
|
affinity_string = ''
|
|
for j in pynvml.nvmlDeviceGetCpuAffinity(
|
|
self.handle, device._nvml_affinity_elements
|
|
):
|
|
# assume nvml returns list of 64 bit ints
|
|
affinity_string = '{:064b}'.format(j) + affinity_string
|
|
affinity_list = [int(x) for x in affinity_string]
|
|
affinity_list.reverse() # so core 0 is in 0th element of list
|
|
|
|
ret = [i for i, e in enumerate(affinity_list) if e != 0]
|
|
return ret
|
|
|
|
|
|
def set_socket_affinity(gpu_id):
|
|
dev = device(gpu_id)
|
|
affinity = dev.getCpuAffinity()
|
|
os.sched_setaffinity(0, affinity)
|
|
|
|
|
|
def set_single_affinity(gpu_id):
|
|
dev = device(gpu_id)
|
|
affinity = dev.getCpuAffinity()
|
|
os.sched_setaffinity(0, affinity[:1])
|
|
|
|
|
|
def set_single_unique_affinity(gpu_id, nproc_per_node):
|
|
devices = [device(i) for i in range(nproc_per_node)]
|
|
socket_affinities = [dev.getCpuAffinity() for dev in devices]
|
|
|
|
siblings_list = get_thread_siblings_list()
|
|
siblings_dict = dict(siblings_list)
|
|
|
|
# remove siblings
|
|
for idx, socket_affinity in enumerate(socket_affinities):
|
|
socket_affinities[idx] = list(set(socket_affinity) - set(siblings_dict.values()))
|
|
|
|
affinities = []
|
|
assigned = []
|
|
|
|
for socket_affinity in socket_affinities:
|
|
for core in socket_affinity:
|
|
if core not in assigned:
|
|
affinities.append([core])
|
|
assigned.append(core)
|
|
break
|
|
os.sched_setaffinity(0, affinities[gpu_id])
|
|
|
|
|
|
def set_socket_unique_affinity(gpu_id, nproc_per_node, mode):
|
|
device_ids = [device(i) for i in range(nproc_per_node)]
|
|
socket_affinities = [dev.getCpuAffinity() for dev in device_ids]
|
|
|
|
siblings_list = get_thread_siblings_list()
|
|
siblings_dict = dict(siblings_list)
|
|
|
|
# remove siblings
|
|
for idx, socket_affinity in enumerate(socket_affinities):
|
|
socket_affinities[idx] = list(set(socket_affinity) - set(siblings_dict.values()))
|
|
|
|
socket_affinities_to_device_ids = collections.defaultdict(list)
|
|
|
|
for idx, socket_affinity in enumerate(socket_affinities):
|
|
socket_affinities_to_device_ids[tuple(socket_affinity)].append(idx)
|
|
|
|
for socket_affinity, device_ids in socket_affinities_to_device_ids.items():
|
|
devices_per_group = len(device_ids)
|
|
cores_per_device = len(socket_affinity) // devices_per_group
|
|
for group_id, device_id in enumerate(device_ids):
|
|
if device_id == gpu_id:
|
|
if mode == 'interleaved':
|
|
affinity = list(socket_affinity[group_id::devices_per_group])
|
|
elif mode == 'continuous':
|
|
affinity = list(socket_affinity[group_id*cores_per_device:(group_id+1)*cores_per_device])
|
|
else:
|
|
raise RuntimeError('Unknown set_socket_unique_affinity mode')
|
|
|
|
# reintroduce siblings
|
|
affinity += [siblings_dict[aff] for aff in affinity if aff in siblings_dict]
|
|
os.sched_setaffinity(0, affinity)
|
|
|
|
|
|
def get_thread_siblings_list():
|
|
path = '/sys/devices/system/cpu/cpu*/topology/thread_siblings_list'
|
|
thread_siblings_list = []
|
|
pattern = re.compile(r'(\d+)\D(\d+)')
|
|
for fname in pathlib.Path(path[0]).glob(path[1:]):
|
|
with open(fname) as f:
|
|
content = f.read().strip()
|
|
res = pattern.findall(content)
|
|
if res:
|
|
pair = tuple(map(int, res[0]))
|
|
thread_siblings_list.append(pair)
|
|
return thread_siblings_list
|
|
|
|
|
|
def set_affinity(gpu_id, nproc_per_node, mode='socket'):
|
|
if mode == 'socket':
|
|
set_socket_affinity(gpu_id)
|
|
elif mode == 'single':
|
|
set_single_affinity(gpu_id)
|
|
elif mode == 'single_unique':
|
|
set_single_unique_affinity(gpu_id, nproc_per_node)
|
|
elif mode == 'socket_unique_interleaved':
|
|
set_socket_unique_affinity(gpu_id, nproc_per_node, 'interleaved')
|
|
elif mode == 'socket_unique_continuous':
|
|
set_socket_unique_affinity(gpu_id, nproc_per_node, 'continuous')
|
|
else:
|
|
raise RuntimeError('Unknown affinity mode')
|
|
|
|
affinity = os.sched_getaffinity(0)
|
|
return affinity
|
|
|