2024-12-12 01:18:26 -05:00
|
|
|
from data_stat import Cpu, Format
|
2024-12-05 12:20:05 -05:00
|
|
|
|
2024-12-02 23:32:33 -05:00
|
|
|
import argparse
|
|
|
|
import glob
|
|
|
|
import os
|
|
|
|
import subprocess
|
2024-12-03 08:53:39 -05:00
|
|
|
import random
|
2024-12-02 23:32:33 -05:00
|
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
2024-12-05 12:20:05 -05:00
|
|
|
parser.add_argument('cpu', choices=[x.name.lower() for x in Cpu])
|
2024-12-02 23:32:33 -05:00
|
|
|
parser.add_argument('output_dir')
|
|
|
|
parser.add_argument('matrix_dir')
|
2024-12-12 01:18:26 -05:00
|
|
|
parser.add_argument('format', type=str,
|
|
|
|
choices=[fmt.name.lower() for fmt in Format])
|
|
|
|
parser.add_argument('base_iterations', type=int)
|
|
|
|
parser.add_argument('min_time_s', type=int)
|
2024-12-02 23:32:33 -05:00
|
|
|
parser.add_argument('baseline_time_s', type=int)
|
|
|
|
parser.add_argument('baseline_delay_s', type=int)
|
2024-12-12 15:24:15 -05:00
|
|
|
#parser.add_argument('--perf', action='store_const', const='--perf')
|
2024-12-02 23:32:33 -05:00
|
|
|
parser.add_argument('--power', action='store_const', const='--power')
|
2024-12-03 08:53:39 -05:00
|
|
|
parser.add_argument('--distribute', action='store_true')
|
2024-12-02 23:32:33 -05:00
|
|
|
args = parser.parse_args()
|
2024-12-05 12:20:05 -05:00
|
|
|
args.cpu = Cpu[args.cpu.upper()]
|
2024-12-12 01:18:26 -05:00
|
|
|
args.format = Format[args.format.upper()]
|
2024-12-02 23:32:33 -05:00
|
|
|
|
2024-12-05 12:20:05 -05:00
|
|
|
srun_args = {
|
|
|
|
Cpu.ALTRA: [
|
|
|
|
'--account', 'oasis',
|
|
|
|
'--partition', 'oasis',
|
|
|
|
'--qos', 'oasis-exempt',
|
|
|
|
#'--cpus-per-task 160',
|
|
|
|
'--cpus-per-task', '160',
|
|
|
|
#'--mem 28114',
|
|
|
|
'--mem', '16G',
|
2024-12-12 15:24:15 -05:00
|
|
|
'--ntasks-per-node', '1',
|
|
|
|
'--time', '1-00:00:00'
|
2024-12-05 12:20:05 -05:00
|
|
|
#'--exclusive',
|
|
|
|
#'--output', '/dev/null',
|
|
|
|
#'--error', '/dev/null'
|
2024-12-09 10:57:15 -05:00
|
|
|
],
|
|
|
|
Cpu.EPYC_7313P: [
|
|
|
|
'--account', 'nexus',
|
|
|
|
'--partition', 'tron',
|
|
|
|
'--qos', 'high',
|
|
|
|
'--cpus-per-task', '16',
|
|
|
|
'--ntasks-per-node', '1',
|
2024-12-12 15:24:15 -05:00
|
|
|
'--time', '1-00:00:00',
|
2024-12-09 10:57:15 -05:00
|
|
|
'--prefer', 'EPYC-7313P'
|
2024-12-12 01:18:26 -05:00
|
|
|
],
|
|
|
|
Cpu.XEON_4216: [
|
|
|
|
'--account', 'nexus',
|
|
|
|
'--partition', 'tron',
|
|
|
|
'--qos', 'tron-exempt',
|
|
|
|
'--cpus-per-task', '32',
|
|
|
|
'--ntasks-per-node', '1',
|
2024-12-12 15:24:15 -05:00
|
|
|
'--time', '1-00:00:00',
|
2024-12-12 01:18:26 -05:00
|
|
|
'--prefer', 'Xeon,4216'
|
2024-12-02 23:32:33 -05:00
|
|
|
]
|
2024-12-05 12:20:05 -05:00
|
|
|
}
|
2024-12-09 10:57:15 -05:00
|
|
|
python = {
|
|
|
|
Cpu.ALTRA: 'python3',
|
2024-12-12 01:18:26 -05:00
|
|
|
Cpu.EPYC_7313P: 'python3.11',
|
|
|
|
Cpu.XEON_4216: 'python3.11'
|
2024-12-09 10:57:15 -05:00
|
|
|
}
|
2024-12-02 23:32:33 -05:00
|
|
|
|
2024-12-12 15:24:15 -05:00
|
|
|
def run(run_args, matrix_file: str, srun_args_list: list = None) -> list:
|
2024-12-02 23:32:33 -05:00
|
|
|
run_args_list = [
|
2024-12-05 12:20:05 -05:00
|
|
|
args.cpu.name.lower(),
|
2024-12-02 23:32:33 -05:00
|
|
|
matrix_file,
|
2024-12-12 01:18:26 -05:00
|
|
|
args.format.name.lower(),
|
|
|
|
str(args.base_iterations),
|
|
|
|
str(args.min_time_s),
|
2024-12-02 23:32:33 -05:00
|
|
|
str(args.baseline_time_s),
|
|
|
|
str(args.baseline_delay_s)]
|
2024-12-12 15:24:15 -05:00
|
|
|
# if args.perf is not None:
|
|
|
|
# run_args_list += [args.perf]
|
2024-12-02 23:32:33 -05:00
|
|
|
if args.power is not None:
|
|
|
|
run_args_list += [args.power]
|
2024-12-12 15:24:15 -05:00
|
|
|
|
|
|
|
if srun_args_list is None:
|
|
|
|
command = [python[args.cpu], 'run.py'] + run_args_list
|
|
|
|
else:
|
|
|
|
command = ['srun'] + srun_args_list + [python[args.cpu], 'run.py'] + run_args_list
|
|
|
|
|
|
|
|
print(command)
|
|
|
|
return command
|
2024-12-03 08:53:39 -05:00
|
|
|
|
|
|
|
processes = list()
|
2024-12-02 23:32:33 -05:00
|
|
|
|
|
|
|
for i, matrix in enumerate(glob.glob(f'{args.matrix_dir.rstrip("/")}/*.mtx')):
|
2024-12-12 15:24:15 -05:00
|
|
|
#if args.distribute:
|
|
|
|
# if args.cpu == Cpu.ALTRA:
|
|
|
|
# i = i % 40
|
|
|
|
# srun_args_temp = srun_args[args.cpu] + ['--nodelist', f'oasis{i:02}']
|
|
|
|
# elif args.cpu == Cpu.EPYC_7313P:
|
|
|
|
# srun_args_temp = srun_args[args.cpu]
|
|
|
|
#else:
|
|
|
|
srun_args_temp = srun_args[args.cpu]
|
2024-12-05 12:20:05 -05:00
|
|
|
|
|
|
|
output_filename = '_'.join([
|
|
|
|
args.cpu.name.lower(),
|
2024-12-12 01:18:26 -05:00
|
|
|
str(args.min_time_s),
|
2024-12-05 12:20:05 -05:00
|
|
|
str(args.baseline_time_s),
|
|
|
|
str(args.baseline_delay_s),
|
2024-12-12 01:18:26 -05:00
|
|
|
os.path.splitext(os.path.basename(matrix))[0]])
|
2024-12-05 12:20:05 -05:00
|
|
|
|
|
|
|
json_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.json'
|
|
|
|
raw_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.output'
|
|
|
|
with open(json_filepath, 'w') as json_file, open(raw_filepath, 'w') as raw_file:
|
|
|
|
print(json_filepath)
|
|
|
|
print(raw_filepath)
|
2024-12-02 23:32:33 -05:00
|
|
|
|
2024-12-12 15:24:15 -05:00
|
|
|
if args.distribute:
|
|
|
|
processes.append(subprocess.Popen(
|
|
|
|
run(args, matrix, srun_args_temp),
|
|
|
|
stdout=json_file,
|
|
|
|
stderr=raw_file))
|
|
|
|
else:
|
|
|
|
subprocess.run(
|
|
|
|
run(args, matrix),
|
|
|
|
stdout=json_file,
|
|
|
|
stderr=raw_file)
|
2024-12-02 23:32:33 -05:00
|
|
|
|
2024-12-12 15:24:15 -05:00
|
|
|
if args.distribute:
|
|
|
|
# Wait on every 10 jobs to avoid socket timeout.
|
|
|
|
if i % 10 == 9:
|
|
|
|
print("Waiting on 10 jobs")
|
|
|
|
for process in processes:
|
|
|
|
process.wait()
|
2024-12-03 08:53:39 -05:00
|
|
|
|
2024-12-12 15:24:15 -05:00
|
|
|
processes = list()
|
2024-12-02 23:32:33 -05:00
|
|
|
|
2024-12-12 15:24:15 -05:00
|
|
|
if args.distribute:
|
|
|
|
for process in processes:
|
|
|
|
process.wait()
|