import data_stat from data_stat import Stat, Cpu, Format, MatrixType import argparse import os, sys import subprocess, signal import json import time parser = argparse.ArgumentParser() parser.add_argument('cpu', choices=[x.name.lower() for x in Cpu]) parser.add_argument('matrix_type', type=str, choices=[t.name.lower() for t in MatrixType]) parser.add_argument('format', type=str, choices=[fmt.name.lower() for fmt in Format]) parser.add_argument('base_iterations', type=int) parser.add_argument('min_time_s', type=int) parser.add_argument('baseline_time_s', type=int) parser.add_argument('baseline_delay_s', type=int) #parser.add_argument('--perf', action='store_true') parser.add_argument('-m', '--matrix_file', type=str) parser.add_argument('-ss', '--synthetic_size', type=int) parser.add_argument('-sd', '--synthetic_density', type=float) parser.add_argument('-c', '--cores', type=int) parser.add_argument('--power', action='store_true') parser.add_argument('-d', '--debug', action='store_true') args = parser.parse_args() args.cpu = Cpu[args.cpu.upper()] args.matrix_type = MatrixType[args.matrix_type.upper()] args.format = Format[args.format.upper()] assert args.baseline_time_s >= 2 python = { Cpu.ALTRA: 'python3', Cpu.EPYC_7313P: 'python3.11', Cpu.XEON_4216: 'python3.11' } perf = ['perf', 'stat'] perf_args = { Cpu.ALTRA: [ ['-d', '-d'], ['-M', 'branch_misprediction_ratio'], ['-M', 'dtlb_walk_ratio,itlb_walk_ratio'], ['-M', 'l1d_cache_miss_ratio,l1i_cache_miss_ratio'], ['-M', 'l2_cache_miss_ratio,l2_tlb_miss_ratio,ll_cache_read_miss_ratio']] } power = ['taskset', '-c', '0', './power.sh'] def program( cpu: Cpu, cores: int, matrix_type: MatrixType, fmt: Format, iterations: int, matrix_file: str, synthetic_size: int, synthetic_density: float ) -> list: apptainer = ['apptainer', 'run'] if cores is not None: apptainer += [ '--env', 'OMP_PROC_BIND=true', '--env', 'OMP_PLACES={0:' + f'{cores}' + '}'] spmv = f'python3 spmv.py {matrix_type.name.lower()} {fmt.name.lower()} ' spmv += f'{iterations} ' if matrix_type == MatrixType.SUITESPARSE: spmv += f'-m {matrix_file}' elif matrix_type == MatrixType.SYNTHETIC: spmv += f'-ss {synthetic_size} -sd {synthetic_density}' else: exit("Unrecognized matrix type!") if cores is not None: spmv += f' -c {cores}' if cpu == Cpu.ALTRA: return apptainer + ['pytorch-altra.sif', '-c', 'numactl --cpunodebind=0 --membind=0 ' + spmv] elif cpu == Cpu.EPYC_7313P: return apptainer + ['pytorch-epyc_7313p.sif'] + spmv.split(' ') elif cpu == Cpu.XEON_4216: return apptainer + ['pytorch-xeon_4216.sif', 'numactl', '--cpunodebind=0', '--membind=0'] + spmv.split(' ') def baseline_power(cpu: Cpu, baseline_time_s: int) -> list: power_process = subprocess.Popen(power + [str(baseline_time_s)], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) return [float(x) for x in power_process.communicate()[0].strip().split('\n') if len(x) != 0] def run_program(program: list[str]) -> tuple[dict, str]: if args.debug: print(program, file=sys.stderr) process = subprocess.run(program, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) process.check_returncode() if args.debug: print(process.stdout, file=sys.stderr) print(process.stderr, file=sys.stderr) return (json.loads(process.stdout), process.stderr) def trapezoidal_rule(power: list[float], time_s: float) -> float: result = 0.0 relevant_power = power[-int(time_s):] assert(time_s >= 2) assert(len(relevant_power) >= 2) assert(len(power) >= time_s) for pair in zip(relevant_power, relevant_power[1:]): result += 0.5 * (pair[0] + pair[1]) result += (time_s % 1) * (power[-1]) return result result = dict() result[Stat.CPU.name] = args.cpu.value if args.cores is not None: result[Stat.CORES.name] = args.cores else: if args.cpu == Cpu.ALTRA: result[Stat.CORES.name] = 80 elif args.cpu == Cpu.EPYC_7313P: result[Stat.CORES.name] = 16 elif args.cpu == Cpu.XEON_4216: result[Stat.CORES.name] = 16 iterations = args.base_iterations program_result = run_program(program( args.cpu, args.cores, args.matrix_type, args.format, iterations, args.matrix_file, args.synthetic_size, args.synthetic_density)) while program_result[0][Stat.TIME_S.name] < args.min_time_s: # Increase the number of iterations by difference between the current time taken and the desired time. iterations *= 1 / (program_result[0][Stat.TIME_S.name] / args.min_time_s) # Add another 5% for safety. iterations += iterations * 0.05 iterations = int(iterations) program_result = run_program(program( args.cpu, args.cores, args.matrix_type, args.format, iterations, args.matrix_file, args.synthetic_size, args.synthetic_density)) result[Stat.ITERATIONS.name] = iterations result |= program_result[0] print(program_result[1], file=sys.stderr) result[Stat.TIME_S_1KI.name] = ( (result[Stat.TIME_S.name] / result[Stat.ITERATIONS.name]) * 1000 ) if args.power: result[Stat.BASELINE_TIME_S.name] = args.baseline_time_s result[Stat.BASELINE_DELAY_S.name] = args.baseline_delay_s # Baseline time.sleep(args.baseline_delay_s) baseline_list = baseline_power(args.cpu, args.baseline_time_s) if args.debug: print(baseline_list, file=sys.stderr) assert(len(baseline_list) == args.baseline_time_s) # Power Collection power_process = subprocess.run( power + ['-1'] + program( args.cpu, args.cores, args.matrix_type, args.format, result[Stat.ITERATIONS.name], args.matrix_file, args.synthetic_size, args.synthetic_density), stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) power_process.check_returncode() power_list = [float(x) for x in power_process.stdout.strip().split('\n')] power_time_s = json.loads(power_process.stderr)[Stat.TIME_S.name] if args.debug: print(power_list, file=sys.stderr) print(power_time_s, file=sys.stderr) if args.cpu == Cpu.ALTRA: # Trapezoidal Rule across the last (s) power recordings. result[Stat.J.name] = trapezoidal_rule( power_list, power_time_s) elif args.cpu == Cpu.EPYC_7313P or args.cpu == Cpu.XEON_4216: result[Stat.J.name] = power_list[0] * power_time_s result[Stat.W.name] = result[Stat.J.name] / power_time_s if args.debug: print(result, file=sys.stderr) #print(len(result['power'])) #print(sum(result['power']) / len(result['power'])) # Baseline time.sleep(args.baseline_delay_s) baseline_list += baseline_power(args.cpu, args.baseline_time_s) if args.debug: print(baseline_list, file=sys.stderr) assert(len(baseline_list) / 2 == args.baseline_time_s) baseline_joules = ( trapezoidal_rule( baseline_list[:args.baseline_time_s], args.baseline_time_s) + trapezoidal_rule( baseline_list[args.baseline_time_s:], args.baseline_time_s) ) baseline_wattage = baseline_joules / (args.baseline_time_s * 2) if args.debug: print(baseline_joules, file=sys.stderr) print(baseline_wattage, file=sys.stderr) result[Stat.J_1KI.name] = ( (result[Stat.J.name] / result[Stat.ITERATIONS.name]) * 1000 ) result[Stat.W_1KI.name] = ( (result[Stat.W.name] / result[Stat.ITERATIONS.name]) * 1000 ) result[Stat.W_D.name] = result[Stat.W.name] - baseline_wattage result[Stat.J_D.name] = result[Stat.W_D.name] * power_time_s result[Stat.W_D_1KI.name] = ( (result[Stat.W_D.name] / result[Stat.ITERATIONS.name]) * 1000 ) result[Stat.J_D_1KI.name] = ( (result[Stat.W_D_1KI.name] / result[Stat.ITERATIONS.name]) * 1000 ) if args.debug: print(result, file=sys.stderr) print(json.dumps(result)) #if args.perf: # for perf_arg in perf_args[args.cpu]: # output = run_program(perf + perf_arg + program[args.cpu])[1] # print(output, file=sys.stderr) # result = result | data_stat.parse_output(output, args.cpu) # if args.debug: # print(result) # # result = result | data_stat.derive_stats(result) # # if args.debug: # print(result) #arch = subprocess.run(['uname', '-m'], stdout=subprocess.PIPE, text=True).stdout.strip() #baseline = subprocess.run( # ['./power.sh', args.baseline_time_s], # stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) #print(baseline) #for line in baseline.stdout.split('\n'): # print("line") # print(line) #os.path.basename(args.matrix_file) #subprocess.run( # ['apptainer', 'run', 'pytorch-altra.sif', '-c', # f'"numactl --cpunodebind=0 --membind=0 python spmv.py {args.matrix_file} {args.iterations}"' # ], # stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)