122 lines
3.9 KiB
Python
Executable File
122 lines
3.9 KiB
Python
Executable File
#! /bin/python3
|
|
|
|
import perf_stat
|
|
|
|
import argparse
|
|
import os, sys
|
|
import subprocess, signal
|
|
import json
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('arch')
|
|
parser.add_argument('matrix_file')
|
|
parser.add_argument('iterations', type=int)
|
|
parser.add_argument('baseline_time_s', type=int)
|
|
parser.add_argument('baseline_delay_s', type=int)
|
|
parser.add_argument('--perf', action='store_true')
|
|
parser.add_argument('--power', action='store_true')
|
|
parser.add_argument('-d', '--debug', action='store_true')
|
|
args = parser.parse_args()
|
|
|
|
program_altra = [
|
|
'apptainer', 'run', 'pytorch-altra.sif', '-c',
|
|
'numactl --cpunodebind=0 --membind=0 '
|
|
+ f'python spmv.py {args.matrix_file} {args.iterations}']
|
|
perf = ['perf', 'stat']
|
|
perf_altra = [['-d', '-d'],
|
|
['-M', 'branch_misprediction_ratio'],
|
|
['-M', 'dtlb_walk_ratio,itlb_walk_ratio'],
|
|
['-M', 'l1d_cache_miss_ratio,l1i_cache_miss_ratio'],
|
|
['-M', 'l2_cache_miss_ratio,l2_tlb_miss_ratio,ll_cache_read_miss_ratio']]
|
|
|
|
def baseline_power(baseline_time_s: int) -> list:
|
|
power_process = subprocess.Popen(['./power.py', '-s', str(baseline_time_s)],
|
|
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
|
|
return [float(x) for x in power_process.communicate()[0].strip().split('\n')]
|
|
|
|
def run_program(program: list[str]) -> tuple[dict, str]:
|
|
process = subprocess.run(program,
|
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
#print(json.loads(process.stdout))
|
|
#print(process.stderr)
|
|
return (json.loads(process.stdout), process.stderr)
|
|
|
|
result = dict()
|
|
result['architecture'] = args.arch
|
|
result['iterations'] = args.iterations
|
|
result['baseline_time_s'] = args.baseline_time_s
|
|
result['baseline_delay_s'] = args.baseline_delay_s
|
|
|
|
if args.power is True:
|
|
result['power_before'] = baseline_power(args.baseline_time_s)
|
|
if args.debug:
|
|
print(result)
|
|
|
|
run_program(program_altra) # Warmup
|
|
power_process = subprocess.Popen(['./power.py'],
|
|
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
|
|
|
|
if args.arch == 'altra':
|
|
result = result | run_program(program_altra)[0]
|
|
elif args.arch == 'x86':
|
|
print("Arch not implemented yet!")
|
|
exit(1)
|
|
else:
|
|
print("Unrecognized arch!")
|
|
exit(1)
|
|
|
|
power_process.send_signal(signal.SIGINT)
|
|
if args.debug:
|
|
print(result)
|
|
|
|
result['power'] = [float(x) for x in power_process.communicate()[0].strip().split('\n')]
|
|
if args.debug:
|
|
print(result)
|
|
#print(len(result['power']))
|
|
#print(sum(result['power']) / len(result['power']))
|
|
|
|
import time
|
|
time.sleep(args.baseline_delay_s)
|
|
result['power_after'] = baseline_power(args.baseline_time_s)
|
|
if args.debug:
|
|
print(result)
|
|
|
|
if args.perf is True:
|
|
if args.arch == 'altra':
|
|
for perf_args in perf_altra:
|
|
output = run_program(perf + perf_args + program_altra)[1]
|
|
print(output, file=sys.stderr)
|
|
result = result | perf_stat.parse_output(output, perf_stat.CPU.ALTRA)
|
|
if args.debug:
|
|
print(result)
|
|
elif args.arch == 'x86':
|
|
print("no implement")
|
|
exit(1)
|
|
else:
|
|
print("Unrecognized arch!")
|
|
exit(1)
|
|
|
|
result = result | perf_stat.derive_stats(result)
|
|
|
|
if args.debug:
|
|
print(result)
|
|
|
|
print(json.dumps(result))
|
|
|
|
#arch = subprocess.run(['uname', '-m'], stdout=subprocess.PIPE, text=True).stdout.strip()
|
|
#baseline = subprocess.run(
|
|
# ['./power.sh', args.baseline_time_s],
|
|
# stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
|
#print(baseline)
|
|
#for line in baseline.stdout.split('\n'):
|
|
# print("line")
|
|
# print(line)
|
|
|
|
#os.path.basename(args.matrix_file)
|
|
|
|
#subprocess.run(
|
|
# ['apptainer', 'run', 'pytorch-altra.sif', '-c',
|
|
# f'"numactl --cpunodebind=0 --membind=0 python spmv.py {args.matrix_file} {args.iterations}"'
|
|
# ],
|
|
# stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|