ampere_research/pytorch/run.py

261 lines
9.1 KiB
Python
Raw Normal View History

2024-12-04 22:47:16 -05:00
import data_stat
from data_stat import Stat, Cpu, Format, MatrixType
2024-12-02 23:32:33 -05:00
import argparse
import os, sys
import subprocess, signal
import json
2024-12-04 22:47:16 -05:00
import time
2024-12-02 23:32:33 -05:00
parser = argparse.ArgumentParser()
2024-12-04 22:47:16 -05:00
parser.add_argument('cpu', choices=[x.name.lower() for x in Cpu])
parser.add_argument('matrix_type', type=str,
choices=[t.name.lower() for t in MatrixType])
parser.add_argument('format', type=str,
choices=[fmt.name.lower() for fmt in Format])
2024-12-11 23:06:43 -05:00
parser.add_argument('base_iterations', type=int)
parser.add_argument('min_time_s', type=int)
2024-12-02 23:32:33 -05:00
parser.add_argument('baseline_time_s', type=int)
parser.add_argument('baseline_delay_s', type=int)
#parser.add_argument('--perf', action='store_true')
parser.add_argument('-m', '--matrix_file', type=str)
parser.add_argument('-ss', '--synthetic_size', type=int)
parser.add_argument('-sd', '--synthetic_density', type=float)
2024-12-14 17:22:42 -05:00
parser.add_argument('-c', '--cores', type=int)
2024-12-02 23:32:33 -05:00
parser.add_argument('--power', action='store_true')
parser.add_argument('-d', '--debug', action='store_true')
args = parser.parse_args()
2024-12-04 22:47:16 -05:00
args.cpu = Cpu[args.cpu.upper()]
args.matrix_type = MatrixType[args.matrix_type.upper()]
args.format = Format[args.format.upper()]
assert args.baseline_time_s >= 2
2024-12-04 22:47:16 -05:00
2024-12-09 10:57:15 -05:00
python = {
Cpu.ALTRA: 'python3',
Cpu.EPYC_7313P: 'python3.11',
Cpu.XEON_4216: 'python3.11'
2024-12-04 22:47:16 -05:00
}
2024-12-02 23:32:33 -05:00
perf = ['perf', 'stat']
2024-12-04 22:47:16 -05:00
perf_args = {
Cpu.ALTRA: [
['-d', '-d'],
['-M', 'branch_misprediction_ratio'],
['-M', 'dtlb_walk_ratio,itlb_walk_ratio'],
['-M', 'l1d_cache_miss_ratio,l1i_cache_miss_ratio'],
['-M', 'l2_cache_miss_ratio,l2_tlb_miss_ratio,ll_cache_read_miss_ratio']]
}
2024-12-02 23:32:33 -05:00
2024-12-14 17:22:42 -05:00
power = ['taskset', '-c', '0', './power.sh']
def program(
cpu: Cpu,
2024-12-14 17:22:42 -05:00
cores: int,
matrix_type: MatrixType,
fmt: Format,
iterations: int,
matrix_file: str,
synthetic_size: int,
synthetic_density: float
) -> list:
2024-12-14 17:22:42 -05:00
apptainer = ['apptainer', 'run']
if cores is not None:
apptainer += [ '--env', 'OMP_PROC_BIND=true',
'--env', 'OMP_PLACES={0:' + f'{cores}' + '}']
spmv = f'python3 spmv.py {matrix_type.name.lower()} {fmt.name.lower()} '
spmv += f'{iterations} '
if matrix_type == MatrixType.SUITESPARSE:
spmv += f'-m {matrix_file}'
elif matrix_type == MatrixType.SYNTHETIC:
spmv += f'-ss {synthetic_size} -sd {synthetic_density}'
else:
exit("Unrecognized matrix type!")
2024-12-14 18:19:33 -05:00
2024-12-14 17:22:42 -05:00
if cores is not None:
2024-12-14 18:19:33 -05:00
spmv += f' -c {cores}'
if cpu == Cpu.ALTRA:
2024-12-14 17:22:42 -05:00
return apptainer + ['pytorch-altra.sif', '-c',
'numactl --cpunodebind=0 --membind=0 ' + spmv]
elif cpu == Cpu.EPYC_7313P:
2024-12-14 17:22:42 -05:00
return apptainer + ['pytorch-epyc_7313p.sif'] + spmv.split(' ')
elif cpu == Cpu.XEON_4216:
2024-12-14 17:22:42 -05:00
return apptainer + ['pytorch-xeon_4216.sif',
'numactl', '--cpunodebind=0', '--membind=0'] + spmv.split(' ')
2024-12-09 10:57:15 -05:00
def baseline_power(cpu: Cpu, baseline_time_s: int) -> list:
2024-12-14 17:22:42 -05:00
power_process = subprocess.Popen(power + [str(baseline_time_s)],
2024-12-02 23:32:33 -05:00
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
2024-12-09 10:57:15 -05:00
return [float(x) for x in power_process.communicate()[0].strip().split('\n') if len(x) != 0]
2024-12-02 23:32:33 -05:00
def run_program(program: list[str]) -> tuple[dict, str]:
2024-12-09 10:57:15 -05:00
if args.debug:
print(program, file=sys.stderr)
2024-12-02 23:32:33 -05:00
process = subprocess.run(program,
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
process.check_returncode()
2024-12-04 22:47:16 -05:00
if args.debug:
print(process.stdout, file=sys.stderr)
print(process.stderr, file=sys.stderr)
2024-12-02 23:32:33 -05:00
return (json.loads(process.stdout), process.stderr)
2024-12-11 15:00:37 -05:00
def trapezoidal_rule(power: list[float], time_s: float) -> float:
result = 0.0
2024-12-14 20:34:01 -05:00
#relevant_power = power[-int(time_s):]
relevant_power = power
assert(time_s >= 2)
assert(len(relevant_power) >= 2)
2024-12-14 20:34:01 -05:00
#assert(len(power) >= time_s)
for pair in zip(relevant_power, relevant_power[1:]):
2024-12-11 15:00:37 -05:00
result += 0.5 * (pair[0] + pair[1])
result += (time_s % 1) * (power[-1])
return result
2024-12-02 23:32:33 -05:00
result = dict()
result[Stat.CPU.name] = args.cpu.value
2024-12-14 18:19:33 -05:00
if args.cores is not None:
result[Stat.CORES.name] = args.cores
else:
if args.cpu == Cpu.ALTRA:
result[Stat.CORES.name] = 80
elif args.cpu == Cpu.EPYC_7313P:
result[Stat.CORES.name] = 16
elif args.cpu == Cpu.XEON_4216:
result[Stat.CORES.name] = 16
2024-12-09 10:57:15 -05:00
2024-12-11 23:06:43 -05:00
iterations = args.base_iterations
program_result = run_program(program(
2024-12-14 17:22:42 -05:00
args.cpu, args.cores, args.matrix_type, args.format, iterations,
args.matrix_file, args.synthetic_size, args.synthetic_density))
2024-12-11 23:06:43 -05:00
while program_result[0][Stat.TIME_S.name] < args.min_time_s:
# Increase the number of iterations by difference between the current time taken and the desired time.
iterations *= 1 / (program_result[0][Stat.TIME_S.name] / args.min_time_s)
2024-12-11 23:06:43 -05:00
# Add another 5% for safety.
iterations += iterations * 0.05
iterations = int(iterations)
program_result = run_program(program(
2024-12-14 17:22:42 -05:00
args.cpu, args.cores, args.matrix_type, args.format, iterations,
args.matrix_file, args.synthetic_size, args.synthetic_density))
2024-12-11 23:06:43 -05:00
result[Stat.ITERATIONS.name] = iterations
2024-12-09 10:57:15 -05:00
result |= program_result[0]
print(program_result[1], file=sys.stderr)
2024-12-02 23:32:33 -05:00
result[Stat.TIME_S_1KI.name] = (
(result[Stat.TIME_S.name] / result[Stat.ITERATIONS.name]) * 1000
)
2024-12-11 14:43:23 -05:00
2024-12-04 22:47:16 -05:00
if args.power:
2024-12-09 10:57:15 -05:00
result[Stat.BASELINE_TIME_S.name] = args.baseline_time_s
result[Stat.BASELINE_DELAY_S.name] = args.baseline_delay_s
# Baseline
2024-12-04 22:47:16 -05:00
time.sleep(args.baseline_delay_s)
baseline_list = baseline_power(args.cpu, args.baseline_time_s)
2024-12-02 23:32:33 -05:00
if args.debug:
print(baseline_list, file=sys.stderr)
assert(len(baseline_list) == args.baseline_time_s)
2024-12-02 23:32:33 -05:00
# Power Collection
power_process = subprocess.run(
2024-12-14 17:22:42 -05:00
power + ['-1'] + program(
args.cpu, args.cores, args.matrix_type, args.format,
result[Stat.ITERATIONS.name],
args.matrix_file, args.synthetic_size, args.synthetic_density),
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
power_process.check_returncode()
2024-12-02 23:32:33 -05:00
power_list = [float(x)
for x in power_process.stdout.strip().split('\n')]
2024-12-11 23:06:43 -05:00
power_time_s = json.loads(power_process.stderr)[Stat.TIME_S.name]
if args.debug:
print(power_list, file=sys.stderr)
print(power_time_s, file=sys.stderr)
2024-12-09 10:57:15 -05:00
if args.cpu == Cpu.ALTRA:
2024-12-11 15:00:37 -05:00
# Trapezoidal Rule across the last (s) power recordings.
result[Stat.J.name] = trapezoidal_rule(
2024-12-11 23:06:43 -05:00
power_list, power_time_s)
elif args.cpu == Cpu.EPYC_7313P or args.cpu == Cpu.XEON_4216:
2024-12-11 23:06:43 -05:00
result[Stat.J.name] = power_list[0] * power_time_s
2024-12-11 23:06:43 -05:00
result[Stat.W.name] = result[Stat.J.name] / power_time_s
2024-12-02 23:32:33 -05:00
if args.debug:
print(result, file=sys.stderr)
2024-12-02 23:32:33 -05:00
#print(len(result['power']))
#print(sum(result['power']) / len(result['power']))
# Baseline
2024-12-02 23:32:33 -05:00
time.sleep(args.baseline_delay_s)
baseline_list += baseline_power(args.cpu, args.baseline_time_s)
2024-12-02 23:32:33 -05:00
if args.debug:
print(baseline_list, file=sys.stderr)
assert(len(baseline_list) / 2 == args.baseline_time_s)
baseline_joules = (
trapezoidal_rule(
baseline_list[:args.baseline_time_s],
args.baseline_time_s) +
trapezoidal_rule(
baseline_list[args.baseline_time_s:],
args.baseline_time_s)
)
baseline_wattage = baseline_joules / (args.baseline_time_s * 2)
if args.debug:
print(baseline_joules, file=sys.stderr)
print(baseline_wattage, file=sys.stderr)
result[Stat.J_1KI.name] = (
(result[Stat.J.name] / result[Stat.ITERATIONS.name]) * 1000
)
result[Stat.W_1KI.name] = (
(result[Stat.W.name] / result[Stat.ITERATIONS.name]) * 1000
)
result[Stat.W_D.name] = result[Stat.W.name] - baseline_wattage
2024-12-11 23:06:43 -05:00
result[Stat.J_D.name] = result[Stat.W_D.name] * power_time_s
result[Stat.W_D_1KI.name] = (
(result[Stat.W_D.name] / result[Stat.ITERATIONS.name]) * 1000
)
result[Stat.J_D_1KI.name] = (
(result[Stat.W_D_1KI.name] / result[Stat.ITERATIONS.name]) * 1000
)
2024-12-02 23:32:33 -05:00
if args.debug:
print(result, file=sys.stderr)
2024-12-02 23:32:33 -05:00
print(json.dumps(result))
#if args.perf:
# for perf_arg in perf_args[args.cpu]:
# output = run_program(perf + perf_arg + program[args.cpu])[1]
# print(output, file=sys.stderr)
# result = result | data_stat.parse_output(output, args.cpu)
# if args.debug:
# print(result)
#
# result = result | data_stat.derive_stats(result)
#
# if args.debug:
# print(result)
2024-12-02 23:32:33 -05:00
#arch = subprocess.run(['uname', '-m'], stdout=subprocess.PIPE, text=True).stdout.strip()
#baseline = subprocess.run(
# ['./power.sh', args.baseline_time_s],
# stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
#print(baseline)
#for line in baseline.stdout.split('\n'):
# print("line")
# print(line)
#os.path.basename(args.matrix_file)
#subprocess.run(
# ['apptainer', 'run', 'pytorch-altra.sif', '-c',
# f'"numactl --cpunodebind=0 --membind=0 python spmv.py {args.matrix_file} {args.iterations}"'
# ],
# stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)