From cf86de6ea48cb5b81678047a3327b1fce0aa9c49 Mon Sep 17 00:00:00 2001 From: cephi Date: Wed, 11 Dec 2024 22:47:11 -0500 Subject: [PATCH] Improved power collection and added Xeon 4216 support --- analysis/data_stat.py | 25 ++-- pytorch/power.sh | 28 ++-- pytorch/pytorch-altra.Containerfile | 2 +- pytorch/pytorch-epyc_7313p.Containerfile | 2 +- pytorch/pytorch-xeon_4216.Containerfile | 7 + pytorch/run.py | 166 +++++++++++++++-------- pytorch/spmv.py | 4 +- pytorch/tmp.txt | 1 + 8 files changed, 158 insertions(+), 77 deletions(-) create mode 100644 pytorch/pytorch-xeon_4216.Containerfile create mode 100644 pytorch/tmp.txt diff --git a/analysis/data_stat.py b/analysis/data_stat.py index 83b5d50..a543b44 100644 --- a/analysis/data_stat.py +++ b/analysis/data_stat.py @@ -3,7 +3,7 @@ import re from enum import Enum class Stat(Enum): - PLATFORM = 'platform' + CPU = 'cpu' THREADS = 'threads' ITERATIONS = 'iterations' @@ -18,16 +18,20 @@ class Stat(Enum): MATRIX_NNZ = 'matrix nnz' MATRIX_DENSITY = 'matrix density %' - POWER_BEFORE = 'power before' - POWER = 'power' - POWER_AFTER = 'power after' - TIME_S_OVERALL = 'time (sec) overall' + #POWER_BEFORE = 'power before' + #POWER = 'power' + #POWER_AFTER = 'power after' TIME_S = 'time (sec)' - JOULES_OVERALL = 'joules overall' - JOULES = 'joules' - WATTS_OVERALL = 'wattage overall' - WATTS = 'wattage' - DELTA_WATT = 'Δ watt' + TIME_S_1KI = 'time (sec) per 1k iterations' + J = 'joules' + J_1KI = 'joules per 1k iterations' + J_D = 'Δ joules' + J_D_1KI = 'Δ joules per 1k iterations' + W = 'watts' + W_1KI = 'watts per 1k iterations' + W_D = 'Δ watts' + W_D_1KI = 'Δ watts per 1k iterations' + #DELTA_WATT = 'Δ watt' TASK_CLK = 'task clock (msec)' PAGE_FAULTS = 'page faults' @@ -66,6 +70,7 @@ class Cpu(Enum): #XEON = xeon_names ALTRA = 'Altra' EPYC_7313P = 'Epyc 7313P' + XEON_4216 = 'Xeon 4216' names = { Cpu.ALTRA: { diff --git a/pytorch/power.sh b/pytorch/power.sh index ac0e179..8749ef6 100755 --- a/pytorch/power.sh +++ b/pytorch/power.sh @@ -7,21 +7,33 @@ if [[ $arch = aarch64 ]]; then iter=0 function aarch64_power { sensors | awk '/CPU power:/ {print $3; exit}' - ((iter++)) - sleep 1s } - baseline_time_s=-1 - while [[ "$iter" -ne "$baseline_time_s" ]]; do - aarch64_power - done + if [[ "$baseline_time_s" -eq -1 ]]; then + #"${@:2}" 1>/dev/null 2>&1 & + "${@:2}" 1>&2 2>/dev/null & + pid=$! + + while kill -0 $pid 2> /dev/null; do + aarch64_power + sleep 1s + done + + wait + else + while [[ "$iter" -ne "$baseline_time_s" ]]; do + aarch64_power + ((iter++)) + sleep 1s + done + fi elif [[ $arch = x86_64 ]]; then if [[ "$baseline_time_s" -eq -1 ]]; then #turbostat -s PkgWatt -i 1 2>/dev/null | awk -F: '/PkgWatt/ {getline; print $0}' #turbostat -s PkgWatt -i 1 | awk '/PkgWatt/ {getline; print $0}' - turbostat -s PkgWatt ${@:2} | sed -n "/PkgWatt/{n;n;p}" + turbostat -s PkgWatt ${@:2} 3>&2 2>&1 1>&3 | sed -n "/PkgWatt/{n;n;p}" else #turbostat -s PkgWatt -n "$baseline_time_s" -i 1 2>/dev/null | awk -F: '/PkgWatt/ {getline; print $0}' - turbostat -s PkgWatt -n "$baseline_time_s" -i 1 | sed -n "/PkgWatt/{n;n;p}" + turbostat -s PkgWatt -n "$baseline_time_s" -i 1 2>/dev/null | sed -n "/PkgWatt/{n;n;p}" fi else echo "Unrecognized arch!" diff --git a/pytorch/pytorch-altra.Containerfile b/pytorch/pytorch-altra.Containerfile index f4b1bba..41d6c67 100644 --- a/pytorch/pytorch-altra.Containerfile +++ b/pytorch/pytorch-altra.Containerfile @@ -3,7 +3,7 @@ FROM docker.io/amperecomputingai/pytorch:latest RUN apt-get update -y \ - && apt-get install -y git vim wget \ + && apt-get install -y git vim wget htop \ && rm -rf /var/lib/apt/lists/* RUN pip install scipy diff --git a/pytorch/pytorch-epyc_7313p.Containerfile b/pytorch/pytorch-epyc_7313p.Containerfile index 9aa4f7b..60682e2 100644 --- a/pytorch/pytorch-epyc_7313p.Containerfile +++ b/pytorch/pytorch-epyc_7313p.Containerfile @@ -1,7 +1,7 @@ FROM --platform=linux/amd64 ubuntu:22.04 RUN apt-get update -y \ - && apt-get install -y python3 python3-pip git vim wget \ + && apt-get install -y python3 python3-pip git vim wget htop \ && rm -rf /var/lib/apt/lists/* RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu RUN pip install scipy diff --git a/pytorch/pytorch-xeon_4216.Containerfile b/pytorch/pytorch-xeon_4216.Containerfile new file mode 100644 index 0000000..d0a3ee4 --- /dev/null +++ b/pytorch/pytorch-xeon_4216.Containerfile @@ -0,0 +1,7 @@ +FROM --platform=linux/amd64 ubuntu:22.04 + +RUN apt-get update -y \ + && apt-get install -y python3 python3-pip git vim wget htop numactl \ + && rm -rf /var/lib/apt/lists/* +RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu +RUN pip install scipy diff --git a/pytorch/run.py b/pytorch/run.py index 0b66f5c..1c5f04b 100644 --- a/pytorch/run.py +++ b/pytorch/run.py @@ -13,7 +13,7 @@ parser.add_argument('matrix_file') parser.add_argument('iterations', type=int) parser.add_argument('baseline_time_s', type=int) parser.add_argument('baseline_delay_s', type=int) -parser.add_argument('--perf', action='store_true') +#parser.add_argument('--perf', action='store_true') parser.add_argument('--power', action='store_true') parser.add_argument('-d', '--debug', action='store_true') args = parser.parse_args() @@ -21,17 +21,22 @@ args.cpu = Cpu[args.cpu.upper()] python = { Cpu.ALTRA: 'python3', - Cpu.EPYC_7313P: 'python3.11' -} -program = { - Cpu.ALTRA: [ - 'apptainer', 'run', 'pytorch-altra.sif', '-c', - 'numactl --cpunodebind=0 --membind=0 ' - + f'python spmv.py {args.matrix_file} {args.iterations}'], - Cpu.EPYC_7313P: [ - 'apptainer', 'run', 'pytorch-epyc_7313p.sif', - 'python3', 'spmv.py', f'{args.matrix_file}', f'{args.iterations}'] + Cpu.EPYC_7313P: 'python3.11', + Cpu.XEON_4216: 'python3.11' } +#program = { +# Cpu.ALTRA: [ +# 'apptainer', 'run', 'pytorch-altra.sif', '-c', +# 'numactl --cpunodebind=0 --membind=0 ' +# + f'python3 spmv.py {args.matrix_file} {args.iterations}'], +# Cpu.EPYC_7313P: [ +# 'apptainer', 'run', 'pytorch-epyc_7313p.sif', +# 'python3', 'spmv.py', f'{args.matrix_file}', f'{args.iterations}'], +# Cpu.XEON_4216: [ +# 'apptainer', 'run', 'pytorch-altra.sif', '-c', +# 'numactl --cpunodebind=0 --membind=0 ' +# + f'python3 spmv.py {args.matrix_file} {args.iterations}'] +#} perf = ['perf', 'stat'] perf_args = { Cpu.ALTRA: [ @@ -42,8 +47,24 @@ perf_args = { ['-M', 'l2_cache_miss_ratio,l2_tlb_miss_ratio,ll_cache_read_miss_ratio']] } +def program(cpu: Cpu, matrix_file: str, iterations: int) -> list: + if cpu == Cpu.ALTRA: + return [ + 'apptainer', 'run', 'pytorch-altra.sif', '-c', + 'numactl --cpunodebind=0 --membind=0 ' + + f'python3 spmv.py {matrix_file} {iterations}'] + elif cpu == Cpu.EPYC_7313P: + return [ + 'apptainer', 'run', 'pytorch-epyc_7313p.sif', + 'python3', 'spmv.py', f'{matrix_file}', f'{iterations}'] + elif cpu == Cpu.XEON_4216: + return [ + 'apptainer', 'run', 'pytorch-xeon_4216.sif', + 'numactl', '--cpunodebind=0', '--membind=0', + 'python3', 'spmv.py', f'{matrix_file}', f'{iterations}'] + def baseline_power(cpu: Cpu, baseline_time_s: int) -> list: - power_process = subprocess.Popen([python[args.cpu], 'power.py', '-s', str(baseline_time_s)], + power_process = subprocess.Popen(['./power.sh', str(baseline_time_s)], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) return [float(x) for x in power_process.communicate()[0].strip().split('\n') if len(x) != 0] @@ -52,18 +73,22 @@ def run_program(program: list[str]) -> tuple[dict, str]: print(program) process = subprocess.run(program, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + process.check_returncode() + if args.debug: print(process.stdout) print(process.stderr) return (json.loads(process.stdout), process.stderr) def trapezoidal_rule(power: list[float], time_s: float) -> float: - from math import ceil - from itertools import pairwise result = 0.0 relevant_power = power[-int(time_s):] - relevant_pairs = [pair for pair in zip(relevant_power, relevant_power[1:])] - for pair in relevant_pairs: + + assert(time_s >= 2) + assert(len(relevant_power) >= 2) + assert(len(power) >= time_s) + + for pair in zip(relevant_power, relevant_power[1:]): result += 0.5 * (pair[0] + pair[1]) result += (time_s % 1) * (power[-1]) return result @@ -72,74 +97,105 @@ result = dict() result[Stat.CPU.name] = args.cpu.name result[Stat.ITERATIONS.name] = args.iterations -program_result = run_program(program[args.cpu]) +program_result = run_program(program(args.cpu, args.matrix_file, args.iterations)) result |= program_result[0] print(program_result[1], file=sys.stderr) -result[Stat.TIME_S.name] = result[Stat.TIME_S_OVERALL.name] / result[Stat.ITERATIONS.name] +result[Stat.TIME_S_1KI.name] = ( + (result[Stat.TIME_S.name] / result[Stat.ITERATIONS.name]) * 1000 +) if args.power: result[Stat.BASELINE_TIME_S.name] = args.baseline_time_s result[Stat.BASELINE_DELAY_S.name] = args.baseline_delay_s + # Baseline time.sleep(args.baseline_delay_s) - result[Stat.POWER_BEFORE.name] = baseline_power(args.cpu, args.baseline_time_s) + #result[Stat.POWER_BEFORE.name] = baseline_power(args.cpu, args.baseline_time_s) + baseline_list = baseline_power(args.cpu, args.baseline_time_s) if args.debug: - print(result) + print(baseline_list) + assert(len(baseline_list) == args.baseline_time_s) - run_program(program[args.cpu]) # Warmup + # Power Collection + power_process = subprocess.run( + ['./power.sh', '-1'] + program(args.cpu, args.matrix_file, args.iterations), + stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + power_process.check_returncode() + #result[Stat.POWER.name] = [float(x) for x in power_process.communicate()[0].strip().split('\n')] + power_list = [float(x) + #for x in power_process.communicate()[0].strip().split('\n')] + for x in power_process.stdout.strip().split('\n')] + power_process_time_s = json.loads(power_process.stderr)[Stat.TIME_S.name] + if args.debug: + print(power_list) + print(power_process_time_s) + if args.cpu == Cpu.ALTRA: - power_process = subprocess.Popen( - [python[args.cpu], 'power.py'], - stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) - - run_program(program[args.cpu])[0] - - power_process.send_signal(signal.SIGINT) - if args.debug: - print(result) - - result[Stat.POWER.name] = [float(x) for x in power_process.communicate()[0].strip().split('\n')] # Trapezoidal Rule across the last (s) power recordings. - #result[Stat.JOULES.name] = ( - # sum(result[Stat.POWER.name][-ceil(result[Stat.TIME_S.name]):]) - # + (result[Stat.POWER.name][-1] * (result[Stat.TIME_S.name] % 1))) - result[Stat.JOULES.name] = trapezoidal_rule( - result[Stat.POWER.name], - result[Stat.TIME_S.name]) + result[Stat.J.name] = trapezoidal_rule( + power_list, power_process_time_s) + elif args.cpu == Cpu.EPYC_7313P or args.cpu == Cpu.XEON_4216: + result[Stat.J.name] = power_list[0] * power_process_time_s - elif args.cpu == Cpu.EPYC_7313P: - power_process = subprocess.Popen( - [python[args.cpu], 'power.py'] + program[args.cpu], - stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) - result[Stat.POWER.name] = [float(x) for x in power_process.communicate()[0].strip().split('\n')] - result[Stat.JOULES.name] = result[Stat.POWER.name][0] * result[Stat.TIME_S_OVERALL.name] + result[Stat.W.name] = result[Stat.J.name] / power_process_time_s if args.debug: print(result) #print(len(result['power'])) #print(sum(result['power']) / len(result['power'])) + # Baseline time.sleep(args.baseline_delay_s) - result[Stat.POWER_AFTER.name] = baseline_power(args.cpu, args.baseline_time_s) + #result[Stat.POWER_AFTER.name] = baseline_power(args.cpu, args.baseline_time_s) + baseline_list += baseline_power(args.cpu, args.baseline_time_s) if args.debug: - print(result) + print(baseline_list) + assert(len(baseline_list) / 2 == args.baseline_time_s) -if args.perf: - for perf_arg in perf_args[args.cpu]: - output = run_program(perf + perf_arg + program[args.cpu])[1] - print(output, file=sys.stderr) - result = result | data_stat.parse_output(output, args.cpu) - if args.debug: - print(result) - - result = result | data_stat.derive_stats(result) + baseline_joules = ( + trapezoidal_rule( + baseline_list[:args.baseline_time_s], + args.baseline_time_s) + + trapezoidal_rule( + baseline_list[args.baseline_time_s:], + args.baseline_time_s) + ) + baseline_wattage = baseline_joules / (args.baseline_time_s * 2) + + result[Stat.J_1KI.name] = ( + (result[Stat.J.name] / result[Stat.ITERATIONS.name]) * 1000 + ) + result[Stat.W_1KI.name] = ( + (result[Stat.W.name] / result[Stat.ITERATIONS.name]) * 1000 + ) + result[Stat.W_D.name] = result[Stat.W.name] - baseline_wattage + result[Stat.J_D.name] = result[Stat.W_D.name] * power_process_time_s + result[Stat.W_D_1KI.name] = ( + (result[Stat.W_D.name] / result[Stat.ITERATIONS.name]) * 1000 + ) + result[Stat.J_D_1KI.name] = ( + (result[Stat.W_D_1KI.name] / result[Stat.ITERATIONS.name]) * 1000 + ) if args.debug: print(result) print(json.dumps(result)) +#if args.perf: +# for perf_arg in perf_args[args.cpu]: +# output = run_program(perf + perf_arg + program[args.cpu])[1] +# print(output, file=sys.stderr) +# result = result | data_stat.parse_output(output, args.cpu) +# if args.debug: +# print(result) +# +# result = result | data_stat.derive_stats(result) +# +# if args.debug: +# print(result) + #arch = subprocess.run(['uname', '-m'], stdout=subprocess.PIPE, text=True).stdout.strip() #baseline = subprocess.run( diff --git a/pytorch/spmv.py b/pytorch/spmv.py index 3f92543..d063b35 100644 --- a/pytorch/spmv.py +++ b/pytorch/spmv.py @@ -50,7 +50,7 @@ print(f"NNZ: {result[Stat.MATRIX_NNZ.name]}", file=sys.stderr) result[Stat.MATRIX_DENSITY.name] = matrix.values().shape[0] / (matrix.shape[0] * matrix.shape[1]) print(f"Density: {result[Stat.MATRIX_DENSITY.name]}", file=sys.stderr) -result[Stat.TIME_S_OVERALL.name] = end - start -print(f"Time: {result[Stat.TIME_S_OVERALL.name]} seconds", file=sys.stderr) +result[Stat.TIME_S.name] = end - start +print(f"Time: {result[Stat.TIME_S.name]} seconds", file=sys.stderr) print(json.dumps(result)) diff --git a/pytorch/tmp.txt b/pytorch/tmp.txt new file mode 100644 index 0000000..cb4117e --- /dev/null +++ b/pytorch/tmp.txt @@ -0,0 +1 @@ +{"MATRIX_FILE": "as-caida", "MATRIX_SHAPE": [31379, 31379], "MATRIX_SIZE": 984641641, "MATRIX_NNZ": 106762, "MATRIX_DENSITY": 0.00010842726485909405, "TIME_S_T": 2.6111819744110107}