From 10934046f7f3aa1d664c51eec9037ef78a3d2c48 Mon Sep 17 00:00:00 2001 From: cephi Date: Wed, 4 Dec 2024 22:47:16 -0500 Subject: [PATCH] improve data_stat --- analysis/{perf_stat.py => data_stat.py} | 115 +++++++++++++----------- pytorch/data_stat.py | 1 + pytorch/perf_stat.py | 1 - pytorch/run.py | 82 ++++++++--------- pytorch/spmv.py | 25 +++--- 5 files changed, 118 insertions(+), 106 deletions(-) rename analysis/{perf_stat.py => data_stat.py} (60%) create mode 120000 pytorch/data_stat.py delete mode 120000 pytorch/perf_stat.py diff --git a/analysis/perf_stat.py b/analysis/data_stat.py similarity index 60% rename from analysis/perf_stat.py rename to analysis/data_stat.py index b4394c4..0ee5c19 100644 --- a/analysis/perf_stat.py +++ b/analysis/data_stat.py @@ -5,13 +5,21 @@ from enum import Enum class Stat(Enum): CPU = 'cpu' + ITERATIONS = 'iterations' + BASELINE_TIME_S = 'baseline time (sec)' + BASELINE_DELAY_S = 'baseline delay (sec)' + SOLVER = 'solver' - LIN_ALG = 'linear algebra' - INPUT_FILE = 'input file' - MAXWELL_SIZE = 'maxwell size' - MATRIX_COLS = 'matrix columns' + + MATRIX_FILE = 'matrix file' + MATRIX_SHAPE = 'matrix shape' + MATRIX_SIZE = 'matrix size' + MATRIX_NNZ = 'matrix nnz' + MATRIX_DENSITY = 'matrix density %' + TIME_S = 'time (sec)' POWER_DELTA = 'Δ watt' + JOULES = 'joules' TASK_CLK = 'task clock (msec)' PAGE_FAULTS = 'page faults' @@ -45,54 +53,57 @@ class Stat(Enum): L2D_CACHE_MISS_RATE = 'L2D cache miss rate' LL_CACHE_MISS_RATE = 'LL cache miss rate' -altra_names = { - Stat.TASK_CLK: 'task-clock:u', - Stat.PAGE_FAULTS: 'page-faults:u', - Stat.CYCLES: 'cycles:u', - Stat.INSTS: 'instructions:u', +class Cpu(Enum): + #ALTRA = altra_names + #XEON = xeon_names + ALTRA = 'Altra' + EPYC_7313P = 'Epyc 7313P' - Stat.BR: 'BR_RETIRED:u', - Stat.BR_MISS: 'BR_MIS_PRED_RETIRED:u', - Stat.ITLB: 'L1I_TLB:u', - Stat.ITLB_MISS: 'ITLB_WALK:u', - Stat.DTLB: 'L1D_TLB:u', - Stat.DTLB_MISS: 'DTLB_WALK:u', - Stat.L2D_TLB: 'L2D_TLB:u', - Stat.L2D_TLB_MISS: 'L2D_TLB_REFILL:u', - Stat.L1I_CACHE: 'L1I_CACHE:u', - Stat.L1I_CACHE_MISS: 'L1I_CACHE_REFILL:u', - Stat.L1D_CACHE: 'L1D_CACHE:u', - Stat.L1D_CACHE_MISS: 'L1D_CACHE_REFILL:u', - Stat.L2D_CACHE: 'L2D_CACHE:u', - Stat.L2D_CACHE_MISS: 'L2D_CACHE_REFILL:u', - Stat.LL_CACHE: 'LL_CACHE_RD:u', - Stat.LL_CACHE_MISS: 'LL_CACHE_MISS_RD:u', +names = { + Cpu.ALTRA: { + Stat.TASK_CLK: 'task-clock:u', + Stat.PAGE_FAULTS: 'page-faults:u', + Stat.CYCLES: 'cycles:u', + Stat.INSTS: 'instructions:u', + + Stat.BR: 'BR_RETIRED:u', + Stat.BR_MISS: 'BR_MIS_PRED_RETIRED:u', + Stat.ITLB: 'L1I_TLB:u', + Stat.ITLB_MISS: 'ITLB_WALK:u', + Stat.DTLB: 'L1D_TLB:u', + Stat.DTLB_MISS: 'DTLB_WALK:u', + Stat.L2D_TLB: 'L2D_TLB:u', + Stat.L2D_TLB_MISS: 'L2D_TLB_REFILL:u', + Stat.L1I_CACHE: 'L1I_CACHE:u', + Stat.L1I_CACHE_MISS: 'L1I_CACHE_REFILL:u', + Stat.L1D_CACHE: 'L1D_CACHE:u', + Stat.L1D_CACHE_MISS: 'L1D_CACHE_REFILL:u', + Stat.L2D_CACHE: 'L2D_CACHE:u', + Stat.L2D_CACHE_MISS: 'L2D_CACHE_REFILL:u', + Stat.LL_CACHE: 'LL_CACHE_RD:u', + Stat.LL_CACHE_MISS: 'LL_CACHE_MISS_RD:u', + }, + Cpu.EPYC_7313P: { + Stat.TASK_CLK: 'task-clock:u', + Stat.PAGE_FAULTS: 'page-faults:u', + Stat.CYCLES: 'cycles:u', + Stat.INSTS: 'instructions:u', + + Stat.BR: 'branches:u', + Stat.BR_MISS: 'branch-misses:u', + Stat.ITLB: 'iTLB-loads:u', + Stat.ITLB_MISS: 'iTLB-load-misses:u', + Stat.DTLB: 'dTLB-loads:u', + Stat.DTLB_MISS: 'dTLB-load-misses:u', + Stat.L1I_CACHE: 'L1-icache-loads:u', + Stat.L1I_CACHE_MISS: 'L1-icache-load-misses:u', + Stat.L1D_CACHE: 'L1-dcache-loads:u', + Stat.L1D_CACHE_MISS: 'L1-dcache-load-misses:u', + Stat.LL_CACHE: 'LLC-loads:u', + Stat.LL_CACHE_MISS: 'LLC-load-misses:u', + } } -xeon_names = { - Stat.TASK_CLK: 'task-clock:u', - Stat.PAGE_FAULTS: 'page-faults:u', - Stat.CYCLES: 'cycles:u', - Stat.INSTS: 'instructions:u', - - Stat.BR: 'branches:u', - Stat.BR_MISS: 'branch-misses:u', - Stat.ITLB: 'iTLB-loads:u', - Stat.ITLB_MISS: 'iTLB-load-misses:u', - Stat.DTLB: 'dTLB-loads:u', - Stat.DTLB_MISS: 'dTLB-load-misses:u', - Stat.L1I_CACHE: 'L1-icache-loads:u', - Stat.L1I_CACHE_MISS: 'L1-icache-load-misses:u', - Stat.L1D_CACHE: 'L1-dcache-loads:u', - Stat.L1D_CACHE_MISS: 'L1-dcache-load-misses:u', - Stat.LL_CACHE: 'LLC-loads:u', - Stat.LL_CACHE_MISS: 'LLC-load-misses:u', -} - -class CPU(Enum): - ALTRA = altra_names - XEON = xeon_names - def parse_output_old(filename: str, data: dict[str, str]) -> dict: result: dict[str, int | float] = dict() cpu: CPU = CPU[data['cpu'].upper()] @@ -112,12 +123,12 @@ def parse_output_old(filename: str, data: dict[str, str]) -> dict: return result | parse_power(filename, cpu) -def parse_output(output: str, cpu: CPU) -> dict: +def parse_output(output: str, cpu: Cpu) -> dict: result = dict() for line in output.split('\n'): - for stat in [x for x in Stat if x in cpu.value]: - regex = r'^\W*([\d+(,|\.)?]+)\W*.*' + cpu.value[stat] + for stat in [x for x in Stat if x in names[cpu]]: + regex = r'^\W*([\d+(,|\.)?]+)\W*.*' + names[cpu][stat] value = re.search(regex, line) if value is None: diff --git a/pytorch/data_stat.py b/pytorch/data_stat.py new file mode 120000 index 0000000..ed3063f --- /dev/null +++ b/pytorch/data_stat.py @@ -0,0 +1 @@ +../analysis/data_stat.py \ No newline at end of file diff --git a/pytorch/perf_stat.py b/pytorch/perf_stat.py deleted file mode 120000 index 747fadc..0000000 --- a/pytorch/perf_stat.py +++ /dev/null @@ -1 +0,0 @@ -../analysis/perf_stat.py \ No newline at end of file diff --git a/pytorch/run.py b/pytorch/run.py index c94cc9a..34aa811 100755 --- a/pytorch/run.py +++ b/pytorch/run.py @@ -1,14 +1,16 @@ #! /bin/python3 -import perf_stat +import data_stat +from data_stat import Stat, Cpu import argparse import os, sys import subprocess, signal import json +import time parser = argparse.ArgumentParser() -parser.add_argument('arch') +parser.add_argument('cpu', choices=[x.name.lower() for x in Cpu]) parser.add_argument('matrix_file') parser.add_argument('iterations', type=int) parser.add_argument('baseline_time_s', type=int) @@ -17,17 +19,23 @@ parser.add_argument('--perf', action='store_true') parser.add_argument('--power', action='store_true') parser.add_argument('-d', '--debug', action='store_true') args = parser.parse_args() +args.cpu = Cpu[args.cpu.upper()] -program_altra = [ - 'apptainer', 'run', 'pytorch-altra.sif', '-c', - 'numactl --cpunodebind=0 --membind=0 ' - + f'python spmv.py {args.matrix_file} {args.iterations}'] +program = { + Cpu.ALTRA: [ + 'apptainer', 'run', 'pytorch-altra.sif', '-c', + 'numactl --cpunodebind=0 --membind=0 ' + + f'python spmv.py {args.matrix_file} {args.iterations}'] +} perf = ['perf', 'stat'] -perf_altra = [['-d', '-d'], - ['-M', 'branch_misprediction_ratio'], - ['-M', 'dtlb_walk_ratio,itlb_walk_ratio'], - ['-M', 'l1d_cache_miss_ratio,l1i_cache_miss_ratio'], - ['-M', 'l2_cache_miss_ratio,l2_tlb_miss_ratio,ll_cache_read_miss_ratio']] +perf_args = { + Cpu.ALTRA: [ + ['-d', '-d'], + ['-M', 'branch_misprediction_ratio'], + ['-M', 'dtlb_walk_ratio,itlb_walk_ratio'], + ['-M', 'l1d_cache_miss_ratio,l1i_cache_miss_ratio'], + ['-M', 'l2_cache_miss_ratio,l2_tlb_miss_ratio,ll_cache_read_miss_ratio']] +} def baseline_power(baseline_time_s: int) -> list: power_process = subprocess.Popen(['./power.py', '-s', str(baseline_time_s)], @@ -37,33 +45,29 @@ def baseline_power(baseline_time_s: int) -> list: def run_program(program: list[str]) -> tuple[dict, str]: process = subprocess.run(program, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - #print(json.loads(process.stdout)) - #print(process.stderr) + if args.debug: + print(process.stdout) + print(process.stderr) return (json.loads(process.stdout), process.stderr) result = dict() -result['architecture'] = args.arch -result['iterations'] = args.iterations -result['baseline_time_s'] = args.baseline_time_s -result['baseline_delay_s'] = args.baseline_delay_s +result[Stat.CPU.value] = args.cpu.value +result[Stat.ITERATIONS.value] = args.iterations +result[Stat.BASELINE_TIME_S.value] = args.baseline_time_s +result[Stat.BASELINE_DELAY_S.value] = args.baseline_delay_s -if args.power is True: +if args.power: + time.sleep(args.baseline_delay_s) result['power_before'] = baseline_power(args.baseline_time_s) if args.debug: print(result) - run_program(program_altra) # Warmup + print(program[args.cpu]) + run_program(program[args.cpu]) # Warmup power_process = subprocess.Popen(['./power.py'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) - if args.arch == 'altra': - result = result | run_program(program_altra)[0] - elif args.arch == 'x86': - print("Arch not implemented yet!") - exit(1) - else: - print("Unrecognized arch!") - exit(1) + result = result | run_program(program[args.cpu])[0] power_process.send_signal(signal.SIGINT) if args.debug: @@ -75,28 +79,20 @@ if args.power is True: #print(len(result['power'])) #print(sum(result['power']) / len(result['power'])) - import time time.sleep(args.baseline_delay_s) result['power_after'] = baseline_power(args.baseline_time_s) if args.debug: print(result) -if args.perf is True: - if args.arch == 'altra': - for perf_args in perf_altra: - output = run_program(perf + perf_args + program_altra)[1] - print(output, file=sys.stderr) - result = result | perf_stat.parse_output(output, perf_stat.CPU.ALTRA) - if args.debug: - print(result) - elif args.arch == 'x86': - print("no implement") - exit(1) - else: - print("Unrecognized arch!") - exit(1) +if args.perf: + for perf_arg in perf_args[args.cpu]: + output = run_program(perf + perf_arg + program[args.cpu])[1] + print(output, file=sys.stderr) + result = result | data_stat.parse_output(output, args.cpu) + if args.debug: + print(result) - result = result | perf_stat.derive_stats(result) + result = result | data_stat.derive_stats(result) if args.debug: print(result) diff --git a/pytorch/spmv.py b/pytorch/spmv.py index 7203257..c47055a 100644 --- a/pytorch/spmv.py +++ b/pytorch/spmv.py @@ -1,3 +1,5 @@ +from data_stat import Stat + import torch, scipy import numpy as np import argparse @@ -32,19 +34,22 @@ end = time.time() result = dict() -result['matrix'] = os.path.splitext(os.path.basename(args.matrix_file))[0] -print(f"Matrix: {result['matrix']}", file=sys.stderr) +result[Stat.MATRIX_FILE.value] = os.path.splitext(os.path.basename(args.matrix_file))[0] +print(f"Matrix: {result[Stat.MATRIX_FILE.value]}", file=sys.stderr) -result['shape'] = matrix.shape -print(f"Shape: {result['shape']}", file=sys.stderr) +result[Stat.MATRIX_SHAPE.value] = matrix.shape +print(f"Shape: {result[Stat.MATRIX_SHAPE.value]}", file=sys.stderr) -result['nnz'] = matrix.values().shape[0] -print(f"NNZ: {result['nnz']}", file=sys.stderr) +result[Stat.MATRIX_SIZE.value] = matrix.shape[0] * matrix.shape[1] +print(f"Size: {result[Stat.MATRIX_SIZE.value]}", file=sys.stderr) -result['% density'] = matrix.values().shape[0] / (matrix.shape[0] * matrix.shape[1]) -print(f"Density: {result['% density']}", file=sys.stderr) +result[Stat.MATRIX_NNZ.value] = matrix.values().shape[0] +print(f"NNZ: {result[Stat.MATRIX_NNZ.value]}", file=sys.stderr) -result['time_s'] = end - start -print(f"Time: {result['time_s']} seconds", file=sys.stderr) +result[Stat.MATRIX_DENSITY.value] = matrix.values().shape[0] / (matrix.shape[0] * matrix.shape[1]) +print(f"Density: {result[Stat.MATRIX_DENSITY.value]}", file=sys.stderr) + +result[Stat.TIME_S.value] = end - start +print(f"Time: {result[Stat.TIME_S.value]} seconds", file=sys.stderr) print(json.dumps(result))