Compare commits

...

2 Commits

Author SHA1 Message Date
ee01fadf14 update 2024-12-05 12:20:05 -05:00
10934046f7 improve data_stat 2024-12-04 22:47:16 -05:00
6 changed files with 179 additions and 146 deletions

View File

@ -5,13 +5,25 @@ from enum import Enum
class Stat(Enum): class Stat(Enum):
CPU = 'cpu' CPU = 'cpu'
SOLVER = 'solver' THREADS = 'threads'
LIN_ALG = 'linear algebra' ITERATIONS = 'iterations'
INPUT_FILE = 'input file' BASELINE_TIME_S = 'baseline time (sec)'
MAXWELL_SIZE = 'maxwell size' BASELINE_DELAY_S = 'baseline delay (sec)'
MATRIX_COLS = 'matrix columns'
POWER_DELTA = 'Δ watt' SOLVER = 'solver'
MATRIX_FILE = 'matrix file'
MATRIX_SHAPE = 'matrix shape'
MATRIX_SIZE = 'matrix size'
MATRIX_NNZ = 'matrix nnz'
MATRIX_DENSITY = 'matrix density %'
POWER_BEFORE = 'power before'
POWER = 'power'
POWER_AFTER = 'power after'
TIME_S = 'time (sec)'
DELTA_WATT = 'Δ watt'
JOULES = 'joules'
TASK_CLK = 'task clock (msec)' TASK_CLK = 'task clock (msec)'
PAGE_FAULTS = 'page faults' PAGE_FAULTS = 'page faults'
@ -45,7 +57,14 @@ class Stat(Enum):
L2D_CACHE_MISS_RATE = 'L2D cache miss rate' L2D_CACHE_MISS_RATE = 'L2D cache miss rate'
LL_CACHE_MISS_RATE = 'LL cache miss rate' LL_CACHE_MISS_RATE = 'LL cache miss rate'
altra_names = { class Cpu(Enum):
#ALTRA = altra_names
#XEON = xeon_names
ALTRA = 'Altra'
EPYC_7313P = 'Epyc 7313P'
names = {
Cpu.ALTRA: {
Stat.TASK_CLK: 'task-clock:u', Stat.TASK_CLK: 'task-clock:u',
Stat.PAGE_FAULTS: 'page-faults:u', Stat.PAGE_FAULTS: 'page-faults:u',
Stat.CYCLES: 'cycles:u', Stat.CYCLES: 'cycles:u',
@ -67,9 +86,8 @@ altra_names = {
Stat.L2D_CACHE_MISS: 'L2D_CACHE_REFILL:u', Stat.L2D_CACHE_MISS: 'L2D_CACHE_REFILL:u',
Stat.LL_CACHE: 'LL_CACHE_RD:u', Stat.LL_CACHE: 'LL_CACHE_RD:u',
Stat.LL_CACHE_MISS: 'LL_CACHE_MISS_RD:u', Stat.LL_CACHE_MISS: 'LL_CACHE_MISS_RD:u',
} },
Cpu.EPYC_7313P: {
xeon_names = {
Stat.TASK_CLK: 'task-clock:u', Stat.TASK_CLK: 'task-clock:u',
Stat.PAGE_FAULTS: 'page-faults:u', Stat.PAGE_FAULTS: 'page-faults:u',
Stat.CYCLES: 'cycles:u', Stat.CYCLES: 'cycles:u',
@ -87,12 +105,9 @@ xeon_names = {
Stat.L1D_CACHE_MISS: 'L1-dcache-load-misses:u', Stat.L1D_CACHE_MISS: 'L1-dcache-load-misses:u',
Stat.LL_CACHE: 'LLC-loads:u', Stat.LL_CACHE: 'LLC-loads:u',
Stat.LL_CACHE_MISS: 'LLC-load-misses:u', Stat.LL_CACHE_MISS: 'LLC-load-misses:u',
}
} }
class CPU(Enum):
ALTRA = altra_names
XEON = xeon_names
def parse_output_old(filename: str, data: dict[str, str]) -> dict: def parse_output_old(filename: str, data: dict[str, str]) -> dict:
result: dict[str, int | float] = dict() result: dict[str, int | float] = dict()
cpu: CPU = CPU[data['cpu'].upper()] cpu: CPU = CPU[data['cpu'].upper()]
@ -112,12 +127,12 @@ def parse_output_old(filename: str, data: dict[str, str]) -> dict:
return result | parse_power(filename, cpu) return result | parse_power(filename, cpu)
def parse_output(output: str, cpu: CPU) -> dict: def parse_output(output: str, cpu: Cpu) -> dict:
result = dict() result = dict()
for line in output.split('\n'): for line in output.split('\n'):
for stat in [x for x in Stat if x in cpu.value]: for stat in [x for x in Stat if x in names[cpu]]:
regex = r'^\W*([\d+(,|\.)?]+)\W*.*' + cpu.value[stat] regex = r'^\W*([\d+(,|\.)?]+)\W*.*' + names[cpu][stat]
value = re.search(regex, line) value = re.search(regex, line)
if value is None: if value is None:

View File

@ -1,5 +1,7 @@
#! /bin/python3 #! /bin/python3
from data_stat import Cpu
import argparse import argparse
import glob import glob
import os import os
@ -7,7 +9,7 @@ import subprocess
import random import random
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('arch') parser.add_argument('cpu', choices=[x.name.lower() for x in Cpu])
parser.add_argument('output_dir') parser.add_argument('output_dir')
parser.add_argument('matrix_dir') parser.add_argument('matrix_dir')
parser.add_argument('iterations', type=int) parser.add_argument('iterations', type=int)
@ -17,8 +19,10 @@ parser.add_argument('--perf', action='store_const', const='--perf')
parser.add_argument('--power', action='store_const', const='--power') parser.add_argument('--power', action='store_const', const='--power')
parser.add_argument('--distribute', action='store_true') parser.add_argument('--distribute', action='store_true')
args = parser.parse_args() args = parser.parse_args()
args.cpu = Cpu[args.cpu.upper()]
srun_args_altra = [ srun_args = {
Cpu.ALTRA: [
'--account', 'oasis', '--account', 'oasis',
'--partition', 'oasis', '--partition', 'oasis',
'--qos', 'oasis-exempt', '--qos', 'oasis-exempt',
@ -31,10 +35,11 @@ srun_args_altra = [
#'--output', '/dev/null', #'--output', '/dev/null',
#'--error', '/dev/null' #'--error', '/dev/null'
] ]
}
def srun(srun_args_list: list, run_args, matrix_file: str) -> list: def srun(srun_args_list: list, run_args, matrix_file: str) -> list:
run_args_list = [ run_args_list = [
args.arch, args.cpu.name.lower(),
matrix_file, matrix_file,
str(args.iterations), str(args.iterations),
str(args.baseline_time_s), str(args.baseline_time_s),
@ -48,15 +53,17 @@ def srun(srun_args_list: list, run_args, matrix_file: str) -> list:
processes = list() processes = list()
for i, matrix in enumerate(glob.glob(f'{args.matrix_dir.rstrip("/")}/*.mtx')): for i, matrix in enumerate(glob.glob(f'{args.matrix_dir.rstrip("/")}/*.mtx')):
if args.arch == 'altra':
if args.distribute: if args.distribute:
if args.cpu == Cpu.ALTRA:
i = i % 40 i = i % 40
srun_args = srun_args_altra + ['--nodelist', f'oasis{i:02}'] srun_args_temp = srun_args[args.cpu] + ['--nodelist', f'oasis{i:02}']
elif args.cpu == Cpu.EPYC_7313P:
srun_args_temp = srun_args[args.cpu]
else: else:
srun_args = srun_args_altra srun_args_temp = srun_args[args.cpu]
output_filename = '_'.join([ output_filename = '_'.join([
args.arch, args.cpu.name.lower(),
str(args.baseline_time_s), str(args.baseline_time_s),
str(args.baseline_delay_s), str(args.baseline_delay_s),
os.path.splitext(os.path.basename(matrix))[0], os.path.splitext(os.path.basename(matrix))[0],
@ -65,14 +72,22 @@ for i, matrix in enumerate(glob.glob(f'{args.matrix_dir.rstrip("/")}/*.mtx')):
json_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.json' json_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.json'
raw_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.output' raw_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.output'
with open(json_filepath, 'w') as json_file, open(raw_filepath, 'w') as raw_file: with open(json_filepath, 'w') as json_file, open(raw_filepath, 'w') as raw_file:
print(srun(srun_args, args, matrix)) print(srun(srun_args_temp, args, matrix))
print(json_filepath) print(json_filepath)
print(raw_filepath) print(raw_filepath)
processes.append(subprocess.Popen( processes.append(subprocess.Popen(
srun(srun_args_altra, args, matrix), srun(srun_args_temp, args, matrix),
stdout=json_file, stdout=json_file,
stderr=raw_file)) stderr=raw_file))
# Wait on every 10 jobs to avoid socket timeout.
if i % 10 == 9:
print("Waiting on 10 jobs")
for process in processes:
process.wait()
processes = list()
for process in processes: for process in processes:
process.wait() process.wait()

1
pytorch/data_stat.py Symbolic link
View File

@ -0,0 +1 @@
../analysis/data_stat.py

View File

@ -1 +0,0 @@
../analysis/perf_stat.py

View File

@ -1,14 +1,16 @@
#! /bin/python3 #! /bin/python3
import perf_stat import data_stat
from data_stat import Stat, Cpu
import argparse import argparse
import os, sys import os, sys
import subprocess, signal import subprocess, signal
import json import json
import time
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('arch') parser.add_argument('cpu', choices=[x.name.lower() for x in Cpu])
parser.add_argument('matrix_file') parser.add_argument('matrix_file')
parser.add_argument('iterations', type=int) parser.add_argument('iterations', type=int)
parser.add_argument('baseline_time_s', type=int) parser.add_argument('baseline_time_s', type=int)
@ -17,17 +19,23 @@ parser.add_argument('--perf', action='store_true')
parser.add_argument('--power', action='store_true') parser.add_argument('--power', action='store_true')
parser.add_argument('-d', '--debug', action='store_true') parser.add_argument('-d', '--debug', action='store_true')
args = parser.parse_args() args = parser.parse_args()
args.cpu = Cpu[args.cpu.upper()]
program_altra = [ program = {
Cpu.ALTRA: [
'apptainer', 'run', 'pytorch-altra.sif', '-c', 'apptainer', 'run', 'pytorch-altra.sif', '-c',
'numactl --cpunodebind=0 --membind=0 ' 'numactl --cpunodebind=0 --membind=0 '
+ f'python spmv.py {args.matrix_file} {args.iterations}'] + f'python spmv.py {args.matrix_file} {args.iterations}']
}
perf = ['perf', 'stat'] perf = ['perf', 'stat']
perf_altra = [['-d', '-d'], perf_args = {
Cpu.ALTRA: [
['-d', '-d'],
['-M', 'branch_misprediction_ratio'], ['-M', 'branch_misprediction_ratio'],
['-M', 'dtlb_walk_ratio,itlb_walk_ratio'], ['-M', 'dtlb_walk_ratio,itlb_walk_ratio'],
['-M', 'l1d_cache_miss_ratio,l1i_cache_miss_ratio'], ['-M', 'l1d_cache_miss_ratio,l1i_cache_miss_ratio'],
['-M', 'l2_cache_miss_ratio,l2_tlb_miss_ratio,ll_cache_read_miss_ratio']] ['-M', 'l2_cache_miss_ratio,l2_tlb_miss_ratio,ll_cache_read_miss_ratio']]
}
def baseline_power(baseline_time_s: int) -> list: def baseline_power(baseline_time_s: int) -> list:
power_process = subprocess.Popen(['./power.py', '-s', str(baseline_time_s)], power_process = subprocess.Popen(['./power.py', '-s', str(baseline_time_s)],
@ -37,66 +45,56 @@ def baseline_power(baseline_time_s: int) -> list:
def run_program(program: list[str]) -> tuple[dict, str]: def run_program(program: list[str]) -> tuple[dict, str]:
process = subprocess.run(program, process = subprocess.run(program,
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
#print(json.loads(process.stdout)) if args.debug:
#print(process.stderr) print(process.stdout)
print(process.stderr)
return (json.loads(process.stdout), process.stderr) return (json.loads(process.stdout), process.stderr)
result = dict() result = dict()
result['architecture'] = args.arch result[Stat.CPU.value] = args.cpu.value
result['iterations'] = args.iterations result[Stat.ITERATIONS.value] = args.iterations
result['baseline_time_s'] = args.baseline_time_s result[Stat.BASELINE_TIME_S.value] = args.baseline_time_s
result['baseline_delay_s'] = args.baseline_delay_s result[Stat.BASELINE_DELAY_S.value] = args.baseline_delay_s
if args.power is True: if args.power:
result['power_before'] = baseline_power(args.baseline_time_s) time.sleep(args.baseline_delay_s)
result[Stat.POWER_BEFORE.value] = baseline_power(args.baseline_time_s)
if args.debug: if args.debug:
print(result) print(result)
run_program(program_altra) # Warmup run_program(program[args.cpu]) # Warmup
power_process = subprocess.Popen(['./power.py'], power_process = subprocess.Popen(['./power.py'],
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
if args.arch == 'altra': result = result | run_program(program[args.cpu])[0]
result = result | run_program(program_altra)[0]
elif args.arch == 'x86':
print("Arch not implemented yet!")
exit(1)
else:
print("Unrecognized arch!")
exit(1)
power_process.send_signal(signal.SIGINT) power_process.send_signal(signal.SIGINT)
if args.debug: if args.debug:
print(result) print(result)
result['power'] = [float(x) for x in power_process.communicate()[0].strip().split('\n')] result[Stat.POWER.value] = [float(x) for x in power_process.communicate()[0].strip().split('\n')]
# Riemann Sum
from math import ceil
result[Stat.JOULES.value] = sum(result[Stat.POWER.value][-ceil(result[Stat.TIME_S.value]):-1]) + (result[Stat.POWER.value][-1] * (result[Stat.TIME_S.value] % 1))
if args.debug: if args.debug:
print(result) print(result)
#print(len(result['power'])) #print(len(result['power']))
#print(sum(result['power']) / len(result['power'])) #print(sum(result['power']) / len(result['power']))
import time
time.sleep(args.baseline_delay_s) time.sleep(args.baseline_delay_s)
result['power_after'] = baseline_power(args.baseline_time_s) result[Stat.POWER_AFTER.value] = baseline_power(args.baseline_time_s)
if args.debug: if args.debug:
print(result) print(result)
if args.perf is True: if args.perf:
if args.arch == 'altra': for perf_arg in perf_args[args.cpu]:
for perf_args in perf_altra: output = run_program(perf + perf_arg + program[args.cpu])[1]
output = run_program(perf + perf_args + program_altra)[1]
print(output, file=sys.stderr) print(output, file=sys.stderr)
result = result | perf_stat.parse_output(output, perf_stat.CPU.ALTRA) result = result | data_stat.parse_output(output, args.cpu)
if args.debug: if args.debug:
print(result) print(result)
elif args.arch == 'x86':
print("no implement")
exit(1)
else:
print("Unrecognized arch!")
exit(1)
result = result | perf_stat.derive_stats(result) result = result | data_stat.derive_stats(result)
if args.debug: if args.debug:
print(result) print(result)

View File

@ -1,3 +1,5 @@
from data_stat import Stat
import torch, scipy import torch, scipy
import numpy as np import numpy as np
import argparse import argparse
@ -32,19 +34,22 @@ end = time.time()
result = dict() result = dict()
result['matrix'] = os.path.splitext(os.path.basename(args.matrix_file))[0] result[Stat.MATRIX_FILE.value] = os.path.splitext(os.path.basename(args.matrix_file))[0]
print(f"Matrix: {result['matrix']}", file=sys.stderr) print(f"Matrix: {result[Stat.MATRIX_FILE.value]}", file=sys.stderr)
result['shape'] = matrix.shape result[Stat.MATRIX_SHAPE.value] = matrix.shape
print(f"Shape: {result['shape']}", file=sys.stderr) print(f"Shape: {result[Stat.MATRIX_SHAPE.value]}", file=sys.stderr)
result['nnz'] = matrix.values().shape[0] result[Stat.MATRIX_SIZE.value] = matrix.shape[0] * matrix.shape[1]
print(f"NNZ: {result['nnz']}", file=sys.stderr) print(f"Size: {result[Stat.MATRIX_SIZE.value]}", file=sys.stderr)
result['% density'] = matrix.values().shape[0] / (matrix.shape[0] * matrix.shape[1]) result[Stat.MATRIX_NNZ.value] = matrix.values().shape[0]
print(f"Density: {result['% density']}", file=sys.stderr) print(f"NNZ: {result[Stat.MATRIX_NNZ.value]}", file=sys.stderr)
result['time_s'] = end - start result[Stat.MATRIX_DENSITY.value] = matrix.values().shape[0] / (matrix.shape[0] * matrix.shape[1])
print(f"Time: {result['time_s']} seconds", file=sys.stderr) print(f"Density: {result[Stat.MATRIX_DENSITY.value]}", file=sys.stderr)
result[Stat.TIME_S.value] = end - start
print(f"Time: {result[Stat.TIME_S.value]} seconds", file=sys.stderr)
print(json.dumps(result)) print(json.dumps(result))