Compare commits
No commits in common. "ee01fadf14e00f5e81fdf7ab24684c41df11da7c" and "ce2016bdc6fd52e708c4a1419c5d157dc5fb2cdb" have entirely different histories.
ee01fadf14
...
ce2016bdc6
@ -5,25 +5,13 @@ from enum import Enum
|
|||||||
class Stat(Enum):
|
class Stat(Enum):
|
||||||
CPU = 'cpu'
|
CPU = 'cpu'
|
||||||
|
|
||||||
THREADS = 'threads'
|
|
||||||
ITERATIONS = 'iterations'
|
|
||||||
BASELINE_TIME_S = 'baseline time (sec)'
|
|
||||||
BASELINE_DELAY_S = 'baseline delay (sec)'
|
|
||||||
|
|
||||||
SOLVER = 'solver'
|
SOLVER = 'solver'
|
||||||
|
LIN_ALG = 'linear algebra'
|
||||||
|
INPUT_FILE = 'input file'
|
||||||
|
MAXWELL_SIZE = 'maxwell size'
|
||||||
|
MATRIX_COLS = 'matrix columns'
|
||||||
|
|
||||||
MATRIX_FILE = 'matrix file'
|
POWER_DELTA = 'Δ watt'
|
||||||
MATRIX_SHAPE = 'matrix shape'
|
|
||||||
MATRIX_SIZE = 'matrix size'
|
|
||||||
MATRIX_NNZ = 'matrix nnz'
|
|
||||||
MATRIX_DENSITY = 'matrix density %'
|
|
||||||
|
|
||||||
POWER_BEFORE = 'power before'
|
|
||||||
POWER = 'power'
|
|
||||||
POWER_AFTER = 'power after'
|
|
||||||
TIME_S = 'time (sec)'
|
|
||||||
DELTA_WATT = 'Δ watt'
|
|
||||||
JOULES = 'joules'
|
|
||||||
|
|
||||||
TASK_CLK = 'task clock (msec)'
|
TASK_CLK = 'task clock (msec)'
|
||||||
PAGE_FAULTS = 'page faults'
|
PAGE_FAULTS = 'page faults'
|
||||||
@ -57,14 +45,7 @@ class Stat(Enum):
|
|||||||
L2D_CACHE_MISS_RATE = 'L2D cache miss rate'
|
L2D_CACHE_MISS_RATE = 'L2D cache miss rate'
|
||||||
LL_CACHE_MISS_RATE = 'LL cache miss rate'
|
LL_CACHE_MISS_RATE = 'LL cache miss rate'
|
||||||
|
|
||||||
class Cpu(Enum):
|
altra_names = {
|
||||||
#ALTRA = altra_names
|
|
||||||
#XEON = xeon_names
|
|
||||||
ALTRA = 'Altra'
|
|
||||||
EPYC_7313P = 'Epyc 7313P'
|
|
||||||
|
|
||||||
names = {
|
|
||||||
Cpu.ALTRA: {
|
|
||||||
Stat.TASK_CLK: 'task-clock:u',
|
Stat.TASK_CLK: 'task-clock:u',
|
||||||
Stat.PAGE_FAULTS: 'page-faults:u',
|
Stat.PAGE_FAULTS: 'page-faults:u',
|
||||||
Stat.CYCLES: 'cycles:u',
|
Stat.CYCLES: 'cycles:u',
|
||||||
@ -86,8 +67,9 @@ names = {
|
|||||||
Stat.L2D_CACHE_MISS: 'L2D_CACHE_REFILL:u',
|
Stat.L2D_CACHE_MISS: 'L2D_CACHE_REFILL:u',
|
||||||
Stat.LL_CACHE: 'LL_CACHE_RD:u',
|
Stat.LL_CACHE: 'LL_CACHE_RD:u',
|
||||||
Stat.LL_CACHE_MISS: 'LL_CACHE_MISS_RD:u',
|
Stat.LL_CACHE_MISS: 'LL_CACHE_MISS_RD:u',
|
||||||
},
|
}
|
||||||
Cpu.EPYC_7313P: {
|
|
||||||
|
xeon_names = {
|
||||||
Stat.TASK_CLK: 'task-clock:u',
|
Stat.TASK_CLK: 'task-clock:u',
|
||||||
Stat.PAGE_FAULTS: 'page-faults:u',
|
Stat.PAGE_FAULTS: 'page-faults:u',
|
||||||
Stat.CYCLES: 'cycles:u',
|
Stat.CYCLES: 'cycles:u',
|
||||||
@ -105,9 +87,12 @@ names = {
|
|||||||
Stat.L1D_CACHE_MISS: 'L1-dcache-load-misses:u',
|
Stat.L1D_CACHE_MISS: 'L1-dcache-load-misses:u',
|
||||||
Stat.LL_CACHE: 'LLC-loads:u',
|
Stat.LL_CACHE: 'LLC-loads:u',
|
||||||
Stat.LL_CACHE_MISS: 'LLC-load-misses:u',
|
Stat.LL_CACHE_MISS: 'LLC-load-misses:u',
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class CPU(Enum):
|
||||||
|
ALTRA = altra_names
|
||||||
|
XEON = xeon_names
|
||||||
|
|
||||||
def parse_output_old(filename: str, data: dict[str, str]) -> dict:
|
def parse_output_old(filename: str, data: dict[str, str]) -> dict:
|
||||||
result: dict[str, int | float] = dict()
|
result: dict[str, int | float] = dict()
|
||||||
cpu: CPU = CPU[data['cpu'].upper()]
|
cpu: CPU = CPU[data['cpu'].upper()]
|
||||||
@ -127,12 +112,12 @@ def parse_output_old(filename: str, data: dict[str, str]) -> dict:
|
|||||||
|
|
||||||
return result | parse_power(filename, cpu)
|
return result | parse_power(filename, cpu)
|
||||||
|
|
||||||
def parse_output(output: str, cpu: Cpu) -> dict:
|
def parse_output(output: str, cpu: CPU) -> dict:
|
||||||
result = dict()
|
result = dict()
|
||||||
|
|
||||||
for line in output.split('\n'):
|
for line in output.split('\n'):
|
||||||
for stat in [x for x in Stat if x in names[cpu]]:
|
for stat in [x for x in Stat if x in cpu.value]:
|
||||||
regex = r'^\W*([\d+(,|\.)?]+)\W*.*' + names[cpu][stat]
|
regex = r'^\W*([\d+(,|\.)?]+)\W*.*' + cpu.value[stat]
|
||||||
value = re.search(regex, line)
|
value = re.search(regex, line)
|
||||||
|
|
||||||
if value is None:
|
if value is None:
|
@ -1,7 +1,5 @@
|
|||||||
#! /bin/python3
|
#! /bin/python3
|
||||||
|
|
||||||
from data_stat import Cpu
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
@ -9,7 +7,7 @@ import subprocess
|
|||||||
import random
|
import random
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('cpu', choices=[x.name.lower() for x in Cpu])
|
parser.add_argument('arch')
|
||||||
parser.add_argument('output_dir')
|
parser.add_argument('output_dir')
|
||||||
parser.add_argument('matrix_dir')
|
parser.add_argument('matrix_dir')
|
||||||
parser.add_argument('iterations', type=int)
|
parser.add_argument('iterations', type=int)
|
||||||
@ -19,10 +17,8 @@ parser.add_argument('--perf', action='store_const', const='--perf')
|
|||||||
parser.add_argument('--power', action='store_const', const='--power')
|
parser.add_argument('--power', action='store_const', const='--power')
|
||||||
parser.add_argument('--distribute', action='store_true')
|
parser.add_argument('--distribute', action='store_true')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
args.cpu = Cpu[args.cpu.upper()]
|
|
||||||
|
|
||||||
srun_args = {
|
srun_args_altra = [
|
||||||
Cpu.ALTRA: [
|
|
||||||
'--account', 'oasis',
|
'--account', 'oasis',
|
||||||
'--partition', 'oasis',
|
'--partition', 'oasis',
|
||||||
'--qos', 'oasis-exempt',
|
'--qos', 'oasis-exempt',
|
||||||
@ -35,11 +31,10 @@ srun_args = {
|
|||||||
#'--output', '/dev/null',
|
#'--output', '/dev/null',
|
||||||
#'--error', '/dev/null'
|
#'--error', '/dev/null'
|
||||||
]
|
]
|
||||||
}
|
|
||||||
|
|
||||||
def srun(srun_args_list: list, run_args, matrix_file: str) -> list:
|
def srun(srun_args_list: list, run_args, matrix_file: str) -> list:
|
||||||
run_args_list = [
|
run_args_list = [
|
||||||
args.cpu.name.lower(),
|
args.arch,
|
||||||
matrix_file,
|
matrix_file,
|
||||||
str(args.iterations),
|
str(args.iterations),
|
||||||
str(args.baseline_time_s),
|
str(args.baseline_time_s),
|
||||||
@ -53,17 +48,15 @@ def srun(srun_args_list: list, run_args, matrix_file: str) -> list:
|
|||||||
processes = list()
|
processes = list()
|
||||||
|
|
||||||
for i, matrix in enumerate(glob.glob(f'{args.matrix_dir.rstrip("/")}/*.mtx')):
|
for i, matrix in enumerate(glob.glob(f'{args.matrix_dir.rstrip("/")}/*.mtx')):
|
||||||
|
if args.arch == 'altra':
|
||||||
if args.distribute:
|
if args.distribute:
|
||||||
if args.cpu == Cpu.ALTRA:
|
|
||||||
i = i % 40
|
i = i % 40
|
||||||
srun_args_temp = srun_args[args.cpu] + ['--nodelist', f'oasis{i:02}']
|
srun_args = srun_args_altra + ['--nodelist', f'oasis{i:02}']
|
||||||
elif args.cpu == Cpu.EPYC_7313P:
|
|
||||||
srun_args_temp = srun_args[args.cpu]
|
|
||||||
else:
|
else:
|
||||||
srun_args_temp = srun_args[args.cpu]
|
srun_args = srun_args_altra
|
||||||
|
|
||||||
output_filename = '_'.join([
|
output_filename = '_'.join([
|
||||||
args.cpu.name.lower(),
|
args.arch,
|
||||||
str(args.baseline_time_s),
|
str(args.baseline_time_s),
|
||||||
str(args.baseline_delay_s),
|
str(args.baseline_delay_s),
|
||||||
os.path.splitext(os.path.basename(matrix))[0],
|
os.path.splitext(os.path.basename(matrix))[0],
|
||||||
@ -72,22 +65,14 @@ for i, matrix in enumerate(glob.glob(f'{args.matrix_dir.rstrip("/")}/*.mtx')):
|
|||||||
json_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.json'
|
json_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.json'
|
||||||
raw_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.output'
|
raw_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.output'
|
||||||
with open(json_filepath, 'w') as json_file, open(raw_filepath, 'w') as raw_file:
|
with open(json_filepath, 'w') as json_file, open(raw_filepath, 'w') as raw_file:
|
||||||
print(srun(srun_args_temp, args, matrix))
|
print(srun(srun_args, args, matrix))
|
||||||
print(json_filepath)
|
print(json_filepath)
|
||||||
print(raw_filepath)
|
print(raw_filepath)
|
||||||
|
|
||||||
processes.append(subprocess.Popen(
|
processes.append(subprocess.Popen(
|
||||||
srun(srun_args_temp, args, matrix),
|
srun(srun_args_altra, args, matrix),
|
||||||
stdout=json_file,
|
stdout=json_file,
|
||||||
stderr=raw_file))
|
stderr=raw_file))
|
||||||
|
|
||||||
# Wait on every 10 jobs to avoid socket timeout.
|
|
||||||
if i % 10 == 9:
|
|
||||||
print("Waiting on 10 jobs")
|
|
||||||
for process in processes:
|
|
||||||
process.wait()
|
|
||||||
|
|
||||||
processes = list()
|
|
||||||
|
|
||||||
for process in processes:
|
for process in processes:
|
||||||
process.wait()
|
process.wait()
|
||||||
|
@ -1 +0,0 @@
|
|||||||
../analysis/data_stat.py
|
|
1
pytorch/perf_stat.py
Symbolic link
1
pytorch/perf_stat.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../analysis/perf_stat.py
|
@ -1,16 +1,14 @@
|
|||||||
#! /bin/python3
|
#! /bin/python3
|
||||||
|
|
||||||
import data_stat
|
import perf_stat
|
||||||
from data_stat import Stat, Cpu
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os, sys
|
import os, sys
|
||||||
import subprocess, signal
|
import subprocess, signal
|
||||||
import json
|
import json
|
||||||
import time
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('cpu', choices=[x.name.lower() for x in Cpu])
|
parser.add_argument('arch')
|
||||||
parser.add_argument('matrix_file')
|
parser.add_argument('matrix_file')
|
||||||
parser.add_argument('iterations', type=int)
|
parser.add_argument('iterations', type=int)
|
||||||
parser.add_argument('baseline_time_s', type=int)
|
parser.add_argument('baseline_time_s', type=int)
|
||||||
@ -19,23 +17,17 @@ parser.add_argument('--perf', action='store_true')
|
|||||||
parser.add_argument('--power', action='store_true')
|
parser.add_argument('--power', action='store_true')
|
||||||
parser.add_argument('-d', '--debug', action='store_true')
|
parser.add_argument('-d', '--debug', action='store_true')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
args.cpu = Cpu[args.cpu.upper()]
|
|
||||||
|
|
||||||
program = {
|
program_altra = [
|
||||||
Cpu.ALTRA: [
|
|
||||||
'apptainer', 'run', 'pytorch-altra.sif', '-c',
|
'apptainer', 'run', 'pytorch-altra.sif', '-c',
|
||||||
'numactl --cpunodebind=0 --membind=0 '
|
'numactl --cpunodebind=0 --membind=0 '
|
||||||
+ f'python spmv.py {args.matrix_file} {args.iterations}']
|
+ f'python spmv.py {args.matrix_file} {args.iterations}']
|
||||||
}
|
|
||||||
perf = ['perf', 'stat']
|
perf = ['perf', 'stat']
|
||||||
perf_args = {
|
perf_altra = [['-d', '-d'],
|
||||||
Cpu.ALTRA: [
|
|
||||||
['-d', '-d'],
|
|
||||||
['-M', 'branch_misprediction_ratio'],
|
['-M', 'branch_misprediction_ratio'],
|
||||||
['-M', 'dtlb_walk_ratio,itlb_walk_ratio'],
|
['-M', 'dtlb_walk_ratio,itlb_walk_ratio'],
|
||||||
['-M', 'l1d_cache_miss_ratio,l1i_cache_miss_ratio'],
|
['-M', 'l1d_cache_miss_ratio,l1i_cache_miss_ratio'],
|
||||||
['-M', 'l2_cache_miss_ratio,l2_tlb_miss_ratio,ll_cache_read_miss_ratio']]
|
['-M', 'l2_cache_miss_ratio,l2_tlb_miss_ratio,ll_cache_read_miss_ratio']]
|
||||||
}
|
|
||||||
|
|
||||||
def baseline_power(baseline_time_s: int) -> list:
|
def baseline_power(baseline_time_s: int) -> list:
|
||||||
power_process = subprocess.Popen(['./power.py', '-s', str(baseline_time_s)],
|
power_process = subprocess.Popen(['./power.py', '-s', str(baseline_time_s)],
|
||||||
@ -45,56 +37,66 @@ def baseline_power(baseline_time_s: int) -> list:
|
|||||||
def run_program(program: list[str]) -> tuple[dict, str]:
|
def run_program(program: list[str]) -> tuple[dict, str]:
|
||||||
process = subprocess.run(program,
|
process = subprocess.run(program,
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
if args.debug:
|
#print(json.loads(process.stdout))
|
||||||
print(process.stdout)
|
#print(process.stderr)
|
||||||
print(process.stderr)
|
|
||||||
return (json.loads(process.stdout), process.stderr)
|
return (json.loads(process.stdout), process.stderr)
|
||||||
|
|
||||||
result = dict()
|
result = dict()
|
||||||
result[Stat.CPU.value] = args.cpu.value
|
result['architecture'] = args.arch
|
||||||
result[Stat.ITERATIONS.value] = args.iterations
|
result['iterations'] = args.iterations
|
||||||
result[Stat.BASELINE_TIME_S.value] = args.baseline_time_s
|
result['baseline_time_s'] = args.baseline_time_s
|
||||||
result[Stat.BASELINE_DELAY_S.value] = args.baseline_delay_s
|
result['baseline_delay_s'] = args.baseline_delay_s
|
||||||
|
|
||||||
if args.power:
|
if args.power is True:
|
||||||
time.sleep(args.baseline_delay_s)
|
result['power_before'] = baseline_power(args.baseline_time_s)
|
||||||
result[Stat.POWER_BEFORE.value] = baseline_power(args.baseline_time_s)
|
|
||||||
if args.debug:
|
if args.debug:
|
||||||
print(result)
|
print(result)
|
||||||
|
|
||||||
run_program(program[args.cpu]) # Warmup
|
run_program(program_altra) # Warmup
|
||||||
power_process = subprocess.Popen(['./power.py'],
|
power_process = subprocess.Popen(['./power.py'],
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
|
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
|
||||||
|
|
||||||
result = result | run_program(program[args.cpu])[0]
|
if args.arch == 'altra':
|
||||||
|
result = result | run_program(program_altra)[0]
|
||||||
|
elif args.arch == 'x86':
|
||||||
|
print("Arch not implemented yet!")
|
||||||
|
exit(1)
|
||||||
|
else:
|
||||||
|
print("Unrecognized arch!")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
power_process.send_signal(signal.SIGINT)
|
power_process.send_signal(signal.SIGINT)
|
||||||
if args.debug:
|
if args.debug:
|
||||||
print(result)
|
print(result)
|
||||||
|
|
||||||
result[Stat.POWER.value] = [float(x) for x in power_process.communicate()[0].strip().split('\n')]
|
result['power'] = [float(x) for x in power_process.communicate()[0].strip().split('\n')]
|
||||||
# Riemann Sum
|
|
||||||
from math import ceil
|
|
||||||
result[Stat.JOULES.value] = sum(result[Stat.POWER.value][-ceil(result[Stat.TIME_S.value]):-1]) + (result[Stat.POWER.value][-1] * (result[Stat.TIME_S.value] % 1))
|
|
||||||
if args.debug:
|
if args.debug:
|
||||||
print(result)
|
print(result)
|
||||||
#print(len(result['power']))
|
#print(len(result['power']))
|
||||||
#print(sum(result['power']) / len(result['power']))
|
#print(sum(result['power']) / len(result['power']))
|
||||||
|
|
||||||
|
import time
|
||||||
time.sleep(args.baseline_delay_s)
|
time.sleep(args.baseline_delay_s)
|
||||||
result[Stat.POWER_AFTER.value] = baseline_power(args.baseline_time_s)
|
result['power_after'] = baseline_power(args.baseline_time_s)
|
||||||
if args.debug:
|
if args.debug:
|
||||||
print(result)
|
print(result)
|
||||||
|
|
||||||
if args.perf:
|
if args.perf is True:
|
||||||
for perf_arg in perf_args[args.cpu]:
|
if args.arch == 'altra':
|
||||||
output = run_program(perf + perf_arg + program[args.cpu])[1]
|
for perf_args in perf_altra:
|
||||||
|
output = run_program(perf + perf_args + program_altra)[1]
|
||||||
print(output, file=sys.stderr)
|
print(output, file=sys.stderr)
|
||||||
result = result | data_stat.parse_output(output, args.cpu)
|
result = result | perf_stat.parse_output(output, perf_stat.CPU.ALTRA)
|
||||||
if args.debug:
|
if args.debug:
|
||||||
print(result)
|
print(result)
|
||||||
|
elif args.arch == 'x86':
|
||||||
|
print("no implement")
|
||||||
|
exit(1)
|
||||||
|
else:
|
||||||
|
print("Unrecognized arch!")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
result = result | data_stat.derive_stats(result)
|
result = result | perf_stat.derive_stats(result)
|
||||||
|
|
||||||
if args.debug:
|
if args.debug:
|
||||||
print(result)
|
print(result)
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
from data_stat import Stat
|
|
||||||
|
|
||||||
import torch, scipy
|
import torch, scipy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import argparse
|
import argparse
|
||||||
@ -34,22 +32,19 @@ end = time.time()
|
|||||||
|
|
||||||
result = dict()
|
result = dict()
|
||||||
|
|
||||||
result[Stat.MATRIX_FILE.value] = os.path.splitext(os.path.basename(args.matrix_file))[0]
|
result['matrix'] = os.path.splitext(os.path.basename(args.matrix_file))[0]
|
||||||
print(f"Matrix: {result[Stat.MATRIX_FILE.value]}", file=sys.stderr)
|
print(f"Matrix: {result['matrix']}", file=sys.stderr)
|
||||||
|
|
||||||
result[Stat.MATRIX_SHAPE.value] = matrix.shape
|
result['shape'] = matrix.shape
|
||||||
print(f"Shape: {result[Stat.MATRIX_SHAPE.value]}", file=sys.stderr)
|
print(f"Shape: {result['shape']}", file=sys.stderr)
|
||||||
|
|
||||||
result[Stat.MATRIX_SIZE.value] = matrix.shape[0] * matrix.shape[1]
|
result['nnz'] = matrix.values().shape[0]
|
||||||
print(f"Size: {result[Stat.MATRIX_SIZE.value]}", file=sys.stderr)
|
print(f"NNZ: {result['nnz']}", file=sys.stderr)
|
||||||
|
|
||||||
result[Stat.MATRIX_NNZ.value] = matrix.values().shape[0]
|
result['% density'] = matrix.values().shape[0] / (matrix.shape[0] * matrix.shape[1])
|
||||||
print(f"NNZ: {result[Stat.MATRIX_NNZ.value]}", file=sys.stderr)
|
print(f"Density: {result['% density']}", file=sys.stderr)
|
||||||
|
|
||||||
result[Stat.MATRIX_DENSITY.value] = matrix.values().shape[0] / (matrix.shape[0] * matrix.shape[1])
|
result['time_s'] = end - start
|
||||||
print(f"Density: {result[Stat.MATRIX_DENSITY.value]}", file=sys.stderr)
|
print(f"Time: {result['time_s']} seconds", file=sys.stderr)
|
||||||
|
|
||||||
result[Stat.TIME_S.value] = end - start
|
|
||||||
print(f"Time: {result[Stat.TIME_S.value]} seconds", file=sys.stderr)
|
|
||||||
|
|
||||||
print(json.dumps(result))
|
print(json.dumps(result))
|
||||||
|
Loading…
Reference in New Issue
Block a user