ampere_research/analysis/analyze.py.bak
2024-12-02 11:34:38 -05:00

303 lines
13 KiB
Python

#! /bin/python3
import os
import re
from dataclasses import dataclass
import json
import matplotlib.pyplot as plt
import numpy
from scipy.io import mmread
import math
import itertools
import argparse
perf_stats = ["task-clock:u", "page-faults:u", "cycles:u", "instructions:u",
"branches:u", "branch-misses:u", "L1-dcache-loads:u",
"L1-dcache-load-misses:u", "LLC-loads:u", "LLC-load-misses:u",
"L1-icache-loads:u", "L1-icache-load-misses:u", "dTLB-loads:u",
"dTLB-load-misses:u", "iTLB-loads:u", "iTLB-load-misses:u"]
def read_stats(filename):
# Split filename into each input parameter.
filename_split = os.path.splitext(filename)[0].split('_')
s = {
"solver": filename_split[0],
"linalg": filename_split[1],
"file": filename_split[2],
"time_steps": int(filename_split[3]),
"cores": int(re.match(r'\d+', filename_split[4]).group(0)),
"cpu": filename_split[5] }
# Take filename and read SchurComplement matrix from corresponding dir.
print(os.path.splitext(filename)[0])
m = mmread(os.path.splitext(filename)[0] + "/SchurComplement.mm")
s["cols"] = m.get_shape()[0]
s["nonzero"] = m.count_nonzero()
s["entries"] = m.get_shape()[0] * m.get_shape()[1]
s["trace"] = m.trace()
# Compute ratio.
s["% nonzero entry"] = s["nonzero"] / s["entries"]
s["trace over cols"] = s["trace"] / s["cols"]
# Obtain stats from perf in each log.
with open(filename) as file:
for line in file:
for perf_stat in perf_stats:
regex = r'^\W*([\d+(,|\.)?]+)\W*.*' + perf_stat
data = re.search(regex, line)
if data is None:
continue
match perf_stat:
case "task-clock:u":
s[perf_stat[:-2] + " (msec)"] = float(data.group(1).replace(',', ''))
case "page-faults:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "cycles:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "instructions:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "branches:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "branch-misses:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "L1-dcache-loads:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "L1-dcache-load-misses:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "LLC-loads:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "LLC-load-misses:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "L1-icache-loads:u":
print("l1_icache_loads not expected! Exiting...")
exit(1)
case "L1-icache-load-misses:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "dTLB-loads:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "dTLB-load-misses:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "iTLB-loads:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case "iTLB-load-misses:u":
s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
case _:
print("Unexpected case! Exiting...")
exit(1)
# Derive stats.
if s["cycles"] != 0:
s["insn per cycle"] = s["instructions"] / s["cycles"]
else:
s["insn per cycle"] = None
s["branch-miss-rate"] = s["branch-misses"] / s["branches"]
s["L1-dcache-miss-rate"] = s["L1-dcache-load-misses"] / s["L1-dcache-loads"]
s["LLC-miss-rate"] = s["LLC-load-misses"] / s["LLC-loads"]
s["dTLB-miss-rate"] = s["dTLB-load-misses"] / s["dTLB-loads"]
s["iTLB-miss-rate"] = s["iTLB-load-misses"] / s["iTLB-loads"]
print(s)
return s
def box_plot(ax, stat_list, x_name, y_name):
data = accumulate_to_dict(stat_list, x_name, y_name)
for value in data.values():
if type(value[0]) is str:
print(f"{y_name} values are strings... skipping box plot...")
return
print("Plotted data: " + str(data))
ax.boxplot([numpy.array(y_axis) for y_axis in data.values()])
ax.set_xticklabels(data.keys())
ax.set_ylabel(y_name)
def scatter_plot(ax, stat_list, x_name, y_name, color_name, size_name = None):
x_data = accumulate_to_dict(stat_list, color_name, x_name)
y_data = accumulate_to_dict(stat_list, color_name, y_name)
if size_name is not None:
size_data = accumulate_to_dict(stat_list, color_name, size_name)
print(x_data)
print(y_data)
if size_name is not None:
print(size_data)
for group in x_data.keys():
print(f"Plotted x data for {group}: " + str(x_data[group]))
print(f"Plotted y data for {group}: " + str(y_data[group]))
ax.scatter(x_data[group], y_data[group], label = group, alpha = 0.3)
#ax.legend()
#ax.set_xlabel(x_name)
ax.set_ylabel(y_name)
ax.grid(True)
def line_plot(ax, stat_list, x_name, y_name, color_name = None, marker_name = None):
x_data = accumulate_to_dict(stat_list, color_name, x_name, marker_name)
y_data = accumulate_to_dict(stat_list, color_name, y_name, marker_name)
for group in x_data.keys():
sorted_data = [(x, y) for (x, y) in zip(x_data[group], y_data[group])]
sorted_data.sort()
x_data[group] = [x for (x, y) in sorted_data]
y_data[group] = [y for (x, y) in sorted_data]
print(f"Plotted x data for {group}: " + str(x_data[group]))
print(f"Plotted y data for {group}: " + str(y_data[group]))
ax.plot(x_data[group], y_data[group], label = group)
ax.set_ylabel(y_name)
ax.grid(True)
def basic_plot(ax, stat_list, x_name, y_name):
xs = accumulate_to_list(stat_list, x_name)
ys = accumulate_to_list(stat_list, y_name)
print("Plotted x data: " + str(xs))
print("Plotted y data: " + str(ys))
ax.plot(xs, ys, 'o')
ax.legend()
ax.set_xlabel(x_name)
ax.set_ylabel(y_name)
ax.grid(True)
def accumulate_to_dict(stat_list, key_name, value_name, key2_name = None):
key_list = [stat[key_name] for stat in stat_list]
value_list = [stat[value_name] for stat in stat_list]
if key2_name is not None:
key2_list = [stat[key2_name] for stat in stat_list]
data = {pair: list() for pair in
list(itertools.product(list(dict.fromkeys(key_list).keys()),
list(dict.fromkeys(key2_list).keys())))}
for key, key2, value in zip(key_list, key2_list, value_list):
data[(key, key2)].append(value)
else:
data = {key: list() for key in dict.fromkeys(key_list)}
for key, value in zip(key_list, value_list):
data[key].append(value)
return data
def accumulate_to_list(stat_list, name):
lst = [stat[name] for stat in stat_list]
return lst
def show_visualizations(rows, plot, size_multiplier, stat_list, x_name, y_name = None,
color_name = None, marker_name = None, filter_list = []):
# Filter stats (removing values such as a specific CPU type)
stat_list = [stat for stat in stat_list
if len([stat[key] for key in stat.keys()
if stat[key] in filter_list])
== 0]
if y_name is None or y_name == "all":
# Filter y names (removing measurements such as cycles)
y_names = [y_name for y_name in stat_list[0].keys() if y_name != x_name
and y_name != color_name
and y_name != marker_name
and y_name not in filter_list]
fig, axes = plt.subplots(rows, int(math.ceil(len(y_names) / rows)),
figsize = (16 * size_multiplier, 9 * size_multiplier))
match plot:
case "box":
for i, y in enumerate(y_names):
box_plot(axes[i % rows][int(i / rows)], stat_list, x_name, y)
case "line":
for i, y in enumerate(y_names):
line_plot(axes[i % rows][int(i / rows)], stat_list, x_name,
y, color_name, marker_name)
case _:
print("Invalid plot type! Exiting...")
exit(1)
handles, labels = axes[i % rows][int(i / rows)].get_legend_handles_labels()
else:
fig, ax = plt.subplots()
match plot:
case "box":
box_plot(ax, stat_list, x_name, y_name)
case "line":
line_plot(ax, stat_list, x_name, y_name, color_name, marker_name)
case _:
print("Invalid plot type! Exiting...")
exit(1)
handles, labels = ax.get_legend_handles_labels()
if y_name is None or y_name == "all":
y_name = "all"
fontsize = 'xx-large'
else:
fontsize = 'x-small'
title = f"{plot}_plot_of_{y_name}_vs_{x_name}_by_{color_name}_and_{marker_name}_excluding_{filter_list}"
fig.suptitle(title, fontsize = fontsize)
fig.legend(handles, labels, fontsize = fontsize)
fig.supxlabel(x_name, fontsize = fontsize)
plt.savefig(title + ".png", dpi = 100)
plt.show()
save_file = "stats.json"
samples_dir = "Samples/"
# save_file = "stats_avg_10.json"
# samples_dir = "Samples_Avg_10/"
# save_file = "tmp.json"
# samples_dir = "Old_Samples_Avg_10"
def main():
stat_list = list()
# Get stats and accumulate into stat_list
if os.path.isfile(save_file):
with open(save_file, 'r') as file:
stat_list = json.load(file)
# print(*stat_list, sep='\n')
else:
os.chdir('./' + samples_dir)
for filename in os.listdir(os.getcwd()):
if os.path.splitext(filename)[1] == ".log":
stat_list.append(read_stats(filename))
print(*stat_list, sep='\n')
os.chdir('../')
with open(save_file, 'w') as file:
json.dump(stat_list, file, indent = 2)
# print(*stat_list, sep='\n')
possible_args = [keys for keys in stat_list[0].keys()]
parser = argparse.ArgumentParser(epilog = 'Possible arguments are: ' + str(possible_args))
parser.add_argument('-r', '--rows', help = 'Number of rows to display. Will split graphs into columns to evenly fill rows. Does not do anything if -y is not "all".', default = 0)
parser.add_argument('-p', '--plot', help = 'The type of plot. Only "box" and "line" are supported.')
parser.add_argument('-x', help = 'Name of the x axis. See below for possible arguments.')
parser.add_argument('-y', help = 'Name of the y axis. Can be "all" or omitted entirely to display all possible y values. See below for other possible arguments.')
parser.add_argument('-g1', '--group1', help = 'Name of the first group to group by. Does not need to be specified.')
parser.add_argument('-g2', '--group2', help = 'Name of the second group to group by. Does not need to be specified (and does not matter for box plots).')
parser.add_argument('-f', '--filter', nargs = '+', help = 'Names and string values to filter out of the visualization. Can be multiple space-separate arguments. Solver filters are "MueLu", "CG", "GMRES", and "ML". CPU filters are "Xeon,4216" and "Xeon,E5-2683"4216".', default = [])
parser.add_argument('-s', '--size', help = 'Multiplier for the resultant figure size if the visualization is too difficult to view. Default is 4.', default = 4)
args = parser.parse_args()
print(args)
if args.plot == "line" and args.group1 is None:
print("Line plot requires at least one group! Exiting...")
exit(1)
# Plot
show_visualizations(int(args.rows), args.plot, int(args.size), stat_list, args.x, args.y, args.group1, args.group2, args.filter)
#show_visualizations(4, "line", stat_list, "cols", None, "solver", "cpu", ["Xeon,E5-2683", "time_steps", "cpu", "linalg", "file", "cores"])
#show_visualizations(4, "line", stat_list, "cols", None, "solver", "cpu", ["time_steps", "cpu", "linalg", "file", "cores"])
#show_visualizations(4, "line", stat_list, "LLC-miss-rate", "branch-miss-rate", "solver", "cpu")
#show_visualizations(4, "box", stat_list, "solver", "cycles", None, None, ["Xeon,E5-2683", 0, None])
#show_visualizations(4, "box", stat_list, "solver", None, None, None, [0, None])
if __name__ == '__main__':
main()