#! /bin/python3 import argparse import os import re import json from enum import Enum import math import numpy as np import matplotlib.pyplot as plt import itertools class Stat(Enum): CPU = 'cpu' SOLVER = 'solver' LIN_ALG = 'linear algebra' INPUT_FILE = 'input file' MAXWELL_SIZE = 'maxwell size' MATRIX_COLS = 'matrix columns' POWER_DELTA = 'Δ watt' TASK_CLK = 'task clock (msec)' PAGE_FAULTS = 'page faults' CYCLES = 'cycles' INST = 'instructions' BR = 'branches' BR_MISS = 'branch mispredictions' ITLB = 'ITLB accesses' ITLB_MISS = 'ITLB misses' DTLB = 'DTLB accesses' DTLB_MISS = 'DTLB misses' L2D_TLB = 'L2D TLB accesses' L2D_TLB_MISS = 'L2D TLB misses' L1I_CACHE = 'L1I cache accesses' L1I_CACHE_MISS = 'L1I cache misses' L1D_CACHE = 'L1D cache accesses' L1D_CACHE_MISS = 'L1D cache misses' L2D_CACHE = 'L2D cache accesses' L2D_CACHE_MISS = 'L2D cache misses' LL_CACHE = 'LL cache accesses' LL_CACHE_MISS = 'LL cache misses' BRANCH_MISS_RATE = 'branch miss rate' ITLB_MISS_RATE = 'ITLB miss rate' DTLB_MISS_RATE = 'DTLB miss rate' L1I_CACHE_MISS_RATE = 'L1I cache miss rate' L1D_CACHE_MISS_RATE = 'L1D cache miss rate' L2D_CACHE_MISS_RATE = 'L2D cache miss rate' LL_CACHE_MISS_RATE = 'LL cache miss rate' altra_names = { Stat.TASK_CLK: 'task-clock:u', Stat.PAGE_FAULTS: 'page-faults:u', Stat.CYCLES: 'cycles:u', Stat.INST: 'instructions:u', Stat.BR: 'BR_RETIRED:u', Stat.BR_MISS: 'BR_MIS_PRED_RETIRED:u', Stat.ITLB: 'L1I_TLB:u', Stat.ITLB_MISS: 'ITLB_WALK:u', Stat.DTLB: 'L1D_TLB:u', Stat.DTLB_MISS: 'DTLB_WALK:u', Stat.L2D_TLB: 'L2D_TLB:u', Stat.L2D_TLB_MISS: 'L2D_TLB_REFILL:u', Stat.L1I_CACHE: 'L1I_CACHE:u', Stat.L1I_CACHE_MISS: 'L1I_CACHE_REFILL:u', Stat.L1D_CACHE: 'L1D_CACHE:u', Stat.L1D_CACHE_MISS: 'L1D_CACHE_REFILL:u', Stat.L2D_CACHE: 'L2D_CACHE:u', Stat.L2D_CACHE_MISS: 'L2D_CACHE_REFILL:u', Stat.LL_CACHE: 'LL_CACHE_RD:u', Stat.LL_CACHE_MISS: 'LL_CACHE_MISS_RD:u', } xeon_names = { Stat.TASK_CLK: 'task-clock:u', Stat.PAGE_FAULTS: 'page-faults:u', Stat.CYCLES: 'cycles:u', Stat.INST: 'instructions:u', Stat.BR: 'branches:u', Stat.BR_MISS: 'branch-misses:u', Stat.ITLB: 'iTLB-loads:u', Stat.ITLB_MISS: 'iTLB-load-misses:u', Stat.DTLB: 'dTLB-loads:u', Stat.DTLB_MISS: 'dTLB-load-misses:u', Stat.L1I_CACHE: 'L1-icache-loads:u', Stat.L1I_CACHE_MISS: 'L1-icache-load-misses:u', Stat.L1D_CACHE: 'L1-dcache-loads:u', Stat.L1D_CACHE_MISS: 'L1-dcache-load-misses:u', Stat.LL_CACHE: 'LLC-loads:u', Stat.LL_CACHE_MISS: 'LLC-load-misses:u', } class Workload(Enum): MINI_EM = 'mini_em' class CPU(Enum): ALTRA = altra_names XEON = xeon_names class Plot(Enum): BOX = 'box' LINE = 'line' def parse_input(filename: str, workload: Workload) -> dict[str, str | int]: # Split filename into each input parameter. filename_split = os.path.splitext(filename)[0].split('_') i = 0 data = {Stat.CPU.value: filename_split[i]} i += 1 if workload == Workload.MINI_EM: data[Stat.SOLVER.value] = filename_split[i] i += 1 data[Stat.LIN_ALG.value] = filename_split[i] i += 1 data[Stat.INPUT_FILE.value] = filename_split[i] regex = r'^maxwell(\d+)' data[Stat.MAXWELL_SIZE.value] = int(re.search(regex, filename_split[i]).group(1)) x = data[Stat.MAXWELL_SIZE.value] data[Stat.MATRIX_COLS.value] = 3 * x ** 3 + 6 * x ** 2 + 3 * x i += 1 return data def parse_output(filename: str, data: dict[str, str]) -> dict[str, str | int | float]: result: dict[str, int | float] = dict() cpu: CPU = CPU[data['cpu'].upper()] with open(filename, 'r') as file: for line in file: for stat in [x for x in Stat if x in cpu.value]: regex = r'^\W*([\d+(,|\.)?]+)\W*.*' + cpu.value[stat] value = re.search(regex, line) if value is None: continue elif stat == Stat.TASK_CLK: result[stat.value] = float(value.group(1).replace(',', '')) else: result[stat.value] = int(value.group(1).replace(',', '')) return result | parse_power(filename, cpu) # TODO CHANGE THIS def parse_power(filename: str, cpu: CPU) -> dict[str, int]: match cpu: case CPU.ALTRA: class Socket(Enum): SOCKET1 = 'Socket1' SOCKET2 = 'Socket2' data: dict[str, int] = {socket: list() for socket in Socket} baseline_data: dict[str, int] = dict() filename = os.path.splitext(filename)[0] + "_power" + os.path.splitext(filename)[1] with open(filename, 'r') as file: for line in file: regex = r'Start' value = re.search(regex, line) if value is not None: for socket in Socket: baseline_data[socket] = np.average(np.array(data[socket])) data[socket] = list() continue for socket in Socket: regex = r'^' + socket.value + r' (\d+\.\d+)' value = re.search(regex, line) if value is not None: data[socket].append(float(value.group(1))) break power_deltas: dict[str, int] = { socket: np.max(np.array(data[socket])) - baseline_data[socket] for socket in Socket } return {Stat.POWER_DELTA.value: max(power_deltas.values())} case CPU.XEON: return {} def derive_stats(data: dict[str, str | int | float]) -> dict[str, int | float]: result: dict[str, int | float] = dict() result[Stat.BRANCH_MISS_RATE.value] = data[Stat.BR_MISS.value] / data[Stat.BR.value] result[Stat.ITLB_MISS_RATE.value] = data[Stat.ITLB_MISS.value] / data[Stat.ITLB.value] result[Stat.DTLB_MISS_RATE.value] = data[Stat.DTLB_MISS.value] / data[Stat.DTLB.value] result[Stat.L1I_CACHE_MISS_RATE.value] = ( data[Stat.L1I_CACHE_MISS.value] / data[Stat.L1I_CACHE.value] if Stat.L1I_CACHE_MISS.value in data and Stat.L1I_CACHE.value in data else None) result[Stat.L1D_CACHE_MISS_RATE.value] = ( data[Stat.L1D_CACHE_MISS.value] / data[Stat.L1D_CACHE.value]) result[Stat.L2D_CACHE_MISS_RATE.value] = ( data[Stat.L2D_CACHE_MISS.value] / data[Stat.L2D_CACHE.value] if Stat.L2D_CACHE_MISS.value in data and Stat.L2D_CACHE.value in data else None) result[Stat.LL_CACHE_MISS_RATE.value] = ( data[Stat.LL_CACHE_MISS.value] / data[Stat.LL_CACHE.value]) return result def accumulate(stats_list: list[dict[str, str | int | float]], category: Stat, value: Stat): category_list = np.array([stats[category.value] for stats in stats_list if value.value in stats]) value_list = np.array([stats[value.value] for stats in stats_list if value.value in stats]) result: dict[np.ndarray] = dict() for category in np.sort(np.unique(category_list)): result[category] = value_list[category_list == category] return result def box_plot(ax, stats_list: list[dict[str, str | int | float]], x: Stat, y: Stat): data: dict[str, np.ndarray] = accumulate(stats_list, x, y) print("Plotted data: " + str(data)) ax.boxplot(data.values(), tick_labels=data.keys()) ax.set_ylabel(y.value) def line_plot( ax, stats_list: list[dict[str, str | int | float]], x: Stat, y: Stat, color: Stat ): x_data: dict[str, np.ndarray] = accumulate(stats_list, color, x) y_data: dict[str, np.ndarray] = accumulate(stats_list, color, y) for category in x_data.keys(): sorted_indices = np.argsort(x_data[category]) x_data[category] = x_data[category][sorted_indices] y_data[category] = y_data[category][sorted_indices] ax.plot(x_data[category], y_data[category], label=category) print("Plotted x data: " + str(x_data[category])) print("Plotted y data: " + str(y_data[category])) ax.set_ylabel(y.value) ax.grid(True) def visualize( stats_list: list[dict[str, str | int | float]], plot: Plot, rows: int, size_multiplier: int, font_size: int, x: Stat, y: Stat, color: Stat, filter_list: list[str] = [] ): # Remove stats entries containing undesired values (like a specific CPU). stats_list = [stats for stats in stats_list if len([stats[key] for key in stats.keys() if stats[key] in filter_list]) == 0] #x = Stat.MAXWELL_SIZE #y = Stat.DTLB_MISS_RATE #color = Stat.SOLVER if y is None: ys = [stat for stat in Stat if stat.value in stats_list[0].keys() and stat is not x #and y != color #and y != marker and stat.value not in filter_list] fig, axes = plt.subplots(rows, int(math.ceil(len(ys) / rows)), figsize = (16 * size_multiplier, 9 * size_multiplier)) match plot: case Plot.BOX: for i, y in enumerate(ys): box_plot(axes[i % rows][int(i / rows)], stats_list, x, y) case Plot.LINE: for i, y in enumerate(ys): line_plot(axes[i % rows][int(i / rows)], stats_list, x, y, color) handles, labels = axes[i % rows][int(i / rows)].get_legend_handles_labels() else: fig, ax = plt.subplots() match plot: case Plot.BOX: box_plot(ax, stats_list, x, y) case Plot.LINE: line_plot(ax, stats_list, x, y, color) handles, labels = ax.get_legend_handles_labels() #box_plot(ax, stats, x, y) #line_plot(ax, stats, x, y, color) match plot: case Plot.BOX: title = f"{plot.value}_plot_of_{y.value.replace(' ', '_')}_vs_{x.value.replace(' ', '_')}_excluding_{filter_list}" case Plot.LINE: title = f"{plot.value}_plot_of_{y.value.replace(' ', '_')}_vs_{x.value.replace(' ', '_')}_by_{color.value.replace(' ', '_')}_excluding_{filter_list}" fig.suptitle(title, fontsize = font_size) fig.legend(handles, labels, fontsize = font_size) fig.supxlabel(x.value, fontsize = font_size) plt.savefig(title + ".png", dpi = 100) plt.show() def main(): class Command(Enum): PARSE = 'parse' VISUALIZE = 'visualize' parser = argparse.ArgumentParser() parser.add_argument('command', choices=[x.value for x in Command]) parser.add_argument('workload', choices=[x.name.lower() for x in Workload], help='the workload to analyze') parser.add_argument('filepath', help='the output for the ' + Command.PARSE.value + ' command or the input for the ' + Command.VISUALIZE.value + ' command') parser.add_argument('-i', '--input', help='the input directory for the parse command') parser.add_argument('-p', '--plot', choices=[x.name.lower() for x in Plot], help = 'the type of plot') parser.add_argument('-r', '--rows', type=int, help = 'the number of rows to display when -y is not specified', default = 5) parser.add_argument('-s', '--size', type=int, help = 'figure size multiplier', default = 4) parser.add_argument('-fs', '--font_size', type=int, help = 'font size', default = 40) parser.add_argument('-x', choices=[x.name.lower() for x in Stat], help = 'the name of the x axis') parser.add_argument('-y', choices=[x.name.lower() for x in Stat], help = 'the name of the y axis') parser.add_argument('-c', '--color', choices=[x.name.lower() for x in Stat], help = 'the name of the color') parser.add_argument('-f', '--filter', nargs = '+', help = 'a comma-separated string of names and values to filter out.', default = []) args = parser.parse_args() stats_list: list[dict] = list() if args.command == Command.PARSE.value: if (args.input) is None: print("An input directory is required with -i") exit(-1) original_dir = os.getcwd() os.chdir(args.input) for filename in os.listdir(os.getcwd()): if "output" not in filename: continue if "power" in filename: continue stats = parse_input(filename, Workload[args.workload.upper()]) stats = stats | parse_output(filename, stats) stats = stats | derive_stats(stats) stats_list.append(stats) print(filename + " parsed.") os.chdir(original_dir) with open(args.filepath, 'w') as file: json.dump(stats_list, file, indent = 2) elif args.command == Command.VISUALIZE.value: with open(args.filepath, 'r') as file: stats_list = json.load(file) x = Stat[args.x.upper()] if args.x is not None else None y = Stat[args.y.upper()] if args.y is not None else None color = Stat[args.color.upper()] if args.color is not None else None visualize(stats_list, Plot[args.plot.upper()], args.rows, args.size, args.font_size, x, y, color, args.filter) if __name__ == '__main__': main()