Added analysis directory

2024-12-02 11:33:49 -05:00 · 2024-12-02 11:33:49 -05:00 · e6668c4546
commit e6668c4546
parent 063be52ed8
5 changed files with 5284 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,2 @@
 *.img
+*.png
--- a/analysis/ampere_mini-em_avg.json
+++ b/analysis/ampere_mini-em_avg.json
--- a/analysis/analyze.py
+++ b/analysis/analyze.py
@ -0,0 +1,384 @@
+#! /bin/python3
+
+import argparse
+import os
+import re
+import json
+
+from enum import Enum
+
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+import itertools
+
+class Stat(Enum):
+    CPU = 'cpu'
+
+    SOLVER = 'solver'
+    LIN_ALG = 'linear algebra'
+    INPUT_FILE = 'input file'
+    MAXWELL_SIZE = 'maxwell size'
+    MATRIX_COLS = 'matrix columns'
+
+    POWER_DELTA = 'Δ watt'
+
+    TASK_CLK = 'task clock (msec)'
+    PAGE_FAULTS = 'page faults'
+    CYCLES = 'cycles'
+    INST = 'instructions'
+
+    BR = 'branches'
+    BR_MISS = 'branch mispredictions'
+    ITLB = 'ITLB accesses'
+    ITLB_MISS = 'ITLB misses'
+    DTLB = 'DTLB accesses'
+    DTLB_MISS = 'DTLB misses'
+    L2D_TLB = 'L2D TLB accesses'
+    L2D_TLB_MISS = 'L2D TLB misses'
+    L1I_CACHE = 'L1I cache accesses'
+    L1I_CACHE_MISS = 'L1I cache misses'
+    L1D_CACHE = 'L1D cache accesses'
+    L1D_CACHE_MISS = 'L1D cache misses'
+    L2D_CACHE = 'L2D cache accesses'
+    L2D_CACHE_MISS = 'L2D cache misses'
+    LL_CACHE = 'LL cache accesses'
+    LL_CACHE_MISS = 'LL cache misses'
+
+    BRANCH_MISS_RATE = 'branch miss rate'
+    ITLB_MISS_RATE = 'ITLB miss rate'
+    DTLB_MISS_RATE = 'DTLB miss rate'
+    L1I_CACHE_MISS_RATE = 'L1I cache miss rate'
+    L1D_CACHE_MISS_RATE = 'L1D cache miss rate'
+    L2D_CACHE_MISS_RATE = 'L2D cache miss rate'
+    LL_CACHE_MISS_RATE = 'LL cache miss rate'
+
+altra_names = {
+    Stat.TASK_CLK: 'task-clock:u',
+    Stat.PAGE_FAULTS: 'page-faults:u',
+    Stat.CYCLES: 'cycles:u',
+    Stat.INST: 'instructions:u',
+
+    Stat.BR: 'BR_RETIRED:u',
+    Stat.BR_MISS: 'BR_MIS_PRED_RETIRED:u',
+    Stat.ITLB: 'L1I_TLB:u',
+    Stat.ITLB_MISS: 'ITLB_WALK:u',
+    Stat.DTLB: 'L1D_TLB:u',
+    Stat.DTLB_MISS: 'DTLB_WALK:u',
+    Stat.L2D_TLB: 'L2D_TLB:u',
+    Stat.L2D_TLB_MISS: 'L2D_TLB_REFILL:u',
+    Stat.L1I_CACHE: 'L1I_CACHE:u',
+    Stat.L1I_CACHE_MISS: 'L1I_CACHE_REFILL:u',
+    Stat.L1D_CACHE: 'L1D_CACHE:u',
+    Stat.L1D_CACHE_MISS: 'L1D_CACHE_REFILL:u',
+    Stat.L2D_CACHE: 'L2D_CACHE:u',
+    Stat.L2D_CACHE_MISS: 'L2D_CACHE_REFILL:u',
+    Stat.LL_CACHE: 'LL_CACHE_RD:u',
+    Stat.LL_CACHE_MISS: 'LL_CACHE_MISS_RD:u',
+}
+
+xeon_names = {
+    Stat.TASK_CLK: 'task-clock:u',
+    Stat.PAGE_FAULTS: 'page-faults:u',
+    Stat.CYCLES: 'cycles:u',
+    Stat.INST: 'instructions:u',
+
+    Stat.BR: 'branches:u',
+    Stat.BR_MISS: 'branch-misses:u',
+    Stat.ITLB: 'iTLB-loads:u',
+    Stat.ITLB_MISS: 'iTLB-load-misses:u',
+    Stat.DTLB: 'dTLB-loads:u',
+    Stat.DTLB_MISS: 'dTLB-load-misses:u',
+    Stat.L1I_CACHE: 'L1-icache-loads:u',
+    Stat.L1I_CACHE_MISS: 'L1-icache-load-misses:u',
+    Stat.L1D_CACHE: 'L1-dcache-loads:u',
+    Stat.L1D_CACHE_MISS: 'L1-dcache-load-misses:u',
+    Stat.LL_CACHE: 'LLC-loads:u',
+    Stat.LL_CACHE_MISS: 'LLC-load-misses:u',
+}
+
+class Workload(Enum):
+    MINI_EM = 'mini_em'
+
+class CPU(Enum):
+    ALTRA = altra_names
+    XEON = xeon_names
+
+class Plot(Enum):
+    BOX = 'box'
+    LINE = 'line'
+
+def parse_input(filename: str, workload: Workload) -> dict[str, str | int]:
+    # Split filename into each input parameter.
+    filename_split = os.path.splitext(filename)[0].split('_')
+
+    i = 0
+    data = {Stat.CPU.value: filename_split[i]}
+    i += 1
+    if workload == Workload.MINI_EM:
+        data[Stat.SOLVER.value] = filename_split[i]
+        i += 1
+        data[Stat.LIN_ALG.value] = filename_split[i]
+        i += 1
+        data[Stat.INPUT_FILE.value] = filename_split[i]
+        regex = r'^maxwell(\d+)'
+        data[Stat.MAXWELL_SIZE.value] = int(re.search(regex, filename_split[i]).group(1))
+        x = data[Stat.MAXWELL_SIZE.value]
+        data[Stat.MATRIX_COLS.value] = 3 * x ** 3 + 6 * x ** 2 + 3 * x
+        i += 1
+
+    return data
+
+def parse_output(filename: str, data: dict[str, str]) -> dict[str, str | int | float]:
+    result: dict[str, int | float] = dict()
+    cpu: CPU = CPU[data['cpu'].upper()]
+
+    with open(filename, 'r') as file:
+        for line in file:
+            for stat in [x for x in Stat if x in cpu.value]:
+                regex = r'^\W*([\d+(,|\.)?]+)\W*.*' + cpu.value[stat]
+                value = re.search(regex, line)
+
+                if value is None:
+                    continue
+                elif stat == Stat.TASK_CLK: 
+                    result[stat.value] = float(value.group(1).replace(',', ''))
+                else:
+                    result[stat.value] = int(value.group(1).replace(',', ''))
+
+    return result | parse_power(filename, cpu)
+
+# TODO CHANGE THIS
+def parse_power(filename: str, cpu: CPU) -> dict[str, int]:
+    match cpu:
+        case CPU.ALTRA:
+            class Socket(Enum):
+                SOCKET1 = 'Socket1'
+                SOCKET2 = 'Socket2'
+
+            data: dict[str, int] = {socket: list() for socket in Socket}
+            baseline_data: dict[str, int] = dict()
+
+            filename = os.path.splitext(filename)[0] + "_power" + os.path.splitext(filename)[1]
+            with open(filename, 'r') as file:
+                for line in file:
+                    regex = r'Start'
+                    value = re.search(regex, line)
+                    if value is not None:
+                        for socket in Socket:
+                            baseline_data[socket] = np.average(np.array(data[socket]))
+                            data[socket] = list()
+                    
+                        continue
+
+                    for socket in Socket:
+                        regex = r'^' + socket.value + r' (\d+\.\d+)'
+                        value = re.search(regex, line)
+                        if value is not None:
+                            data[socket].append(float(value.group(1)))
+                            break
+
+            power_deltas: dict[str, int] = {
+                socket: np.max(np.array(data[socket])) - baseline_data[socket]
+                for socket in Socket
+            }
+
+            return {Stat.POWER_DELTA.value: max(power_deltas.values())}
+        case CPU.XEON:
+            return {}
+
+def derive_stats(data: dict[str, str | int | float]) -> dict[str, int | float]:
+    result: dict[str, int | float] = dict()
+
+    result[Stat.BRANCH_MISS_RATE.value] = data[Stat.BR_MISS.value] / data[Stat.BR.value]
+    result[Stat.ITLB_MISS_RATE.value] = data[Stat.ITLB_MISS.value] / data[Stat.ITLB.value]
+    result[Stat.DTLB_MISS_RATE.value] = data[Stat.DTLB_MISS.value] / data[Stat.DTLB.value]
+    result[Stat.L1I_CACHE_MISS_RATE.value] = (
+        data[Stat.L1I_CACHE_MISS.value] / data[Stat.L1I_CACHE.value]
+        if Stat.L1I_CACHE_MISS.value in data and Stat.L1I_CACHE.value in data
+        else None)
+    result[Stat.L1D_CACHE_MISS_RATE.value] = (
+        data[Stat.L1D_CACHE_MISS.value] / data[Stat.L1D_CACHE.value])
+    result[Stat.L2D_CACHE_MISS_RATE.value] = (
+        data[Stat.L2D_CACHE_MISS.value] / data[Stat.L2D_CACHE.value]
+        if Stat.L2D_CACHE_MISS.value in data and Stat.L2D_CACHE.value in data
+        else None)
+    result[Stat.LL_CACHE_MISS_RATE.value] = (
+        data[Stat.LL_CACHE_MISS.value] / data[Stat.LL_CACHE.value])
+
+    return result
+
+def accumulate(stats_list: list[dict[str, str | int | float]], category: Stat, value: Stat):
+    category_list = np.array([stats[category.value] for stats in stats_list if value.value in stats])
+    value_list = np.array([stats[value.value] for stats in stats_list if value.value in stats])
+
+    result: dict[np.ndarray] = dict()
+    for category in np.sort(np.unique(category_list)):
+        result[category] = value_list[category_list == category]
+
+    return result
+
+def box_plot(ax, stats_list: list[dict[str, str | int | float]], x: Stat, y: Stat):
+    data: dict[str, np.ndarray] = accumulate(stats_list, x, y)
+
+    print("Plotted data: " + str(data))
+
+    ax.boxplot(data.values(), tick_labels=data.keys())
+    ax.set_ylabel(y.value)
+
+def line_plot(
+    ax, stats_list: list[dict[str, str | int | float]],
+    x: Stat, y: Stat, color: Stat
+):
+    x_data: dict[str, np.ndarray] = accumulate(stats_list, color, x)
+    y_data: dict[str, np.ndarray] = accumulate(stats_list, color, y)
+
+    for category in x_data.keys():
+        sorted_indices = np.argsort(x_data[category])
+        x_data[category] = x_data[category][sorted_indices]
+        y_data[category] = y_data[category][sorted_indices]
+        ax.plot(x_data[category], y_data[category], label=category)
+        print("Plotted x data: " + str(x_data[category]))
+        print("Plotted y data: " + str(y_data[category]))
+
+    ax.set_ylabel(y.value)
+    ax.grid(True)
+
+def visualize(
+    stats_list: list[dict[str, str | int | float]],
+    plot: Plot,
+    rows: int,
+    size_multiplier: int,
+    font_size: int,
+    x: Stat,
+    y: Stat,
+    color: Stat,
+    filter_list: list[str] = []
+):
+    # Remove stats entries containing undesired values (like a specific CPU).
+    stats_list = [stats for stats in stats_list
+                      if len([stats[key] for key in stats.keys()
+                                        if stats[key] in filter_list]) == 0]
+
+    #x = Stat.MAXWELL_SIZE
+    #y = Stat.DTLB_MISS_RATE
+    #color = Stat.SOLVER
+
+    if y is None:
+        ys = [stat for stat in Stat if stat.value in stats_list[0].keys()
+                                    and stat is not x
+                                    #and y != color
+                                    #and y != marker
+                                    and stat.value not in filter_list]
+        fig, axes = plt.subplots(rows, int(math.ceil(len(ys) / rows)),
+                                 figsize = (16 * size_multiplier, 9 * size_multiplier))
+        match plot:
+            case Plot.BOX:
+                for i, y in enumerate(ys):
+                    box_plot(axes[i % rows][int(i / rows)], stats_list, x, y)
+            case Plot.LINE:
+                for i, y in enumerate(ys):
+                    line_plot(axes[i % rows][int(i / rows)], stats_list, x, y, color)
+
+        handles, labels = axes[i % rows][int(i / rows)].get_legend_handles_labels()
+    else:
+        fig, ax = plt.subplots()
+
+        match plot:
+            case Plot.BOX:
+                box_plot(ax, stats_list, x, y)
+            case Plot.LINE:
+                line_plot(ax, stats_list, x, y, color)
+
+        handles, labels = ax.get_legend_handles_labels()
+
+    #box_plot(ax, stats, x, y)
+    #line_plot(ax, stats, x, y, color)
+
+    match plot:
+        case Plot.BOX:
+            title = f"{plot.value}_plot_of_{y.value.replace(' ', '_')}_vs_{x.value.replace(' ', '_')}_excluding_{filter_list}"
+        case Plot.LINE:
+            title = f"{plot.value}_plot_of_{y.value.replace(' ', '_')}_vs_{x.value.replace(' ', '_')}_by_{color.value.replace(' ', '_')}_excluding_{filter_list}"
+    fig.suptitle(title, fontsize = font_size)
+    fig.legend(handles, labels, fontsize = font_size)
+    fig.supxlabel(x.value, fontsize = font_size)
+
+    plt.savefig(title + ".png", dpi = 100)
+    plt.show()
+
+def main():
+    class Command(Enum):
+        PARSE = 'parse'
+        VISUALIZE = 'visualize'
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('command', choices=[x.value for x in Command])
+    parser.add_argument('workload',
+                        choices=[x.name.lower() for x in Workload],
+                        help='the workload to analyze')
+    parser.add_argument('filepath',
+                        help='the output for the ' + Command.PARSE.value + ' command or the input for the ' + Command.VISUALIZE.value + ' command')
+    parser.add_argument('-i', '--input',
+                        help='the input directory for the parse command')
+    parser.add_argument('-p', '--plot',
+                        choices=[x.name.lower() for x in Plot],
+                        help = 'the type of plot')
+    parser.add_argument('-r', '--rows', type=int,
+                        help = 'the number of rows to display when -y is not specified',
+                        default = 5)
+    parser.add_argument('-s', '--size', type=int,
+                        help = 'figure size multiplier',
+                        default = 4)
+    parser.add_argument('-fs', '--font_size', type=int,
+                        help = 'font size',
+                        default = 40)
+    parser.add_argument('-x',
+                        choices=[x.name.lower() for x in Stat],
+                        help = 'the name of the x axis')
+    parser.add_argument('-y',
+                        choices=[x.name.lower() for x in Stat],
+                        help = 'the name of the y axis')
+    parser.add_argument('-c', '--color',
+                        choices=[x.name.lower() for x in Stat],
+                        help = 'the name of the color')
+    parser.add_argument('-f', '--filter', nargs = '+',
+                        help = 'a comma-separated string of names and values to filter out.',
+                        default = [])
+
+    args = parser.parse_args()
+
+    stats_list: list[dict] = list()
+    if args.command == Command.PARSE.value:
+        if (args.input) is None:
+            print("An input directory is required with -i")
+            exit(-1)
+
+        original_dir = os.getcwd()
+        os.chdir(args.input)
+        for filename in os.listdir(os.getcwd()):
+            if "output" not in filename:
+                continue
+            if "power" in filename:
+                continue
+            stats = parse_input(filename, Workload[args.workload.upper()])
+            stats = stats | parse_output(filename, stats)
+            stats = stats | derive_stats(stats)
+            stats_list.append(stats)
+            print(filename + " parsed.")
+
+        os.chdir(original_dir)
+        with open(args.filepath, 'w') as file:
+            json.dump(stats_list, file, indent = 2)
+
+    elif args.command == Command.VISUALIZE.value:
+        with open(args.filepath, 'r') as file:
+            stats_list = json.load(file)
+
+        x = Stat[args.x.upper()] if args.x is not None else None
+        y = Stat[args.y.upper()] if args.y is not None else None
+        color = Stat[args.color.upper()] if args.color is not None else None
+        visualize(stats_list, Plot[args.plot.upper()], args.rows, args.size, args.font_size, x, y, color, args.filter)
+
+if __name__ == '__main__':
+    main()
--- a/analysis/analyze.py.bak
+++ b/analysis/analyze.py.bak
@ -0,0 +1,302 @@
+#! /bin/python3
+
+import os
+import re
+from dataclasses import dataclass
+import json
+import matplotlib.pyplot as plt
+import numpy
+from scipy.io import mmread
+import math
+import itertools
+import argparse
+
+perf_stats = ["task-clock:u", "page-faults:u", "cycles:u", "instructions:u",
+              "branches:u", "branch-misses:u", "L1-dcache-loads:u",
+              "L1-dcache-load-misses:u", "LLC-loads:u", "LLC-load-misses:u",
+              "L1-icache-loads:u", "L1-icache-load-misses:u", "dTLB-loads:u",
+              "dTLB-load-misses:u", "iTLB-loads:u", "iTLB-load-misses:u"]
+
+def read_stats(filename):
+    # Split filename into each input parameter.
+    filename_split = os.path.splitext(filename)[0].split('_')
+    s = {
+        "solver": filename_split[0],
+        "linalg": filename_split[1],
+        "file": filename_split[2],
+        "time_steps": int(filename_split[3]),
+        "cores": int(re.match(r'\d+', filename_split[4]).group(0)),
+        "cpu": filename_split[5] }
+
+    # Take filename and read SchurComplement matrix from corresponding dir.
+    print(os.path.splitext(filename)[0])
+    m = mmread(os.path.splitext(filename)[0] + "/SchurComplement.mm")
+    s["cols"] = m.get_shape()[0]
+    s["nonzero"] = m.count_nonzero()
+    s["entries"] = m.get_shape()[0] * m.get_shape()[1]
+    s["trace"] = m.trace()
+
+    # Compute ratio.
+    s["% nonzero entry"] = s["nonzero"] /  s["entries"]
+    s["trace over cols"] = s["trace"] / s["cols"]
+
+    # Obtain stats from perf in each log.
+    with open(filename) as file:
+        for line in file:
+            for perf_stat in perf_stats:
+                regex = r'^\W*([\d+(,|\.)?]+)\W*.*' + perf_stat
+                data = re.search(regex, line)
+                if data is None:
+                    continue
+                match perf_stat:
+                    case "task-clock:u":
+                        s[perf_stat[:-2] + " (msec)"] = float(data.group(1).replace(',', ''))
+                    case "page-faults:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "cycles:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "instructions:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "branches:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "branch-misses:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "L1-dcache-loads:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "L1-dcache-load-misses:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "LLC-loads:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "LLC-load-misses:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "L1-icache-loads:u":
+                        print("l1_icache_loads not expected! Exiting...")
+                        exit(1)
+                    case "L1-icache-load-misses:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "dTLB-loads:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "dTLB-load-misses:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "iTLB-loads:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case "iTLB-load-misses:u":
+                        s[perf_stat[:-2]] = int(data.group(1).replace(',', ''))
+                    case _:
+                        print("Unexpected case! Exiting...")
+                        exit(1)
+
+    # Derive stats.
+    if s["cycles"] != 0:
+        s["insn per cycle"] = s["instructions"] / s["cycles"]
+    else:
+        s["insn per cycle"] = None
+    s["branch-miss-rate"] = s["branch-misses"] / s["branches"]
+    s["L1-dcache-miss-rate"] = s["L1-dcache-load-misses"] / s["L1-dcache-loads"]
+    s["LLC-miss-rate"] = s["LLC-load-misses"] / s["LLC-loads"]
+    s["dTLB-miss-rate"] = s["dTLB-load-misses"] / s["dTLB-loads"]
+    s["iTLB-miss-rate"] = s["iTLB-load-misses"] / s["iTLB-loads"]
+
+    print(s)
+    return s
+
+def box_plot(ax, stat_list, x_name, y_name):
+    data = accumulate_to_dict(stat_list, x_name, y_name)
+
+    for value in data.values():
+        if type(value[0]) is str:
+            print(f"{y_name} values are strings... skipping box plot...")
+            return
+
+    print("Plotted data: " + str(data))
+
+    ax.boxplot([numpy.array(y_axis) for y_axis in data.values()])
+    ax.set_xticklabels(data.keys())
+    ax.set_ylabel(y_name)
+
+def scatter_plot(ax, stat_list, x_name, y_name, color_name, size_name = None):
+    x_data = accumulate_to_dict(stat_list, color_name, x_name)
+    y_data = accumulate_to_dict(stat_list, color_name, y_name)
+    if size_name is not None:
+        size_data = accumulate_to_dict(stat_list, color_name, size_name)
+
+    print(x_data)
+    print(y_data)
+    if size_name is not None:
+        print(size_data)
+
+    for group in x_data.keys():
+        print(f"Plotted x data for {group}: " + str(x_data[group]))
+        print(f"Plotted y data for {group}: " + str(y_data[group]))
+        ax.scatter(x_data[group], y_data[group], label = group, alpha = 0.3)
+
+    #ax.legend()
+    #ax.set_xlabel(x_name)
+    ax.set_ylabel(y_name)
+    ax.grid(True)
+
+def line_plot(ax, stat_list, x_name, y_name, color_name = None, marker_name = None):
+    x_data = accumulate_to_dict(stat_list, color_name, x_name, marker_name)
+    y_data = accumulate_to_dict(stat_list, color_name, y_name, marker_name)
+
+    for group in x_data.keys():
+        sorted_data = [(x, y) for (x, y) in zip(x_data[group], y_data[group])]
+        sorted_data.sort()
+        x_data[group] = [x for (x, y) in sorted_data]
+        y_data[group] = [y for (x, y) in sorted_data]
+        print(f"Plotted x data for {group}: " + str(x_data[group]))
+        print(f"Plotted y data for {group}: " + str(y_data[group]))
+        ax.plot(x_data[group], y_data[group], label = group)
+
+    ax.set_ylabel(y_name)
+    ax.grid(True)
+
+def basic_plot(ax, stat_list, x_name, y_name):
+    xs = accumulate_to_list(stat_list, x_name)
+    ys = accumulate_to_list(stat_list, y_name)
+    print("Plotted x data: " + str(xs))
+    print("Plotted y data: " + str(ys))
+
+    ax.plot(xs, ys, 'o')
+
+    ax.legend()
+    ax.set_xlabel(x_name)
+    ax.set_ylabel(y_name)
+    ax.grid(True)
+
+def accumulate_to_dict(stat_list, key_name, value_name, key2_name = None):
+    key_list = [stat[key_name] for stat in stat_list]
+    value_list = [stat[value_name] for stat in stat_list]
+
+    if key2_name is not None:
+        key2_list = [stat[key2_name] for stat in stat_list]
+        data = {pair: list() for pair in
+                             list(itertools.product(list(dict.fromkeys(key_list).keys()),
+                                                    list(dict.fromkeys(key2_list).keys())))}
+        for key, key2, value in zip(key_list, key2_list, value_list):
+            data[(key, key2)].append(value)
+    else:
+        data = {key: list() for key in dict.fromkeys(key_list)}
+        for key, value in zip(key_list, value_list):
+                data[key].append(value)
+
+    return data
+
+def accumulate_to_list(stat_list, name):
+    lst = [stat[name] for stat in stat_list]
+    return lst
+
+def show_visualizations(rows, plot, size_multiplier, stat_list, x_name, y_name = None,
+                        color_name = None, marker_name = None, filter_list = []):
+    # Filter stats (removing values such as a specific CPU type)
+    stat_list = [stat for stat in stat_list
+                      if len([stat[key] for key in stat.keys()
+                                        if stat[key] in filter_list])
+                      == 0]
+
+    if y_name is None or y_name == "all":
+        # Filter y names (removing measurements such as cycles)
+        y_names = [y_name for y_name in stat_list[0].keys() if y_name != x_name
+                                                           and y_name != color_name
+                                                           and y_name != marker_name
+                                                           and y_name not in filter_list]
+
+        fig, axes = plt.subplots(rows, int(math.ceil(len(y_names) / rows)),
+                                 figsize = (16 * size_multiplier, 9 * size_multiplier))
+        match plot:
+            case "box":
+                for i, y in enumerate(y_names):
+                    box_plot(axes[i % rows][int(i / rows)], stat_list, x_name, y)
+            case "line":
+                for i, y in enumerate(y_names):
+                    line_plot(axes[i % rows][int(i / rows)], stat_list, x_name,
+                              y, color_name, marker_name)
+            case _:
+                print("Invalid plot type! Exiting...")
+                exit(1)
+
+        handles, labels = axes[i % rows][int(i / rows)].get_legend_handles_labels()
+    else:
+        fig, ax = plt.subplots()
+        match plot:
+            case "box":
+                box_plot(ax, stat_list, x_name, y_name)
+            case "line":
+                line_plot(ax, stat_list, x_name, y_name, color_name, marker_name)
+            case _:
+                print("Invalid plot type! Exiting...")
+                exit(1)
+
+        handles, labels = ax.get_legend_handles_labels()
+
+    if y_name is None or y_name == "all":
+        y_name = "all"
+        fontsize = 'xx-large'
+    else:
+        fontsize = 'x-small'
+
+    title = f"{plot}_plot_of_{y_name}_vs_{x_name}_by_{color_name}_and_{marker_name}_excluding_{filter_list}"
+
+    fig.suptitle(title, fontsize = fontsize)
+    fig.legend(handles, labels, fontsize = fontsize)
+    fig.supxlabel(x_name, fontsize = fontsize)
+
+    plt.savefig(title + ".png", dpi = 100)
+    plt.show()
+           
+save_file = "stats.json"
+samples_dir = "Samples/"
+# save_file = "stats_avg_10.json"
+# samples_dir = "Samples_Avg_10/"
+# save_file = "tmp.json"
+# samples_dir = "Old_Samples_Avg_10"
+
+def main():
+    stat_list = list()
+
+    # Get stats and accumulate into stat_list
+    if os.path.isfile(save_file):
+        with open(save_file, 'r') as file:
+            stat_list = json.load(file)
+        # print(*stat_list, sep='\n')
+    else:
+        os.chdir('./' + samples_dir)
+        for filename in os.listdir(os.getcwd()):
+            if os.path.splitext(filename)[1] == ".log":
+                stat_list.append(read_stats(filename))
+        print(*stat_list, sep='\n')
+        os.chdir('../')
+        with open(save_file, 'w') as file:
+            json.dump(stat_list, file, indent = 2)
+
+    # print(*stat_list, sep='\n')
+
+    possible_args = [keys for keys in stat_list[0].keys()]
+    parser = argparse.ArgumentParser(epilog = 'Possible arguments are: ' + str(possible_args))
+    parser.add_argument('-r', '--rows', help = 'Number of rows to display. Will split graphs into columns to evenly fill rows. Does not do anything if -y is not "all".', default = 0)
+    parser.add_argument('-p', '--plot', help = 'The type of plot. Only "box" and "line" are supported.')
+    parser.add_argument('-x', help = 'Name of the x axis. See below for possible arguments.')
+    parser.add_argument('-y', help = 'Name of the y axis. Can be "all" or omitted entirely to display all possible y values. See below for other possible arguments.')
+    parser.add_argument('-g1', '--group1', help = 'Name of the first group to group by. Does not need to be specified.')
+    parser.add_argument('-g2', '--group2', help = 'Name of the second group to group by. Does not need to be specified (and does not matter for box plots).')
+    parser.add_argument('-f', '--filter', nargs = '+', help = 'Names and string values to filter out of the visualization. Can be multiple space-separate arguments. Solver filters are "MueLu", "CG", "GMRES", and "ML". CPU filters are "Xeon,4216" and "Xeon,E5-2683"4216".', default = [])
+    parser.add_argument('-s', '--size', help = 'Multiplier for the resultant figure size if the visualization is too difficult to view. Default is 4.', default = 4)
+
+    args = parser.parse_args()
+
+    print(args)
+
+    if args.plot == "line" and args.group1 is None:
+        print("Line plot requires at least one group! Exiting...")
+        exit(1)
+
+    # Plot
+    show_visualizations(int(args.rows), args.plot, int(args.size), stat_list, args.x, args.y, args.group1, args.group2, args.filter)
+    #show_visualizations(4, "line", stat_list, "cols", None, "solver", "cpu", ["Xeon,E5-2683", "time_steps", "cpu", "linalg", "file", "cores"])
+    #show_visualizations(4, "line", stat_list, "cols", None, "solver", "cpu", ["time_steps", "cpu", "linalg", "file", "cores"])
+    #show_visualizations(4, "line", stat_list, "LLC-miss-rate", "branch-miss-rate", "solver", "cpu")
+    #show_visualizations(4, "box", stat_list, "solver", "cycles", None, None, ["Xeon,E5-2683", 0, None])
+    #show_visualizations(4, "box", stat_list, "solver", None, None, None, [0, None])
+
+if __name__ == '__main__':
+    main()
--- a/analysis/xeon_mini-em.json
+++ b/analysis/xeon_mini-em.json