From 89d3365313a133cc04c046074cb66d8e4f8024f6 Mon Sep 17 00:00:00 2001 From: cephi Date: Thu, 12 Dec 2024 18:01:49 -0500 Subject: [PATCH] More synthetic --- .../altra_10_2_10_100000_0.0001.json | 2 +- .../altra_10_2_10_100000_0.0001.output | 18 +- .../altra_10_2_10_100000_1e-05.json | 2 +- .../altra_10_2_10_100000_1e-05.output | 12 +- .../altra_10_2_10_100000_5e-05.json | 1 + .../altra_10_2_10_100000_5e-05.output | 17 + .../altra_10_2_10_10000_0.0001.json | 1 + .../altra_10_2_10_10000_0.0001.output | 16 + .../altra_10_2_10_10000_1e-05.json | 1 + .../altra_10_2_10_10000_1e-05.output | 375 ++++++++++++++++++ .../altra_10_2_10_10000_5e-05.json | 1 + .../altra_10_2_10_10000_5e-05.output | 16 + .../altra_10_2_10_20000_0.0001.json | 1 + .../altra_10_2_10_20000_0.0001.output | 16 + .../altra_10_2_10_20000_1e-05.json | 1 + .../altra_10_2_10_20000_1e-05.output | 16 + .../altra_10_2_10_20000_5e-05.json | 1 + .../altra_10_2_10_20000_5e-05.output | 16 + .../altra_10_2_10_50000_0.0001.json | 1 + .../altra_10_2_10_50000_0.0001.output | 17 + .../altra_10_2_10_50000_1e-05.json | 1 + .../altra_10_2_10_50000_1e-05.output | 16 + .../altra_10_2_10_50000_5e-05.json | 1 + .../altra_10_2_10_50000_5e-05.output | 17 + .../epyc_7313p_10_2_10_100000_0.0001.json | 2 +- .../epyc_7313p_10_2_10_100000_0.0001.output | 18 +- .../epyc_7313p_10_2_10_100000_1e-05.json | 2 +- .../epyc_7313p_10_2_10_100000_1e-05.output | 19 +- .../epyc_7313p_10_2_10_100000_5e-05.json | 1 + .../epyc_7313p_10_2_10_100000_5e-05.output | 17 + .../epyc_7313p_10_2_10_10000_0.0001.json | 1 + .../epyc_7313p_10_2_10_10000_0.0001.output | 16 + .../epyc_7313p_10_2_10_10000_1e-05.json | 1 + .../epyc_7313p_10_2_10_10000_1e-05.output | 375 ++++++++++++++++++ .../epyc_7313p_10_2_10_10000_5e-05.json | 1 + .../epyc_7313p_10_2_10_10000_5e-05.output | 15 + .../epyc_7313p_10_2_10_20000_0.0001.json | 1 + .../epyc_7313p_10_2_10_20000_0.0001.output | 16 + .../epyc_7313p_10_2_10_20000_1e-05.json | 1 + .../epyc_7313p_10_2_10_20000_1e-05.output | 16 + .../epyc_7313p_10_2_10_20000_5e-05.json | 1 + .../epyc_7313p_10_2_10_20000_5e-05.output | 16 + .../epyc_7313p_10_2_10_50000_0.0001.json | 1 + .../epyc_7313p_10_2_10_50000_0.0001.output | 17 + .../epyc_7313p_10_2_10_50000_1e-05.json | 1 + .../epyc_7313p_10_2_10_50000_1e-05.output | 16 + .../epyc_7313p_10_2_10_50000_5e-05.json | 1 + .../epyc_7313p_10_2_10_50000_5e-05.output | 17 + .../xeon_4216_10_2_10_100000_0.0001.json | 2 +- .../xeon_4216_10_2_10_100000_0.0001.output | 18 +- .../xeon_4216_10_2_10_100000_1e-05.json | 2 +- .../xeon_4216_10_2_10_100000_1e-05.output | 12 +- .../xeon_4216_10_2_10_100000_5e-05.json | 1 + .../xeon_4216_10_2_10_100000_5e-05.output | 17 + .../xeon_4216_10_2_10_10000_0.0001.json | 1 + .../xeon_4216_10_2_10_10000_0.0001.output | 16 + .../xeon_4216_10_2_10_10000_1e-05.json | 1 + .../xeon_4216_10_2_10_10000_1e-05.output | 375 ++++++++++++++++++ .../xeon_4216_10_2_10_10000_5e-05.json | 1 + .../xeon_4216_10_2_10_10000_5e-05.output | 16 + .../xeon_4216_10_2_10_20000_0.0001.json | 1 + .../xeon_4216_10_2_10_20000_0.0001.output | 16 + .../xeon_4216_10_2_10_20000_1e-05.json | 1 + .../xeon_4216_10_2_10_20000_1e-05.output | 16 + .../xeon_4216_10_2_10_20000_5e-05.json | 1 + .../xeon_4216_10_2_10_20000_5e-05.output | 16 + .../xeon_4216_10_2_10_50000_0.0001.json | 1 + .../xeon_4216_10_2_10_50000_0.0001.output | 17 + .../xeon_4216_10_2_10_50000_1e-05.json | 1 + .../xeon_4216_10_2_10_50000_1e-05.output | 17 + .../xeon_4216_10_2_10_50000_5e-05.json | 1 + .../xeon_4216_10_2_10_50000_5e-05.output | 17 + 72 files changed, 1650 insertions(+), 55 deletions(-) create mode 100644 pytorch/output_test2/altra_10_2_10_100000_5e-05.json create mode 100644 pytorch/output_test2/altra_10_2_10_100000_5e-05.output create mode 100644 pytorch/output_test2/altra_10_2_10_10000_0.0001.json create mode 100644 pytorch/output_test2/altra_10_2_10_10000_0.0001.output create mode 100644 pytorch/output_test2/altra_10_2_10_10000_1e-05.json create mode 100644 pytorch/output_test2/altra_10_2_10_10000_1e-05.output create mode 100644 pytorch/output_test2/altra_10_2_10_10000_5e-05.json create mode 100644 pytorch/output_test2/altra_10_2_10_10000_5e-05.output create mode 100644 pytorch/output_test2/altra_10_2_10_20000_0.0001.json create mode 100644 pytorch/output_test2/altra_10_2_10_20000_0.0001.output create mode 100644 pytorch/output_test2/altra_10_2_10_20000_1e-05.json create mode 100644 pytorch/output_test2/altra_10_2_10_20000_1e-05.output create mode 100644 pytorch/output_test2/altra_10_2_10_20000_5e-05.json create mode 100644 pytorch/output_test2/altra_10_2_10_20000_5e-05.output create mode 100644 pytorch/output_test2/altra_10_2_10_50000_0.0001.json create mode 100644 pytorch/output_test2/altra_10_2_10_50000_0.0001.output create mode 100644 pytorch/output_test2/altra_10_2_10_50000_1e-05.json create mode 100644 pytorch/output_test2/altra_10_2_10_50000_1e-05.output create mode 100644 pytorch/output_test2/altra_10_2_10_50000_5e-05.json create mode 100644 pytorch/output_test2/altra_10_2_10_50000_5e-05.output create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_100000_5e-05.json create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_100000_5e-05.output create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_10000_0.0001.json create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_10000_0.0001.output create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_10000_1e-05.json create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_10000_1e-05.output create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_10000_5e-05.json create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_10000_5e-05.output create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_20000_0.0001.json create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_20000_0.0001.output create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_20000_1e-05.json create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_20000_1e-05.output create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_20000_5e-05.json create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_20000_5e-05.output create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_50000_0.0001.json create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_50000_0.0001.output create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_50000_1e-05.json create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_50000_1e-05.output create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_50000_5e-05.json create mode 100644 pytorch/output_test2/epyc_7313p_10_2_10_50000_5e-05.output create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_100000_5e-05.json create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_100000_5e-05.output create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_10000_0.0001.json create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_10000_0.0001.output create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_10000_1e-05.json create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_10000_1e-05.output create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_10000_5e-05.json create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_10000_5e-05.output create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_20000_0.0001.json create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_20000_0.0001.output create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_20000_1e-05.json create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_20000_1e-05.output create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_20000_5e-05.json create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_20000_5e-05.output create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_50000_0.0001.json create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_50000_0.0001.output create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_50000_1e-05.json create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_50000_1e-05.output create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_50000_5e-05.json create mode 100644 pytorch/output_test2/xeon_4216_10_2_10_50000_5e-05.output diff --git a/pytorch/output_test2/altra_10_2_10_100000_0.0001.json b/pytorch/output_test2/altra_10_2_10_100000_0.0001.json index b6820a1..a5e80b9 100644 --- a/pytorch/output_test2/altra_10_2_10_100000_0.0001.json +++ b/pytorch/output_test2/altra_10_2_10_100000_0.0001.json @@ -1 +1 @@ -{"CPU": "Altra", "ITERATIONS": 82249, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 999948, "MATRIX_DENSITY": 9.99948e-05, "TIME_S": 12.20802617073059, "TIME_S_1KI": 0.14842765469161437, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1033.954319496155, "W": 84.9319356344559, "J_1KI": 12.57102602458577, "W_1KI": 1.032619674822258, "W_D": 74.4169356344559, "J_D": 905.9455841684344, "W_D_1KI": 0.9047761752052413, "J_D_1KI": 0.011000451983674468} +{"CPU": "Altra", "ITERATIONS": 68726, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 999952, "MATRIX_DENSITY": 9.99952e-05, "TIME_S": 14.76737093925476, "TIME_S_1KI": 0.21487313301013825, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1048.773065109253, "W": 84.94573931101591, "J_1KI": 15.260208146978625, "W_1KI": 1.2360058683906516, "W_D": 74.56573931101592, "J_D": 920.6175566148759, "W_D_1KI": 1.0849713254229245, "J_D_1KI": 0.015786912164579992} diff --git a/pytorch/output_test2/altra_10_2_10_100000_0.0001.output b/pytorch/output_test2/altra_10_2_10_100000_0.0001.output index d50cf42..647603a 100644 --- a/pytorch/output_test2/altra_10_2_10_100000_0.0001.output +++ b/pytorch/output_test2/altra_10_2_10_100000_0.0001.output @@ -1,17 +1,17 @@ /nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) matrix = matrix.to_sparse_csr().type(torch.float32) -tensor(crow_indices=tensor([ 0, 12, 20, ..., 999931, 999940, - 999948]), - col_indices=tensor([20217, 25552, 38877, ..., 63581, 75717, 96314]), - values=tensor([-1.5899, -0.7194, -0.7547, ..., 0.5402, -0.1912, - -0.1167]), size=(100000, 100000), nnz=999948, +tensor(crow_indices=tensor([ 0, 9, 14, ..., 999938, 999944, + 999952]), + col_indices=tensor([ 7267, 12169, 14263, ..., 65124, 80624, 88608]), + values=tensor([-0.9381, -0.7021, 0.3838, ..., 0.4652, -2.0655, + 0.0178]), size=(100000, 100000), nnz=999952, layout=torch.sparse_csr) -tensor([0.7377, 0.7528, 0.7695, ..., 0.6702, 0.9924, 0.8686]) +tensor([0.6322, 0.2482, 0.5736, ..., 0.5609, 0.3437, 0.0062]) Matrix: synthetic Matrix: csr Shape: torch.Size([100000, 100000]) Size: 10000000000 -NNZ: 999948 -Density: 9.99948e-05 -Time: 12.20802617073059 seconds +NNZ: 999952 +Density: 9.99952e-05 +Time: 14.76737093925476 seconds diff --git a/pytorch/output_test2/altra_10_2_10_100000_1e-05.json b/pytorch/output_test2/altra_10_2_10_100000_1e-05.json index 0a850b2..0054aa9 100644 --- a/pytorch/output_test2/altra_10_2_10_100000_1e-05.json +++ b/pytorch/output_test2/altra_10_2_10_100000_1e-05.json @@ -1 +1 @@ -{"CPU": "Altra", "ITERATIONS": 104724, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 99999, "MATRIX_DENSITY": 9.9999e-06, "TIME_S": 10.740116596221924, "TIME_S_1KI": 0.10255640155286203, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 720.857314195633, "W": 76.49734010579499, "J_1KI": 6.883401266143702, "W_1KI": 0.7304661787727262, "W_D": 65.89234010579499, "J_D": 620.9232275140286, "W_D_1KI": 0.6291999933710991, "J_D_1KI": 0.0060081738032456665} +{"CPU": "Altra", "ITERATIONS": 109027, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 99999, "MATRIX_DENSITY": 9.9999e-06, "TIME_S": 11.089369058609009, "TIME_S_1KI": 0.10171213606362652, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1338.7191013622285, "W": 84.31423687535087, "J_1KI": 12.278785084082186, "W_1KI": 0.7733335492616588, "W_D": 73.66423687535087, "J_D": 1169.6212246823309, "W_D_1KI": 0.6756513237578844, "J_D_1KI": 0.006197100936078993} diff --git a/pytorch/output_test2/altra_10_2_10_100000_1e-05.output b/pytorch/output_test2/altra_10_2_10_100000_1e-05.output index 78b1d0c..a82b2a3 100644 --- a/pytorch/output_test2/altra_10_2_10_100000_1e-05.output +++ b/pytorch/output_test2/altra_10_2_10_100000_1e-05.output @@ -1,16 +1,16 @@ /nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) matrix = matrix.to_sparse_csr().type(torch.float32) -tensor(crow_indices=tensor([ 0, 0, 1, ..., 99990, 99994, 99999]), - col_indices=tensor([77500, 30298, 91629, ..., 67143, 70964, 98118]), - values=tensor([ 0.0300, -1.0927, 1.5365, ..., -1.2655, 1.0213, - 0.2378]), size=(100000, 100000), nnz=99999, +tensor(crow_indices=tensor([ 0, 1, 3, ..., 99996, 99998, 99999]), + col_indices=tensor([51912, 73273, 76981, ..., 56282, 97323, 82299]), + values=tensor([-1.0009, -1.0395, 1.0694, ..., -0.2809, -0.4591, + -1.3247]), size=(100000, 100000), nnz=99999, layout=torch.sparse_csr) -tensor([0.9139, 0.4903, 0.5737, ..., 0.7094, 0.3230, 0.9275]) +tensor([0.9591, 0.5528, 0.0037, ..., 0.1141, 0.8131, 0.2616]) Matrix: synthetic Matrix: csr Shape: torch.Size([100000, 100000]) Size: 10000000000 NNZ: 99999 Density: 9.9999e-06 -Time: 10.740116596221924 seconds +Time: 11.089369058609009 seconds diff --git a/pytorch/output_test2/altra_10_2_10_100000_5e-05.json b/pytorch/output_test2/altra_10_2_10_100000_5e-05.json new file mode 100644 index 0000000..159037e --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_100000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Altra", "ITERATIONS": 86604, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 499984, "MATRIX_DENSITY": 4.99984e-05, "TIME_S": 11.186031818389893, "TIME_S_1KI": 0.129162992683824, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 703.3605092048646, "W": 72.85834958159226, "J_1KI": 8.121570703487883, "W_1KI": 0.8412815756961833, "W_D": 62.373349581592265, "J_D": 602.1403335988522, "W_D_1KI": 0.720213264763663, "J_D_1KI": 0.008316166282892973} diff --git a/pytorch/output_test2/altra_10_2_10_100000_5e-05.output b/pytorch/output_test2/altra_10_2_10_100000_5e-05.output new file mode 100644 index 0000000..ab9e15a --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_100000_5e-05.output @@ -0,0 +1,17 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 4, 10, ..., 499969, 499975, + 499984]), + col_indices=tensor([50217, 56812, 62796, ..., 67972, 79752, 87971]), + values=tensor([ 2.8176, 1.0569, 0.3735, ..., -0.3011, 0.4006, + 0.5158]), size=(100000, 100000), nnz=499984, + layout=torch.sparse_csr) +tensor([0.2516, 0.6501, 0.8377, ..., 0.5649, 0.1553, 0.1858]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([100000, 100000]) +Size: 10000000000 +NNZ: 499984 +Density: 4.99984e-05 +Time: 11.186031818389893 seconds + diff --git a/pytorch/output_test2/altra_10_2_10_10000_0.0001.json b/pytorch/output_test2/altra_10_2_10_10000_0.0001.json new file mode 100644 index 0000000..d509025 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_10000_0.0001.json @@ -0,0 +1 @@ +{"CPU": "Altra", "ITERATIONS": 124752, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [10000, 10000], "MATRIX_SIZE": 100000000, "MATRIX_NNZ": 10000, "MATRIX_DENSITY": 0.0001, "TIME_S": 11.144775629043579, "TIME_S_1KI": 0.08933544655832033, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 657.3852064323426, "W": 65.4524638588437, "J_1KI": 5.269536411699552, "W_1KI": 0.5246606375756997, "W_D": 54.9824638588437, "J_D": 552.227620215416, "W_D_1KI": 0.4407341273794705, "J_D_1KI": 0.003532882257434514} diff --git a/pytorch/output_test2/altra_10_2_10_10000_0.0001.output b/pytorch/output_test2/altra_10_2_10_10000_0.0001.output new file mode 100644 index 0000000..35de69c --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_10000_0.0001.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 1, 3, ..., 9998, 10000, 10000]), + col_indices=tensor([6543, 1100, 5224, ..., 5370, 1002, 7590]), + values=tensor([-0.6024, 0.2491, 0.9340, ..., 0.1715, -0.8476, + 1.0921]), size=(10000, 10000), nnz=10000, + layout=torch.sparse_csr) +tensor([0.4076, 0.0059, 0.6456, ..., 0.1126, 0.9287, 0.3305]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([10000, 10000]) +Size: 100000000 +NNZ: 10000 +Density: 0.0001 +Time: 11.144775629043579 seconds + diff --git a/pytorch/output_test2/altra_10_2_10_10000_1e-05.json b/pytorch/output_test2/altra_10_2_10_10000_1e-05.json new file mode 100644 index 0000000..45abb56 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_10000_1e-05.json @@ -0,0 +1 @@ +{"CPU": "Altra", "ITERATIONS": 143288, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [10000, 10000], "MATRIX_SIZE": 100000000, "MATRIX_NNZ": 1000, "MATRIX_DENSITY": 1e-05, "TIME_S": 14.683725357055664, "TIME_S_1KI": 0.10247700684673988, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1078.6493862438201, "W": 74.05715122890027, "J_1KI": 7.527841733039892, "W_1KI": 0.5168412653460183, "W_D": 63.45215122890026, "J_D": 924.1865619075296, "W_D_1KI": 0.44282948487591606, "J_D_1KI": 0.0030904854898938924} diff --git a/pytorch/output_test2/altra_10_2_10_10000_1e-05.output b/pytorch/output_test2/altra_10_2_10_10000_1e-05.output new file mode 100644 index 0000000..53c9ea7 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_10000_1e-05.output @@ -0,0 +1,375 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 999, 999, 1000]), + col_indices=tensor([6479, 3115, 4717, 5855, 3796, 1057, 7556, 8831, 1163, + 9840, 4878, 3472, 6097, 5442, 4400, 9297, 7898, 1121, + 889, 2944, 3048, 7993, 6232, 7988, 589, 8010, 681, + 4719, 9348, 2112, 1701, 6864, 1026, 8052, 3786, 2896, + 1871, 5133, 3657, 8896, 3648, 89, 890, 5156, 9876, + 39, 6216, 6737, 5018, 1232, 4613, 9011, 5276, 8382, + 3025, 522, 814, 6937, 4310, 4645, 2896, 9323, 9106, + 2616, 7838, 7885, 9266, 7297, 7483, 7872, 1670, 6795, + 6030, 7036, 6238, 4721, 5031, 4228, 222, 9994, 1938, + 8758, 9348, 6185, 9814, 4057, 1293, 670, 7617, 2314, + 9332, 3275, 5628, 6754, 9297, 1394, 7513, 9421, 2955, + 8213, 3049, 675, 9708, 6103, 7119, 8224, 8482, 6105, + 1938, 57, 8619, 5744, 3545, 5822, 3241, 6496, 7484, + 8472, 8242, 1625, 4128, 5361, 7669, 183, 6679, 8543, + 4173, 4213, 8417, 7507, 8597, 8926, 3833, 5984, 5410, + 4207, 4752, 4378, 9652, 1551, 5699, 1545, 5029, 7031, + 3400, 9893, 6065, 8113, 9253, 1026, 1820, 848, 2188, + 4432, 2912, 9706, 109, 9541, 7950, 4202, 8921, 7013, + 323, 8324, 6781, 1055, 4480, 4534, 585, 8177, 4113, + 6524, 7513, 9467, 1797, 4903, 5038, 2752, 4695, 1113, + 9978, 994, 3560, 5800, 5712, 6106, 3911, 7992, 7737, + 4765, 4629, 2485, 6310, 4251, 1871, 4843, 7475, 3171, + 4420, 2399, 195, 9772, 7896, 4517, 6364, 3804, 4048, + 5517, 280, 7555, 7705, 229, 8337, 2779, 4926, 8103, + 9746, 7514, 5867, 9627, 9666, 285, 988, 5807, 4321, + 3623, 5705, 1233, 5337, 4750, 6099, 5698, 1341, 4488, + 547, 4798, 3752, 351, 4087, 3459, 8976, 2421, 7544, + 5242, 7727, 1414, 5290, 7533, 7924, 3026, 8963, 4646, + 9338, 5685, 1715, 5002, 1035, 7582, 888, 1738, 4863, + 9977, 8685, 4683, 3323, 8590, 1574, 205, 3260, 7246, + 7770, 8001, 9827, 6157, 789, 1020, 6235, 2157, 4538, + 1345, 7724, 5995, 5646, 5025, 7799, 5175, 1868, 2795, + 6872, 6475, 7933, 1454, 2575, 8613, 1263, 3481, 6136, + 7610, 875, 2813, 1942, 9694, 6276, 1110, 46, 233, + 9884, 2112, 3704, 7554, 2404, 1502, 171, 5516, 5604, + 1230, 3991, 4025, 7148, 6686, 5326, 3812, 3427, 3691, + 2654, 4039, 5288, 6778, 5283, 7063, 2136, 4977, 9614, + 8839, 7529, 7288, 3392, 1463, 9474, 3526, 7478, 3936, + 8768, 8945, 2068, 5001, 2670, 4044, 2459, 9032, 509, + 7244, 4512, 2162, 3920, 518, 8135, 7684, 1625, 899, + 8337, 1106, 6754, 8794, 5415, 771, 2058, 2102, 6707, + 3721, 9308, 7660, 1802, 7789, 3515, 3744, 1302, 9625, + 8810, 8550, 4507, 2667, 2094, 3172, 9480, 536, 6733, + 3755, 2930, 9522, 2835, 1407, 8272, 5823, 6624, 7775, + 6586, 7914, 7454, 3548, 4363, 1575, 5973, 289, 5066, + 1594, 2286, 6130, 8476, 1039, 1899, 1888, 2094, 8870, + 5985, 7784, 7661, 6996, 6959, 8794, 5413, 3142, 1621, + 5070, 8852, 9606, 7928, 9121, 5056, 8274, 6156, 2163, + 8883, 5212, 6099, 5880, 8248, 7634, 2389, 9434, 3957, + 6900, 9933, 1067, 6341, 7465, 9654, 1306, 8016, 8324, + 7382, 1734, 2662, 2384, 401, 4424, 1713, 4836, 8858, + 6721, 1997, 5366, 3488, 5379, 9281, 8856, 5941, 7579, + 5646, 5270, 7482, 8429, 757, 2107, 2595, 1041, 6537, + 6393, 830, 2670, 2336, 7405, 5341, 6712, 7382, 3795, + 5831, 1537, 5453, 5790, 9458, 4549, 1053, 5087, 667, + 8609, 1660, 5027, 8449, 6001, 1967, 5091, 9429, 9040, + 8250, 7449, 9250, 6616, 472, 8694, 345, 6015, 5900, + 3284, 3956, 7132, 3967, 2765, 3857, 3071, 5575, 1502, + 7543, 9804, 2007, 7147, 7106, 6521, 1337, 7959, 3501, + 5699, 2823, 7126, 6702, 6789, 2287, 9307, 4457, 7040, + 7174, 2289, 8553, 9427, 2419, 3637, 1324, 6394, 5687, + 8009, 4797, 1821, 5036, 2474, 5070, 2792, 6874, 5519, + 3250, 3658, 5279, 6385, 9080, 2819, 2427, 4478, 9364, + 9176, 5998, 8605, 8483, 7179, 6753, 7754, 6280, 1939, + 5500, 755, 8458, 3769, 9523, 1450, 8564, 8985, 8733, + 3928, 6234, 8906, 1325, 2021, 2659, 640, 9454, 424, + 9676, 2363, 334, 71, 1000, 148, 2587, 695, 8747, + 7229, 1728, 8250, 1357, 5682, 6102, 4164, 7437, 9737, + 7218, 6401, 4303, 7215, 1589, 4290, 1909, 1295, 6421, + 2863, 9742, 92, 3126, 6257, 907, 3426, 2959, 7765, + 8200, 3672, 898, 1000, 988, 6557, 8521, 7471, 1157, + 1706, 7797, 7518, 5521, 1579, 6063, 9334, 1605, 7891, + 3070, 9925, 941, 7096, 4237, 2762, 2654, 8958, 6977, + 1547, 8508, 1040, 6512, 9536, 5583, 2664, 5585, 7596, + 7495, 3230, 2469, 6410, 6120, 1264, 1836, 6650, 4584, + 4640, 7089, 8787, 5709, 4226, 5693, 238, 8440, 9055, + 5366, 1690, 7085, 6059, 1452, 4265, 936, 624, 6300, + 7687, 2059, 6396, 5860, 9619, 794, 9594, 4366, 2421, + 2697, 3030, 9637, 140, 1128, 8140, 8705, 9521, 8838, + 9983, 547, 348, 4506, 9396, 169, 141, 4555, 7107, + 7397, 7694, 8564, 7152, 9961, 8054, 4514, 5201, 4730, + 3059, 3946, 3163, 1482, 7438, 3662, 6359, 3937, 865, + 7014, 1984, 7400, 6677, 905, 1488, 9493, 8268, 5615, + 4851, 5245, 8723, 3897, 5907, 2752, 5838, 2471, 445, + 3465, 2900, 6608, 6233, 7080, 6448, 8827, 3467, 6532, + 4413, 9497, 5236, 6956, 4191, 9905, 4772, 7447, 1078, + 9387, 5792, 9925, 3766, 2706, 7798, 2504, 8639, 5011, + 5454, 547, 8636, 1809, 8870, 211, 601, 9812, 655, + 742, 1651, 4354, 2033, 6990, 5578, 7880, 1926, 8127, + 7562, 6114, 9866, 5639, 5955, 9260, 923, 9272, 7977, + 5738, 9701, 5583, 8763, 5050, 3403, 1688, 7314, 1983, + 1988, 5822, 3325, 157, 769, 4037, 218, 186, 3512, + 1872, 7753, 7587, 6176, 5552, 2572, 4543, 9259, 6603, + 2214, 3225, 6280, 9020, 1252, 4965, 1022, 2240, 9386, + 2382, 6120, 4618, 4838, 1633, 7155, 7616, 6691, 665, + 1457, 3409, 9706, 8970, 1878, 2257, 7914, 872, 4755, + 6946, 3734, 3635, 6124, 2445, 2675, 7152, 9976, 3260, + 9901, 7939, 4090, 1247, 9770, 3890, 2031, 6049, 8111, + 1679, 1075, 289, 2499, 7825, 6661, 4230, 5533, 3887, + 4746, 8094, 5553, 6416, 6536, 9941, 4540, 886, 4348, + 2837, 7877, 875, 489, 8816, 8353, 2403, 3760, 8893, + 9209, 4416, 9370, 6304, 5420, 8991, 6899, 6841, 7814, + 6292, 2119, 2377, 4335, 5967, 7689, 6273, 4528, 5047, + 1727, 6945, 3018, 437, 2433, 5483, 4409, 5809, 9746, + 8283, 4812, 2834, 2478, 2661, 1093, 5995, 2, 4420, + 4097, 2108, 2123, 4587, 4745, 8528, 2328, 878, 1420, + 1364, 2861, 6002, 8255, 8269, 1056, 3081, 3672, 7049, + 3619, 9812, 3388, 5119, 152, 2963, 6489, 8225, 9280, + 2204, 9990, 6659, 3245, 9097, 7166, 7749, 465, 2116, + 4012, 3215, 6162, 273, 9071, 613, 9841, 566, 9006, + 9137, 3195, 2782, 8978, 6630, 4696, 6279, 7662, 1354, + 4269]), + values=tensor([-7.2998e-01, -7.4456e-01, 3.4477e-01, -1.0187e+00, + -1.0044e+00, -1.4693e+00, -4.3524e-01, -3.3832e-01, + 1.4596e+00, 9.4389e-02, -4.8143e-01, 8.6704e-01, + 5.8689e-01, -1.5651e+00, -3.9737e-01, 1.9204e+00, + -8.7053e-01, -1.1434e+00, -1.5080e-01, -1.2403e+00, + 1.0841e+00, -7.5341e-01, 4.4580e-01, -4.1078e-01, + 5.1427e-01, 5.5780e-01, 2.0935e+00, 1.6707e-01, + 3.5903e-02, -1.7397e-01, -1.7942e+00, 2.9346e-01, + 1.3710e+00, -1.6456e+00, 1.0911e+00, 3.8715e-01, + -7.7361e-01, 9.7373e-01, 2.7513e-01, 1.5095e-01, + -1.7172e+00, 4.7350e-01, -2.1276e+00, -7.0403e-01, + -1.4326e+00, 5.4913e-01, 9.3244e-01, -1.2736e+00, + 1.7894e-01, 5.0785e-01, -1.9120e+00, -6.7372e-01, + 1.0990e+00, 7.9786e-01, -3.5433e-01, 1.4433e+00, + -1.2325e-01, -2.7658e-01, 5.4439e-01, 2.6426e-02, + -1.0208e+00, -5.3690e-01, -8.2000e-02, -2.1889e-01, + -1.5900e+00, 1.9102e+00, -1.0639e+00, 1.0062e+00, + 9.4828e-01, 7.6437e-01, 1.7356e+00, -1.9411e-01, + -1.0442e+00, -4.2639e-01, -5.5511e-01, 4.5370e-01, + -1.8633e-01, -7.6753e-01, 1.7345e+00, 1.8726e+00, + 7.6290e-01, 1.5324e+00, -5.1148e-01, 1.3375e-01, + -7.0048e-02, 5.2116e-01, 1.8010e+00, -4.5363e-01, + -3.0493e-01, 1.5984e+00, 2.4389e-01, 6.3357e-01, + 1.9627e+00, 9.2926e-01, 9.4726e-01, 1.4193e+00, + 1.3151e+00, -2.9556e-02, 2.5993e-01, -1.2392e+00, + -6.2775e-01, -5.1552e-01, 2.8292e-01, -1.4247e+00, + 4.0375e-01, -4.2962e-01, 5.5634e-01, 8.9631e-01, + 1.4419e+00, -1.4131e+00, 7.7520e-01, 3.7470e-01, + 1.0635e+00, -1.0983e+00, -5.2634e-01, -1.1688e+00, + 6.1775e-01, 1.4562e+00, 9.8589e-01, -8.2176e-01, + 1.3035e+00, -5.8891e-01, -1.0283e+00, 2.9444e-01, + -2.8273e-01, 1.1188e+00, 1.8346e-01, 1.0485e+00, + -1.3999e+00, 1.9224e-01, -3.4045e-01, 1.0224e+00, + 1.8074e+00, 1.5770e+00, -1.5899e+00, -6.3306e-01, + 3.0346e-02, 2.7839e-01, -8.6080e-01, -5.6682e-01, + -1.8426e+00, 2.9815e-01, 2.6710e-02, 9.7297e-01, + -5.0825e-01, -6.6015e-01, -9.1973e-01, 3.2248e-01, + 3.0468e-02, -1.0088e+00, -1.0745e+00, 1.5058e+00, + 3.2921e-02, -1.5350e+00, 4.2802e-01, -1.3097e+00, + -1.2399e+00, 3.7438e-01, -1.1187e-01, -1.9535e+00, + 4.9469e-01, -1.2504e+00, 6.6612e-01, 2.1916e-01, + 1.8320e-01, 3.6086e-01, 5.0287e-01, -1.8645e+00, + -2.1213e+00, 1.0252e+00, 1.4347e-01, -1.0769e+00, + -3.6245e-01, 1.2715e-01, 1.3664e-01, -4.6585e-01, + 1.6159e+00, 1.0699e+00, 5.2361e-02, -1.3191e+00, + 2.5929e-01, 2.0209e+00, 6.4900e-02, 1.9648e+00, + -1.7187e-01, -6.2694e-01, -3.8953e-01, 7.1935e-01, + 3.0865e-01, 1.4352e+00, -3.8383e-01, 4.5598e-01, + -2.6955e-01, 9.5453e-01, -3.2503e-01, -2.5612e-01, + 1.7842e+00, -1.8518e-01, 5.9814e-01, 4.2184e-01, + -4.2226e-01, 1.5432e+00, 6.5723e-01, 8.7977e-01, + 1.5705e+00, -5.0474e-01, -8.7012e-01, 6.9947e-01, + 2.6916e-01, -1.7358e+00, -8.5223e-01, 1.2391e+00, + -3.5854e-01, -2.0812e+00, -1.5762e-01, -2.4264e-01, + -1.1387e+00, 4.4071e-01, -1.1569e+00, 5.9616e-03, + 1.6277e+00, 1.7105e+00, -1.6196e+00, 4.1804e-01, + -1.2078e+00, -7.2068e-01, -1.2853e+00, 1.5126e-01, + 2.9485e-01, 4.9357e-01, -4.7104e-01, -6.2904e-01, + 3.9228e-01, 7.1090e-01, -1.3956e+00, 3.4913e-01, + 1.6433e+00, 1.0227e+00, -1.0921e+00, 1.1231e+00, + -1.0697e+00, -1.9333e-01, 4.4284e-02, 2.1280e-01, + 7.0283e-01, 5.7306e-01, 1.2751e+00, 3.9377e-01, + -2.0720e-01, -1.4800e+00, 6.3344e-01, 4.2445e-01, + 1.4313e+00, -1.4253e+00, 6.2623e-01, 3.8520e-01, + -8.8217e-01, 1.3559e-01, 9.8086e-01, -9.2960e-01, + 1.1514e+00, 7.7487e-01, -1.0084e+00, -1.0475e+00, + -5.2888e-01, -8.3291e-01, -1.8248e+00, -3.1423e+00, + -1.0521e+00, -9.9453e-01, -1.5773e+00, 1.3359e+00, + 1.1314e+00, -3.5125e-01, 1.6508e-01, 1.5815e+00, + -8.7103e-01, 9.6517e-01, -4.1519e-01, -1.1196e+00, + -5.0030e-01, 1.8968e+00, 5.5156e-01, -5.7922e-01, + -3.0602e-01, 8.7648e-01, -2.8517e-01, -2.9744e-01, + -7.4999e-01, 2.8937e-01, -7.9673e-01, -8.7117e-01, + -1.0940e+00, 4.2568e-01, 3.3066e-01, 1.1591e-01, + -9.5260e-01, 5.2968e-01, -1.6354e+00, 3.5492e-01, + 1.6254e+00, 7.1252e-01, 7.8410e-01, 1.4605e+00, + 1.5979e-01, 1.2091e+00, 1.9792e+00, 7.2778e-01, + 9.2618e-01, -1.2020e+00, -7.5116e-01, -3.1533e-01, + 3.5765e-01, 1.3068e+00, 1.7627e-01, 5.8602e-01, + -1.2538e+00, 6.3901e-01, -1.6345e+00, -4.4813e-01, + -7.5592e-01, -1.3613e+00, -2.3428e-02, -9.3267e-01, + -1.0531e+00, -7.8175e-01, -2.4052e-01, 5.5371e-01, + 1.3127e+00, -1.1702e+00, 3.7622e-01, 3.1787e-02, + 8.5983e-03, -6.9158e-01, 6.2972e-01, -6.0011e-01, + -5.0325e-02, -9.5664e-01, -1.2695e+00, 3.9009e-01, + -5.8587e-01, 1.4509e+00, -4.1096e-02, 3.3566e-01, + -8.1094e-01, -1.7999e+00, 7.8758e-01, -1.4106e-01, + -5.9981e-01, 1.0734e-01, 2.1687e+00, -7.7576e-01, + -1.7830e+00, 3.4887e-01, -2.4571e-01, 9.1632e-01, + -6.5365e-01, -2.8534e-01, -1.3526e+00, -1.3233e+00, + -3.2920e-01, -5.9307e-01, 8.6740e-01, -1.2758e+00, + -1.0818e+00, 3.7876e-02, 7.3231e-01, 9.4827e-01, + -1.1367e+00, -1.1491e+00, 7.5078e-01, -1.5669e+00, + -2.3817e-02, 1.8235e+00, -1.6757e+00, 2.3020e-03, + -1.6120e+00, -5.6508e-01, 1.0216e+00, 1.1109e+00, + 4.4064e-03, -3.0990e-01, -1.0602e+00, 6.7224e-01, + -1.5558e+00, 7.1329e-01, -1.8696e-01, -3.4279e-01, + 2.6478e-01, 8.9681e-02, -1.8134e-01, 1.7732e+00, + 8.0057e-01, 5.6668e-01, -1.0491e+00, -4.5977e-01, + 1.8484e+00, -3.2277e-01, 2.5545e-01, 1.6333e-01, + -6.5691e-01, 3.6047e-01, -1.6018e+00, 2.6492e-01, + -3.6704e-01, -8.6275e-01, -3.3381e-01, -2.5938e-01, + -4.4064e-01, 6.2224e-01, 3.5504e-01, -1.3863e+00, + -9.4222e-01, -1.0396e+00, 2.3028e+00, 1.5774e+00, + 4.6592e-01, 1.8896e+00, 1.4314e+00, 7.9368e-01, + 1.7264e+00, -8.7838e-01, 6.5233e-01, 2.4539e-01, + 1.7227e+00, 2.9940e-01, 1.3372e-01, 1.6940e-01, + -1.6284e-01, 4.6076e-02, -1.4535e+00, -3.1876e-01, + 2.6235e-01, -2.7923e-01, -1.1042e+00, -6.3335e-01, + 2.2331e-01, -3.3434e-01, 1.1257e+00, -7.5085e-01, + 5.8664e-01, -7.1117e-01, 9.5397e-01, 8.3881e-01, + 7.5974e-01, -2.2942e-01, 7.4273e-01, -8.9993e-01, + 9.0466e-01, 4.1832e-01, 1.7091e+00, 7.4549e-02, + 1.6276e+00, 3.8153e-01, 3.6910e-01, -6.7370e-01, + 7.3541e-01, -2.6342e-02, 7.0989e-01, 3.4771e-01, + 1.3447e+00, -1.1100e+00, 1.0608e+00, -9.0350e-01, + 5.7913e-02, 2.5658e-02, 1.2074e+00, 4.6002e-01, + -2.0185e-01, -6.9327e-01, -1.0991e+00, -6.8715e-01, + -4.7089e-01, -1.6164e+00, 6.8740e-01, -1.1291e+00, + -3.1846e-01, -1.8473e+00, -3.0798e-01, 1.7178e+00, + -1.3688e+00, 2.1002e+00, -4.1005e-02, 6.5597e-01, + 1.9940e+00, 4.8965e-01, -3.2856e-01, -4.5221e-01, + -3.8701e-01, 1.0978e+00, 2.0471e+00, 1.1246e-01, + -1.4968e+00, -1.5937e+00, -2.2886e+00, -5.2454e-01, + 2.2714e-02, 1.1280e+00, 8.5152e-02, 1.1737e+00, + 1.0344e+00, -7.9111e-02, 6.6589e-01, 1.8608e-01, + -2.0561e+00, -1.4294e+00, 2.2180e+00, -7.4942e-01, + 1.6159e+00, 1.1977e+00, 3.2921e-01, 9.0332e-01, + 1.1855e-01, 2.4343e-01, 1.0998e+00, -2.8499e-01, + -1.0149e+00, 3.9209e-01, -1.2792e-01, -5.3702e-01, + -1.3198e+00, -9.3403e-01, 5.2020e-01, -4.4716e-01, + -1.5790e+00, 4.0554e-01, -3.7547e-01, -8.7263e-01, + 9.9404e-01, 1.2314e+00, -6.2519e-01, 9.9526e-01, + -1.2041e+00, 1.0053e+00, 1.9907e+00, 1.4260e-01, + -3.4284e-02, -2.5151e-01, 5.4337e-01, -9.5155e-01, + -7.9959e-01, 1.9833e-01, -2.1154e-01, 1.1495e-01, + -1.1370e-01, 1.4304e+00, 9.3396e-01, -7.4156e-01, + -2.5913e-01, -1.1952e+00, -1.4044e+00, -9.5951e-01, + 5.8783e-01, 5.6352e-01, -8.8568e-01, -1.5047e+00, + -1.2746e+00, -2.1674e-01, 1.5398e+00, -1.1717e+00, + -8.7999e-01, 4.3623e-01, 2.5355e+00, 1.2578e+00, + -1.1059e+00, 1.1124e+00, 7.8657e-01, -7.7026e-01, + -3.4921e-01, -6.7918e-02, -3.3681e-01, -5.5700e-02, + -1.5625e+00, 3.6662e-01, 1.2499e-01, -9.7597e-02, + -4.2032e-01, -4.4446e-01, -4.9773e-01, 1.2602e+00, + 7.1671e-01, -1.7042e+00, 6.7912e-01, -1.1853e+00, + 7.8122e-01, -1.3862e+00, 1.5876e+00, -2.0916e-01, + -6.9287e-01, -1.2172e+00, -7.6964e-01, -6.5200e-02, + 7.9736e-01, 1.1498e+00, -1.4432e+00, 1.7212e+00, + -2.2876e-01, -1.4215e+00, 1.0146e-01, -3.1597e-01, + 6.1613e-02, -5.1080e-01, 8.3908e-01, -9.5323e-01, + -4.1348e-01, 4.8230e-01, -1.7117e+00, 1.1920e+00, + 1.9271e-01, -1.5754e+00, 1.5758e+00, 1.6242e+00, + -1.4219e+00, -1.8540e+00, -8.2778e-01, 2.1010e-01, + -2.4406e-01, -1.9005e+00, 1.0068e+00, -6.6636e-01, + -1.0553e+00, -1.7132e+00, -1.7807e+00, 2.8786e-01, + 2.4391e-01, 7.7426e-01, 2.7373e-01, 1.0370e+00, + 2.6797e-02, 2.4975e+00, -9.0851e-01, 7.1709e-01, + 5.5147e-01, -3.2873e-01, 1.7774e+00, 6.2712e-01, + -2.5910e-01, -1.3377e+00, 6.4783e-01, -6.6019e-01, + -7.8775e-01, -6.2118e-01, 8.7092e-02, -4.8095e-01, + 1.8732e+00, 2.9550e-01, -5.9324e-02, 4.3735e-01, + 1.0869e+00, -9.7961e-01, -2.8692e-01, -1.7983e+00, + 1.4130e+00, 1.4342e+00, -2.8091e-01, -9.3072e-01, + 1.8908e-01, 1.4547e+00, 7.6457e-01, 2.8210e-01, + -3.8370e-02, 1.9881e+00, -3.2404e-01, 1.3790e+00, + 8.8769e-01, 2.5962e+00, -4.4460e-01, 3.0404e-01, + 4.2994e-01, -7.4070e-01, 7.5822e-01, -7.3455e-03, + -2.8524e+00, -2.1554e-02, 8.9089e-01, -1.8337e-01, + -4.9342e-01, 8.6433e-01, -8.4646e-01, -4.3106e-02, + -3.8655e-01, -3.4969e-01, -4.7278e-01, 2.7075e+00, + -2.0911e-01, 2.1016e-01, -6.6274e-01, -1.9051e+00, + -9.9419e-01, 2.1293e+00, 1.8203e+00, -3.3297e-01, + 2.2172e+00, -8.0419e-01, 1.1549e+00, 1.4993e-01, + 1.7666e+00, 7.9516e-01, -2.6300e+00, 9.7719e-02, + -2.0529e-01, 2.4598e-01, 6.6070e-02, 3.9417e-01, + 6.8219e-01, 8.8059e-01, -1.4649e+00, 5.4011e-01, + -4.1180e-02, 1.7078e-01, -9.4379e-01, 2.9841e-03, + -4.7754e-02, 9.8577e-02, -4.4666e-01, -1.1371e+00, + -1.4925e-01, -2.2278e+00, 6.5378e-02, -1.5335e+00, + -1.8529e+00, 1.0216e+00, -1.8265e-01, 5.7717e-01, + 3.0942e-03, -9.2210e-01, -1.6033e-01, -3.6544e-01, + -1.8180e+00, 3.3216e-02, 1.0062e+00, 6.3406e-02, + -5.0263e-01, 7.5054e-01, -1.1869e+00, -2.1323e+00, + 6.3238e-01, -5.0871e-01, -1.2799e-02, 4.4926e-01, + 1.1932e+00, 4.3280e-01, 4.4488e-01, -9.7017e-01, + -7.9599e-01, -1.5263e+00, 3.1828e-01, -1.0308e+00, + -1.0291e+00, -7.9305e-01, -9.3113e-01, 3.0383e-01, + -4.4581e-01, -7.8833e-01, -9.3751e-01, -4.5619e-01, + -2.8221e-01, -9.6035e-01, -2.7023e-01, 5.3940e-01, + -1.8338e+00, -2.0746e+00, -1.9253e+00, -1.2650e+00, + -1.5108e-01, -1.4237e+00, 8.1445e-02, -1.1240e+00, + 1.5612e+00, -1.2562e+00, 3.1824e-01, 5.6413e-01, + -1.3930e+00, -3.3761e-01, 1.1518e+00, 8.8995e-01, + 6.2780e-01, -1.1175e+00, -8.4073e-02, 7.3407e-01, + -7.3282e-01, -4.1945e-01, -6.3481e-01, -2.5250e+00, + -1.9411e-01, 5.2893e-01, 1.3592e-01, -4.1929e-02, + 1.9824e+00, -5.4967e-01, -1.8487e-01, -1.3102e-01, + -3.0684e+00, 8.9770e-02, -2.4619e+00, 1.5200e+00, + 1.5291e+00, -1.6923e+00, -6.7511e-01, -4.0813e-02, + 1.8419e+00, 8.8464e-01, 3.4792e-01, 3.8462e-01, + -9.3891e-01, -1.4633e-01, 1.0408e-01, -6.0281e-01, + 6.7417e-01, 4.6902e-01, -4.7878e-02, -2.4994e-03, + -5.3854e-01, -3.6643e-02, 5.8389e-01, -5.5696e-01, + 1.3861e+00, 3.2715e-01, -1.0697e+00, 1.0265e-01, + -1.9004e+00, 1.7313e-01, 1.9422e+00, -9.7055e-01, + 5.0661e-01, 1.6237e-02, 1.3856e+00, -1.5653e+00, + -1.2222e+00, 1.5164e-01, -2.3167e+00, -1.0366e+00, + 9.3753e-01, 2.4919e-01, -1.5544e-01, -1.2676e+00, + 5.2621e-01, -1.3731e+00, 1.4062e-01, -6.1994e-01, + -1.1799e+00, -6.1786e-01, -1.3474e+00, 9.5969e-01, + -6.2614e-01, -1.4824e+00, -4.8794e-01, -1.2150e+00, + 7.6974e-01, 1.0569e+00, -6.9266e-02, 8.7721e-02, + 6.9163e-01, -1.0692e+00, 8.9054e-02, -4.1989e-01, + 1.0683e+00, -5.4554e-01, 1.6999e-01, 1.2482e+00, + -1.5045e+00, -7.6083e-01, -1.1662e+00, 5.1516e-01, + 2.1758e-01, 6.2458e-01, -7.7595e-01, 1.8924e+00, + -5.9510e-01, -1.4791e+00, 1.5966e+00, -2.1018e+00, + -9.2216e-01, -2.1002e-01, 4.5121e-01, 1.4576e+00, + -7.6810e-01, -3.9639e-01, -9.4050e-01, -1.2569e+00, + -9.0495e-01, 1.4025e+00, -1.7575e-01, 2.9283e-01, + -1.2332e+00, 3.2072e-01, 5.2668e-01, 6.8229e-01, + -8.5882e-01, -2.1604e-01, -2.0212e+00, -6.0747e-01, + -1.3662e-01, 4.3108e-01, 1.6482e+00, 7.8054e-01, + 4.2441e-01, -9.9305e-02, 2.5850e-01, -1.1061e+00, + -1.4998e-01, -5.2035e-01, 7.0228e-01, -2.3761e-01, + -8.2698e-01, 2.6248e+00, -1.0233e+00, 2.5725e-01, + 4.2821e-01, 9.3467e-01, -2.1869e+00, -4.9542e-01, + 8.0347e-01, 2.6591e-01, -7.5103e-01, -3.0667e-03, + 7.1254e-01, 7.1686e-01, 3.7952e-01, 1.3113e+00, + 2.1049e-01, 1.4192e+00, 2.6454e-01, 2.0789e+00, + 1.8061e+00, 3.0976e-01, -8.3733e-02, -1.4670e+00, + -7.2831e-01, 5.1600e-01, 1.1845e-01, 5.1758e-01, + -9.8339e-01, 3.4486e-01, -1.4062e+00, -9.7044e-01, + -5.2972e-03, 9.1333e-01, 1.0024e+00, 9.3248e-01, + -1.4035e-01, 8.8494e-01, 7.4568e-01, -4.6183e-01, + -3.7966e-01, 6.5651e-02, 1.2309e+00, 1.1560e+00, + 2.0723e+00, 2.3916e+00, 1.3140e+00, 2.1369e-01, + 2.0539e-01, -1.9116e-01, -1.2841e+00, 1.0293e-01, + -1.1464e+00, -1.1779e+00, 1.3296e+00, -3.9741e-01, + 1.0123e+00, 3.8692e-01, 9.6798e-01, -5.0365e-01, + 6.9614e-01, 6.5680e-01, 6.3927e-01, 1.2289e-01, + -7.1378e-01, 1.4048e+00, -2.9717e-01, 4.0848e-01, + -1.7374e+00, 8.1578e-01, 8.9790e-01, 7.8613e-01, + -9.3854e-01, -1.2153e+00, 1.7158e+00, -2.4091e-01, + 1.8530e-01, -9.8432e-01, -7.6705e-02, 1.1269e+00, + 1.4949e+00, -3.2681e-01, 8.3171e-01, 8.4933e-01, + -4.8143e-01, 7.3843e-01, 1.2397e-01, 4.5028e-01, + -5.2425e-01, -1.6772e+00, 2.3094e+00, -5.5873e-01, + -7.8440e-01, 1.9962e-01, 8.1310e-01, -2.1801e-01]), + size=(10000, 10000), nnz=1000, layout=torch.sparse_csr) +tensor([0.7505, 0.4073, 0.0835, ..., 0.6948, 0.5731, 0.2916]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([10000, 10000]) +Size: 100000000 +NNZ: 1000 +Density: 1e-05 +Time: 14.683725357055664 seconds + diff --git a/pytorch/output_test2/altra_10_2_10_10000_5e-05.json b/pytorch/output_test2/altra_10_2_10_10000_5e-05.json new file mode 100644 index 0000000..849032d --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_10000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Altra", "ITERATIONS": 120060, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [10000, 10000], "MATRIX_SIZE": 100000000, "MATRIX_NNZ": 5000, "MATRIX_DENSITY": 5e-05, "TIME_S": 12.660146236419678, "TIME_S_1KI": 0.10544849438963584, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 870.1460137557983, "W": 72.72296358512563, "J_1KI": 7.247592984805916, "W_1KI": 0.6057218356249011, "W_D": 61.982963585125624, "J_D": 741.6395870780945, "W_D_1KI": 0.516266563261083, "J_D_1KI": 0.004300071324846602} diff --git a/pytorch/output_test2/altra_10_2_10_10000_5e-05.output b/pytorch/output_test2/altra_10_2_10_10000_5e-05.output new file mode 100644 index 0000000..d6ee707 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_10000_5e-05.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 1, 1, ..., 5000, 5000, 5000]), + col_indices=tensor([8592, 9144, 2169, ..., 9134, 8894, 359]), + values=tensor([-0.0060, -0.0063, -0.4176, ..., 1.0189, 0.1882, + 1.7586]), size=(10000, 10000), nnz=5000, + layout=torch.sparse_csr) +tensor([0.3551, 0.3905, 0.3120, ..., 0.2483, 0.7672, 0.2531]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([10000, 10000]) +Size: 100000000 +NNZ: 5000 +Density: 5e-05 +Time: 12.660146236419678 seconds + diff --git a/pytorch/output_test2/altra_10_2_10_20000_0.0001.json b/pytorch/output_test2/altra_10_2_10_20000_0.0001.json new file mode 100644 index 0000000..4b43599 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_20000_0.0001.json @@ -0,0 +1 @@ +{"CPU": "Altra", "ITERATIONS": 133794, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [20000, 20000], "MATRIX_SIZE": 400000000, "MATRIX_NNZ": 39999, "MATRIX_DENSITY": 9.99975e-05, "TIME_S": 16.90324330329895, "TIME_S_1KI": 0.12633782758045167, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 805.8708676052095, "W": 75.0263270228942, "J_1KI": 6.023221277525221, "W_1KI": 0.5607600267791844, "W_D": 64.4763270228942, "J_D": 692.551477057934, "W_D_1KI": 0.48190746238915194, "J_D_1KI": 0.003601861536310686} diff --git a/pytorch/output_test2/altra_10_2_10_20000_0.0001.output b/pytorch/output_test2/altra_10_2_10_20000_0.0001.output new file mode 100644 index 0000000..7168738 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_20000_0.0001.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 1, 3, ..., 39995, 39997, 39999]), + col_indices=tensor([17006, 4621, 18341, ..., 19319, 4727, 18723]), + values=tensor([ 0.2379, -0.9389, 0.6425, ..., 0.1001, 1.7488, + -0.7276]), size=(20000, 20000), nnz=39999, + layout=torch.sparse_csr) +tensor([0.8179, 0.8239, 0.9268, ..., 0.9263, 0.5883, 0.5053]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([20000, 20000]) +Size: 400000000 +NNZ: 39999 +Density: 9.99975e-05 +Time: 16.90324330329895 seconds + diff --git a/pytorch/output_test2/altra_10_2_10_20000_1e-05.json b/pytorch/output_test2/altra_10_2_10_20000_1e-05.json new file mode 100644 index 0000000..7b7fbbf --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_20000_1e-05.json @@ -0,0 +1 @@ +{"CPU": "Altra", "ITERATIONS": 111302, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [20000, 20000], "MATRIX_SIZE": 400000000, "MATRIX_NNZ": 4000, "MATRIX_DENSITY": 1e-05, "TIME_S": 10.277668476104736, "TIME_S_1KI": 0.09234037551979961, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 837.560459136963, "W": 71.64058174307, "J_1KI": 7.525115982973918, "W_1KI": 0.6436594287889705, "W_D": 60.82558174307, "J_D": 711.1207214188578, "W_D_1KI": 0.5464913635250939, "J_D_1KI": 0.004909986914207237} diff --git a/pytorch/output_test2/altra_10_2_10_20000_1e-05.output b/pytorch/output_test2/altra_10_2_10_20000_1e-05.output new file mode 100644 index 0000000..e963e8d --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_20000_1e-05.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 1, 1, ..., 3999, 3999, 4000]), + col_indices=tensor([10253, 7068, 18490, ..., 5661, 16756, 17692]), + values=tensor([ 1.4469, 0.6569, -0.3333, ..., -0.9449, -0.0864, + -1.4279]), size=(20000, 20000), nnz=4000, + layout=torch.sparse_csr) +tensor([0.2516, 0.4285, 0.8673, ..., 0.0632, 0.3777, 0.0594]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([20000, 20000]) +Size: 400000000 +NNZ: 4000 +Density: 1e-05 +Time: 10.277668476104736 seconds + diff --git a/pytorch/output_test2/altra_10_2_10_20000_5e-05.json b/pytorch/output_test2/altra_10_2_10_20000_5e-05.json new file mode 100644 index 0000000..f6b13ff --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_20000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Altra", "ITERATIONS": 125814, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [20000, 20000], "MATRIX_SIZE": 400000000, "MATRIX_NNZ": 19999, "MATRIX_DENSITY": 4.99975e-05, "TIME_S": 10.677078485488892, "TIME_S_1KI": 0.08486399355786232, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 799.5182011795044, "W": 75.37915658808171, "J_1KI": 6.3547633902388005, "W_1KI": 0.5991317070284842, "W_D": 64.78415658808171, "J_D": 687.1410438203812, "W_D_1KI": 0.5149200930586557, "J_D_1KI": 0.004092709023309454} diff --git a/pytorch/output_test2/altra_10_2_10_20000_5e-05.output b/pytorch/output_test2/altra_10_2_10_20000_5e-05.output new file mode 100644 index 0000000..81037f8 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_20000_5e-05.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 2, ..., 19996, 19998, 19999]), + col_indices=tensor([ 2667, 4661, 5883, ..., 13481, 17827, 19838]), + values=tensor([-1.5771, -0.6983, -0.6117, ..., -1.0094, -0.7733, + 0.5763]), size=(20000, 20000), nnz=19999, + layout=torch.sparse_csr) +tensor([0.3110, 0.3755, 0.4763, ..., 0.0556, 0.0235, 0.3706]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([20000, 20000]) +Size: 400000000 +NNZ: 19999 +Density: 4.99975e-05 +Time: 10.677078485488892 seconds + diff --git a/pytorch/output_test2/altra_10_2_10_50000_0.0001.json b/pytorch/output_test2/altra_10_2_10_50000_0.0001.json new file mode 100644 index 0000000..ce83201 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_50000_0.0001.json @@ -0,0 +1 @@ +{"CPU": "Altra", "ITERATIONS": 118605, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [50000, 50000], "MATRIX_SIZE": 2500000000, "MATRIX_NNZ": 249986, "MATRIX_DENSITY": 9.99944e-05, "TIME_S": 11.267115592956543, "TIME_S_1KI": 0.09499696971423248, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 878.7403219223023, "W": 76.94861601833053, "J_1KI": 7.408965236898127, "W_1KI": 0.6487805406039419, "W_D": 66.37861601833052, "J_D": 758.0326902151107, "W_D_1KI": 0.5596611948765273, "J_D_1KI": 0.004718698156709476} diff --git a/pytorch/output_test2/altra_10_2_10_50000_0.0001.output b/pytorch/output_test2/altra_10_2_10_50000_0.0001.output new file mode 100644 index 0000000..d13068d --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_50000_0.0001.output @@ -0,0 +1,17 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 5, 15, ..., 249976, 249982, + 249986]), + col_indices=tensor([17442, 19726, 35075, ..., 45436, 48088, 48654]), + values=tensor([-0.4134, -0.5604, 2.5859, ..., 0.1425, -0.4736, + -0.0671]), size=(50000, 50000), nnz=249986, + layout=torch.sparse_csr) +tensor([0.7503, 0.4656, 0.2507, ..., 0.7766, 0.1600, 0.0359]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([50000, 50000]) +Size: 2500000000 +NNZ: 249986 +Density: 9.99944e-05 +Time: 11.267115592956543 seconds + diff --git a/pytorch/output_test2/altra_10_2_10_50000_1e-05.json b/pytorch/output_test2/altra_10_2_10_50000_1e-05.json new file mode 100644 index 0000000..c690809 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_50000_1e-05.json @@ -0,0 +1 @@ +{"CPU": "Altra", "ITERATIONS": 115338, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [50000, 50000], "MATRIX_SIZE": 2500000000, "MATRIX_NNZ": 25000, "MATRIX_DENSITY": 1e-05, "TIME_S": 13.82823133468628, "TIME_S_1KI": 0.11989310838306784, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 894.4880589389801, "W": 76.29270117988719, "J_1KI": 7.755363010794188, "W_1KI": 0.6614706443660129, "W_D": 65.81270117988718, "J_D": 771.6160841274261, "W_D_1KI": 0.570607268895656, "J_D_1KI": 0.004947261690818777} diff --git a/pytorch/output_test2/altra_10_2_10_50000_1e-05.output b/pytorch/output_test2/altra_10_2_10_50000_1e-05.output new file mode 100644 index 0000000..57a0249 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_50000_1e-05.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 25000, 25000, 25000]), + col_indices=tensor([13862, 4916, 38787, ..., 5088, 30567, 42215]), + values=tensor([-0.2260, 1.4145, 1.1308, ..., 0.9407, 0.6943, + -0.2440]), size=(50000, 50000), nnz=25000, + layout=torch.sparse_csr) +tensor([0.8792, 0.8161, 0.1487, ..., 0.5739, 0.7719, 0.6015]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([50000, 50000]) +Size: 2500000000 +NNZ: 25000 +Density: 1e-05 +Time: 13.82823133468628 seconds + diff --git a/pytorch/output_test2/altra_10_2_10_50000_5e-05.json b/pytorch/output_test2/altra_10_2_10_50000_5e-05.json new file mode 100644 index 0000000..2472b81 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_50000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Altra", "ITERATIONS": 132458, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [50000, 50000], "MATRIX_SIZE": 2500000000, "MATRIX_NNZ": 124998, "MATRIX_DENSITY": 4.99992e-05, "TIME_S": 12.935593843460083, "TIME_S_1KI": 0.09765807911534286, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 887.2351137542726, "W": 76.34686817805957, "J_1KI": 6.698237280906193, "W_1KI": 0.5763854820249404, "W_D": 65.79186817805956, "J_D": 764.5743308150768, "W_D_1KI": 0.49669984582327653, "J_D_1KI": 0.003749866718682726} diff --git a/pytorch/output_test2/altra_10_2_10_50000_5e-05.output b/pytorch/output_test2/altra_10_2_10_50000_5e-05.output new file mode 100644 index 0000000..dbca100 --- /dev/null +++ b/pytorch/output_test2/altra_10_2_10_50000_5e-05.output @@ -0,0 +1,17 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 2, 4, ..., 124992, 124995, + 124998]), + col_indices=tensor([19401, 47685, 26750, ..., 932, 14818, 47901]), + values=tensor([ 1.5329, 2.1967, 0.7519, ..., 1.6488, -0.2402, + -1.2661]), size=(50000, 50000), nnz=124998, + layout=torch.sparse_csr) +tensor([0.3628, 0.0633, 0.4692, ..., 0.2124, 0.4450, 0.4631]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([50000, 50000]) +Size: 2500000000 +NNZ: 124998 +Density: 4.99992e-05 +Time: 12.935593843460083 seconds + diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_100000_0.0001.json b/pytorch/output_test2/epyc_7313p_10_2_10_100000_0.0001.json index 24db3a4..3bf5948 100644 --- a/pytorch/output_test2/epyc_7313p_10_2_10_100000_0.0001.json +++ b/pytorch/output_test2/epyc_7313p_10_2_10_100000_0.0001.json @@ -1 +1 @@ -{"CPU": "Epyc 7313P", "ITERATIONS": 101034, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 999955, "MATRIX_DENSITY": 9.99955e-05, "TIME_S": 10.361092805862427, "TIME_S_1KI": 0.10255055531665011, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1568.5116010475158, "W": 147.64, "J_1KI": 15.524591731966623, "W_1KI": 1.4612902587247856, "W_D": 127.57249999999999, "J_D": 1355.316623033285, "W_D_1KI": 1.2626690025140053, "J_D_1KI": 0.012497466224379963} +{"CPU": "Epyc 7313P", "ITERATIONS": 98312, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 999949, "MATRIX_DENSITY": 9.99949e-05, "TIME_S": 10.41502332687378, "TIME_S_1KI": 0.10593847472204593, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1474.0722945690156, "W": 147.65, "J_1KI": 14.993818603720966, "W_1KI": 1.5018512490845473, "W_D": 128.1775, "J_D": 1279.6674672341348, "W_D_1KI": 1.3037828545854016, "J_D_1KI": 0.013261685802195068} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_100000_0.0001.output b/pytorch/output_test2/epyc_7313p_10_2_10_100000_0.0001.output index 35074ce..00201fb 100644 --- a/pytorch/output_test2/epyc_7313p_10_2_10_100000_0.0001.output +++ b/pytorch/output_test2/epyc_7313p_10_2_10_100000_0.0001.output @@ -1,17 +1,17 @@ /nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) matrix = matrix.to_sparse_csr().type(torch.float32) -tensor(crow_indices=tensor([ 0, 12, 24, ..., 999933, 999940, - 999955]), - col_indices=tensor([ 5967, 15636, 19622, ..., 82825, 87847, 97213]), - values=tensor([-1.5657, 1.3165, 0.1051, ..., -0.5017, 0.1827, - -1.1977]), size=(100000, 100000), nnz=999955, +tensor(crow_indices=tensor([ 0, 11, 23, ..., 999932, 999944, + 999949]), + col_indices=tensor([ 9003, 14694, 14961, ..., 25088, 62580, 64370]), + values=tensor([-0.5626, -0.3545, 0.8913, ..., -1.4062, -1.3465, + 0.0257]), size=(100000, 100000), nnz=999949, layout=torch.sparse_csr) -tensor([0.4289, 0.2254, 0.8435, ..., 0.1753, 0.8896, 0.3058]) +tensor([0.5947, 0.3012, 0.0547, ..., 0.1233, 0.4957, 0.0854]) Matrix: synthetic Matrix: csr Shape: torch.Size([100000, 100000]) Size: 10000000000 -NNZ: 999955 -Density: 9.99955e-05 -Time: 10.361092805862427 seconds +NNZ: 999949 +Density: 9.99949e-05 +Time: 10.41502332687378 seconds diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_100000_1e-05.json b/pytorch/output_test2/epyc_7313p_10_2_10_100000_1e-05.json index 99ead94..15f4213 100644 --- a/pytorch/output_test2/epyc_7313p_10_2_10_100000_1e-05.json +++ b/pytorch/output_test2/epyc_7313p_10_2_10_100000_1e-05.json @@ -1 +1 @@ -{"CPU": "Epyc 7313P", "ITERATIONS": 150582, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 100000, "MATRIX_DENSITY": 1e-05, "TIME_S": 10.027292013168335, "TIME_S_1KI": 0.06659024327720667, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1239.2374660491944, "W": 116.83, "J_1KI": 8.229652057013418, "W_1KI": 0.7758563440517459, "W_D": 97.04249999999999, "J_D": 1029.3477856636046, "W_D_1KI": 0.6444495358010917, "J_D_1KI": 0.0042797249060385146} +{"CPU": "Epyc 7313P", "ITERATIONS": 156417, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 99999, "MATRIX_DENSITY": 9.9999e-06, "TIME_S": 11.098344087600708, "TIME_S_1KI": 0.07095356698824748, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1239.9076641368865, "W": 118.41, "J_1KI": 7.92693674048784, "W_1KI": 0.7570149024722377, "W_D": 97.90375, "J_D": 1025.1803899395466, "W_D_1KI": 0.6259150220244603, "J_D_1KI": 0.004001579253050885} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_100000_1e-05.output b/pytorch/output_test2/epyc_7313p_10_2_10_100000_1e-05.output index 4d0c1da..721a35f 100644 --- a/pytorch/output_test2/epyc_7313p_10_2_10_100000_1e-05.output +++ b/pytorch/output_test2/epyc_7313p_10_2_10_100000_1e-05.output @@ -1,17 +1,16 @@ /nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) matrix = matrix.to_sparse_csr().type(torch.float32) -tensor(crow_indices=tensor([ 0, 1, 2, ..., 99996, 99997, - 100000]), - col_indices=tensor([98366, 86469, 784, ..., 24883, 35225, 74645]), - values=tensor([ 0.5652, 0.5870, -0.9667, ..., -0.8134, 0.3649, - -0.5054]), size=(100000, 100000), nnz=100000, - layout=torch.sparse_csr) -tensor([0.7832, 0.0968, 0.2513, ..., 0.3975, 0.2140, 0.9668]) +tensor(crow_indices=tensor([ 0, 0, 2, ..., 99996, 99998, 99999]), + col_indices=tensor([18767, 44455, 31476, ..., 37701, 96003, 93517]), + values=tensor([ 1.1909e+00, -1.8994e-01, 2.6436e-04, ..., + -4.3065e-01, -4.1931e-01, -1.6576e+00]), + size=(100000, 100000), nnz=99999, layout=torch.sparse_csr) +tensor([0.3502, 0.7895, 0.7161, ..., 0.4208, 0.4096, 0.7887]) Matrix: synthetic Matrix: csr Shape: torch.Size([100000, 100000]) Size: 10000000000 -NNZ: 100000 -Density: 1e-05 -Time: 10.027292013168335 seconds +NNZ: 99999 +Density: 9.9999e-06 +Time: 11.098344087600708 seconds diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_100000_5e-05.json b/pytorch/output_test2/epyc_7313p_10_2_10_100000_5e-05.json new file mode 100644 index 0000000..932f1e8 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_100000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Epyc 7313P", "ITERATIONS": 135631, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 499989, "MATRIX_DENSITY": 4.99989e-05, "TIME_S": 10.57732343673706, "TIME_S_1KI": 0.0779860314879125, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1524.0668136215209, "W": 146.54, "J_1KI": 11.236861879817452, "W_1KI": 1.0804314647831248, "W_D": 126.76624999999999, "J_D": 1318.4129569554327, "W_D_1KI": 0.9346406794906768, "J_D_1KI": 0.006891054991046861} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_100000_5e-05.output b/pytorch/output_test2/epyc_7313p_10_2_10_100000_5e-05.output new file mode 100644 index 0000000..e90837e --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_100000_5e-05.output @@ -0,0 +1,17 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 5, 7, ..., 499973, 499982, + 499989]), + col_indices=tensor([ 6400, 26862, 36191, ..., 26915, 49846, 61682]), + values=tensor([-0.6810, -0.8696, 1.1051, ..., -0.5859, 0.5431, + -0.8420]), size=(100000, 100000), nnz=499989, + layout=torch.sparse_csr) +tensor([0.2037, 0.9965, 0.5140, ..., 0.7576, 0.1956, 0.2524]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([100000, 100000]) +Size: 10000000000 +NNZ: 499989 +Density: 4.99989e-05 +Time: 10.57732343673706 seconds + diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_10000_0.0001.json b/pytorch/output_test2/epyc_7313p_10_2_10_10000_0.0001.json new file mode 100644 index 0000000..b30fbe3 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_10000_0.0001.json @@ -0,0 +1 @@ +{"CPU": "Epyc 7313P", "ITERATIONS": 402279, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [10000, 10000], "MATRIX_SIZE": 100000000, "MATRIX_NNZ": 9999, "MATRIX_DENSITY": 9.999e-05, "TIME_S": 11.058919668197632, "TIME_S_1KI": 0.02749067107206101, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1068.2311151790618, "W": 98.58, "J_1KI": 2.65544837085471, "W_1KI": 0.24505380594065315, "W_D": 78.6075, "J_D": 851.8054106962682, "W_D_1KI": 0.19540542757638357, "J_D_1KI": 0.0004857460309297368} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_10000_0.0001.output b/pytorch/output_test2/epyc_7313p_10_2_10_10000_0.0001.output new file mode 100644 index 0000000..33b1947 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_10000_0.0001.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 1, 1, ..., 9996, 9999, 9999]), + col_indices=tensor([6722, 8655, 672, ..., 2263, 3918, 5766]), + values=tensor([ 0.4549, 0.3188, 1.2914, ..., 0.2944, -0.2287, + 0.9296]), size=(10000, 10000), nnz=9999, + layout=torch.sparse_csr) +tensor([0.6875, 0.3376, 0.2808, ..., 0.9144, 0.8469, 0.4162]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([10000, 10000]) +Size: 100000000 +NNZ: 9999 +Density: 9.999e-05 +Time: 11.058919668197632 seconds + diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_10000_1e-05.json b/pytorch/output_test2/epyc_7313p_10_2_10_10000_1e-05.json new file mode 100644 index 0000000..2e26395 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_10000_1e-05.json @@ -0,0 +1 @@ +{"CPU": "Epyc 7313P", "ITERATIONS": 523085, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [10000, 10000], "MATRIX_SIZE": 100000000, "MATRIX_NNZ": 1000, "MATRIX_DENSITY": 1e-05, "TIME_S": 10.953115224838257, "TIME_S_1KI": 0.020939455776476587, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1012.5659449434281, "W": 96.26, "J_1KI": 1.9357579455412182, "W_1KI": 0.18402362904690442, "W_D": 76.33250000000001, "J_D": 802.9471222978832, "W_D_1KI": 0.14592752611908202, "J_D_1KI": 0.00027897478635227933} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_10000_1e-05.output b/pytorch/output_test2/epyc_7313p_10_2_10_10000_1e-05.output new file mode 100644 index 0000000..4162868 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_10000_1e-05.output @@ -0,0 +1,375 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 1, 1, ..., 999, 999, 1000]), + col_indices=tensor([4647, 1295, 7952, 7931, 1961, 548, 322, 7213, 2539, + 8115, 5638, 9728, 5785, 423, 9827, 8156, 944, 6469, + 8854, 397, 1078, 2935, 918, 7788, 9843, 816, 6498, + 1456, 3379, 5491, 2551, 9880, 2273, 946, 1501, 4929, + 5621, 1002, 6227, 9897, 9029, 2257, 2854, 8941, 546, + 9015, 3126, 8811, 7534, 4255, 7850, 7066, 1278, 4449, + 2066, 8336, 9337, 600, 3851, 1193, 1630, 1450, 7323, + 979, 3315, 4176, 7132, 3696, 6013, 2174, 1011, 8918, + 1264, 3774, 6660, 5516, 996, 27, 1726, 5222, 7460, + 1718, 7503, 4073, 3590, 7782, 758, 9751, 1985, 2127, + 1679, 3304, 7498, 826, 8988, 7827, 5252, 3613, 9874, + 575, 4981, 2402, 1488, 4129, 8185, 1132, 1146, 123, + 8122, 8254, 5602, 5974, 8948, 1037, 7017, 5828, 4605, + 5556, 9136, 5669, 1180, 5960, 8636, 3432, 763, 4504, + 3327, 5439, 6343, 2724, 3552, 4766, 4687, 4033, 6170, + 4561, 2414, 6770, 2877, 9463, 3571, 4217, 494, 5689, + 7034, 8993, 779, 6697, 1415, 5467, 1900, 1658, 8011, + 3698, 7082, 2078, 4079, 9597, 6229, 8413, 5985, 558, + 7315, 8864, 4835, 3071, 4177, 9515, 3212, 4847, 176, + 4926, 3620, 4006, 5454, 5899, 550, 8122, 7484, 8971, + 2623, 260, 9992, 6507, 307, 5053, 8555, 4210, 3708, + 4460, 9607, 4247, 4420, 6646, 8088, 7959, 6273, 9367, + 8422, 6182, 6194, 478, 8740, 2177, 153, 6584, 9519, + 2594, 772, 2163, 3212, 1455, 6045, 6422, 532, 1294, + 4768, 9688, 6160, 6023, 2698, 5648, 232, 9315, 5328, + 2172, 8911, 5449, 8596, 61, 5282, 3013, 8410, 5673, + 3902, 7120, 7539, 727, 1645, 9026, 80, 5125, 5721, + 7397, 9736, 6197, 7809, 409, 5622, 9876, 5268, 3386, + 1991, 6188, 8351, 8692, 438, 6994, 4536, 8846, 7696, + 8174, 5224, 5673, 2329, 4514, 4738, 1559, 1984, 3186, + 6089, 7186, 7322, 7501, 1680, 7379, 2383, 5805, 4316, + 7474, 2081, 4569, 8303, 6010, 4336, 1911, 7462, 6367, + 169, 6218, 1045, 3993, 5177, 7437, 2957, 3160, 3382, + 3172, 1354, 6384, 2532, 6337, 8733, 9434, 6056, 6493, + 8280, 1950, 3130, 4138, 3538, 2798, 3711, 7910, 4728, + 2569, 3073, 9729, 9515, 8853, 1210, 7109, 3973, 1740, + 4387, 2262, 5464, 2587, 1889, 2445, 464, 7267, 2305, + 9962, 9539, 4794, 6861, 6315, 7588, 2222, 7442, 3034, + 3322, 4326, 5973, 6628, 9944, 8167, 1687, 9011, 3002, + 4425, 2565, 1143, 9683, 5783, 6921, 3963, 4032, 5258, + 7995, 7184, 4272, 6542, 2310, 9544, 7353, 7003, 8025, + 3256, 1688, 9274, 5816, 9134, 3282, 564, 3273, 183, + 7479, 188, 3659, 8858, 8789, 5173, 201, 6606, 698, + 7384, 6996, 2540, 7914, 4836, 5896, 2544, 2322, 8202, + 1612, 7162, 6753, 1788, 9220, 9061, 8939, 82, 3603, + 1175, 1933, 1979, 5573, 5391, 205, 7759, 6856, 9928, + 5403, 1924, 6080, 6570, 4149, 168, 4561, 2852, 7450, + 8360, 4675, 5201, 9261, 6889, 726, 9835, 8073, 5064, + 2150, 7098, 445, 7523, 1927, 8473, 4737, 6511, 127, + 4589, 7110, 7575, 5342, 1429, 9880, 3129, 638, 1241, + 1595, 2471, 7140, 6080, 8921, 204, 5093, 884, 8709, + 5660, 1792, 1883, 209, 2285, 8447, 3340, 902, 1811, + 808, 2659, 3983, 4081, 913, 9463, 9578, 350, 2027, + 3710, 4746, 7487, 7890, 8493, 6353, 3044, 1969, 2667, + 1590, 5601, 4613, 5328, 2259, 1282, 4770, 5509, 980, + 8506, 8020, 3073, 665, 1021, 2008, 8436, 2562, 554, + 7469, 6672, 1747, 9623, 4781, 5984, 340, 1280, 7437, + 5352, 5362, 9157, 6561, 1035, 3559, 7683, 4567, 6388, + 1500, 8259, 4090, 2393, 2998, 3331, 7052, 2801, 1066, + 8234, 5995, 6429, 3386, 1773, 4500, 6771, 4410, 127, + 7373, 3380, 6257, 4780, 3178, 4170, 7708, 3378, 6000, + 5622, 6508, 86, 6970, 939, 6696, 8543, 3846, 9682, + 5352, 6706, 941, 3333, 6367, 2268, 8741, 6897, 3352, + 9044, 8765, 4331, 3255, 7467, 7418, 2731, 6414, 7693, + 5901, 4114, 746, 3271, 2519, 9460, 7459, 2716, 9129, + 248, 7688, 1655, 6295, 131, 4923, 944, 1128, 9961, + 1684, 7507, 4888, 963, 6370, 4160, 8830, 988, 7408, + 9975, 6795, 4194, 9618, 7008, 8618, 5275, 8847, 9413, + 9734, 9320, 9180, 930, 8532, 3729, 9706, 7435, 9959, + 582, 7671, 8584, 5079, 8693, 9033, 1298, 3253, 5474, + 3741, 5719, 4740, 6627, 4502, 2746, 747, 8472, 3317, + 4517, 2775, 3800, 1746, 7279, 6227, 5416, 3121, 1956, + 768, 5229, 7435, 3121, 1018, 9295, 9899, 5505, 8373, + 1667, 7633, 1460, 4738, 6295, 6159, 2217, 5721, 7961, + 9181, 5719, 9942, 8964, 9175, 2894, 2566, 5845, 6036, + 9563, 3906, 8312, 1083, 8364, 1807, 5244, 6429, 6154, + 1011, 1160, 6915, 1850, 1724, 936, 8332, 118, 5850, + 8071, 6720, 8568, 9280, 7244, 6526, 5699, 1219, 1331, + 2824, 4727, 6743, 1781, 2519, 7232, 8823, 99, 1947, + 2413, 827, 5384, 1372, 6475, 2559, 9989, 7171, 3457, + 2443, 7032, 7762, 1630, 6106, 8342, 3460, 8316, 9134, + 4123, 3580, 6186, 409, 3778, 237, 4702, 69, 146, + 6113, 7605, 3516, 4401, 4335, 2712, 7359, 1517, 704, + 8721, 3067, 6417, 376, 6191, 6695, 3647, 7647, 775, + 2670, 7399, 3454, 4477, 9474, 987, 8428, 7294, 8143, + 3110, 2501, 7265, 8215, 7276, 7951, 7443, 1367, 2930, + 8288, 7313, 3654, 806, 1977, 1108, 5774, 7919, 5625, + 2316, 4305, 3631, 412, 8233, 282, 53, 5600, 193, + 773, 1936, 9374, 9886, 5250, 8441, 8959, 1612, 1976, + 902, 4139, 6581, 349, 2706, 7877, 5884, 1476, 531, + 3549, 3744, 3349, 4257, 5785, 6020, 4856, 6638, 8326, + 3300, 1951, 4994, 6317, 437, 7398, 6572, 3989, 1925, + 6801, 6209, 2683, 9692, 9841, 6309, 3372, 7816, 2380, + 3458, 5252, 4446, 9391, 375, 4194, 8475, 2247, 9996, + 9841, 1292, 6644, 4904, 4994, 1988, 6230, 4668, 6334, + 3663, 7614, 2888, 2751, 4586, 5493, 1433, 2910, 6489, + 2558, 888, 3089, 6889, 1553, 1658, 6061, 5528, 3780, + 7712, 9172, 6084, 4531, 4953, 6365, 2628, 2300, 3726, + 5312, 6216, 3435, 2232, 4255, 5460, 313, 1130, 6103, + 6636, 8616, 7495, 6495, 7071, 8362, 8026, 557, 4601, + 2566, 3919, 2911, 1263, 6889, 478, 3484, 3989, 8492, + 2978, 6290, 1597, 5865, 5490, 5283, 7556, 7983, 8790, + 4372, 7921, 2811, 9711, 5833, 8309, 5194, 8587, 3146, + 6696, 3123, 8229, 3533, 7634, 3924, 8736, 5634, 1967, + 2944, 8872, 8631, 952, 6129, 5370, 9025, 4383, 3814, + 5398, 3357, 2505, 2595, 7730, 3431, 3115, 8611, 9241, + 2593, 3131, 9494, 2934, 4893, 8004, 5800, 9231, 9045, + 1076, 539, 5794, 4121, 7902, 8040, 6989, 5265, 8964, + 2651, 9828, 1991, 8908, 9179, 1906, 6413, 2530, 5999, + 4668, 7976, 5875, 4636, 5512, 1553, 4032, 8335, 4016, + 7903, 1788, 8021, 504, 6250, 1625, 309, 9742, 8816, + 6767]), + values=tensor([ 1.4774e+00, -1.9034e+00, -8.2230e-01, 1.0186e+00, + 5.1585e-01, 8.7567e-01, -2.9277e-01, -1.9882e-01, + -3.1053e-01, 6.3713e-01, 2.1544e+00, 1.8021e+00, + 5.4942e-01, 2.2987e-01, 5.5233e-01, 1.7304e-01, + -2.1377e+00, -5.0276e-01, 6.8051e-01, 2.4130e-01, + 1.0573e-01, 1.9507e-01, 1.6470e+00, -6.0090e-01, + -1.3917e+00, -1.3560e+00, 4.9608e-02, -2.2779e-01, + -1.5553e+00, -6.6643e-01, 9.4623e-02, -4.1692e-01, + -9.2234e-01, 8.9114e-01, 2.9580e-01, -7.2335e-01, + 1.6835e+00, 1.1049e+00, 1.0875e+00, -4.4526e-01, + -1.3659e+00, 3.7386e-01, -4.5881e-02, -1.0150e+00, + 1.2738e+00, 1.2610e+00, -1.1803e+00, -3.7665e-01, + 2.2471e-01, 1.4791e+00, 9.2070e-01, 7.0025e-01, + -1.0079e+00, 5.7982e-01, -1.5695e+00, 2.9406e-01, + 9.8637e-01, 5.2604e-01, 5.2968e-02, 2.1938e-01, + 7.5714e-01, -2.0968e+00, 1.8278e+00, 6.1690e-01, + -5.7480e-01, 7.2753e-01, -5.6282e-01, -5.9638e-01, + 1.9641e-01, 4.9142e-01, -1.4958e+00, 6.8649e-01, + -8.9458e-02, 5.3001e-03, -1.3790e+00, 3.2407e-01, + 8.0328e-01, 3.5465e-01, -1.6067e+00, -2.4955e+00, + -5.0680e-01, -7.8152e-01, 6.2687e-01, 7.0234e-01, + -9.8273e-01, -8.8614e-01, -1.4896e+00, 1.3437e+00, + 3.6498e-01, -1.0000e-01, 1.3578e+00, 1.2920e+00, + 1.2163e+00, 1.2852e-01, -6.2648e-02, -4.3623e-01, + 1.0197e-01, -1.9350e-01, 9.2390e-01, 2.0579e+00, + -1.2477e+00, 2.3546e+00, 9.6638e-01, -1.2217e+00, + 1.2095e+00, -6.7570e-01, 1.1593e-01, 1.8368e-03, + 1.1921e-01, -1.1243e+00, -2.3158e+00, 8.4472e-01, + -9.1001e-02, 8.6444e-01, 5.9517e-01, 1.7739e-01, + -2.3286e+00, 3.2360e-01, -4.3629e-01, -1.7693e+00, + -6.6408e-01, 3.4119e-01, -9.5183e-01, 1.8893e-01, + 7.7407e-01, -6.4112e-01, -1.5160e-01, -3.6138e-01, + -1.5229e+00, 1.5788e-01, -2.0770e+00, -4.4577e-01, + 3.7312e-01, -1.7092e+00, 5.5901e-01, 2.1351e+00, + 1.7962e-01, 9.3331e-03, -3.6358e-01, -7.0235e-01, + -2.0697e-01, -3.8959e-01, 8.8226e-01, 3.6739e-01, + 1.5750e+00, -1.5797e-01, 1.1664e-01, 2.2296e-01, + 1.1107e+00, -3.6718e-01, -1.1099e+00, 1.5072e+00, + -5.5474e-02, -2.0976e-01, 4.1536e-01, 7.6963e-01, + 5.1936e-01, -1.1578e+00, -1.1344e+00, 1.0286e+00, + 9.8558e-01, -3.0517e-01, 1.6606e+00, 5.5288e-02, + 7.2935e-01, 3.6626e-01, 1.0661e+00, -8.5631e-01, + 4.8161e-03, 1.9160e+00, -5.8392e-01, -1.0421e+00, + -1.1306e+00, 7.9325e-02, -3.8846e-01, 1.0591e+00, + -1.0159e+00, 9.2227e-01, 1.5886e-01, -9.3713e-01, + 1.1494e+00, -1.1400e-01, 3.8183e-01, -1.3387e+00, + -2.3070e+00, 6.3951e-01, 2.8067e-01, 1.2126e+00, + -5.8674e-02, -6.2345e-01, -1.8086e-01, -4.3900e-01, + -8.8748e-01, -2.3683e-01, 1.2554e+00, -1.7986e+00, + -2.1087e-01, 1.2202e+00, 2.4920e-01, -1.6691e+00, + 2.5550e+00, 3.4194e-01, 1.1023e+00, -3.8545e-02, + -5.9368e-02, -2.5914e-01, 9.9329e-01, -2.6167e-01, + 6.6743e-01, 1.5576e+00, -1.0044e-01, -3.3954e-01, + -3.6835e-01, -8.5184e-01, -2.7121e-01, -1.0550e+00, + 1.2444e+00, 5.4080e-01, -6.5235e-01, -7.1625e-02, + 1.2853e+00, -5.4941e-01, -1.0605e+00, -1.4626e+00, + 1.2877e+00, -6.3918e-01, 3.7117e-01, 1.3761e+00, + -3.3136e-01, -1.4807e+00, -9.4804e-01, -8.9754e-01, + -1.9493e+00, 9.9428e-01, -1.6656e+00, -9.2101e-01, + -3.2562e-01, -7.2095e-01, 1.4112e-01, -1.0885e-01, + 5.8341e-01, -1.4903e-01, 9.6966e-01, 8.4062e-01, + -1.4059e+00, -1.3398e-02, 3.8838e-02, -1.2159e-01, + -6.9554e-01, 8.7279e-01, -5.5869e-01, 1.0446e+00, + -4.8532e-01, 6.1932e-01, 3.9699e-01, 1.1671e+00, + 9.5693e-02, 9.3210e-02, 1.0799e+00, 2.0201e-02, + 6.0990e-01, 4.8067e-01, -1.3500e+00, -1.3848e-03, + 6.7272e-01, -8.0327e-01, 1.6446e-01, -1.2311e+00, + 4.3927e-01, -1.7767e-01, 1.1837e+00, -1.3280e-01, + 2.6880e-01, -1.2931e+00, -4.2672e-02, 5.4013e-01, + -1.4976e+00, -1.5350e+00, 4.2091e-01, -7.6595e-01, + -9.4768e-01, 1.0620e+00, 8.7924e-01, -1.3656e+00, + 7.7697e-01, 4.3042e-01, 1.0872e+00, -1.8971e+00, + -9.2905e-01, 5.4788e-01, 4.1512e-01, 1.8564e-02, + 2.5627e-01, 1.5844e-01, -1.1948e+00, 4.8507e-02, + 6.0071e-01, 8.5215e-01, 8.7713e-01, -4.6370e-01, + -5.0806e-01, -2.5515e-01, -9.1428e-01, 1.1916e-01, + -1.7128e+00, -9.6324e-01, -2.3281e+00, -1.7253e-01, + 8.5910e-01, 4.5060e-01, -1.2805e+00, -1.4226e+00, + -1.3586e-01, -7.8466e-01, -1.6343e+00, -3.8296e-01, + 3.5768e-01, 2.9541e+00, -1.8078e+00, -1.4735e+00, + -1.7233e+00, -4.5483e-01, -8.8884e-02, -1.8152e-01, + 1.0560e+00, -3.8076e-01, 8.4391e-01, -5.9993e-01, + 8.0498e-01, -2.9542e-01, 5.5183e-01, 4.7850e-01, + 5.3558e-01, -1.1390e-02, -1.5187e+00, 7.8137e-01, + 1.0152e+00, 4.5427e-01, 1.6245e+00, -4.5355e-01, + -1.1284e-01, 6.6219e-02, -2.3243e+00, -1.8291e+00, + -4.5931e-01, 1.9089e-01, -4.0537e-01, -9.5941e-02, + -3.3064e-01, 1.0927e+00, 2.3745e-01, -1.3086e+00, + -8.7807e-01, 5.5230e-01, -2.7218e-01, -8.5828e-01, + -7.2146e-01, -2.1333e+00, 3.2772e-01, -3.5118e-01, + -6.8939e-01, -7.6587e-01, -2.5551e-01, 1.2934e+00, + 8.6855e-01, -4.4933e-01, 8.8039e-01, -1.9645e-01, + 6.4996e-02, 3.4184e-01, -1.6909e+00, -8.4311e-01, + 1.3937e+00, -7.3523e-01, -8.1308e-01, -1.0950e+00, + 2.9976e-01, 1.9687e-01, -1.4858e-02, 1.5501e+00, + -1.0730e+00, 1.3726e+00, 7.1194e-01, 4.2179e-01, + -2.2949e-01, -1.4817e+00, 9.8377e-01, -4.6259e-01, + 1.5189e+00, 1.8922e-01, -1.5433e+00, 6.4250e-01, + -9.2730e-01, -1.6384e+00, 2.7592e-01, -7.8516e-01, + -1.8050e+00, -9.9316e-01, -3.0863e-01, 1.5999e+00, + 8.3383e-01, 2.2719e-01, 1.2329e+00, 2.6758e-01, + 8.4876e-01, 1.6294e+00, -1.6685e+00, -8.4900e-01, + -1.3590e+00, 4.7903e-01, 1.9328e-01, -1.0739e+00, + -1.1280e+00, 5.0037e-01, 1.9288e-01, -3.6745e-01, + 1.4357e+00, 4.9803e-01, -9.0778e-01, -1.5903e+00, + 1.2057e+00, 8.5606e-02, 1.9850e+00, 3.9562e-01, + -8.2714e-01, 3.9565e-01, -5.3300e-01, -1.1163e+00, + -4.2531e-01, -2.3260e+00, 8.1330e-01, 7.0376e-01, + 1.0117e+00, 4.4273e-01, 4.6496e-01, 1.7566e+00, + 9.1152e-01, 8.4584e-01, -8.1503e-02, 1.0063e-01, + -1.4846e+00, -9.8863e-01, -6.0240e-01, -5.1249e-01, + -9.0139e-01, -1.1627e+00, -7.6422e-01, 4.4281e-01, + -1.5166e-01, 7.3228e-01, 1.4279e+00, -2.0918e-01, + 1.2849e+00, -4.5600e-01, 9.8661e-02, 8.2443e-02, + 1.0623e+00, 6.3365e-01, 8.0188e-02, -2.0101e-01, + -1.1472e+00, -3.8804e-01, -1.9462e+00, 2.6471e-01, + -7.4000e-01, -1.0588e+00, 6.4899e-03, -1.4338e-01, + 1.0538e+00, -1.2824e+00, -2.5085e+00, -1.7331e-02, + -1.2868e+00, 9.2576e-01, 1.0816e+00, 1.3831e-01, + -8.5682e-01, -9.5847e-01, 2.7750e-01, 4.3030e-01, + -3.3784e-01, 4.0159e-01, -1.3604e-02, 4.0193e-01, + -4.5598e-01, 1.0023e+00, 6.2504e-02, 8.0422e-01, + 6.4891e-01, 2.6172e-01, 3.8444e-01, -1.8258e-01, + -1.0682e+00, 1.6767e+00, -5.9099e-01, -1.1465e+00, + -2.4719e+00, 1.0795e+00, 1.9153e-01, 2.3596e-01, + -6.3782e-01, 1.3409e+00, 7.3850e-01, -1.8026e-02, + -1.4949e+00, 1.0021e+00, 4.1492e-01, -2.2544e+00, + -8.3357e-01, -1.4024e+00, 1.1193e+00, 9.4477e-01, + 1.9160e+00, -4.2618e-01, -1.1531e+00, -7.6533e-02, + -1.3039e+00, -7.3835e-01, 1.3188e+00, 4.3461e-01, + 1.0613e+00, -1.0092e+00, -4.4870e-01, -1.0435e+00, + 2.5450e-01, 1.3864e+00, 4.1661e-01, -7.2851e-01, + -4.5282e-01, 3.3290e-02, 1.3471e+00, -4.6221e-01, + -1.4970e+00, 6.6208e-02, -2.4024e-02, -5.2706e-01, + -6.4908e-01, 1.0242e+00, 3.0981e-01, 9.9542e-01, + -4.7328e-01, -9.0162e-01, -5.0963e-01, -2.5595e-01, + -1.5820e-02, 4.0786e-01, -2.0804e+00, -3.0692e-01, + 3.2806e-01, 1.6641e+00, 1.4886e+00, 1.2551e-01, + 2.2856e+00, -1.1111e+00, -1.3408e+00, -1.1433e+00, + 5.5891e-01, 6.2348e-01, -3.9987e-01, 1.8547e+00, + -8.8490e-02, 6.8572e-01, -3.0707e-01, -1.7110e+00, + -1.5514e+00, -1.5731e+00, -1.1459e+00, -1.8522e+00, + -8.8170e-01, -1.6604e+00, 3.1056e-01, -2.7506e-01, + -5.8439e-01, -1.0790e+00, -1.3537e-01, 4.9679e-01, + 2.0699e-01, 1.1270e+00, -3.4786e-01, 7.4295e-01, + 1.5392e+00, 4.4749e-01, 1.0631e+00, -1.2118e+00, + -6.2016e-01, -8.0634e-01, -3.0503e-01, -1.4727e-01, + 1.8130e+00, 1.2632e+00, 1.8286e+00, -2.3866e+00, + 1.6357e+00, -5.5224e-01, -6.1724e-01, 2.9560e-01, + -4.3014e-01, 7.0331e-01, -1.6486e-01, 1.5988e+00, + 3.9326e-01, 8.1806e-01, 1.1004e+00, 2.4719e-01, + -9.1543e-01, -9.4896e-01, 4.1227e-01, 1.3896e-01, + 1.2136e+00, 6.2576e-01, 5.9636e-01, 5.1880e-01, + 6.5617e-01, 1.0917e+00, 6.2311e-01, -8.5467e-01, + 5.3468e-01, 2.5315e+00, 8.9457e-01, -1.3365e+00, + -8.8855e-01, 1.0990e-01, 3.4835e-01, 2.1834e-01, + -1.8697e+00, 5.4834e-02, 1.0645e+00, -5.6060e-01, + 1.2602e+00, 1.5836e+00, -5.3802e-01, 2.0736e+00, + -1.2956e+00, 1.1569e+00, 2.4883e+00, 5.5114e-01, + 1.0230e+00, 1.7359e+00, -1.7189e+00, -2.4630e-01, + 2.5550e-01, -8.1469e-01, 2.0109e+00, 8.3171e-02, + 1.1638e+00, 1.5765e+00, -1.4145e+00, 2.2278e+00, + -7.4152e-01, 3.7439e-01, 1.8619e-02, -2.5400e-01, + 6.8341e-01, 1.1718e-01, -1.0690e+00, 1.4741e-02, + 2.5142e-01, 2.6004e+00, 1.1302e+00, -1.7900e+00, + 3.5523e-01, -1.5607e-03, 1.5313e-01, 6.1991e-01, + -1.8253e+00, -9.4847e-01, -3.3350e-01, 2.1402e+00, + 2.3421e+00, 1.6823e-01, -7.0590e-01, -3.7156e-01, + 3.7793e-02, -6.7847e-01, -1.0251e+00, 1.2298e+00, + 2.1682e-01, -2.6518e-01, 6.3263e-02, -6.3446e-01, + 5.8485e-01, 2.4167e-01, -5.6682e-01, 4.8558e-01, + -8.6387e-01, 6.4603e-02, 2.0638e-01, 1.0381e+00, + 1.1751e+00, -1.8290e-01, 1.3803e+00, 2.0003e+00, + 6.3373e-01, 1.5940e-01, 1.0758e+00, -4.2354e-01, + -4.0828e-02, -1.8293e+00, 3.3506e-01, 4.2480e-01, + 1.7636e-01, 1.0873e-01, -4.8361e-01, -1.0026e-01, + -7.5282e-01, -1.6940e+00, 1.6567e-01, 1.7524e+00, + 8.7328e-01, 7.1980e-01, -7.3903e-02, -1.5992e+00, + 3.5314e-01, -1.0527e+00, -1.4238e+00, -2.6858e+00, + -1.0809e+00, 2.2724e-01, 2.6745e-01, 5.4161e-01, + -4.9346e-01, -5.0384e-01, -9.1497e-01, 1.6168e+00, + -4.5604e-01, -2.0449e+00, -3.2436e-01, 1.9041e+00, + 3.8490e-01, 5.8397e-01, 2.4258e-01, 4.6215e-01, + -6.3383e-01, 2.2416e-01, -1.0570e-03, 1.2739e+00, + 1.0544e+00, -4.5959e-01, -1.9574e-01, -4.8141e-01, + -9.0162e-01, -7.5849e-01, -8.5144e-01, -1.2785e+00, + 3.0691e-01, 1.4021e-01, 6.6012e-01, 1.2105e+00, + -9.4146e-01, -2.9055e-01, 6.3757e-01, -1.0330e+00, + -2.1625e+00, 4.7504e-01, -2.0946e-01, -5.0273e-01, + 2.6705e-01, 6.0660e-01, -1.3649e+00, -1.9798e+00, + -9.8631e-01, 4.7616e-01, 1.1786e+00, 3.2594e-01, + 5.7676e-01, 1.0104e+00, -9.3109e-01, 3.6480e-01, + 1.0100e+00, -1.3095e+00, 7.5254e-01, -8.6249e-02, + -4.5556e-01, -3.2329e-01, -8.7189e-01, -1.4503e+00, + -1.2672e+00, 1.4716e+00, -1.4726e+00, 5.1861e-01, + 1.6589e-01, 1.5992e+00, -1.0192e+00, -1.2210e+00, + 1.0045e+00, -6.4953e-01, 5.2774e-02, 1.8174e-01, + -6.2736e-01, 4.6841e-01, 1.4066e+00, -1.0460e+00, + -5.2439e-01, 5.6814e-01, 2.6570e-02, 1.7824e+00, + -5.2446e-01, 2.1179e+00, -6.9675e-01, 8.3444e-02, + 1.4590e+00, -5.6098e-01, 1.0774e-01, -2.0498e+00, + -4.9974e-01, 1.3325e-01, -5.7896e-01, -1.9690e+00, + 2.1932e-02, 1.9330e+00, -4.5281e-01, 2.8954e-01, + 9.6404e-02, 7.4822e-01, 6.2616e-01, 2.3362e+00, + 8.4433e-02, 1.6629e+00, 8.6470e-02, -4.7773e-01, + 6.0162e-01, -2.6995e-01, -1.7166e+00, -2.1468e+00, + -8.8638e-01, -6.6910e-01, -8.5639e-01, 1.9590e-01, + -1.0267e+00, -2.4710e-01, 8.8860e-01, 1.3216e+00, + -8.1887e-01, 4.2267e-01, 6.8528e-01, 2.5021e-02, + 1.3074e-01, 4.7596e-01, -6.1381e-01, -3.9693e-01, + 9.1585e-01, 2.4342e-01, 9.3114e-02, 1.1635e+00, + -2.8888e-01, -1.2860e+00, -6.5543e-01, -1.0001e+00, + -2.6336e+00, 1.1523e+00, 7.6031e-02, -1.5559e+00, + -1.7696e+00, -1.1676e+00, 2.5711e-01, -6.1194e-02, + -2.5604e+00, -3.5082e-04, 1.1333e+00, -6.3115e-01, + 3.8487e-01, -9.9423e-01, -4.6050e-01, -4.4407e-01, + 1.1573e+00, 1.9616e+00, 2.3081e-01, -1.7214e+00, + -5.9683e-01, 7.2177e-01, 5.5150e-01, -2.6976e-02, + 4.9376e-01, 2.2903e+00, 3.3082e-01, -5.9719e-02, + 8.6577e-02, 3.9437e-01, 5.0027e-01, 1.4199e+00, + 6.7964e-01, 2.2392e-01, -4.1305e-01, 4.8636e-01, + -7.0156e-01, -1.8982e-01, 2.9470e-01, -1.4112e+00, + 1.2496e+00, -1.8674e-01, -4.2865e-01, 2.0369e+00, + 2.7128e+00, 2.5072e-01, 1.2118e-01, 5.2286e-01, + 1.0655e+00, 5.5021e-01, 8.6323e-01, -3.5954e-01, + 5.2638e-01, -8.6807e-02, 1.5776e+00, 2.2448e-01, + -9.5654e-01, -2.2316e-01, 4.1198e-01, 1.5892e+00, + 1.5577e+00, 1.2127e+00, 9.1719e-01, -9.5950e-01, + -8.6558e-01, 4.7157e-01, -2.8923e-02, -3.6520e-01, + 2.3698e+00, -1.8278e-01, -1.1968e+00, -5.0584e-01, + -3.0307e-01, -7.0528e-01, 1.0867e+00, -1.1913e-01, + -7.6922e-01, -1.7579e-01, 1.6677e+00, 8.2541e-01, + 1.2866e+00, -1.8878e-01, -4.6991e-01, 7.8482e-01, + -2.2554e+00, 7.4173e-01, 2.1759e-01, -1.2295e+00, + -4.3719e-01, 1.6515e+00, -3.3248e+00, -1.1890e-01, + -5.4792e-01, -7.0226e-01, 1.2593e+00, -1.0807e+00, + -4.3653e-01, -1.4557e+00, 3.7223e-01, -7.0067e-01, + 1.8311e-01, -1.2096e+00, -7.0326e-01, 7.6123e-01, + 2.2030e-01, -1.4887e-01, -3.4302e-01, 8.7138e-01, + -9.7662e-02, 5.8218e-02, -3.6415e-01, -1.6065e-01, + 1.8607e+00, -1.5571e+00, 7.2682e-01, -3.6674e-01, + -7.0598e-02, -6.8534e-01, -1.8873e+00, -4.1586e-01, + 1.0436e+00, -1.1082e+00, -1.6328e+00, -9.9428e-01, + -1.6107e+00, 2.7890e-01, 5.1244e-01, -5.0264e-01, + 9.5351e-01, 7.3103e-01, 8.8388e-01, 7.4883e-01, + -5.4148e-01, 1.0652e+00, 9.6621e-02, 1.1595e+00, + -9.0660e-01, 4.4410e-02, -1.8294e+00, -6.6544e-02, + -7.8287e-01, 1.4151e+00, -1.1213e+00, -6.6041e-01, + 1.5469e+00, 1.6831e+00, -1.4223e+00, -1.6415e+00, + -7.0729e-01, -8.8041e-01, 1.0207e+00, -5.4149e-01]), + size=(10000, 10000), nnz=1000, layout=torch.sparse_csr) +tensor([0.3467, 0.4957, 0.9411, ..., 0.4421, 0.9080, 0.4314]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([10000, 10000]) +Size: 100000000 +NNZ: 1000 +Density: 1e-05 +Time: 10.953115224838257 seconds + diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_10000_5e-05.json b/pytorch/output_test2/epyc_7313p_10_2_10_10000_5e-05.json new file mode 100644 index 0000000..02b6ebc --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_10000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Epyc 7313P", "ITERATIONS": 405688, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [10000, 10000], "MATRIX_SIZE": 100000000, "MATRIX_NNZ": 4999, "MATRIX_DENSITY": 4.999e-05, "TIME_S": 10.101872205734253, "TIME_S_1KI": 0.024900594066707058, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 901.5193470287323, "W": 97.06, "J_1KI": 2.2221987020289786, "W_1KI": 0.23924789493403797, "W_D": 77.18, "J_D": 716.8685679340363, "W_D_1KI": 0.19024472008045593, "J_D_1KI": 0.00046894342470187907} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_10000_5e-05.output b/pytorch/output_test2/epyc_7313p_10_2_10_10000_5e-05.output new file mode 100644 index 0000000..14a4ebe --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_10000_5e-05.output @@ -0,0 +1,15 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 4997, 4997, 4999]), + col_indices=tensor([7534, 4703, 7615, ..., 690, 3311, 9850]), + values=tensor([0.9319, 1.5848, 0.2665, ..., 1.2538, 0.8654, 0.6652]), + size=(10000, 10000), nnz=4999, layout=torch.sparse_csr) +tensor([0.4609, 0.8821, 0.6641, ..., 0.4010, 0.9569, 0.0599]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([10000, 10000]) +Size: 100000000 +NNZ: 4999 +Density: 4.999e-05 +Time: 10.101872205734253 seconds + diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_20000_0.0001.json b/pytorch/output_test2/epyc_7313p_10_2_10_20000_0.0001.json new file mode 100644 index 0000000..e284a48 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_20000_0.0001.json @@ -0,0 +1 @@ +{"CPU": "Epyc 7313P", "ITERATIONS": 224845, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [20000, 20000], "MATRIX_SIZE": 400000000, "MATRIX_NNZ": 40000, "MATRIX_DENSITY": 0.0001, "TIME_S": 10.829553365707397, "TIME_S_1KI": 0.048164528300417606, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1060.466723241806, "W": 102.78, "J_1KI": 4.716434535977255, "W_1KI": 0.45711490137650385, "W_D": 82.84125, "J_D": 854.74206009686, "W_D_1KI": 0.36843714558918367, "J_D_1KI": 0.0016386272569511604} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_20000_0.0001.output b/pytorch/output_test2/epyc_7313p_10_2_10_20000_0.0001.output new file mode 100644 index 0000000..1dbdf47 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_20000_0.0001.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 2, 3, ..., 39996, 39998, 40000]), + col_indices=tensor([ 3017, 12065, 12288, ..., 14488, 300, 17624]), + values=tensor([ 0.9413, 1.2109, -0.1435, ..., -0.9306, 1.4038, + -1.7362]), size=(20000, 20000), nnz=40000, + layout=torch.sparse_csr) +tensor([0.4349, 0.6098, 0.4010, ..., 0.6451, 0.4277, 0.0573]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([20000, 20000]) +Size: 400000000 +NNZ: 40000 +Density: 0.0001 +Time: 10.829553365707397 seconds + diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_20000_1e-05.json b/pytorch/output_test2/epyc_7313p_10_2_10_20000_1e-05.json new file mode 100644 index 0000000..5f036f5 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_20000_1e-05.json @@ -0,0 +1 @@ +{"CPU": "Epyc 7313P", "ITERATIONS": 376559, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [20000, 20000], "MATRIX_SIZE": 400000000, "MATRIX_NNZ": 4000, "MATRIX_DENSITY": 1e-05, "TIME_S": 11.13594126701355, "TIME_S_1KI": 0.029572898980009903, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1044.873656282425, "W": 99.08, "J_1KI": 2.774794006470234, "W_1KI": 0.26311945803977593, "W_D": 79.34625, "J_D": 836.7663135829567, "W_D_1KI": 0.21071399169851207, "J_D_1KI": 0.0005595776271407988} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_20000_1e-05.output b/pytorch/output_test2/epyc_7313p_10_2_10_20000_1e-05.output new file mode 100644 index 0000000..1a84764 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_20000_1e-05.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 3999, 3999, 4000]), + col_indices=tensor([ 8934, 212, 12203, ..., 17644, 4637, 9395]), + values=tensor([ 1.2371, 0.0694, 2.3960, ..., -1.0433, -0.2651, + 0.6109]), size=(20000, 20000), nnz=4000, + layout=torch.sparse_csr) +tensor([0.2961, 0.5849, 0.9840, ..., 0.2712, 0.9792, 0.9610]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([20000, 20000]) +Size: 400000000 +NNZ: 4000 +Density: 1e-05 +Time: 11.13594126701355 seconds + diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_20000_5e-05.json b/pytorch/output_test2/epyc_7313p_10_2_10_20000_5e-05.json new file mode 100644 index 0000000..0d6b7a5 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_20000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Epyc 7313P", "ITERATIONS": 247171, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [20000, 20000], "MATRIX_SIZE": 400000000, "MATRIX_NNZ": 19999, "MATRIX_DENSITY": 4.99975e-05, "TIME_S": 10.491997718811035, "TIME_S_1KI": 0.0424483362482291, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1038.4885951280594, "W": 99.49, "J_1KI": 4.201498537967882, "W_1KI": 0.4025148581346517, "W_D": 79.61874999999999, "J_D": 831.070095822215, "W_D_1KI": 0.32212011117809125, "J_D_1KI": 0.0013032277701594897} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_20000_5e-05.output b/pytorch/output_test2/epyc_7313p_10_2_10_20000_5e-05.output new file mode 100644 index 0000000..944fe92 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_20000_5e-05.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 1, ..., 19995, 19995, 19999]), + col_indices=tensor([ 6664, 8664, 844, ..., 13486, 13952, 14311]), + values=tensor([ 0.0765, 0.5519, 0.0090, ..., 1.4320, -0.8858, + -0.6986]), size=(20000, 20000), nnz=19999, + layout=torch.sparse_csr) +tensor([0.3197, 0.2721, 0.5326, ..., 0.8808, 0.5799, 0.4532]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([20000, 20000]) +Size: 400000000 +NNZ: 19999 +Density: 4.99975e-05 +Time: 10.491997718811035 seconds + diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_50000_0.0001.json b/pytorch/output_test2/epyc_7313p_10_2_10_50000_0.0001.json new file mode 100644 index 0000000..bceeacc --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_50000_0.0001.json @@ -0,0 +1 @@ +{"CPU": "Epyc 7313P", "ITERATIONS": 126709, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [50000, 50000], "MATRIX_SIZE": 2500000000, "MATRIX_NNZ": 249991, "MATRIX_DENSITY": 9.99964e-05, "TIME_S": 10.611387491226196, "TIME_S_1KI": 0.08374612293701471, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1311.9159492492677, "W": 120.65, "J_1KI": 10.35377083908221, "W_1KI": 0.952181770829223, "W_D": 100.78750000000001, "J_D": 1095.9364171981813, "W_D_1KI": 0.7954249500824725, "J_D_1KI": 0.006277572627693948} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_50000_0.0001.output b/pytorch/output_test2/epyc_7313p_10_2_10_50000_0.0001.output new file mode 100644 index 0000000..4eb0d9f --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_50000_0.0001.output @@ -0,0 +1,17 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 3, 5, ..., 249979, 249987, + 249991]), + col_indices=tensor([ 6923, 14624, 14826, ..., 16653, 24983, 26510]), + values=tensor([-0.3339, -0.1256, 1.3717, ..., 0.6008, -2.5611, + -0.3793]), size=(50000, 50000), nnz=249991, + layout=torch.sparse_csr) +tensor([0.2532, 0.9095, 0.3546, ..., 0.7075, 0.6638, 0.8293]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([50000, 50000]) +Size: 2500000000 +NNZ: 249991 +Density: 9.99964e-05 +Time: 10.611387491226196 seconds + diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_50000_1e-05.json b/pytorch/output_test2/epyc_7313p_10_2_10_50000_1e-05.json new file mode 100644 index 0000000..89ed1d5 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_50000_1e-05.json @@ -0,0 +1 @@ +{"CPU": "Epyc 7313P", "ITERATIONS": 210368, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [50000, 50000], "MATRIX_SIZE": 2500000000, "MATRIX_NNZ": 25000, "MATRIX_DENSITY": 1e-05, "TIME_S": 11.059137105941772, "TIME_S_1KI": 0.05257043421975668, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1123.8110211277008, "W": 103.99, "J_1KI": 5.342119624314062, "W_1KI": 0.49432423182233043, "W_D": 83.88749999999999, "J_D": 906.5650258183479, "W_D_1KI": 0.3987654966534834, "J_D_1KI": 0.001895561571405743} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_50000_1e-05.output b/pytorch/output_test2/epyc_7313p_10_2_10_50000_1e-05.output new file mode 100644 index 0000000..cab4f93 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_50000_1e-05.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 1, 1, ..., 24999, 24999, 25000]), + col_indices=tensor([17991, 40249, 32851, ..., 16381, 40475, 35032]), + values=tensor([-2.1931, -0.1057, -0.3336, ..., 1.6608, 0.0622, + -1.3985]), size=(50000, 50000), nnz=25000, + layout=torch.sparse_csr) +tensor([0.4030, 0.6455, 0.3588, ..., 0.8411, 0.5744, 0.8801]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([50000, 50000]) +Size: 2500000000 +NNZ: 25000 +Density: 1e-05 +Time: 11.059137105941772 seconds + diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_50000_5e-05.json b/pytorch/output_test2/epyc_7313p_10_2_10_50000_5e-05.json new file mode 100644 index 0000000..c7d2a96 --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_50000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Epyc 7313P", "ITERATIONS": 161422, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [50000, 50000], "MATRIX_SIZE": 2500000000, "MATRIX_NNZ": 124998, "MATRIX_DENSITY": 4.99992e-05, "TIME_S": 10.579585790634155, "TIME_S_1KI": 0.06553992510707435, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 1212.8924506664275, "W": 114.44, "J_1KI": 7.513798928686471, "W_1KI": 0.7089492138618032, "W_D": 94.58875, "J_D": 1002.4989583447576, "W_D_1KI": 0.5859718625713967, "J_D_1KI": 0.0036300619653541442} diff --git a/pytorch/output_test2/epyc_7313p_10_2_10_50000_5e-05.output b/pytorch/output_test2/epyc_7313p_10_2_10_50000_5e-05.output new file mode 100644 index 0000000..664f5bd --- /dev/null +++ b/pytorch/output_test2/epyc_7313p_10_2_10_50000_5e-05.output @@ -0,0 +1,17 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 2, 5, ..., 124996, 124997, + 124998]), + col_indices=tensor([11989, 44778, 46590, ..., 12645, 23142, 31661]), + values=tensor([-0.0866, 1.8120, -0.0219, ..., 1.2549, -1.2066, + -0.6973]), size=(50000, 50000), nnz=124998, + layout=torch.sparse_csr) +tensor([0.3195, 0.2357, 0.1472, ..., 0.2809, 0.6862, 0.5275]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([50000, 50000]) +Size: 2500000000 +NNZ: 124998 +Density: 4.99992e-05 +Time: 10.579585790634155 seconds + diff --git a/pytorch/output_test2/xeon_4216_10_2_10_100000_0.0001.json b/pytorch/output_test2/xeon_4216_10_2_10_100000_0.0001.json index 71fa75a..f86760c 100644 --- a/pytorch/output_test2/xeon_4216_10_2_10_100000_0.0001.json +++ b/pytorch/output_test2/xeon_4216_10_2_10_100000_0.0001.json @@ -1 +1 @@ -{"CPU": "Xeon 4216", "ITERATIONS": 41245, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 999957, "MATRIX_DENSITY": 9.99957e-05, "TIME_S": 10.48258900642395, "TIME_S_1KI": 0.2541541764195406, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 907.7721267700196, "W": 86.4, "J_1KI": 22.00926480227954, "W_1KI": 2.0947993696205605, "W_D": 77.29, "J_D": 812.0568018293382, "W_D_1KI": 1.8739241120135777, "J_D_1KI": 0.045433970469476975} +{"CPU": "Xeon 4216", "ITERATIONS": 41417, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 999936, "MATRIX_DENSITY": 9.99936e-05, "TIME_S": 10.603114604949951, "TIME_S_1KI": 0.2560087549786308, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 911.4264350938797, "W": 86.86, "J_1KI": 22.006094963273046, "W_1KI": 2.097206461115001, "W_D": 77.6825, "J_D": 815.1264568752051, "W_D_1KI": 1.8756187072941064, "J_D_1KI": 0.045286203908880565} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_100000_0.0001.output b/pytorch/output_test2/xeon_4216_10_2_10_100000_0.0001.output index ac2290e..557df2e 100644 --- a/pytorch/output_test2/xeon_4216_10_2_10_100000_0.0001.output +++ b/pytorch/output_test2/xeon_4216_10_2_10_100000_0.0001.output @@ -1,17 +1,17 @@ /nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) matrix = matrix.to_sparse_csr().type(torch.float32) -tensor(crow_indices=tensor([ 0, 10, 20, ..., 999933, 999947, - 999957]), - col_indices=tensor([10614, 12000, 12630, ..., 76477, 82289, 92989]), - values=tensor([ 0.5650, 0.5553, -0.5300, ..., 0.3637, -1.1395, - 0.7341]), size=(100000, 100000), nnz=999957, +tensor(crow_indices=tensor([ 0, 15, 22, ..., 999917, 999929, + 999936]), + col_indices=tensor([ 6790, 16501, 25658, ..., 50375, 68639, 74275]), + values=tensor([ 0.5666, 1.0499, -2.0495, ..., 1.5720, 1.2770, + -0.7894]), size=(100000, 100000), nnz=999936, layout=torch.sparse_csr) -tensor([0.5083, 0.7251, 0.1206, ..., 0.9177, 0.3147, 0.5521]) +tensor([0.9755, 0.4770, 0.7031, ..., 0.5604, 0.7471, 0.7018]) Matrix: synthetic Matrix: csr Shape: torch.Size([100000, 100000]) Size: 10000000000 -NNZ: 999957 -Density: 9.99957e-05 -Time: 10.48258900642395 seconds +NNZ: 999936 +Density: 9.99936e-05 +Time: 10.603114604949951 seconds diff --git a/pytorch/output_test2/xeon_4216_10_2_10_100000_1e-05.json b/pytorch/output_test2/xeon_4216_10_2_10_100000_1e-05.json index 4eb3a2c..ebc670b 100644 --- a/pytorch/output_test2/xeon_4216_10_2_10_100000_1e-05.json +++ b/pytorch/output_test2/xeon_4216_10_2_10_100000_1e-05.json @@ -1 +1 @@ -{"CPU": "Xeon 4216", "ITERATIONS": 118541, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 99998, "MATRIX_DENSITY": 9.9998e-06, "TIME_S": 10.91425085067749, "TIME_S_1KI": 0.09207152673486381, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 868.9650503587723, "W": 82.07, "J_1KI": 7.3305021077835715, "W_1KI": 0.6923342978378788, "W_D": 72.5, "J_D": 767.636970281601, "W_D_1KI": 0.6116027366059, "J_D_1KI": 0.005159419412742426} +{"CPU": "Xeon 4216", "ITERATIONS": 117123, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 99998, "MATRIX_DENSITY": 9.9998e-06, "TIME_S": 10.340282917022705, "TIME_S_1KI": 0.08828567332652601, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 875.3140355348587, "W": 81.91, "J_1KI": 7.473459828853929, "W_1KI": 0.6993502557140784, "W_D": 72.77, "J_D": 777.6413425207138, "W_D_1KI": 0.6213126371421497, "J_D_1KI": 0.005304787592037001} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_100000_1e-05.output b/pytorch/output_test2/xeon_4216_10_2_10_100000_1e-05.output index 7fa6eba..23046c4 100644 --- a/pytorch/output_test2/xeon_4216_10_2_10_100000_1e-05.output +++ b/pytorch/output_test2/xeon_4216_10_2_10_100000_1e-05.output @@ -1,16 +1,16 @@ /nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) matrix = matrix.to_sparse_csr().type(torch.float32) -tensor(crow_indices=tensor([ 0, 2, 3, ..., 99997, 99997, 99998]), - col_indices=tensor([90305, 96230, 86891, ..., 66888, 39495, 21203]), - values=tensor([-0.5290, 1.7137, 0.7615, ..., 1.2465, -0.3855, - -0.4542]), size=(100000, 100000), nnz=99998, +tensor(crow_indices=tensor([ 0, 0, 0, ..., 99997, 99997, 99998]), + col_indices=tensor([58363, 95270, 20035, ..., 59508, 69423, 51805]), + values=tensor([ 1.0401, 0.8766, 1.7073, ..., -0.0072, 1.9232, + 0.0142]), size=(100000, 100000), nnz=99998, layout=torch.sparse_csr) -tensor([0.2048, 0.5046, 0.8421, ..., 0.4453, 0.3792, 0.7036]) +tensor([0.2867, 0.2295, 0.4526, ..., 0.7869, 0.6646, 0.3034]) Matrix: synthetic Matrix: csr Shape: torch.Size([100000, 100000]) Size: 10000000000 NNZ: 99998 Density: 9.9998e-06 -Time: 10.91425085067749 seconds +Time: 10.340282917022705 seconds diff --git a/pytorch/output_test2/xeon_4216_10_2_10_100000_5e-05.json b/pytorch/output_test2/xeon_4216_10_2_10_100000_5e-05.json new file mode 100644 index 0000000..188b10a --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_100000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Xeon 4216", "ITERATIONS": 69686, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [100000, 100000], "MATRIX_SIZE": 10000000000, "MATRIX_NNZ": 499986, "MATRIX_DENSITY": 4.99986e-05, "TIME_S": 10.165817260742188, "TIME_S_1KI": 0.14588033838564687, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 899.6590386629105, "W": 88.05, "J_1KI": 12.910183374894677, "W_1KI": 1.2635249547972334, "W_D": 78.845, "J_D": 805.6060977101325, "W_D_1KI": 1.1314324254513102, "J_D_1KI": 0.016236151098517785} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_100000_5e-05.output b/pytorch/output_test2/xeon_4216_10_2_10_100000_5e-05.output new file mode 100644 index 0000000..93d5792 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_100000_5e-05.output @@ -0,0 +1,17 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 4, 9, ..., 499980, 499983, + 499986]), + col_indices=tensor([58761, 63539, 72385, ..., 33504, 41124, 68298]), + values=tensor([-0.1835, 1.2708, -2.4180, ..., -0.2638, 0.8943, + 0.8332]), size=(100000, 100000), nnz=499986, + layout=torch.sparse_csr) +tensor([0.6028, 0.3331, 0.7633, ..., 0.8876, 0.4663, 0.2198]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([100000, 100000]) +Size: 10000000000 +NNZ: 499986 +Density: 4.99986e-05 +Time: 10.165817260742188 seconds + diff --git a/pytorch/output_test2/xeon_4216_10_2_10_10000_0.0001.json b/pytorch/output_test2/xeon_4216_10_2_10_10000_0.0001.json new file mode 100644 index 0000000..d491488 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_10000_0.0001.json @@ -0,0 +1 @@ +{"CPU": "Xeon 4216", "ITERATIONS": 395356, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [10000, 10000], "MATRIX_SIZE": 100000000, "MATRIX_NNZ": 9999, "MATRIX_DENSITY": 9.999e-05, "TIME_S": 10.566214323043823, "TIME_S_1KI": 0.026725822608089478, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 788.1841578483582, "W": 74.55, "J_1KI": 1.9936061621636147, "W_1KI": 0.18856423071864345, "W_D": 65.39375, "J_D": 691.3791787028313, "W_D_1KI": 0.16540472384382682, "J_D_1KI": 0.0004183690745652698} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_10000_0.0001.output b/pytorch/output_test2/xeon_4216_10_2_10_10000_0.0001.output new file mode 100644 index 0000000..d4b012a --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_10000_0.0001.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 2, ..., 9997, 9997, 9999]), + col_indices=tensor([4106, 8016, 7128, ..., 5609, 1233, 8666]), + values=tensor([ 0.6536, -2.5242, 0.4276, ..., 0.4750, -1.7889, + 1.5433]), size=(10000, 10000), nnz=9999, + layout=torch.sparse_csr) +tensor([0.4100, 0.4025, 0.7291, ..., 0.6304, 0.5931, 0.7594]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([10000, 10000]) +Size: 100000000 +NNZ: 9999 +Density: 9.999e-05 +Time: 10.566214323043823 seconds + diff --git a/pytorch/output_test2/xeon_4216_10_2_10_10000_1e-05.json b/pytorch/output_test2/xeon_4216_10_2_10_10000_1e-05.json new file mode 100644 index 0000000..68e363e --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_10000_1e-05.json @@ -0,0 +1 @@ +{"CPU": "Xeon 4216", "ITERATIONS": 476678, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [10000, 10000], "MATRIX_SIZE": 100000000, "MATRIX_NNZ": 1000, "MATRIX_DENSITY": 1e-05, "TIME_S": 11.187696933746338, "TIME_S_1KI": 0.023470134836821373, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 768.1570157098771, "W": 73.37, "J_1KI": 1.6114798998692559, "W_1KI": 0.15391941730056768, "W_D": 64.16125000000001, "J_D": 671.7447774869205, "W_D_1KI": 0.13460082067978807, "J_D_1KI": 0.0002823726303286245} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_10000_1e-05.output b/pytorch/output_test2/xeon_4216_10_2_10_10000_1e-05.output new file mode 100644 index 0000000..c42649a --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_10000_1e-05.output @@ -0,0 +1,375 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 1000, 1000, 1000]), + col_indices=tensor([1654, 1104, 4675, 5854, 9970, 6100, 7133, 1211, 5315, + 478, 5135, 2953, 9948, 210, 8574, 5480, 706, 6269, + 5530, 5031, 109, 9703, 1681, 7878, 4767, 73, 2990, + 1980, 7102, 7980, 8316, 5886, 6974, 5917, 9033, 2097, + 3756, 1865, 5092, 506, 4936, 8829, 4251, 7526, 735, + 119, 6076, 5655, 3122, 634, 2728, 1562, 3695, 1648, + 778, 8113, 2206, 8081, 4091, 8173, 782, 4321, 1750, + 1852, 2090, 8555, 5712, 8459, 1945, 1827, 5789, 9433, + 575, 1337, 9782, 4287, 940, 3740, 2213, 3489, 2407, + 4565, 4982, 616, 3681, 9974, 8636, 4712, 274, 5769, + 9310, 5852, 948, 7110, 3189, 7817, 3034, 5348, 1310, + 1600, 1331, 8145, 6789, 6506, 2524, 7361, 9546, 6104, + 4114, 2605, 5194, 9689, 3746, 1794, 6787, 3992, 6930, + 8095, 1632, 1600, 6024, 8301, 4591, 8401, 2342, 6134, + 2532, 7185, 343, 5241, 4909, 4214, 2599, 8582, 8301, + 1989, 9709, 4969, 6232, 9576, 4713, 2595, 5689, 161, + 7726, 543, 3218, 1587, 8396, 271, 1909, 5298, 5883, + 1242, 7207, 1778, 9276, 7577, 3639, 7286, 2567, 6898, + 9404, 32, 1616, 5683, 350, 6436, 5159, 1744, 9102, + 2158, 4825, 8204, 8910, 9960, 3041, 283, 9413, 9477, + 8492, 3018, 8915, 4982, 983, 9219, 7688, 123, 9926, + 4268, 404, 3138, 7807, 663, 489, 4950, 2286, 1795, + 3195, 1042, 6016, 9527, 9570, 3938, 8025, 7256, 2908, + 8415, 8604, 9071, 4084, 1721, 6702, 781, 9824, 1781, + 5723, 4705, 8757, 1063, 6186, 9430, 7501, 1361, 7845, + 9308, 6371, 8126, 8436, 8552, 3454, 6226, 9563, 4512, + 947, 2377, 9834, 817, 2442, 60, 4590, 9068, 8827, + 1779, 4747, 7755, 9009, 5013, 2920, 6690, 3530, 1469, + 1786, 5255, 5087, 1344, 3441, 2155, 6034, 9164, 4806, + 8033, 3663, 9646, 9496, 1065, 451, 7356, 2369, 8975, + 7246, 6556, 7119, 611, 5262, 3363, 5016, 4780, 6989, + 1462, 2927, 2941, 7524, 8585, 620, 4381, 4611, 2390, + 6351, 6392, 2716, 2901, 8119, 9351, 3233, 1380, 5192, + 1139, 2019, 6187, 9128, 4966, 6818, 9032, 3326, 4479, + 5157, 9058, 2265, 2437, 5342, 5987, 1916, 286, 9442, + 4735, 1772, 9348, 1035, 6605, 2261, 8048, 2906, 6099, + 9775, 884, 7642, 7696, 4461, 2945, 8476, 5571, 1022, + 1449, 9452, 3052, 5422, 5656, 4443, 386, 7561, 4488, + 2861, 9084, 7435, 7231, 1364, 7623, 3712, 5975, 5287, + 9, 3152, 5939, 2231, 446, 5195, 8003, 2221, 1963, + 5660, 4977, 946, 8593, 6567, 1112, 7843, 9457, 8283, + 6659, 9300, 5806, 9576, 4404, 4647, 2372, 8462, 512, + 389, 7579, 8476, 7323, 7396, 9342, 5766, 8724, 6802, + 4825, 145, 5849, 7164, 5359, 5821, 6456, 7718, 1161, + 4775, 2785, 6318, 1280, 874, 137, 2308, 7477, 2233, + 5392, 1347, 7767, 5497, 3254, 7639, 9300, 50, 9443, + 1222, 8620, 5538, 2442, 3533, 2, 3868, 1635, 6564, + 8631, 2848, 8701, 1508, 8966, 7644, 2394, 8227, 6499, + 9274, 4592, 1732, 2020, 2449, 5474, 7600, 2330, 105, + 864, 2737, 8522, 4697, 6522, 7153, 8488, 6769, 1988, + 1294, 5686, 7332, 5716, 6014, 716, 7322, 6756, 1027, + 1196, 7938, 3792, 1467, 4744, 7997, 3255, 5290, 967, + 2723, 4319, 4002, 4644, 1487, 3415, 3210, 135, 7605, + 6768, 9289, 2430, 1487, 3156, 3983, 8702, 8664, 3791, + 3973, 1612, 7864, 5303, 6120, 7378, 4092, 1038, 9811, + 8236, 2401, 6604, 4421, 8073, 2574, 1741, 4901, 9453, + 5484, 6366, 6958, 2412, 5274, 6091, 4825, 2256, 1076, + 1899, 7714, 7093, 8167, 8616, 3991, 5037, 7182, 7837, + 5918, 7847, 4329, 9551, 4718, 8473, 5573, 8536, 1135, + 9150, 2750, 5406, 437, 1230, 7421, 1415, 251, 5820, + 9293, 3073, 473, 3504, 164, 2957, 3042, 737, 920, + 7450, 6677, 515, 1435, 4231, 4947, 4772, 1289, 8550, + 4581, 5021, 898, 6210, 8611, 634, 7559, 3396, 5023, + 6204, 4616, 3591, 601, 6042, 5419, 2270, 5798, 6090, + 2349, 1330, 9428, 6924, 3697, 7874, 7463, 4272, 8679, + 334, 3596, 6747, 4908, 6553, 9066, 2921, 5449, 2589, + 2187, 940, 3181, 4064, 2838, 3543, 3597, 1321, 3230, + 6851, 2292, 878, 9854, 8495, 8140, 7914, 932, 5459, + 8636, 8484, 5254, 3172, 1890, 9445, 467, 6702, 7840, + 1431, 8378, 1872, 2127, 7316, 2148, 6684, 50, 8302, + 3376, 5991, 6653, 8969, 1407, 4484, 7530, 7326, 6381, + 8418, 4407, 5343, 9695, 2035, 2781, 9162, 5437, 97, + 228, 6968, 3221, 3603, 5790, 253, 9745, 8510, 4866, + 6191, 2710, 7638, 7528, 9759, 8018, 346, 2013, 9723, + 1254, 4985, 8351, 9290, 7108, 6930, 281, 7865, 3380, + 343, 5120, 3674, 2017, 7980, 2399, 8096, 6857, 2682, + 2534, 273, 9230, 366, 5628, 8518, 3578, 4993, 6939, + 2546, 3258, 275, 504, 3054, 5659, 985, 5845, 5202, + 5231, 2157, 4130, 1080, 9288, 6318, 1324, 9985, 2260, + 8010, 7291, 2236, 8429, 1992, 9163, 573, 7719, 1548, + 4736, 3825, 3930, 6223, 477, 3313, 7580, 6101, 6637, + 5413, 676, 3838, 8740, 7377, 6753, 9060, 3515, 8820, + 1599, 5075, 3296, 1726, 7572, 327, 2194, 7904, 7118, + 584, 292, 4770, 6148, 4539, 7795, 5951, 4563, 6369, + 9465, 3910, 8549, 7636, 9479, 9175, 2853, 9853, 5218, + 7597, 9992, 7454, 2752, 3037, 8707, 357, 4313, 5123, + 3870, 9577, 9378, 6766, 3189, 3315, 756, 3086, 1524, + 4813, 9018, 2222, 2306, 2772, 5886, 9549, 9759, 5260, + 9662, 4309, 5956, 7114, 3870, 4528, 6998, 1063, 3733, + 2742, 6418, 5844, 3231, 6651, 3343, 4031, 4869, 1426, + 3762, 5230, 3544, 7997, 5099, 4957, 6171, 6793, 5618, + 8568, 7043, 4998, 3481, 8334, 6152, 5168, 9404, 6224, + 937, 2023, 550, 423, 7177, 4955, 9995, 3147, 7879, + 923, 5351, 8253, 2764, 4092, 7617, 3910, 5693, 9336, + 6987, 8912, 7856, 3148, 8435, 111, 1674, 6187, 2541, + 7845, 6106, 725, 1101, 7812, 7687, 3388, 1404, 5509, + 7005, 6562, 9841, 6680, 5073, 6936, 8647, 1368, 780, + 7886, 4972, 8129, 4057, 7748, 905, 4296, 7029, 7428, + 3388, 4419, 1198, 4605, 5087, 5257, 8172, 7183, 8803, + 7925, 1630, 789, 90, 3632, 7740, 5780, 349, 3088, + 3861, 9468, 7697, 8502, 9512, 1220, 9103, 4292, 792, + 146, 5294, 8069, 3770, 9191, 1933, 9801, 3276, 3838, + 7524, 1556, 525, 2612, 2458, 4895, 5271, 9286, 7467, + 3478, 6081, 4405, 9527, 1083, 9889, 5402, 4040, 4739, + 8412, 444, 4296, 4837, 9634, 5614, 3247, 1927, 8790, + 2690, 6187, 4711, 7943, 8606, 4605, 7479, 9169, 9739, + 8593, 2518, 3008, 4814, 7616, 817, 4815, 8631, 4743, + 4989, 7048, 2659, 4210, 722, 349, 3291, 5587, 6076, + 5858, 5081, 6292, 6315, 5299, 1392, 7851, 8121, 2173, + 2809, 2899, 2234, 514, 4841, 7070, 2431, 2952, 6928, + 4500, 4100, 9357, 3, 7960, 2209, 9461, 2811, 410, + 2094]), + values=tensor([-4.8176e-01, 1.3833e+00, -1.8209e-01, -8.2137e-01, + -5.7257e-01, 1.5526e+00, -5.5037e-01, 8.0032e-01, + 3.2901e-01, -3.5345e-01, 2.6346e-01, 5.4653e-01, + 6.4941e-01, -1.3241e+00, 2.7635e-01, -1.3716e+00, + -7.6230e-01, 1.4233e-01, -7.7668e-01, -8.1970e-02, + 1.3957e+00, -9.2301e-01, -4.9746e-02, 1.0034e+00, + -8.2411e-01, -7.2606e-02, 8.8880e-02, -2.9893e-01, + -1.7740e+00, 3.0888e-01, 1.6410e+00, 1.1671e-01, + -2.4674e+00, -4.6490e-01, -1.3220e+00, -7.1927e-01, + 1.9118e+00, 7.4122e-01, 8.9791e-01, -3.9476e-01, + -5.5494e-01, 1.0323e+00, -8.2545e-01, -4.1676e-01, + 5.5162e-01, 6.0593e-01, -4.7204e-01, -3.6296e-01, + -2.9680e-01, 5.9518e-01, -3.6637e-01, -1.0319e+00, + -3.7294e-01, -3.6091e-01, 6.5648e-01, -1.5579e+00, + -5.9853e-01, 1.1665e+00, 9.7701e-01, -7.2069e-01, + 5.3282e-01, -7.3723e-01, -8.4875e-01, 7.4306e-03, + 1.4955e+00, -1.6505e+00, -2.5035e+00, -8.9697e-01, + -1.3400e+00, 1.0964e+00, 1.0408e+00, 3.5874e-02, + -1.7858e+00, 4.0748e-01, -9.2050e-01, 2.0273e+00, + -1.4051e-01, -1.3370e+00, 1.2009e+00, -6.2723e-02, + -2.4750e-01, -7.2609e-01, 2.0098e-01, -1.7501e+00, + 8.0190e-02, 1.5007e-01, 7.4738e-01, -1.5900e+00, + -2.5245e-01, 2.4192e-01, -2.9848e-01, -1.4833e+00, + -1.4152e+00, 7.6090e-01, -7.4684e-01, 3.2277e-01, + 6.4209e-01, 4.5986e-01, 2.6006e-01, -1.3322e-01, + -5.3277e-01, 9.3977e-01, -6.0385e-01, -5.9643e-01, + -8.7923e-01, -1.3271e-01, 6.7074e-02, 1.3653e+00, + -7.1640e-01, 2.7959e-02, -1.9413e+00, -5.5779e-01, + 4.4798e-01, -9.1343e-01, -1.2413e+00, 9.6573e-01, + 4.6101e-01, 3.4667e-01, -1.2377e+00, 8.6892e-01, + 9.7082e-01, -8.2838e-01, -1.3242e-01, -4.7113e-01, + 1.0736e+00, -6.3903e-01, 2.0796e-01, 7.7331e-01, + -3.0117e-01, 2.6725e-01, 3.8078e-01, -1.3118e+00, + 8.4570e-02, -7.4116e-01, -1.0122e+00, 9.1681e-01, + 8.9492e-02, -4.4761e-01, 1.0865e+00, -1.2483e-01, + 1.4130e+00, -1.8280e-01, 9.1984e-01, -1.5652e+00, + 1.2926e+00, 9.2758e-01, -1.5914e-01, -7.3516e-01, + 1.2751e+00, -8.9539e-02, 1.4552e+00, -5.3861e-01, + -1.8197e+00, 1.0036e+00, 2.7176e+00, -1.3472e+00, + 5.6195e-01, 4.9765e-01, 2.0106e-01, -1.8617e+00, + -4.7750e-01, 1.4508e+00, -8.1787e-01, 2.1117e-01, + 1.0078e+00, -5.7631e-01, 3.5483e-01, 2.1961e-02, + -1.3475e+00, 2.1110e-01, 3.8120e-03, 8.2768e-02, + -9.1796e-01, -6.4778e-01, 3.8173e-01, 1.4168e+00, + 1.4013e-01, -1.9595e+00, 7.9639e-01, -3.4552e-01, + 1.1968e+00, 2.6613e-01, -4.1305e-01, -5.4668e-03, + 2.1271e+00, 3.3008e-01, -9.9718e-02, 9.9591e-01, + 2.8449e-01, -7.3716e-01, 5.9116e-01, -1.0669e-01, + 7.2910e-01, -1.2956e+00, 2.9254e-01, -2.1768e+00, + -8.7578e-01, 4.3425e-01, 1.6423e+00, 1.7066e-01, + -1.4572e+00, -3.1115e-01, -1.2769e+00, -9.4374e-01, + -9.3026e-01, -7.8455e-01, 1.3593e-01, 6.7372e-01, + -1.0246e+00, 3.3980e-01, 7.3473e-01, 8.7399e-01, + 6.7824e-01, -1.4826e+00, 1.6237e-01, 1.4513e-01, + -1.6972e+00, 2.2642e-02, -1.3871e+00, -1.7861e+00, + 7.5440e-03, -1.5089e+00, -1.8918e+00, 4.3819e-01, + -2.5954e-01, 1.5283e+00, 1.3483e-01, -6.3885e-01, + -6.8770e-01, 1.8963e+00, -9.0393e-01, -5.9254e-01, + 1.4358e+00, 7.4929e-01, -7.9435e-01, -6.3637e-01, + 2.3565e-01, -1.3680e+00, -1.1607e+00, -4.3228e-01, + 2.2752e-01, -1.5701e+00, -7.2385e-01, 2.1715e+00, + 5.0176e-01, -9.1547e-01, -1.1477e+00, 4.8477e-01, + -6.1993e-01, -9.6034e-01, 2.1312e-01, 6.8167e-01, + 3.9546e-01, -5.0155e-01, -2.3010e+00, 8.1677e-02, + -7.6593e-02, 6.0044e-01, -5.6240e-01, -7.2354e-02, + -5.3549e-02, -1.6841e-01, -2.7525e+00, 2.4114e-01, + -4.4123e-01, -8.3691e-01, -2.0250e+00, 2.5560e-01, + -4.6854e-01, 1.0440e+00, -1.3026e-02, 8.6522e-01, + 3.2798e-02, -3.5249e-01, 1.1208e+00, -1.8801e+00, + -1.4191e+00, -5.3744e-01, -5.9773e-02, 2.3213e+00, + -3.4030e-01, -1.0599e+00, -9.2847e-01, -1.1462e+00, + -9.3283e-01, 1.5488e+00, -9.8844e-01, 4.8718e-01, + -8.2729e-01, 1.0692e+00, 1.9620e-01, -4.5411e-01, + 7.7838e-02, 9.7217e-02, -1.8327e+00, 6.1161e-01, + 3.2046e-01, -9.4473e-01, 1.9492e+00, -1.0999e+00, + -7.9930e-01, -2.4908e+00, 8.4131e-01, -4.9478e-01, + 4.8337e-01, -1.9781e+00, 5.0777e-01, -3.4391e-01, + -6.6262e-01, 1.3652e+00, -1.0856e+00, -1.3016e-01, + -7.2410e-01, -1.5331e+00, -7.5705e-01, -1.8505e+00, + 2.0425e-01, 2.4103e-01, 6.2314e-01, -9.0279e-01, + 6.3619e-01, 1.4427e+00, 1.0064e+00, 6.4928e-02, + -3.9091e-01, -1.4298e-01, -1.0291e+00, 3.3745e-01, + 3.7974e-01, -3.5223e-01, 9.6685e-01, 2.1980e-01, + -1.0115e+00, -4.7249e-01, 4.6540e-02, 1.2089e+00, + 6.9314e-01, -8.6273e-01, 1.3022e+00, -4.5418e-02, + -1.0972e+00, -7.0189e-01, 6.2567e-01, 1.1986e+00, + 1.0303e+00, -4.7801e-02, 5.6203e-01, -6.4360e-01, + -4.1837e-01, -3.5917e-01, -2.3727e+00, -1.3125e+00, + 7.5596e-01, -6.2826e-01, -1.2298e+00, -1.8300e-01, + -5.4747e-01, -2.3532e-01, -7.3902e-01, 4.1469e-01, + 2.2335e+00, -8.7657e-01, 8.5888e-01, -1.1770e+00, + 2.0493e+00, -6.8118e-01, 1.5013e+00, -3.3960e-01, + 1.1355e+00, 2.3526e-01, 1.8034e-01, -1.0958e+00, + -1.2261e+00, 4.0897e-01, -4.2872e-01, -1.8961e+00, + 3.9162e-01, -3.5232e-02, 1.7274e+00, -7.8502e-02, + 1.1604e+00, 4.7280e-01, -1.3066e+00, 2.3080e-01, + -1.2216e-01, -5.5845e-01, -1.1204e-01, -1.9166e+00, + -9.2895e-02, -1.9216e-01, -2.9641e-01, 8.7811e-02, + -1.6684e+00, -5.5148e-01, 1.6150e-01, -2.4129e-01, + -1.3496e-01, 1.0254e+00, -8.3753e-01, -1.2184e+00, + 7.6461e-01, 1.6704e+00, 5.8565e-02, 3.2151e-01, + -3.4935e-01, -1.3887e+00, -8.2473e-01, 3.7228e-01, + -6.7665e-01, 2.2524e-01, -7.5009e-01, -8.2533e-01, + 5.4638e-01, 1.1608e+00, 2.4551e-01, 1.0442e+00, + -2.2091e+00, 1.5339e-01, 1.5135e+00, -7.4042e-01, + -9.1126e-02, 1.7649e-01, 2.6282e-02, -2.6788e-01, + 1.3826e+00, 2.0871e+00, 2.4985e-01, -7.5911e-01, + 3.0764e-01, -1.5976e+00, -1.0223e+00, -1.1064e+00, + -2.6039e-01, -5.4336e-01, 1.6761e+00, -8.9946e-01, + 2.3798e+00, 4.4901e-01, 7.2322e-01, -7.8520e-01, + -4.4113e-01, 1.0453e+00, -6.8131e-01, -8.3771e-02, + -8.1761e-01, -5.7947e-01, 2.8470e-01, 4.8130e-01, + -5.9792e-02, 7.4798e-02, 8.3214e-01, -1.6115e-02, + 2.1004e+00, -1.9277e+00, -5.8264e-01, -4.4842e-01, + 1.0546e-01, -5.0709e-02, 6.4367e-01, -4.9936e-01, + -1.3981e+00, 3.9247e-01, 5.3691e-01, 4.6383e-01, + -1.0534e+00, -4.6643e-01, 1.0515e+00, -1.1465e+00, + -1.4271e+00, 1.4321e+00, 1.3445e+00, -2.0552e+00, + -2.2965e+00, -1.7854e+00, 7.7557e-01, -7.9617e-01, + -7.9508e-01, -4.8400e-01, 7.8004e-01, -1.9954e+00, + 2.0904e+00, -4.6499e-01, -7.3970e-01, -4.6092e-01, + 3.0531e-01, 4.0282e-01, 2.2734e+00, 8.2466e-01, + 2.2024e-01, 6.3515e-01, 3.7627e-02, -3.1571e-01, + -1.1474e+00, -5.6880e-01, 8.6719e-01, -5.1288e-01, + -8.5742e-01, 8.6148e-01, 1.3342e+00, 1.1824e+00, + 2.0991e+00, -8.3636e-01, -8.4462e-01, 1.0354e+00, + 1.8701e+00, -5.5031e-01, 9.0735e-01, -9.4435e-01, + 1.3093e+00, 1.6520e+00, -8.6865e-02, 3.1265e-01, + 7.9198e-01, 3.0680e-01, -1.0098e+00, 9.7298e-01, + 1.0101e-01, -8.9944e-01, 3.0745e-01, 3.8583e-01, + -2.3706e+00, 2.0500e-03, -7.5115e-01, -1.3732e+00, + -1.0134e+00, -1.7605e+00, 1.6164e+00, -7.0092e-01, + -2.2740e+00, -2.1053e-01, 9.1323e-01, -8.7235e-02, + 1.4652e+00, -7.0488e-01, 1.0087e+00, -4.5216e-01, + 7.7164e-01, 8.0955e-01, 3.6573e-01, 6.3805e-01, + -7.2943e-01, -1.7263e+00, 2.7461e-01, -9.7021e-01, + 1.1009e+00, -7.0909e-01, 3.4298e-01, 3.8915e-01, + 2.1427e+00, -8.9532e-01, -9.1108e-01, -6.1773e-01, + 1.0636e+00, -1.0135e+00, 1.2431e+00, -3.9389e-01, + -1.5275e-01, -1.1259e+00, -1.2357e+00, 1.0335e+00, + -1.7496e-01, 8.2169e-01, -7.1848e-01, -1.3329e+00, + 1.3217e+00, -2.3043e-01, -8.0953e-02, -2.0720e+00, + -1.6196e-01, -8.8473e-02, -7.4153e-01, 1.3605e+00, + 7.8564e-01, -9.0746e-02, -1.8390e+00, -1.0966e+00, + -1.1471e+00, -9.3499e-01, 2.4971e+00, -1.7359e-01, + 6.8960e-01, 1.3091e-02, 8.0217e-01, 5.4156e-01, + -9.5815e-01, 1.3365e+00, 2.1937e-03, 1.3349e+00, + -1.3831e+00, -5.8637e-02, 9.8449e-01, -4.7135e-01, + -1.6031e+00, -4.1448e-01, 9.2374e-01, -1.5946e+00, + 2.0805e-01, -2.8803e-01, -2.7629e-01, 1.2531e+00, + 2.8596e-01, 9.8368e-01, 3.9480e-01, -1.4442e+00, + 4.7663e-01, -2.4717e-01, 9.5427e-01, 1.4055e-01, + -6.5796e-02, 4.9864e-02, -2.1816e+00, -2.4210e+00, + 7.0967e-01, 3.4625e-01, -5.3392e-01, 2.1058e-01, + 1.1355e+00, 1.0136e+00, 7.2296e-01, -1.3099e-01, + 3.7469e-01, -4.6614e-01, 1.2097e+00, 1.6545e+00, + 1.5209e+00, 1.3079e-01, 2.7652e-01, 1.1988e-01, + -2.3084e+00, 1.1074e+00, -2.7647e-01, -7.8619e-02, + 5.8584e-02, 5.4417e-01, 3.4919e-01, 4.0257e-01, + -8.0396e-01, 1.2973e+00, -6.0563e-01, -6.4760e-02, + 9.9923e-02, 1.4287e+00, -1.0946e-02, -1.2329e+00, + -2.2836e+00, 2.0995e-01, 9.8164e-01, 7.9413e-01, + -1.5134e+00, -1.2562e+00, 5.8957e-02, 4.7070e-01, + 2.3419e+00, -1.3139e+00, 1.8747e+00, 2.0700e+00, + -1.5023e+00, -1.9998e-01, -8.8775e-01, 1.0809e+00, + 1.0537e-01, 1.9242e+00, 1.5848e+00, 7.6454e-01, + -1.9968e+00, 7.0724e-01, 4.8470e-01, 5.9933e-01, + 7.9559e-01, -7.2783e-01, -7.7681e-01, 2.4647e-01, + 4.4276e-02, -2.3025e+00, 1.0871e+00, -1.4844e-01, + -2.4633e-01, 2.5657e-02, -8.2268e-01, -6.0448e-02, + -1.0613e+00, 1.7994e+00, 5.3950e-01, -4.4513e-01, + -1.0966e+00, -1.8238e+00, -4.2284e-01, -5.9390e-01, + 4.4126e-02, -7.3443e-02, 5.1138e-01, 2.5205e+00, + 4.2606e-01, -8.1304e-01, -3.0688e-02, -1.4845e+00, + -8.8771e-01, 1.1266e+00, -3.0798e-01, -7.1247e-01, + 1.4196e+00, -8.6671e-01, -6.0795e-01, 1.2660e+00, + -1.8673e-01, -4.3188e-01, 8.3412e-01, -8.6337e-02, + -2.4473e+00, 7.9490e-01, 3.0068e-01, 1.0345e+00, + -8.6877e-01, -1.4248e+00, 9.1902e-01, 4.3338e-01, + 1.2110e-01, 8.6950e-02, -9.0798e-04, 1.5272e+00, + 1.2326e+00, -1.5178e-01, -6.9644e-01, 7.0378e-01, + -1.4619e+00, 1.4223e+00, -5.8794e-01, -1.7062e+00, + -8.8796e-02, 2.9008e-01, -1.8979e-01, 2.0930e-01, + 1.3557e+00, 6.7349e-01, -4.6404e-01, -7.7619e-01, + -4.2321e-01, 1.6817e-01, -3.0062e-01, 1.1289e+00, + 3.3492e-01, -2.5341e-01, -2.0937e+00, -5.3060e-02, + -1.2958e+00, -1.9158e+00, 6.0796e-01, 1.5373e+00, + 5.5875e-01, -2.0739e-01, 1.0779e+00, -5.2903e-01, + -4.8817e-01, 4.0454e-01, -1.9102e-01, 1.6527e+00, + -1.5796e-01, -1.2526e+00, 9.4571e-02, -4.0707e-01, + -1.2596e+00, 7.2539e-01, 1.5785e+00, -1.3305e+00, + -2.5043e-01, 4.2680e-01, 1.7951e+00, -7.7350e-01, + 1.4649e+00, -1.6265e+00, 3.9058e-01, 1.5078e+00, + 6.1269e-01, -1.0664e+00, -1.1686e+00, -1.2193e+00, + 3.2467e-01, 3.9051e-01, -5.5200e-01, 1.3785e+00, + -3.9128e-01, 6.9070e-01, -1.1654e+00, -3.5444e-01, + -1.6195e-01, 3.0730e-01, -9.6870e-01, 4.2070e-01, + 4.0745e-01, 2.2876e+00, 1.4650e+00, -3.3493e-02, + -1.1086e+00, 2.3923e+00, -1.0399e+00, -1.3889e+00, + -1.3925e+00, 3.5987e-01, 5.6371e-01, 3.2694e-01, + 1.9104e+00, 6.0407e-02, -5.7308e-02, -6.5825e-01, + 7.2978e-01, -6.2420e-01, 5.6086e-01, 2.9303e+00, + 9.5242e-01, 1.3335e+00, 3.6687e-01, 2.7254e-01, + 5.5345e-01, 1.2182e+00, 2.4368e+00, 3.9376e-01, + -2.6783e-01, 1.3739e+00, 8.0999e-02, -8.6219e-01, + -3.9697e-02, 3.9133e-01, -1.1332e-01, 6.4560e-01, + 4.2850e-01, -1.3259e+00, -8.0176e-01, -3.4058e-01, + 2.8782e-01, 1.3521e+00, -1.7105e+00, 1.7491e-01, + -7.6611e-02, 3.8719e-01, -1.2320e+00, 8.5197e-01, + 3.9473e-01, -3.5982e-01, 4.8003e-01, -4.2240e-01, + -9.5596e-02, -3.6750e-01, 1.1325e-02, -3.0086e-02, + 1.8793e+00, -1.9815e+00, 5.6579e-01, -8.1610e-01, + -1.7869e+00, 1.6062e+00, -6.7557e-01, 4.3911e-01, + -5.0054e-01, 8.8378e-01, 2.7626e+00, 6.4125e-01, + 2.8855e-01, 8.1351e-01, 6.6238e-01, 2.8131e+00, + -3.6867e-01, 1.5074e+00, 2.6711e-01, -1.6078e+00, + 2.0317e-01, 6.2640e-02, -2.0642e-01, 1.7985e+00, + 2.0713e-01, 3.2554e-01, -5.3402e-01, -6.9594e-01, + -5.1038e-01, 1.1465e+00, -1.3147e+00, -1.2995e+00, + 6.0172e-02, 1.4313e+00, 1.6462e+00, -6.9562e-01, + -1.6168e+00, 2.3005e-01, -1.0662e-01, -1.0360e+00, + -4.2360e-01, 7.7584e-02, 1.5929e-01, 1.3169e+00, + 1.0285e+00, 2.8344e-01, -2.1833e-01, -1.5123e+00, + 5.0198e-01, 5.9931e-01, -1.0409e+00, -8.7931e-02, + -1.6818e+00, -2.5217e-01, -4.7770e-02, 4.6147e-01, + 1.0181e+00, -6.7574e-01, 1.6547e+00, 1.4723e+00, + -7.8444e-02, 5.1298e-01, -1.5199e+00, 4.7903e-01, + -6.4973e-01, 1.3039e-01, 1.7015e+00, 3.7630e-02, + -1.4349e+00, -9.7091e-01, 3.1178e-01, -5.2630e-01, + -2.1096e-02, -2.5317e-01, 1.2015e+00, -4.3441e-01, + -8.1679e-01, 2.9420e-01, 2.1152e-01, 1.1556e-01, + -1.0851e+00, 1.3174e+00, -4.9545e-02, 5.5209e-01, + 1.2620e-02, 3.6496e-01, -7.2885e-01, -5.8240e-01, + -1.6989e+00, 2.3335e-01, 5.2563e-01, -1.1906e+00, + -8.2775e-01, -7.5084e-01, 6.4604e-02, 8.7449e-02, + 1.1698e+00, 3.9196e-01, 1.4219e-01, -1.9752e+00, + -9.2015e-01, 7.0171e-02, -9.8036e-01, 7.3232e-02, + 3.9501e-01, 5.8356e-03, 1.0470e+00, -1.3574e-01, + 1.8646e-01, 6.1086e-02, 1.5866e+00, -5.6900e-01, + -1.3039e+00, 3.1103e-01, -1.8408e-01, 4.9445e-01, + 3.8955e-02, 6.4997e-01, 7.2714e-01, -8.3344e-01, + 2.3698e-01, -1.1504e+00, -1.8965e+00, 2.7283e-01, + -2.4724e-01, -1.3143e-01, -4.3957e-01, -6.3226e-01, + 7.6214e-01, -8.4973e-01, 1.0677e+00, -5.5402e-01, + -1.4500e+00, 1.0347e-01, 5.6076e-01, -1.4678e+00, + -6.3220e-01, 1.1301e+00, -3.3657e-01, -7.9221e-01, + 8.9233e-01, -7.2263e-01, -4.1515e-01, -1.0156e+00, + -6.1813e-01, 8.7238e-01, 9.2822e-01, 4.2539e-01, + 3.7094e-01, 9.9011e-01, 8.8612e-01, 1.5185e+00, + 8.0851e-01, 6.1060e-01, -1.1706e+00, -1.3243e+00]), + size=(10000, 10000), nnz=1000, layout=torch.sparse_csr) +tensor([0.2908, 0.6605, 0.7225, ..., 0.3770, 0.3334, 0.5813]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([10000, 10000]) +Size: 100000000 +NNZ: 1000 +Density: 1e-05 +Time: 11.187696933746338 seconds + diff --git a/pytorch/output_test2/xeon_4216_10_2_10_10000_5e-05.json b/pytorch/output_test2/xeon_4216_10_2_10_10000_5e-05.json new file mode 100644 index 0000000..ef35488 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_10000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Xeon 4216", "ITERATIONS": 440665, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [10000, 10000], "MATRIX_SIZE": 100000000, "MATRIX_NNZ": 5000, "MATRIX_DENSITY": 5e-05, "TIME_S": 10.798494100570679, "TIME_S_1KI": 0.024504996086756787, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 793.5612908649445, "W": 74.13, "J_1KI": 1.8008266843632794, "W_1KI": 0.1682230265621277, "W_D": 64.96625, "J_D": 695.4633915102482, "W_D_1KI": 0.14742775123960378, "J_D_1KI": 0.00033455743306049667} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_10000_5e-05.output b/pytorch/output_test2/xeon_4216_10_2_10_10000_5e-05.output new file mode 100644 index 0000000..6a3a172 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_10000_5e-05.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 5000, 5000, 5000]), + col_indices=tensor([8266, 4110, 1127, ..., 2062, 4056, 905]), + values=tensor([-0.6934, -1.5934, 0.4073, ..., -1.3866, -0.1937, + -1.3300]), size=(10000, 10000), nnz=5000, + layout=torch.sparse_csr) +tensor([0.9293, 0.8596, 0.7145, ..., 0.3618, 0.3352, 0.4743]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([10000, 10000]) +Size: 100000000 +NNZ: 5000 +Density: 5e-05 +Time: 10.798494100570679 seconds + diff --git a/pytorch/output_test2/xeon_4216_10_2_10_20000_0.0001.json b/pytorch/output_test2/xeon_4216_10_2_10_20000_0.0001.json new file mode 100644 index 0000000..a486cc2 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_20000_0.0001.json @@ -0,0 +1 @@ +{"CPU": "Xeon 4216", "ITERATIONS": 259904, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [20000, 20000], "MATRIX_SIZE": 400000000, "MATRIX_NNZ": 39996, "MATRIX_DENSITY": 9.999e-05, "TIME_S": 10.36286997795105, "TIME_S_1KI": 0.039871914160424814, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 801.7571017837524, "W": 77.24, "J_1KI": 3.0848201712315024, "W_1KI": 0.2971866535336124, "W_D": 68.00375, "J_D": 705.884121056795, "W_D_1KI": 0.2616494936591972, "J_D_1KI": 0.0010067159168739121} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_20000_0.0001.output b/pytorch/output_test2/xeon_4216_10_2_10_20000_0.0001.output new file mode 100644 index 0000000..2eb4668 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_20000_0.0001.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 1, 6, ..., 39993, 39994, 39996]), + col_indices=tensor([12767, 8765, 9580, ..., 6145, 8828, 14685]), + values=tensor([ 0.1350, 0.9538, -0.5970, ..., 0.4847, 1.1186, + -0.3204]), size=(20000, 20000), nnz=39996, + layout=torch.sparse_csr) +tensor([0.2590, 0.5571, 0.5001, ..., 0.9763, 0.4254, 0.2867]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([20000, 20000]) +Size: 400000000 +NNZ: 39996 +Density: 9.999e-05 +Time: 10.36286997795105 seconds + diff --git a/pytorch/output_test2/xeon_4216_10_2_10_20000_1e-05.json b/pytorch/output_test2/xeon_4216_10_2_10_20000_1e-05.json new file mode 100644 index 0000000..290e9c3 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_20000_1e-05.json @@ -0,0 +1 @@ +{"CPU": "Xeon 4216", "ITERATIONS": 368614, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [20000, 20000], "MATRIX_SIZE": 400000000, "MATRIX_NNZ": 4000, "MATRIX_DENSITY": 1e-05, "TIME_S": 10.64458179473877, "TIME_S_1KI": 0.028877312838738546, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 785.2860484743118, "W": 74.57, "J_1KI": 2.1303749951827977, "W_1KI": 0.20229833918407872, "W_D": 65.41749999999999, "J_D": 688.9023746287821, "W_D_1KI": 0.1774688427460704, "J_D_1KI": 0.0004814490028758278} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_20000_1e-05.output b/pytorch/output_test2/xeon_4216_10_2_10_20000_1e-05.output new file mode 100644 index 0000000..1e87d53 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_20000_1e-05.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 3999, 3999, 4000]), + col_indices=tensor([ 4783, 9851, 5325, ..., 15405, 3201, 3555]), + values=tensor([ 0.3085, 1.2735, -0.3529, ..., 1.1219, 1.0957, + -0.5773]), size=(20000, 20000), nnz=4000, + layout=torch.sparse_csr) +tensor([0.7769, 0.4542, 0.8807, ..., 0.1799, 0.8487, 0.1514]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([20000, 20000]) +Size: 400000000 +NNZ: 4000 +Density: 1e-05 +Time: 10.64458179473877 seconds + diff --git a/pytorch/output_test2/xeon_4216_10_2_10_20000_5e-05.json b/pytorch/output_test2/xeon_4216_10_2_10_20000_5e-05.json new file mode 100644 index 0000000..2406ed8 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_20000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Xeon 4216", "ITERATIONS": 283636, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [20000, 20000], "MATRIX_SIZE": 400000000, "MATRIX_NNZ": 20000, "MATRIX_DENSITY": 5e-05, "TIME_S": 10.500569581985474, "TIME_S_1KI": 0.03702128637403388, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 810.0879921865463, "W": 76.03, "J_1KI": 2.8560831212770816, "W_1KI": 0.26805483083952675, "W_D": 66.86375, "J_D": 712.4230039137601, "W_D_1KI": 0.23573788235625942, "J_D_1KI": 0.0008311282148819593} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_20000_5e-05.output b/pytorch/output_test2/xeon_4216_10_2_10_20000_5e-05.output new file mode 100644 index 0000000..97cd584 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_20000_5e-05.output @@ -0,0 +1,16 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 1, ..., 19998, 19999, 20000]), + col_indices=tensor([ 8922, 1520, 2083, ..., 15757, 10768, 6287]), + values=tensor([-1.1442, 0.4868, 0.6733, ..., 0.9094, -1.4024, + -0.9844]), size=(20000, 20000), nnz=20000, + layout=torch.sparse_csr) +tensor([0.7870, 0.5320, 0.2324, ..., 0.3230, 0.6000, 0.7397]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([20000, 20000]) +Size: 400000000 +NNZ: 20000 +Density: 5e-05 +Time: 10.500569581985474 seconds + diff --git a/pytorch/output_test2/xeon_4216_10_2_10_50000_0.0001.json b/pytorch/output_test2/xeon_4216_10_2_10_50000_0.0001.json new file mode 100644 index 0000000..f35f476 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_50000_0.0001.json @@ -0,0 +1 @@ +{"CPU": "Xeon 4216", "ITERATIONS": 136743, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [50000, 50000], "MATRIX_SIZE": 2500000000, "MATRIX_NNZ": 249989, "MATRIX_DENSITY": 9.99956e-05, "TIME_S": 10.19824481010437, "TIME_S_1KI": 0.07457964802662198, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 912.4124284863472, "W": 84.05, "J_1KI": 6.672461687152887, "W_1KI": 0.6146566917502175, "W_D": 74.32124999999999, "J_D": 806.8010969737171, "W_D_1KI": 0.5435104539172023, "J_D_1KI": 0.0039746857529614115} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_50000_0.0001.output b/pytorch/output_test2/xeon_4216_10_2_10_50000_0.0001.output new file mode 100644 index 0000000..731b6e4 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_50000_0.0001.output @@ -0,0 +1,17 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 3, 9, ..., 249977, 249981, + 249989]), + col_indices=tensor([ 1763, 15898, 31944, ..., 25767, 45730, 48074]), + values=tensor([ 0.6468, 0.2817, -0.4433, ..., 0.7389, -0.5082, + -0.4977]), size=(50000, 50000), nnz=249989, + layout=torch.sparse_csr) +tensor([0.3477, 0.0927, 0.9187, ..., 0.0955, 0.4581, 0.4152]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([50000, 50000]) +Size: 2500000000 +NNZ: 249989 +Density: 9.99956e-05 +Time: 10.19824481010437 seconds + diff --git a/pytorch/output_test2/xeon_4216_10_2_10_50000_1e-05.json b/pytorch/output_test2/xeon_4216_10_2_10_50000_1e-05.json new file mode 100644 index 0000000..41b7e71 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_50000_1e-05.json @@ -0,0 +1 @@ +{"CPU": "Xeon 4216", "ITERATIONS": 213899, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [50000, 50000], "MATRIX_SIZE": 2500000000, "MATRIX_NNZ": 25000, "MATRIX_DENSITY": 1e-05, "TIME_S": 10.812883138656616, "TIME_S_1KI": 0.050551349649398156, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 809.0901684951782, "W": 74.49, "J_1KI": 3.7825804164356924, "W_1KI": 0.34824847240987566, "W_D": 65.00625, "J_D": 706.0802492380142, "W_D_1KI": 0.3039109579754931, "J_D_1KI": 0.0014208152351132689} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_50000_1e-05.output b/pytorch/output_test2/xeon_4216_10_2_10_50000_1e-05.output new file mode 100644 index 0000000..1438dd6 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_50000_1e-05.output @@ -0,0 +1,17 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 25000, 25000, 25000]), + col_indices=tensor([14105, 49794, 12275, ..., 28034, 32175, 49067]), + values=tensor([-0.2580, 2.2414, -0.4406, ..., 0.7293, 1.3018, + 0.4178]), size=(50000, 50000), nnz=25000, + layout=torch.sparse_csr) +tensor([6.5155e-01, 7.4521e-01, 5.7246e-01, ..., 2.1695e-01, 2.5153e-01, + 5.8967e-04]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([50000, 50000]) +Size: 2500000000 +NNZ: 25000 +Density: 1e-05 +Time: 10.812883138656616 seconds + diff --git a/pytorch/output_test2/xeon_4216_10_2_10_50000_5e-05.json b/pytorch/output_test2/xeon_4216_10_2_10_50000_5e-05.json new file mode 100644 index 0000000..b954a39 --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_50000_5e-05.json @@ -0,0 +1 @@ +{"CPU": "Xeon 4216", "ITERATIONS": 167105, "MATRIX_TYPE": "synthetic", "MATRIX_FORMAT": "csr", "MATRIX_SHAPE": [50000, 50000], "MATRIX_SIZE": 2500000000, "MATRIX_NNZ": 124998, "MATRIX_DENSITY": 4.99992e-05, "TIME_S": 10.385007858276367, "TIME_S_1KI": 0.06214660158748313, "BASELINE_TIME_S": 2, "BASELINE_DELAY_S": 10, "J": 840.060476000309, "W": 80.17, "J_1KI": 5.02714147392543, "W_1KI": 0.47975823583974153, "W_D": 70.9425, "J_D": 743.3702172714471, "W_D_1KI": 0.4245384638401005, "J_D_1KI": 0.0025405491388055448} diff --git a/pytorch/output_test2/xeon_4216_10_2_10_50000_5e-05.output b/pytorch/output_test2/xeon_4216_10_2_10_50000_5e-05.output new file mode 100644 index 0000000..976d4ca --- /dev/null +++ b/pytorch/output_test2/xeon_4216_10_2_10_50000_5e-05.output @@ -0,0 +1,17 @@ +/nfshomes/vut/ampere_research/pytorch/spmv.py:57: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at ../aten/src/ATen/SparseCsrTensorImpl.cpp:53.) + matrix = matrix.to_sparse_csr().type(torch.float32) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 124995, 124996, + 124998]), + col_indices=tensor([ 4431, 12161, 21642, ..., 46785, 7280, 21912]), + values=tensor([-0.0038, 0.8141, -1.0870, ..., -0.4364, -1.6532, + -0.2924]), size=(50000, 50000), nnz=124998, + layout=torch.sparse_csr) +tensor([0.7830, 0.7286, 0.1161, ..., 0.7505, 0.0868, 0.3751]) +Matrix: synthetic +Matrix: csr +Shape: torch.Size([50000, 50000]) +Size: 2500000000 +NNZ: 124998 +Density: 4.99992e-05 +Time: 10.385007858276367 seconds +