From 01b1b0fc0cc5d907d2e75aea3d0636047588c183 Mon Sep 17 00:00:00 2001 From: cephi Date: Tue, 3 Dec 2024 00:20:09 -0500 Subject: [PATCH] Got output! --- ...altra_100_soc-sign-Slashdot090221_2_2.json | 1 - pytorch/output/altra_2_2_Oregon-2_100.json | 1 + pytorch/output/altra_2_2_Oregon-2_100.output | 153 ++++++++++++++++ pytorch/output/altra_2_2_as-caida_100.json | 1 + pytorch/output/altra_2_2_as-caida_100.output | 158 ++++++++++++++++ pytorch/output/altra_2_2_dc2_100.json | 1 + pytorch/output/altra_2_2_dc2_100.output | 168 ++++++++++++++++++ pytorch/output/altra_2_2_de2010_100.json | 1 + pytorch/output/altra_2_2_de2010_100.output | 163 +++++++++++++++++ pytorch/output/altra_2_2_email-Enron_100.json | 1 + .../output/altra_2_2_email-Enron_100.output | 158 ++++++++++++++++ .../output/altra_2_2_p2p-Gnutella04_100.json | 1 + .../altra_2_2_p2p-Gnutella04_100.output | 153 ++++++++++++++++ .../output/altra_2_2_p2p-Gnutella24_100.json | 1 + .../altra_2_2_p2p-Gnutella24_100.output | 153 ++++++++++++++++ .../output/altra_2_2_p2p-Gnutella25_100.json | 1 + .../altra_2_2_p2p-Gnutella25_100.output | 153 ++++++++++++++++ .../output/altra_2_2_p2p-Gnutella30_100.json | 1 + .../altra_2_2_p2p-Gnutella30_100.output | 153 ++++++++++++++++ pytorch/output/altra_2_2_ri2010_100.json | 1 + pytorch/output/altra_2_2_ri2010_100.output | 158 ++++++++++++++++ ...altra_2_2_soc-sign-Slashdot090216_100.json | 1 + ...tra_2_2_soc-sign-Slashdot090216_100.output | 158 ++++++++++++++++ ...altra_2_2_soc-sign-Slashdot090221_100.json | 1 + ...ra_2_2_soc-sign-Slashdot090221_100.output} | 108 +++++------ .../altra_2_2_soc-sign-epinions_100.json | 1 + .../altra_2_2_soc-sign-epinions_100.output | 163 +++++++++++++++++ .../output/altra_2_2_sx-mathoverflow_100.json | 1 + .../altra_2_2_sx-mathoverflow_100.output | 158 ++++++++++++++++ pytorch/output/altra_2_2_ut2010_100.json | 1 + pytorch/output/altra_2_2_ut2010_100.output | 168 ++++++++++++++++++ pytorch/output/altra_2_2_vt2010_100.json | 1 + pytorch/output/altra_2_2_vt2010_100.output | 158 ++++++++++++++++ 33 files changed, 2445 insertions(+), 55 deletions(-) delete mode 100644 pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.json create mode 100644 pytorch/output/altra_2_2_Oregon-2_100.json create mode 100644 pytorch/output/altra_2_2_Oregon-2_100.output create mode 100644 pytorch/output/altra_2_2_as-caida_100.json create mode 100644 pytorch/output/altra_2_2_as-caida_100.output create mode 100644 pytorch/output/altra_2_2_dc2_100.json create mode 100644 pytorch/output/altra_2_2_dc2_100.output create mode 100644 pytorch/output/altra_2_2_de2010_100.json create mode 100644 pytorch/output/altra_2_2_de2010_100.output create mode 100644 pytorch/output/altra_2_2_email-Enron_100.json create mode 100644 pytorch/output/altra_2_2_email-Enron_100.output create mode 100644 pytorch/output/altra_2_2_p2p-Gnutella04_100.json create mode 100644 pytorch/output/altra_2_2_p2p-Gnutella04_100.output create mode 100644 pytorch/output/altra_2_2_p2p-Gnutella24_100.json create mode 100644 pytorch/output/altra_2_2_p2p-Gnutella24_100.output create mode 100644 pytorch/output/altra_2_2_p2p-Gnutella25_100.json create mode 100644 pytorch/output/altra_2_2_p2p-Gnutella25_100.output create mode 100644 pytorch/output/altra_2_2_p2p-Gnutella30_100.json create mode 100644 pytorch/output/altra_2_2_p2p-Gnutella30_100.output create mode 100644 pytorch/output/altra_2_2_ri2010_100.json create mode 100644 pytorch/output/altra_2_2_ri2010_100.output create mode 100644 pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.json create mode 100644 pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.output create mode 100644 pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.json rename pytorch/output/{altra_100_soc-sign-Slashdot090221_2_2.output => altra_2_2_soc-sign-Slashdot090221_100.output} (67%) create mode 100644 pytorch/output/altra_2_2_soc-sign-epinions_100.json create mode 100644 pytorch/output/altra_2_2_soc-sign-epinions_100.output create mode 100644 pytorch/output/altra_2_2_sx-mathoverflow_100.json create mode 100644 pytorch/output/altra_2_2_sx-mathoverflow_100.output create mode 100644 pytorch/output/altra_2_2_ut2010_100.json create mode 100644 pytorch/output/altra_2_2_ut2010_100.output create mode 100644 pytorch/output/altra_2_2_vt2010_100.json create mode 100644 pytorch/output/altra_2_2_vt2010_100.output diff --git a/pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.json b/pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.json deleted file mode 100644 index c548473..0000000 --- a/pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.json +++ /dev/null @@ -1 +0,0 @@ -{"power_before": [20.2, 20.32], "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 1.6952476501464844, "power": [44.68, 57.12, 64.2, 67.04, 55.68, 42.76], "power_after": [53.56, 52.52], "task clock (msec)": 61.63, "page faults": 3293, "cycles": 41677750, "instructions": 91767205, "branch mispredictions": 329386, "branches": 19813961, "ITLB accesses": 27944146, "ITLB misses": 6811, "DTLB misses": 18962, "DTLB accesses": 37689058, "L1I cache accesses": 31746573, "L1I cache misses": 290044, "L1D cache misses": 471100, "L1D cache accesses": 33271575, "LL cache misses": 550308, "LL cache accesses": 564981, "L2D TLB accesses": 168456, "L2D TLB misses": 20450, "L2D cache misses": 306309, "L2D cache accesses": 1745776, "instructions per cycle": 2.201827233955768, "branch miss rate": 0.01662393501228755, "ITLB miss rate": 0.00024373620149279208, "DTLB miss rate": 0.0005031168462740565, "L2D TLB miss rate": 0.12139668518782352, "L1I cache miss rate": 0.009136230231842662, "L1D cache miss rate": 0.014159233519904002, "L2D cache miss rate": 0.17545721787904062, "LL cache miss rate": 0.9740292151417481} diff --git a/pytorch/output/altra_2_2_Oregon-2_100.json b/pytorch/output/altra_2_2_Oregon-2_100.json new file mode 100644 index 0000000..1639db3 --- /dev/null +++ b/pytorch/output/altra_2_2_Oregon-2_100.json @@ -0,0 +1 @@ +{"power_before": [50.88, 50.88], "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 0.1896660327911377, "power": [25.52, 32.28, 33.12, 33.12], "power_after": [32.88, 26.52], "task clock (msec)": 42.01, "page faults": 3263, "cycles": 47084933, "instructions": 77895119, "branch mispredictions": 330923, "branches": 19740519, "ITLB accesses": 27761239, "ITLB misses": 6471, "DTLB misses": 17268, "DTLB accesses": 36993265, "L1I cache accesses": 31834980, "L1I cache misses": 298333, "L1D cache misses": 466901, "L1D cache accesses": 33528976, "LL cache misses": 525505, "LL cache accesses": 546521, "L2D TLB accesses": 184884, "L2D TLB misses": 22933, "L2D cache misses": 292367, "L2D cache accesses": 1706226, "instructions per cycle": 1.6543534000568716, "branch miss rate": 0.016763642333821112, "ITLB miss rate": 0.00023309478370183695, "DTLB miss rate": 0.0004667876706746485, "L2D TLB miss rate": 0.12403993855606758, "L1I cache miss rate": 0.009371232524725947, "L1D cache miss rate": 0.013925298523879763, "L2D cache miss rate": 0.1713530329510862, "LL cache miss rate": 0.9615458509371094} diff --git a/pytorch/output/altra_2_2_Oregon-2_100.output b/pytorch/output/altra_2_2_Oregon-2_100.output new file mode 100644 index 0000000..68f1fba --- /dev/null +++ b/pytorch/output/altra_2_2_Oregon-2_100.output @@ -0,0 +1,153 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394148 queued and waiting for resources +srun: job 3394148 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]), + col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806), + nnz=65460, layout=torch.sparse_csr) +tensor([0.3190, 0.2829, 0.6210, ..., 0.9278, 0.7514, 0.5737]) +Shape: torch.Size([11806, 11806]) +NNZ: 65460 +Density: 0.0004696458003979807 +Time: 0.22389841079711914 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100': + + 42.01 msec task-clock:u # 0.012 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,263 page-faults:u # 77.672 K/sec + 47,084,933 cycles:u # 1.121 GHz (65.90%) + 77,895,119 instructions:u # 1.65 insn per cycle (85.49%) + branches:u + 352,740 branch-misses:u + 30,958,922 L1-dcache-loads:u # 736.946 M/sec + 442,351 L1-dcache-load-misses:u # 1.43% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 29,506,648 L1-icache-loads:u # 702.376 M/sec + 272,063 L1-icache-load-misses:u # 0.92% of all L1-icache accesses + 51,646,382 dTLB-loads:u # 1.229 G/sec (15.87%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 3.513156571 seconds time elapsed + + 15.150380000 seconds user + 32.922923000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]), + col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806), + nnz=65460, layout=torch.sparse_csr) +tensor([0.0741, 0.5476, 0.1060, ..., 0.8459, 0.8270, 0.8313]) +Shape: torch.Size([11806, 11806]) +NNZ: 65460 +Density: 0.0004696458003979807 +Time: 0.20610284805297852 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100': + + 330,923 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 19,740,519 BR_RETIRED:u + + 3.639725976 seconds time elapsed + + 15.493122000 seconds user + 27.617441000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]), + col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806), + nnz=65460, layout=torch.sparse_csr) +tensor([0.9699, 0.9368, 0.7284, ..., 0.7182, 0.5308, 0.9833]) +Shape: torch.Size([11806, 11806]) +NNZ: 65460 +Density: 0.0004696458003979807 +Time: 0.15960955619812012 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100': + + 27,761,239 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,471 ITLB_WALK:u + 17,268 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 36,993,265 L1D_TLB:u + + 3.455602215 seconds time elapsed + + 15.015027000 seconds user + 27.930709000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]), + col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806), + nnz=65460, layout=torch.sparse_csr) +tensor([0.5851, 0.3425, 0.8120, ..., 0.0829, 0.5823, 0.2256]) +Shape: torch.Size([11806, 11806]) +NNZ: 65460 +Density: 0.0004696458003979807 +Time: 0.15697884559631348 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100': + + 31,834,980 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 298,333 L1I_CACHE_REFILL:u + 466,901 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 33,528,976 L1D_CACHE:u + + 3.452279902 seconds time elapsed + + 14.635240000 seconds user + 28.262858000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]), + col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806), + nnz=65460, layout=torch.sparse_csr) +tensor([0.0772, 0.9112, 0.0293, ..., 0.4016, 0.4357, 0.5368]) +Shape: torch.Size([11806, 11806]) +NNZ: 65460 +Density: 0.0004696458003979807 +Time: 0.20962285995483398 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100': + + 525,505 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 546,521 LL_CACHE_RD:u + 184,884 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 22,933 L2D_TLB_REFILL:u + 292,367 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,706,226 L2D_CACHE:u + + 3.566096255 seconds time elapsed + + 15.763579000 seconds user + 28.620423000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_as-caida_100.json b/pytorch/output/altra_2_2_as-caida_100.json new file mode 100644 index 0000000..d1ddf90 --- /dev/null +++ b/pytorch/output/altra_2_2_as-caida_100.json @@ -0,0 +1 @@ +{"power_before": [20.16, 20.08], "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 0.336850643157959, "power": [24.28, 30.72, 30.72, 34.56], "power_after": [37.32, 32.92], "task clock (msec)": 60.78, "page faults": 3300, "cycles": 66733059, "instructions": 87889334, "branch mispredictions": 326300, "branches": 19832700, "ITLB accesses": 27233629, "ITLB misses": 5868, "DTLB misses": 16893, "DTLB accesses": 36409508, "L1I cache accesses": 30924532, "L1I cache misses": 288199, "L1D cache misses": 462816, "L1D cache accesses": 32428375, "LL cache misses": 551997, "LL cache accesses": 568528, "L2D TLB accesses": 193991, "L2D TLB misses": 24353, "L2D cache misses": 312207, "L2D cache accesses": 1821196, "instructions per cycle": 1.3170284011707, "branch miss rate": 0.016452626218316214, "ITLB miss rate": 0.0002154688969288669, "DTLB miss rate": 0.00046397221297250155, "L2D TLB miss rate": 0.125536751704976, "L1I cache miss rate": 0.009319429635992551, "L1D cache miss rate": 0.014271945479845968, "L2D cache miss rate": 0.17142965391973186, "LL cache miss rate": 0.9709231559395491} diff --git a/pytorch/output/altra_2_2_as-caida_100.output b/pytorch/output/altra_2_2_as-caida_100.output new file mode 100644 index 0000000..490529e --- /dev/null +++ b/pytorch/output/altra_2_2_as-caida_100.output @@ -0,0 +1,158 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394150 queued and waiting for resources +srun: job 3394150 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761, + 106762]), + col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379), + nnz=106762, layout=torch.sparse_csr) +tensor([0.7672, 0.5818, 0.6775, ..., 0.1052, 0.2539, 0.4347]) +Shape: torch.Size([31379, 31379]) +NNZ: 106762 +Density: 0.00010842726485909405 +Time: 0.28373050689697266 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100': + + 60.78 msec task-clock:u # 0.017 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,300 page-faults:u # 54.293 K/sec + 66,733,059 cycles:u # 1.098 GHz (58.34%) + 87,889,334 instructions:u # 1.32 insn per cycle (93.45%) + branches:u + 369,909 branch-misses:u + 31,872,708 L1-dcache-loads:u # 524.386 M/sec + 465,719 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 30,443,353 L1-icache-loads:u # 500.870 M/sec + 292,371 L1-icache-load-misses:u # 0.96% of all L1-icache accesses + 34,702,735 dTLB-loads:u # 570.947 M/sec (6.96%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 3.683429807 seconds time elapsed + + 15.161162000 seconds user + 31.335288000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761, + 106762]), + col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379), + nnz=106762, layout=torch.sparse_csr) +tensor([0.2708, 0.2455, 0.7615, ..., 0.1172, 0.4072, 0.8970]) +Shape: torch.Size([31379, 31379]) +NNZ: 106762 +Density: 0.00010842726485909405 +Time: 0.32511067390441895 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100': + + 326,300 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 19,832,700 BR_RETIRED:u + + 3.755497210 seconds time elapsed + + 14.681699000 seconds user + 29.413955000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761, + 106762]), + col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379), + nnz=106762, layout=torch.sparse_csr) +tensor([0.9417, 0.0965, 0.8551, ..., 0.6665, 0.0164, 0.5102]) +Shape: torch.Size([31379, 31379]) +NNZ: 106762 +Density: 0.00010842726485909405 +Time: 0.33124780654907227 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100': + + 27,233,629 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 5,868 ITLB_WALK:u + 16,893 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 36,409,508 L1D_TLB:u + + 3.751203540 seconds time elapsed + + 14.849342000 seconds user + 27.706396000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761, + 106762]), + col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379), + nnz=106762, layout=torch.sparse_csr) +tensor([0.9215, 0.4139, 0.1789, ..., 0.0245, 0.0029, 0.2129]) +Shape: torch.Size([31379, 31379]) +NNZ: 106762 +Density: 0.00010842726485909405 +Time: 0.3386805057525635 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100': + + 30,924,532 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 288,199 L1I_CACHE_REFILL:u + 462,816 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 32,428,375 L1D_CACHE:u + + 3.628443937 seconds time elapsed + + 15.430937000 seconds user + 30.878583000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761, + 106762]), + col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379), + nnz=106762, layout=torch.sparse_csr) +tensor([0.4983, 0.0268, 0.1695, ..., 0.6987, 0.7224, 0.8577]) +Shape: torch.Size([31379, 31379]) +NNZ: 106762 +Density: 0.00010842726485909405 +Time: 0.3289623260498047 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100': + + 551,997 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 568,528 LL_CACHE_RD:u + 193,991 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 24,353 L2D_TLB_REFILL:u + 312,207 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,821,196 L2D_CACHE:u + + 3.698790384 seconds time elapsed + + 15.745189000 seconds user + 31.063512000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_dc2_100.json b/pytorch/output/altra_2_2_dc2_100.json new file mode 100644 index 0000000..28dce2a --- /dev/null +++ b/pytorch/output/altra_2_2_dc2_100.json @@ -0,0 +1 @@ +{"power_before": [16.32, 16.2], "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 2.2665774822235107, "power": [35.16, 50.8, 53.4, 53.4, 46.08, 46.88], "power_after": [58.4, 57.32], "task clock (msec)": 50.43, "page faults": 3285, "cycles": 54118679, "instructions": 77692421, "branch mispredictions": 325039, "branches": 19383216, "ITLB accesses": 26060519, "ITLB misses": 4749, "DTLB misses": 16865, "DTLB accesses": 34819729, "L1I cache accesses": 30777115, "L1I cache misses": 293980, "L1D cache misses": 461522, "L1D cache accesses": 32216597, "LL cache misses": 567700, "LL cache accesses": 588689, "L2D TLB accesses": 189417, "L2D TLB misses": 22360, "L2D cache misses": 328306, "L2D cache accesses": 1908607, "instructions per cycle": 1.4355934482436277, "branch miss rate": 0.0167690954896236, "ITLB miss rate": 0.00018222967854170517, "DTLB miss rate": 0.00048435184547243316, "L2D TLB miss rate": 0.11804642666708902, "L1I cache miss rate": 0.009551902444397404, "L1D cache miss rate": 0.014325597455249542, "L2D cache miss rate": 0.172013410827897, "LL cache miss rate": 0.9643461997761127} diff --git a/pytorch/output/altra_2_2_dc2_100.output b/pytorch/output/altra_2_2_dc2_100.output new file mode 100644 index 0000000..9a95126 --- /dev/null +++ b/pytorch/output/altra_2_2_dc2_100.output @@ -0,0 +1,168 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394149 queued and waiting for resources +srun: job 3394149 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394, + 766396]), + col_indices=tensor([ 0, 1, 2, ..., 116833, 89, + 116834]), + values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ..., + 1.0331e+01, -1.0000e-03, 1.0000e-03]), + size=(116835, 116835), nnz=766396, layout=torch.sparse_csr) +tensor([0.4749, 0.3788, 0.8812, ..., 0.8281, 0.8889, 0.4945]) +Shape: torch.Size([116835, 116835]) +NNZ: 766396 +Density: 5.614451099680581e-05 +Time: 2.2480316162109375 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100': + + 50.43 msec task-clock:u # 0.009 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,285 page-faults:u # 65.135 K/sec + 54,118,679 cycles:u # 1.073 GHz (60.92%) + 77,692,421 instructions:u # 1.44 insn per cycle (82.73%) + branches:u + 367,999 branch-misses:u + 32,182,371 L1-dcache-loads:u # 638.112 M/sec + 491,960 L1-dcache-load-misses:u # 1.53% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 30,682,258 L1-icache-loads:u # 608.367 M/sec + 300,874 L1-icache-load-misses:u # 0.98% of all L1-icache accesses + 55,244,523 dTLB-loads:u # 1.095 G/sec (19.09%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 5.813837947 seconds time elapsed + + 28.815118000 seconds user + 213.749674000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394, + 766396]), + col_indices=tensor([ 0, 1, 2, ..., 116833, 89, + 116834]), + values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ..., + 1.0331e+01, -1.0000e-03, 1.0000e-03]), + size=(116835, 116835), nnz=766396, layout=torch.sparse_csr) +tensor([0.9715, 0.3920, 0.0297, ..., 0.1819, 0.5744, 0.8105]) +Shape: torch.Size([116835, 116835]) +NNZ: 766396 +Density: 5.614451099680581e-05 +Time: 2.2333595752716064 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100': + + 325,039 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 19,383,216 BR_RETIRED:u + + 5.973132269 seconds time elapsed + + 29.719778000 seconds user + 213.706315000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394, + 766396]), + col_indices=tensor([ 0, 1, 2, ..., 116833, 89, + 116834]), + values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ..., + 1.0331e+01, -1.0000e-03, 1.0000e-03]), + size=(116835, 116835), nnz=766396, layout=torch.sparse_csr) +tensor([0.3371, 0.4985, 0.9905, ..., 0.6075, 0.1568, 0.3782]) +Shape: torch.Size([116835, 116835]) +NNZ: 766396 +Density: 5.614451099680581e-05 +Time: 1.9790923595428467 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100': + + 26,060,519 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 4,749 ITLB_WALK:u + 16,865 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 34,819,729 L1D_TLB:u + + 5.575020445 seconds time elapsed + + 26.769391000 seconds user + 188.138935000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394, + 766396]), + col_indices=tensor([ 0, 1, 2, ..., 116833, 89, + 116834]), + values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ..., + 1.0331e+01, -1.0000e-03, 1.0000e-03]), + size=(116835, 116835), nnz=766396, layout=torch.sparse_csr) +tensor([0.6806, 0.8858, 0.7035, ..., 0.6007, 0.0880, 0.4550]) +Shape: torch.Size([116835, 116835]) +NNZ: 766396 +Density: 5.614451099680581e-05 +Time: 1.5306556224822998 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100': + + 30,777,115 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 293,980 L1I_CACHE_REFILL:u + 461,522 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 32,216,597 L1D_CACHE:u + + 4.961298684 seconds time elapsed + + 23.946357000 seconds user + 156.598674000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394, + 766396]), + col_indices=tensor([ 0, 1, 2, ..., 116833, 89, + 116834]), + values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ..., + 1.0331e+01, -1.0000e-03, 1.0000e-03]), + size=(116835, 116835), nnz=766396, layout=torch.sparse_csr) +tensor([0.3029, 0.1908, 0.9816, ..., 0.0418, 0.8182, 0.5474]) +Shape: torch.Size([116835, 116835]) +NNZ: 766396 +Density: 5.614451099680581e-05 +Time: 2.28926944732666 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100': + + 567,700 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 588,689 LL_CACHE_RD:u + 189,417 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 22,360 L2D_TLB_REFILL:u + 328,306 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,908,607 L2D_CACHE:u + + 5.710829283 seconds time elapsed + + 28.671301000 seconds user + 213.960421000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_de2010_100.json b/pytorch/output/altra_2_2_de2010_100.json new file mode 100644 index 0000000..afb2e95 --- /dev/null +++ b/pytorch/output/altra_2_2_de2010_100.json @@ -0,0 +1 @@ +{"power_before": [20.48, 20.96], "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 0.3271017074584961, "power": [25.28, 26.08, 31.28, 32.96], "power_after": [33.4, 30.24], "task clock (msec)": 59.88, "page faults": 3313, "cycles": 58169777, "instructions": 57993431, "branch mispredictions": 330494, "branches": 20578427, "ITLB accesses": 27982097, "ITLB misses": 6614, "DTLB misses": 17270, "DTLB accesses": 37728899, "L1I cache accesses": 29754926, "L1I cache misses": 278786, "L1D cache misses": 454742, "L1D cache accesses": 31173246, "LL cache misses": 543243, "LL cache accesses": 560716, "L2D TLB accesses": 162281, "L2D TLB misses": 19847, "L2D cache misses": 300577, "L2D cache accesses": 1696278, "instructions per cycle": 0.9969684257170179, "branch miss rate": 0.016060216847478187, "ITLB miss rate": 0.0002363654160729984, "DTLB miss rate": 0.00045773930482307474, "L2D TLB miss rate": 0.12230020766448321, "L1I cache miss rate": 0.009369406598423401, "L1D cache miss rate": 0.014587572946365611, "L2D cache miss rate": 0.1771979592967662, "LL cache miss rate": 0.9688380570556218} diff --git a/pytorch/output/altra_2_2_de2010_100.output b/pytorch/output/altra_2_2_de2010_100.output new file mode 100644 index 0000000..30243dd --- /dev/null +++ b/pytorch/output/altra_2_2_de2010_100.output @@ -0,0 +1,163 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394139 queued and waiting for resources +srun: job 3394139 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051, + 116056]), + col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]), + values=tensor([ 14900., 33341., 20255., ..., 164227., 52413., + 16949.]), size=(24115, 24115), nnz=116056, + layout=torch.sparse_csr) +tensor([0.4207, 0.3943, 0.6543, ..., 0.2191, 0.5415, 0.1575]) +Shape: torch.Size([24115, 24115]) +NNZ: 116056 +Density: 0.0001995689928120616 +Time: 0.36042284965515137 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100': + + 59.88 msec task-clock:u # 0.016 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,313 page-faults:u # 55.328 K/sec + 58,169,777 cycles:u # 0.971 GHz (61.49%) + 57,993,431 instructions:u # 1.00 insn per cycle (81.67%) + branches:u + 341,266 branch-misses:u + 31,858,781 L1-dcache-loads:u # 532.049 M/sec + 467,486 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 30,461,310 L1-icache-loads:u # 508.711 M/sec + 294,156 L1-icache-load-misses:u # 0.97% of all L1-icache accesses + 43,828,130 dTLB-loads:u # 731.940 M/sec (40.26%) + 47,836 dTLB-load-misses:u # 0.11% of all dTLB cache accesses (25.52%) + 0 iTLB-loads:u # 0.000 /sec (2.73%) + iTLB-load-misses:u (0.00%) + + 3.824054028 seconds time elapsed + + 15.099361000 seconds user + 28.830417000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051, + 116056]), + col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]), + values=tensor([ 14900., 33341., 20255., ..., 164227., 52413., + 16949.]), size=(24115, 24115), nnz=116056, + layout=torch.sparse_csr) +tensor([0.0456, 0.2095, 0.0276, ..., 0.4209, 0.6824, 0.5475]) +Shape: torch.Size([24115, 24115]) +NNZ: 116056 +Density: 0.0001995689928120616 +Time: 0.3598823547363281 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100': + + 330,494 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 20,578,427 BR_RETIRED:u + + 3.781234836 seconds time elapsed + + 14.965545000 seconds user + 29.444131000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051, + 116056]), + col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]), + values=tensor([ 14900., 33341., 20255., ..., 164227., 52413., + 16949.]), size=(24115, 24115), nnz=116056, + layout=torch.sparse_csr) +tensor([0.9882, 0.5477, 0.6307, ..., 0.1179, 0.6903, 0.1235]) +Shape: torch.Size([24115, 24115]) +NNZ: 116056 +Density: 0.0001995689928120616 +Time: 0.29088521003723145 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100': + + 27,982,097 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,614 ITLB_WALK:u + 17,270 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 37,728,899 L1D_TLB:u + + 3.576632300 seconds time elapsed + + 14.864601000 seconds user + 29.274547000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051, + 116056]), + col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]), + values=tensor([ 14900., 33341., 20255., ..., 164227., 52413., + 16949.]), size=(24115, 24115), nnz=116056, + layout=torch.sparse_csr) +tensor([0.3952, 0.0475, 0.1125, ..., 0.3481, 0.1290, 0.3495]) +Shape: torch.Size([24115, 24115]) +NNZ: 116056 +Density: 0.0001995689928120616 +Time: 0.30365920066833496 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100': + + 29,754,926 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 278,786 L1I_CACHE_REFILL:u + 454,742 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 31,173,246 L1D_CACHE:u + + 3.730995381 seconds time elapsed + + 15.213930000 seconds user + 30.995070000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051, + 116056]), + col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]), + values=tensor([ 14900., 33341., 20255., ..., 164227., 52413., + 16949.]), size=(24115, 24115), nnz=116056, + layout=torch.sparse_csr) +tensor([0.7266, 0.7537, 0.9729, ..., 0.3349, 0.3523, 0.6532]) +Shape: torch.Size([24115, 24115]) +NNZ: 116056 +Density: 0.0001995689928120616 +Time: 0.2798902988433838 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100': + + 543,243 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 560,716 LL_CACHE_RD:u + 162,281 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 19,847 L2D_TLB_REFILL:u + 300,577 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,696,278 L2D_CACHE:u + + 3.819959836 seconds time elapsed + + 15.346035000 seconds user + 29.199873000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_email-Enron_100.json b/pytorch/output/altra_2_2_email-Enron_100.json new file mode 100644 index 0000000..b0b9d38 --- /dev/null +++ b/pytorch/output/altra_2_2_email-Enron_100.json @@ -0,0 +1 @@ +{"power_before": [20.28, 20.32], "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 1.030203104019165, "power": [32.08, 47.84, 55.76, 58.08, 58.24], "power_after": [48.76, 45.16], "task clock (msec)": 60.43, "page faults": 3319, "cycles": 66114448, "instructions": 90786829, "branch mispredictions": 341625, "branches": 20129354, "ITLB accesses": 27441303, "ITLB misses": 6807, "DTLB misses": 20551, "DTLB accesses": 36867114, "L1I cache accesses": 31744243, "L1I cache misses": 271027, "L1D cache misses": 464135, "L1D cache accesses": 33441141, "LL cache misses": 539935, "LL cache accesses": 552519, "L2D TLB accesses": 188291, "L2D TLB misses": 24177, "L2D cache misses": 301281, "L2D cache accesses": 1737575, "instructions per cycle": 1.3731768432824245, "branch miss rate": 0.016971483535934636, "ITLB miss rate": 0.00024805673404065397, "DTLB miss rate": 0.0005574344658494288, "L2D TLB miss rate": 0.12840231344036623, "L1I cache miss rate": 0.008537831568388637, "L1D cache miss rate": 0.01387916159918108, "L2D cache miss rate": 0.17339165215889962, "LL cache miss rate": 0.9772243126480719} diff --git a/pytorch/output/altra_2_2_email-Enron_100.output b/pytorch/output/altra_2_2_email-Enron_100.output new file mode 100644 index 0000000..55d535a --- /dev/null +++ b/pytorch/output/altra_2_2_email-Enron_100.output @@ -0,0 +1,158 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394152 queued and waiting for resources +srun: job 3394152 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661, + 367662]), + col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692), + nnz=367662, layout=torch.sparse_csr) +tensor([0.3626, 0.7532, 0.0782, ..., 0.6679, 0.4308, 0.6586]) +Shape: torch.Size([36692, 36692]) +NNZ: 367662 +Density: 0.0002730901120626302 +Time: 1.3745801448822021 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100': + + 60.43 msec task-clock:u # 0.012 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,319 page-faults:u # 54.926 K/sec + 66,114,448 cycles:u # 1.094 GHz (58.10%) + 90,786,829 instructions:u # 1.37 insn per cycle (92.25%) + branches:u + 372,381 branch-misses:u + 32,997,410 L1-dcache-loads:u # 546.070 M/sec + 470,216 L1-dcache-load-misses:u # 1.43% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 31,485,339 L1-icache-loads:u # 521.047 M/sec + 294,395 L1-icache-load-misses:u # 0.94% of all L1-icache accesses + 31,376,646 dTLB-loads:u # 519.248 M/sec (10.03%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 4.904488673 seconds time elapsed + + 22.874521000 seconds user + 139.276239000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661, + 367662]), + col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692), + nnz=367662, layout=torch.sparse_csr) +tensor([0.2040, 0.8252, 0.0215, ..., 0.2921, 0.9143, 0.8728]) +Shape: torch.Size([36692, 36692]) +NNZ: 367662 +Density: 0.0002730901120626302 +Time: 1.3087654113769531 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100': + + 341,625 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 20,129,354 BR_RETIRED:u + + 4.644873434 seconds time elapsed + + 22.729927000 seconds user + 132.278582000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661, + 367662]), + col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692), + nnz=367662, layout=torch.sparse_csr) +tensor([0.6154, 0.6641, 0.3794, ..., 0.9736, 0.0619, 0.4790]) +Shape: torch.Size([36692, 36692]) +NNZ: 367662 +Density: 0.0002730901120626302 +Time: 1.2701547145843506 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100': + + 27,441,303 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,807 ITLB_WALK:u + 20,551 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 36,867,114 L1D_TLB:u + + 4.861510767 seconds time elapsed + + 22.111354000 seconds user + 132.431608000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661, + 367662]), + col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692), + nnz=367662, layout=torch.sparse_csr) +tensor([0.4201, 0.4134, 0.8169, ..., 0.6631, 0.0087, 0.8439]) +Shape: torch.Size([36692, 36692]) +NNZ: 367662 +Density: 0.0002730901120626302 +Time: 1.1176586151123047 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100': + + 31,744,243 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 271,027 L1I_CACHE_REFILL:u + 464,135 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 33,441,141 L1D_CACHE:u + + 4.693803969 seconds time elapsed + + 21.724904000 seconds user + 119.873018000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661, + 367662]), + col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692), + nnz=367662, layout=torch.sparse_csr) +tensor([0.1285, 0.3989, 0.3903, ..., 0.7892, 0.2737, 0.2659]) +Shape: torch.Size([36692, 36692]) +NNZ: 367662 +Density: 0.0002730901120626302 +Time: 1.196892261505127 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100': + + 539,935 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 552,519 LL_CACHE_RD:u + 188,291 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 24,177 L2D_TLB_REFILL:u + 301,281 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,737,575 L2D_CACHE:u + + 4.741030347 seconds time elapsed + + 23.793930000 seconds user + 125.634838000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_p2p-Gnutella04_100.json b/pytorch/output/altra_2_2_p2p-Gnutella04_100.json new file mode 100644 index 0000000..34a22c0 --- /dev/null +++ b/pytorch/output/altra_2_2_p2p-Gnutella04_100.json @@ -0,0 +1 @@ +{"power_before": [50.68, 49.4], "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.11296772956848145, "power": [26.2, 29.76, 33.64, 34.44], "power_after": [36.84, 29.44], "task clock (msec)": 67.56, "page faults": 3829, "cycles": 47862000, "instructions": 84392375, "branch mispredictions": 331622, "branches": 19800140, "ITLB accesses": 25905045, "ITLB misses": 6746, "DTLB misses": 17547, "DTLB accesses": 35220079, "L1I cache accesses": 30359576, "L1I cache misses": 283204, "L1D cache misses": 465520, "L1D cache accesses": 31843274, "LL cache misses": 560542, "LL cache accesses": 575610, "L2D TLB accesses": 173643, "L2D TLB misses": 21499, "L2D cache misses": 313335, "L2D cache accesses": 1741621, "instructions per cycle": 1.7632438051063475, "branch miss rate": 0.016748467435078743, "ITLB miss rate": 0.0002604125953072075, "DTLB miss rate": 0.0004982101261044871, "L2D TLB miss rate": 0.12381149830399152, "L1I cache miss rate": 0.009328325270418797, "L1D cache miss rate": 0.014619099782264852, "L2D cache miss rate": 0.17990998041479747, "LL cache miss rate": 0.9738225534650197} diff --git a/pytorch/output/altra_2_2_p2p-Gnutella04_100.output b/pytorch/output/altra_2_2_p2p-Gnutella04_100.output new file mode 100644 index 0000000..ad45ac8 --- /dev/null +++ b/pytorch/output/altra_2_2_p2p-Gnutella04_100.output @@ -0,0 +1,153 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394153 queued and waiting for resources +srun: job 3394153 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]), + col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879), + nnz=39994, layout=torch.sparse_csr) +tensor([0.6982, 0.7263, 0.0064, ..., 0.9256, 0.7249, 0.5065]) +Shape: torch.Size([10879, 10879]) +NNZ: 39994 +Density: 0.0003379223282393842 +Time: 0.18009519577026367 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100': + + 67.56 msec task-clock:u # 0.019 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,829 page-faults:u # 56.674 K/sec + 47,862,000 cycles:u # 0.708 GHz (59.24%) + 84,392,375 instructions:u # 1.76 insn per cycle (87.61%) + branches:u + 368,432 branch-misses:u + 32,507,448 L1-dcache-loads:u # 481.147 M/sec + 481,389 L1-dcache-load-misses:u # 1.48% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 31,030,656 L1-icache-loads:u # 459.289 M/sec + 308,582 L1-icache-load-misses:u # 0.99% of all L1-icache accesses + 34,988,046 dTLB-loads:u # 517.863 M/sec (20.00%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 3.538329547 seconds time elapsed + + 14.667604000 seconds user + 29.534487000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]), + col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879), + nnz=39994, layout=torch.sparse_csr) +tensor([0.4946, 0.3509, 0.5239, ..., 0.4520, 0.4206, 0.8181]) +Shape: torch.Size([10879, 10879]) +NNZ: 39994 +Density: 0.0003379223282393842 +Time: 0.18875432014465332 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100': + + 331,622 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 19,800,140 BR_RETIRED:u + + 3.556031790 seconds time elapsed + + 14.799719000 seconds user + 27.876987000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]), + col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879), + nnz=39994, layout=torch.sparse_csr) +tensor([0.2184, 0.4999, 0.9567, ..., 0.8794, 0.8213, 0.8713]) +Shape: torch.Size([10879, 10879]) +NNZ: 39994 +Density: 0.0003379223282393842 +Time: 0.1066896915435791 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100': + + 25,905,045 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,746 ITLB_WALK:u + 17,547 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 35,220,079 L1D_TLB:u + + 3.505367779 seconds time elapsed + + 14.557493000 seconds user + 29.642958000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]), + col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879), + nnz=39994, layout=torch.sparse_csr) +tensor([0.2180, 0.0881, 0.5532, ..., 0.4961, 0.0093, 0.4929]) +Shape: torch.Size([10879, 10879]) +NNZ: 39994 +Density: 0.0003379223282393842 +Time: 0.12433028221130371 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100': + + 30,359,576 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 283,204 L1I_CACHE_REFILL:u + 465,520 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 31,843,274 L1D_CACHE:u + + 3.565310130 seconds time elapsed + + 14.913239000 seconds user + 28.125605000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]), + col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879), + nnz=39994, layout=torch.sparse_csr) +tensor([0.6394, 0.6808, 0.7957, ..., 0.1529, 0.0561, 0.7834]) +Shape: torch.Size([10879, 10879]) +NNZ: 39994 +Density: 0.0003379223282393842 +Time: 0.13401126861572266 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100': + + 560,542 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 575,610 LL_CACHE_RD:u + 173,643 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 21,499 L2D_TLB_REFILL:u + 313,335 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,741,621 L2D_CACHE:u + + 3.503362704 seconds time elapsed + + 15.287949000 seconds user + 28.752303000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_p2p-Gnutella24_100.json b/pytorch/output/altra_2_2_p2p-Gnutella24_100.json new file mode 100644 index 0000000..af5a1ea --- /dev/null +++ b/pytorch/output/altra_2_2_p2p-Gnutella24_100.json @@ -0,0 +1 @@ +{"power_before": [16.52, 16.24], "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 0.1715233325958252, "power": [18.56, 24.92, 27.84, 27.84], "power_after": [33.2, 27.28], "task clock (msec)": 61.92, "page faults": 3281, "cycles": 66250810, "instructions": 75178179, "branch mispredictions": 332366, "branches": 19076182, "ITLB accesses": 27005133, "ITLB misses": 4791, "DTLB misses": 13403, "DTLB accesses": 36457054, "L1I cache accesses": 32367686, "L1I cache misses": 287524, "L1D cache misses": 467557, "L1D cache accesses": 34022862, "LL cache misses": 535707, "LL cache accesses": 556316, "L2D TLB accesses": 150149, "L2D TLB misses": 18418, "L2D cache misses": 297042, "L2D cache accesses": 1687364, "instructions per cycle": 1.1347510920998551, "branch miss rate": 0.017423088121092577, "ITLB miss rate": 0.00017741071669597036, "DTLB miss rate": 0.00036763804338112453, "L2D TLB miss rate": 0.12266481961251822, "L1I cache miss rate": 0.008883057009388932, "L1D cache miss rate": 0.013742435895016709, "L2D cache miss rate": 0.1760390763344483, "LL cache miss rate": 0.9629545078696281} diff --git a/pytorch/output/altra_2_2_p2p-Gnutella24_100.output b/pytorch/output/altra_2_2_p2p-Gnutella24_100.output new file mode 100644 index 0000000..6b8520b --- /dev/null +++ b/pytorch/output/altra_2_2_p2p-Gnutella24_100.output @@ -0,0 +1,153 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394141 queued and waiting for resources +srun: job 3394141 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]), + col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518), + nnz=65369, layout=torch.sparse_csr) +tensor([0.6616, 0.1149, 0.0110, ..., 0.2481, 0.7877, 0.5589]) +Shape: torch.Size([26518, 26518]) +NNZ: 65369 +Density: 9.295875717624285e-05 +Time: 0.16974925994873047 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100': + + 61.92 msec task-clock:u # 0.017 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,281 page-faults:u # 52.988 K/sec + 66,250,810 cycles:u # 1.070 GHz (62.94%) + 75,178,179 instructions:u # 1.13 insn per cycle (83.47%) + branches:u + 367,749 branch-misses:u + 33,064,095 L1-dcache-loads:u # 533.986 M/sec + 465,542 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 31,552,264 L1-icache-loads:u # 509.570 M/sec + 296,060 L1-icache-load-misses:u # 0.94% of all L1-icache accesses + 73,155,896 dTLB-loads:u # 1.181 G/sec (17.31%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 3.675971385 seconds time elapsed + + 14.857293000 seconds user + 29.791187000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]), + col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518), + nnz=65369, layout=torch.sparse_csr) +tensor([0.1683, 0.8999, 0.0578, ..., 0.5893, 0.0628, 0.8262]) +Shape: torch.Size([26518, 26518]) +NNZ: 65369 +Density: 9.295875717624285e-05 +Time: 0.2227163314819336 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100': + + 332,366 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 19,076,182 BR_RETIRED:u + + 3.532329673 seconds time elapsed + + 14.883993000 seconds user + 28.516661000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]), + col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518), + nnz=65369, layout=torch.sparse_csr) +tensor([0.8389, 0.5614, 0.9033, ..., 0.2231, 0.0349, 0.5167]) +Shape: torch.Size([26518, 26518]) +NNZ: 65369 +Density: 9.295875717624285e-05 +Time: 0.17095375061035156 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100': + + 27,005,133 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 4,791 ITLB_WALK:u + 13,403 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 36,457,054 L1D_TLB:u + + 3.579041343 seconds time elapsed + + 14.885159000 seconds user + 29.562650000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]), + col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518), + nnz=65369, layout=torch.sparse_csr) +tensor([0.8849, 0.5982, 0.0578, ..., 0.9975, 0.2204, 0.0718]) +Shape: torch.Size([26518, 26518]) +NNZ: 65369 +Density: 9.295875717624285e-05 +Time: 0.18003463745117188 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100': + + 32,367,686 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 287,524 L1I_CACHE_REFILL:u + 467,557 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 34,022,862 L1D_CACHE:u + + 3.405321132 seconds time elapsed + + 15.291636000 seconds user + 28.005015000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]), + col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518), + nnz=65369, layout=torch.sparse_csr) +tensor([0.2790, 0.1291, 0.6053, ..., 0.1651, 0.4973, 0.6821]) +Shape: torch.Size([26518, 26518]) +NNZ: 65369 +Density: 9.295875717624285e-05 +Time: 0.22036528587341309 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100': + + 535,707 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 556,316 LL_CACHE_RD:u + 150,149 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 18,418 L2D_TLB_REFILL:u + 297,042 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,687,364 L2D_CACHE:u + + 3.505209576 seconds time elapsed + + 15.297738000 seconds user + 29.848441000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_p2p-Gnutella25_100.json b/pytorch/output/altra_2_2_p2p-Gnutella25_100.json new file mode 100644 index 0000000..c65d5da --- /dev/null +++ b/pytorch/output/altra_2_2_p2p-Gnutella25_100.json @@ -0,0 +1 @@ +{"power_before": [29.76, 33.16], "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 0.14322686195373535, "power": [22.6, 22.6, 26.16, 29.2], "power_after": [34.0, 30.16], "task clock (msec)": 64.71, "page faults": 3319, "cycles": 57611295, "instructions": 83148228, "branch mispredictions": 318386, "branches": 19233431, "ITLB accesses": 27039805, "ITLB misses": 6375, "DTLB misses": 17290, "DTLB accesses": 36688544, "L1I cache accesses": 32508072, "L1I cache misses": 297568, "L1D cache misses": 477654, "L1D cache accesses": 34044579, "LL cache misses": 549474, "LL cache accesses": 561939, "L2D TLB accesses": 185622, "L2D TLB misses": 23295, "L2D cache misses": 305878, "L2D cache accesses": 1763089, "instructions per cycle": 1.4432626102225268, "branch miss rate": 0.01655378075809771, "ITLB miss rate": 0.00023576353453732377, "DTLB miss rate": 0.00047126427257511227, "L2D TLB miss rate": 0.12549697772893298, "L1I cache miss rate": 0.009153664972810446, "L1D cache miss rate": 0.014030251336049713, "L2D cache miss rate": 0.17348982382625042, "LL cache miss rate": 0.9778178770293573} diff --git a/pytorch/output/altra_2_2_p2p-Gnutella25_100.output b/pytorch/output/altra_2_2_p2p-Gnutella25_100.output new file mode 100644 index 0000000..c26ac74 --- /dev/null +++ b/pytorch/output/altra_2_2_p2p-Gnutella25_100.output @@ -0,0 +1,153 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394140 queued and waiting for resources +srun: job 3394140 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]), + col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687), + nnz=54705, layout=torch.sparse_csr) +tensor([0.8199, 0.9849, 0.4642, ..., 0.7594, 0.3568, 0.4020]) +Shape: torch.Size([22687, 22687]) +NNZ: 54705 +Density: 0.00010628522108964806 +Time: 0.19272208213806152 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100': + + 64.71 msec task-clock:u # 0.018 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,319 page-faults:u # 51.288 K/sec + 57,611,295 cycles:u # 0.890 GHz (39.00%) + 83,148,228 instructions:u # 1.44 insn per cycle (82.73%) + branches:u + 375,111 branch-misses:u + 32,759,228 L1-dcache-loads:u # 506.221 M/sec + 475,086 L1-dcache-load-misses:u # 1.45% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 31,366,158 L1-icache-loads:u # 484.694 M/sec + 297,293 L1-icache-load-misses:u # 0.95% of all L1-icache accesses + 35,611,781 dTLB-loads:u # 550.301 M/sec (25.73%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 3.578384817 seconds time elapsed + + 14.435258000 seconds user + 27.700836000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]), + col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687), + nnz=54705, layout=torch.sparse_csr) +tensor([0.0069, 0.9904, 0.5316, ..., 0.2082, 0.4858, 0.4936]) +Shape: torch.Size([22687, 22687]) +NNZ: 54705 +Density: 0.00010628522108964806 +Time: 0.1423017978668213 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100': + + 318,386 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 19,233,431 BR_RETIRED:u + + 3.555753224 seconds time elapsed + + 14.642518000 seconds user + 30.112207000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]), + col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687), + nnz=54705, layout=torch.sparse_csr) +tensor([0.2250, 0.5676, 0.3018, ..., 0.5431, 0.7314, 0.5593]) +Shape: torch.Size([22687, 22687]) +NNZ: 54705 +Density: 0.00010628522108964806 +Time: 0.14638042449951172 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100': + + 27,039,805 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,375 ITLB_WALK:u + 17,290 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 36,688,544 L1D_TLB:u + + 3.566915241 seconds time elapsed + + 16.116565000 seconds user + 28.752519000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]), + col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687), + nnz=54705, layout=torch.sparse_csr) +tensor([0.0220, 0.7494, 0.7913, ..., 0.8924, 0.8542, 0.5491]) +Shape: torch.Size([22687, 22687]) +NNZ: 54705 +Density: 0.00010628522108964806 +Time: 0.17815685272216797 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100': + + 32,508,072 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 297,568 L1I_CACHE_REFILL:u + 477,654 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 34,044,579 L1D_CACHE:u + + 3.435706033 seconds time elapsed + + 14.690285000 seconds user + 28.763423000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]), + col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687), + nnz=54705, layout=torch.sparse_csr) +tensor([0.6277, 0.4955, 0.9335, ..., 0.1476, 0.2079, 0.0931]) +Shape: torch.Size([22687, 22687]) +NNZ: 54705 +Density: 0.00010628522108964806 +Time: 0.14432048797607422 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100': + + 549,474 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 561,939 LL_CACHE_RD:u + 185,622 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 23,295 L2D_TLB_REFILL:u + 305,878 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,763,089 L2D_CACHE:u + + 3.538826979 seconds time elapsed + + 15.006109000 seconds user + 29.644298000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_p2p-Gnutella30_100.json b/pytorch/output/altra_2_2_p2p-Gnutella30_100.json new file mode 100644 index 0000000..9265473 --- /dev/null +++ b/pytorch/output/altra_2_2_p2p-Gnutella30_100.json @@ -0,0 +1 @@ +{"power_before": [20.56, 20.28], "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 0.30861377716064453, "power": [23.88, 27.6, 39.8, 40.12], "power_after": [39.28, 35.2], "task clock (msec)": 65.91, "page faults": 3247, "cycles": 92293071, "instructions": 76208632, "branch mispredictions": 320083, "branches": 19285106, "ITLB accesses": 26853940, "ITLB misses": 6728, "DTLB misses": 13955, "DTLB accesses": 37111059, "L1I cache accesses": 32554796, "L1I cache misses": 298729, "L1D cache misses": 473779, "L1D cache accesses": 34117102, "LL cache misses": 535040, "LL cache accesses": 547502, "L2D TLB accesses": 179876, "L2D TLB misses": 21809, "L2D cache misses": 298620, "L2D cache accesses": 1722959, "instructions per cycle": 0.8257243059990929, "branch miss rate": 0.016597419791210898, "ITLB miss rate": 0.0002505405165871377, "DTLB miss rate": 0.0003760334621547717, "L2D TLB miss rate": 0.12124463519313304, "L1I cache miss rate": 0.009176190199440968, "L1D cache miss rate": 0.013886847716432655, "L2D cache miss rate": 0.17331811145825293, "LL cache miss rate": 0.9772384393116372} diff --git a/pytorch/output/altra_2_2_p2p-Gnutella30_100.output b/pytorch/output/altra_2_2_p2p-Gnutella30_100.output new file mode 100644 index 0000000..7ae8900 --- /dev/null +++ b/pytorch/output/altra_2_2_p2p-Gnutella30_100.output @@ -0,0 +1,153 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394142 queued and waiting for resources +srun: job 3394142 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]), + col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682), + nnz=88328, layout=torch.sparse_csr) +tensor([0.5867, 0.3729, 0.0718, ..., 0.5551, 0.6046, 0.6005]) +Shape: torch.Size([36682, 36682]) +NNZ: 88328 +Density: 6.564359899804003e-05 +Time: 0.3765556812286377 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100': + + 65.91 msec task-clock:u # 0.017 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,247 page-faults:u # 49.267 K/sec + 92,293,071 cycles:u # 1.400 GHz (58.72%) + 76,208,632 instructions:u # 0.83 insn per cycle (75.47%) + branches:u + 336,620 branch-misses:u (89.96%) + 33,256,017 L1-dcache-loads:u # 504.599 M/sec + 479,188 L1-dcache-load-misses:u # 1.44% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 31,686,331 L1-icache-loads:u # 480.782 M/sec + 297,521 L1-icache-load-misses:u # 0.94% of all L1-icache accesses + 55,295,804 dTLB-loads:u # 839.012 M/sec (27.47%) + 103,616 dTLB-load-misses:u # 0.19% of all dTLB cache accesses (20.17%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 3.803094533 seconds time elapsed + + 16.585763000 seconds user + 62.703127000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]), + col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682), + nnz=88328, layout=torch.sparse_csr) +tensor([0.2027, 0.2128, 0.5093, ..., 0.8069, 0.6413, 0.1136]) +Shape: torch.Size([36682, 36682]) +NNZ: 88328 +Density: 6.564359899804003e-05 +Time: 0.2942969799041748 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100': + + 320,083 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 19,285,106 BR_RETIRED:u + + 3.763535833 seconds time elapsed + + 16.476022000 seconds user + 55.208213000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]), + col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682), + nnz=88328, layout=torch.sparse_csr) +tensor([0.5930, 0.8044, 0.8115, ..., 0.6366, 0.1026, 0.6914]) +Shape: torch.Size([36682, 36682]) +NNZ: 88328 +Density: 6.564359899804003e-05 +Time: 0.2431955337524414 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100': + + 26,853,940 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,728 ITLB_WALK:u + 13,955 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 37,111,059 L1D_TLB:u + + 3.752433570 seconds time elapsed + + 16.433982000 seconds user + 53.207908000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]), + col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682), + nnz=88328, layout=torch.sparse_csr) +tensor([0.9666, 0.8206, 0.6252, ..., 0.5180, 0.8170, 0.7406]) +Shape: torch.Size([36682, 36682]) +NNZ: 88328 +Density: 6.564359899804003e-05 +Time: 0.15313339233398438 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100': + + 32,554,796 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 298,729 L1I_CACHE_REFILL:u + 473,779 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 34,117,102 L1D_CACHE:u + + 3.595579651 seconds time elapsed + + 15.817851000 seconds user + 44.491315000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]), + col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682), + nnz=88328, layout=torch.sparse_csr) +tensor([0.9800, 0.9021, 0.5677, ..., 0.3869, 0.2468, 0.3286]) +Shape: torch.Size([36682, 36682]) +NNZ: 88328 +Density: 6.564359899804003e-05 +Time: 0.2539215087890625 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100': + + 535,040 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 547,502 LL_CACHE_RD:u + 179,876 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 21,809 L2D_TLB_REFILL:u + 298,620 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,722,959 L2D_CACHE:u + + 3.549060962 seconds time elapsed + + 16.570077000 seconds user + 52.238012000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_ri2010_100.json b/pytorch/output/altra_2_2_ri2010_100.json new file mode 100644 index 0000000..7c241c8 --- /dev/null +++ b/pytorch/output/altra_2_2_ri2010_100.json @@ -0,0 +1 @@ +{"power_before": [30.44, 35.52], "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 0.29622840881347656, "power": [23.84, 29.44, 33.0, 33.04], "power_after": [36.32, 30.0], "task clock (msec)": 60.77, "page faults": 3361, "cycles": 63493475, "instructions": 91578911, "branch mispredictions": 329084, "branches": 20406595, "ITLB accesses": 26859919, "ITLB misses": 6237, "DTLB misses": 16689, "DTLB accesses": 36348977, "L1I cache accesses": 30979764, "L1I cache misses": 292038, "L1D cache misses": 469219, "L1D cache accesses": 32411890, "LL cache misses": 571870, "LL cache accesses": 598306, "L2D TLB accesses": 205488, "L2D TLB misses": 26392, "L2D cache misses": 342141, "L2D cache accesses": 1857697, "instructions per cycle": 1.442335783322617, "branch miss rate": 0.01612635522976763, "ITLB miss rate": 0.00023220472109390948, "DTLB miss rate": 0.0004591325912693499, "L2D TLB miss rate": 0.12843572374055906, "L1I cache miss rate": 0.009426734173959492, "L1D cache miss rate": 0.014476755289494072, "L2D cache miss rate": 0.1841748142996409, "LL cache miss rate": 0.9558152517273769} diff --git a/pytorch/output/altra_2_2_ri2010_100.output b/pytorch/output/altra_2_2_ri2010_100.output new file mode 100644 index 0000000..c92847c --- /dev/null +++ b/pytorch/output/altra_2_2_ri2010_100.output @@ -0,0 +1,158 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394145 queued and waiting for resources +srun: job 3394145 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747, + 125750]), + col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]), + values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]), + size=(25181, 25181), nnz=125750, layout=torch.sparse_csr) +tensor([0.1402, 0.0708, 0.4576, ..., 0.4700, 0.5629, 0.9120]) +Shape: torch.Size([25181, 25181]) +NNZ: 125750 +Density: 0.00019831796057928155 +Time: 0.3585643768310547 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100': + + 60.77 msec task-clock:u # 0.016 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,361 page-faults:u # 55.311 K/sec + 63,493,475 cycles:u # 1.045 GHz (49.59%) + 91,578,911 instructions:u # 1.44 insn per cycle (92.22%) + branches:u + 374,941 branch-misses:u + 33,905,978 L1-dcache-loads:u # 557.979 M/sec + 470,553 L1-dcache-load-misses:u # 1.39% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 32,247,376 L1-icache-loads:u # 530.684 M/sec + 299,037 L1-icache-load-misses:u # 0.93% of all L1-icache accesses + 27,428,635 dTLB-loads:u # 451.384 M/sec (13.50%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 3.818532962 seconds time elapsed + + 15.563570000 seconds user + 30.194882000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747, + 125750]), + col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]), + values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]), + size=(25181, 25181), nnz=125750, layout=torch.sparse_csr) +tensor([0.1841, 0.4436, 0.8281, ..., 0.0546, 0.5967, 0.9496]) +Shape: torch.Size([25181, 25181]) +NNZ: 125750 +Density: 0.00019831796057928155 +Time: 0.3050577640533447 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100': + + 329,084 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 20,406,595 BR_RETIRED:u + + 3.673527837 seconds time elapsed + + 15.520198000 seconds user + 29.068211000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747, + 125750]), + col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]), + values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]), + size=(25181, 25181), nnz=125750, layout=torch.sparse_csr) +tensor([0.1849, 0.5991, 0.5040, ..., 0.4916, 0.4789, 0.8887]) +Shape: torch.Size([25181, 25181]) +NNZ: 125750 +Density: 0.00019831796057928155 +Time: 0.3605458736419678 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100': + + 26,859,919 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,237 ITLB_WALK:u + 16,689 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 36,348,977 L1D_TLB:u + + 3.769690988 seconds time elapsed + + 15.173839000 seconds user + 29.963392000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747, + 125750]), + col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]), + values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]), + size=(25181, 25181), nnz=125750, layout=torch.sparse_csr) +tensor([0.0513, 0.4498, 0.6748, ..., 0.2114, 0.6847, 0.2188]) +Shape: torch.Size([25181, 25181]) +NNZ: 125750 +Density: 0.00019831796057928155 +Time: 0.3485410213470459 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100': + + 30,979,764 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 292,038 L1I_CACHE_REFILL:u + 469,219 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 32,411,890 L1D_CACHE:u + + 3.598754329 seconds time elapsed + + 16.139631000 seconds user + 29.287026000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747, + 125750]), + col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]), + values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]), + size=(25181, 25181), nnz=125750, layout=torch.sparse_csr) +tensor([0.7270, 0.7858, 0.3165, ..., 0.7139, 0.8270, 0.9478]) +Shape: torch.Size([25181, 25181]) +NNZ: 125750 +Density: 0.00019831796057928155 +Time: 0.3687746524810791 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100': + + 571,870 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 598,306 LL_CACHE_RD:u + 205,488 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 26,392 L2D_TLB_REFILL:u + 342,141 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,857,697 L2D_CACHE:u + + 3.726794738 seconds time elapsed + + 15.231331000 seconds user + 32.108693000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.json b/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.json new file mode 100644 index 0000000..c428bdf --- /dev/null +++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.json @@ -0,0 +1 @@ +{"power_before": [16.52, 16.64], "shape": [81871, 81871], "nnz": 545671, "% density": 8.140867447881048e-05, "time_s": 1.3372814655303955, "power": [23.92, 38.6, 46.04, 48.2, 48.2], "power_after": [45.0, 44.08], "task clock (msec)": 59.01, "page faults": 3448, "cycles": 73062796, "instructions": 88329175, "branch mispredictions": 331091, "branches": 20013316, "ITLB accesses": 26330936, "ITLB misses": 5193, "DTLB misses": 16837, "DTLB accesses": 35930477, "L1I cache accesses": 31853890, "L1I cache misses": 306147, "L1D cache misses": 479933, "L1D cache accesses": 33426019, "LL cache misses": 540302, "LL cache accesses": 553181, "L2D TLB accesses": 173206, "L2D TLB misses": 21390, "L2D cache misses": 300032, "L2D cache accesses": 1739931, "instructions per cycle": 1.2089487377406143, "branch miss rate": 0.016543535314187813, "ITLB miss rate": 0.0001972204861991993, "DTLB miss rate": 0.000468599401004334, "L2D TLB miss rate": 0.12349456716280037, "L1I cache miss rate": 0.009610976869701, "L1D cache miss rate": 0.014358066391334247, "L2D cache miss rate": 0.17243902200719455, "LL cache miss rate": 0.9767182893121781} diff --git a/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.output b/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.output new file mode 100644 index 0000000..cc7f1c0 --- /dev/null +++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.output @@ -0,0 +1,158 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394151 queued and waiting for resources +srun: job 3394151 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669, + 545671]), + col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871), + nnz=545671, layout=torch.sparse_csr) +tensor([0.3831, 0.6714, 0.8380, ..., 0.7892, 0.5274, 0.9035]) +Shape: torch.Size([81871, 81871]) +NNZ: 545671 +Density: 8.140867447881048e-05 +Time: 2.044952392578125 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100': + + 59.01 msec task-clock:u # 0.010 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,448 page-faults:u # 58.432 K/sec + 73,062,796 cycles:u # 1.238 GHz (59.95%) + 88,329,175 instructions:u # 1.21 insn per cycle (93.89%) + branches:u + 365,177 branch-misses:u + 31,850,867 L1-dcache-loads:u # 539.766 M/sec + 473,835 L1-dcache-load-misses:u # 1.49% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 30,385,913 L1-icache-loads:u # 514.940 M/sec + 299,969 L1-icache-load-misses:u # 0.99% of all L1-icache accesses + 24,365,554 dTLB-loads:u # 412.915 M/sec (8.42%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 5.680365622 seconds time elapsed + + 27.656957000 seconds user + 194.823873000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669, + 545671]), + col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871), + nnz=545671, layout=torch.sparse_csr) +tensor([0.6906, 0.4067, 0.7042, ..., 0.8333, 0.7120, 0.3519]) +Shape: torch.Size([81871, 81871]) +NNZ: 545671 +Density: 8.140867447881048e-05 +Time: 1.3788115978240967 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100': + + 331,091 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 20,013,316 BR_RETIRED:u + + 4.886021169 seconds time elapsed + + 23.105025000 seconds user + 141.491451000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669, + 545671]), + col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871), + nnz=545671, layout=torch.sparse_csr) +tensor([0.8755, 0.6165, 0.4104, ..., 0.6974, 0.9453, 0.9872]) +Shape: torch.Size([81871, 81871]) +NNZ: 545671 +Density: 8.140867447881048e-05 +Time: 2.8570749759674072 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100': + + 26,330,936 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 5,193 ITLB_WALK:u + 16,837 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 35,930,477 L1D_TLB:u + + 6.371573603 seconds time elapsed + + 30.986329000 seconds user + 254.347216000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669, + 545671]), + col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871), + nnz=545671, layout=torch.sparse_csr) +tensor([0.3573, 0.9331, 0.0611, ..., 0.9133, 0.6057, 0.2374]) +Shape: torch.Size([81871, 81871]) +NNZ: 545671 +Density: 8.140867447881048e-05 +Time: 2.311248540878296 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100': + + 31,853,890 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 306,147 L1I_CACHE_REFILL:u + 479,933 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 33,426,019 L1D_CACHE:u + + 5.718741260 seconds time elapsed + + 28.451593000 seconds user + 214.350594000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669, + 545671]), + col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]), + values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871), + nnz=545671, layout=torch.sparse_csr) +tensor([0.6021, 0.5679, 0.4538, ..., 0.9086, 0.9552, 0.5329]) +Shape: torch.Size([81871, 81871]) +NNZ: 545671 +Density: 8.140867447881048e-05 +Time: 1.8193013668060303 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100': + + 540,302 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 553,181 LL_CACHE_RD:u + 173,206 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 21,390 L2D_TLB_REFILL:u + 300,032 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,739,931 L2D_CACHE:u + + 5.546861941 seconds time elapsed + + 28.194596000 seconds user + 181.004698000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.json b/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.json new file mode 100644 index 0000000..e3d0cf9 --- /dev/null +++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.json @@ -0,0 +1 @@ +{"power_before": [53.64, 46.88], "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 1.2292509078979492, "power": [40.64, 52.44, 54.8, 54.96, 46.8], "power_after": [47.88, 47.08], "task clock (msec)": 61.26, "page faults": 3303, "cycles": 44515786, "instructions": 81513738, "branch mispredictions": 328019, "branches": 19893662, "ITLB accesses": 27248112, "ITLB misses": 5792, "DTLB misses": 16632, "DTLB accesses": 36929042, "L1I cache accesses": 31702830, "L1I cache misses": 295778, "L1D cache misses": 470423, "L1D cache accesses": 33155119, "LL cache misses": 545220, "LL cache accesses": 562139, "L2D TLB accesses": 192206, "L2D TLB misses": 24891, "L2D cache misses": 307033, "L2D cache accesses": 1782260, "instructions per cycle": 1.8311198189334452, "branch miss rate": 0.01648861833482443, "ITLB miss rate": 0.0002125651861677609, "DTLB miss rate": 0.0004503772396803578, "L2D TLB miss rate": 0.12950168048864238, "L1I cache miss rate": 0.009329703373484323, "L1D cache miss rate": 0.014188548079106578, "L2D cache miss rate": 0.17227172241984895, "LL cache miss rate": 0.9699024618466251} diff --git a/pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.output b/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.output similarity index 67% rename from pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.output rename to pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.output index 7933d54..42e6e96 100644 --- a/pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.output +++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.output @@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur srun: # CPUs. Programs, environments, or other software that was built on x86_64 # srun: # nodes may need to be rebuilt to properly execute on these nodes. # srun: ################################################################################ -srun: job 3393718 queued and waiting for resources -srun: job 3393718 has been allocated resources +srun: job 3394147 queued and waiting for resources +srun: job 3394147 has been allocated resources /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) ).to_sparse_csr().type(torch.float) tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200, @@ -14,37 +14,37 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200, col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]), values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144), nnz=549202, layout=torch.sparse_csr) -tensor([0.8320, 0.8961, 0.3119, ..., 0.2600, 0.3720, 0.6950]) +tensor([0.2696, 0.6106, 0.1626, ..., 0.2215, 0.5107, 0.8609]) Shape: torch.Size([82144, 82144]) NNZ: 549202 Density: 8.13917555860553e-05 -Time: 3.012270212173462 seconds +Time: 1.4500706195831299 seconds Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100': - 61.63 msec task-clock:u # 0.008 CPUs utilized + 61.26 msec task-clock:u # 0.012 CPUs utilized 0 context-switches:u # 0.000 /sec 0 cpu-migrations:u # 0.000 /sec - 3,293 page-faults:u # 53.433 K/sec - 41,677,750 cycles:u # 0.676 GHz (43.47%) - 91,767,205 instructions:u # 2.20 insn per cycle (93.66%) + 3,303 page-faults:u # 53.917 K/sec + 44,515,786 cycles:u # 0.727 GHz (40.46%) + 81,513,738 instructions:u # 1.83 insn per cycle (73.51%) branches:u - 369,577 branch-misses:u - 33,184,885 L1-dcache-loads:u # 538.465 M/sec - 489,650 L1-dcache-load-misses:u # 1.48% of all L1-dcache accesses + 344,479 branch-misses:u (89.42%) + 34,411,073 L1-dcache-loads:u # 561.710 M/sec + 484,811 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses LLC-loads:u LLC-load-misses:u - 31,518,657 L1-icache-loads:u # 511.428 M/sec - 300,352 L1-icache-load-misses:u # 0.95% of all L1-icache accesses - 21,439,232 dTLB-loads:u # 347.878 M/sec (11.35%) - dTLB-load-misses:u (0.00%) + 32,789,672 L1-icache-loads:u # 535.243 M/sec + 293,487 L1-icache-load-misses:u # 0.90% of all L1-icache accesses + 47,065,740 dTLB-loads:u # 768.279 M/sec (32.81%) + 146,215 dTLB-load-misses:u # 0.31% of all dTLB cache accesses (13.39%) iTLB-loads:u (0.00%) iTLB-load-misses:u (0.00%) - 7.285558270 seconds time elapsed + 4.966101053 seconds time elapsed - 30.820742000 seconds user - 271.093513000 seconds sys + 23.375418000 seconds user + 148.052989000 seconds sys @@ -55,21 +55,21 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200, col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]), values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144), nnz=549202, layout=torch.sparse_csr) -tensor([0.2625, 0.3727, 0.7700, ..., 0.9213, 0.0373, 0.4236]) +tensor([0.1999, 0.3932, 0.8035, ..., 0.5079, 0.5903, 0.7606]) Shape: torch.Size([82144, 82144]) NNZ: 549202 Density: 8.13917555860553e-05 -Time: 3.8292958736419678 seconds +Time: 1.9677543640136719 seconds Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100': - 329,386 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio - 19,813,961 BR_RETIRED:u + 328,019 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 19,893,662 BR_RETIRED:u - 7.818393438 seconds time elapsed + 5.529871590 seconds time elapsed - 35.952830000 seconds user - 333.700971000 seconds sys + 26.844356000 seconds user + 190.429440000 seconds sys @@ -80,23 +80,23 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200, col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]), values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144), nnz=549202, layout=torch.sparse_csr) -tensor([0.0340, 0.2650, 0.1324, ..., 0.0868, 0.2162, 0.5618]) +tensor([0.2933, 0.6999, 0.0078, ..., 0.6213, 0.9377, 0.6359]) Shape: torch.Size([82144, 82144]) NNZ: 549202 Density: 8.13917555860553e-05 -Time: 3.464143753051758 seconds +Time: 1.4976201057434082 seconds Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100': - 27,944,146 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio - 6,811 ITLB_WALK:u - 18,962 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio - 37,689,058 L1D_TLB:u + 27,248,112 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 5,792 ITLB_WALK:u + 16,632 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 36,929,042 L1D_TLB:u - 7.541903779 seconds time elapsed + 4.971341163 seconds time elapsed - 32.666428000 seconds user - 309.938101000 seconds sys + 24.247480000 seconds user + 151.276717000 seconds sys @@ -107,23 +107,23 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200, col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]), values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144), nnz=549202, layout=torch.sparse_csr) -tensor([0.6118, 0.9275, 0.9072, ..., 0.7025, 0.2788, 0.7796]) +tensor([0.1310, 0.6695, 0.9479, ..., 0.3141, 0.9327, 0.2117]) Shape: torch.Size([82144, 82144]) NNZ: 549202 Density: 8.13917555860553e-05 -Time: 1.4259674549102783 seconds +Time: 1.0877256393432617 seconds Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100': - 31,746,573 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio - 290,044 L1I_CACHE_REFILL:u - 471,100 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio - 33,271,575 L1D_CACHE:u + 31,702,830 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 295,778 L1I_CACHE_REFILL:u + 470,423 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 33,155,119 L1D_CACHE:u - 5.333100815 seconds time elapsed + 4.675682406 seconds time elapsed - 24.606404000 seconds user - 142.184021000 seconds sys + 23.098007000 seconds user + 119.827712000 seconds sys @@ -134,25 +134,25 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200, col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]), values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144), nnz=549202, layout=torch.sparse_csr) -tensor([0.1819, 0.6831, 0.7926, ..., 0.2272, 0.8215, 0.3765]) +tensor([0.0860, 0.5402, 0.6738, ..., 0.3856, 0.5968, 0.4203]) Shape: torch.Size([82144, 82144]) NNZ: 549202 Density: 8.13917555860553e-05 -Time: 2.8267815113067627 seconds +Time: 1.2302696704864502 seconds Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100': - 550,308 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio - 564,981 LL_CACHE_RD:u - 168,456 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio - 20,450 L2D_TLB_REFILL:u - 306,309 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio - 1,745,776 L2D_CACHE:u + 545,220 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 562,139 LL_CACHE_RD:u + 192,206 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 24,891 L2D_TLB_REFILL:u + 307,033 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,782,260 L2D_CACHE:u - 7.032343494 seconds time elapsed + 4.781838296 seconds time elapsed - 31.547129000 seconds user - 251.812633000 seconds sys + 23.716896000 seconds user + 130.971947000 seconds sys diff --git a/pytorch/output/altra_2_2_soc-sign-epinions_100.json b/pytorch/output/altra_2_2_soc-sign-epinions_100.json new file mode 100644 index 0000000..fc759b4 --- /dev/null +++ b/pytorch/output/altra_2_2_soc-sign-epinions_100.json @@ -0,0 +1 @@ +{"power_before": [30.48, 33.04], "shape": [131828, 131828], "nnz": 841372, "% density": 4.841419648464106e-05, "time_s": 2.848874092102051, "power": [65.52, 75.88, 71.16, 71.16, 59.72, 47.92, 48.68], "power_after": [68.68, 67.88], "task clock (msec)": 49.87, "page faults": 3300, "cycles": 51935476, "instructions": 83731856, "branch mispredictions": 326464, "branches": 20341367, "ITLB accesses": 27590154, "ITLB misses": 6210, "DTLB misses": 17536, "DTLB accesses": 36763243, "L1I cache accesses": 31663300, "L1I cache misses": 289727, "L1D cache misses": 462864, "L1D cache accesses": 33262254, "LL cache misses": 530272, "LL cache accesses": 551373, "L2D TLB accesses": 196152, "L2D TLB misses": 23542, "L2D cache misses": 301998, "L2D cache accesses": 1732662, "instructions per cycle": 1.6122285275675532, "branch miss rate": 0.01604926551888081, "ITLB miss rate": 0.000225080294948698, "DTLB miss rate": 0.0004769981799483794, "L2D TLB miss rate": 0.12001916880786329, "L1I cache miss rate": 0.00915024649989104, "L1D cache miss rate": 0.013915593332911234, "L2D cache miss rate": 0.17429712200071334, "LL cache miss rate": 0.9617300810884828} diff --git a/pytorch/output/altra_2_2_soc-sign-epinions_100.output b/pytorch/output/altra_2_2_soc-sign-epinions_100.output new file mode 100644 index 0000000..2d01331 --- /dev/null +++ b/pytorch/output/altra_2_2_soc-sign-epinions_100.output @@ -0,0 +1,163 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394154 queued and waiting for resources +srun: job 3394154 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371, + 841372]), + col_indices=tensor([ 1, 128552, 3, ..., 131824, 131826, + 7714]), + values=tensor([-1., -1., 1., ..., 1., 1., 1.]), + size=(131828, 131828), nnz=841372, layout=torch.sparse_csr) +tensor([0.5842, 0.3042, 0.7358, ..., 0.7882, 0.7596, 0.5895]) +Shape: torch.Size([131828, 131828]) +NNZ: 841372 +Density: 4.841419648464106e-05 +Time: 2.4407293796539307 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100': + + 49.87 msec task-clock:u # 0.008 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,300 page-faults:u # 66.174 K/sec + 51,935,476 cycles:u # 1.041 GHz (65.00%) + 83,731,856 instructions:u # 1.61 insn per cycle (84.25%) + branches:u + 375,900 branch-misses:u + 34,169,837 L1-dcache-loads:u # 685.197 M/sec + 474,410 L1-dcache-load-misses:u # 1.39% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 32,443,215 L1-icache-loads:u # 650.574 M/sec + 294,146 L1-icache-load-misses:u # 0.91% of all L1-icache accesses + 63,709,518 dTLB-loads:u # 1.278 G/sec (16.44%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 6.058862056 seconds time elapsed + + 29.101578000 seconds user + 224.790489000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371, + 841372]), + col_indices=tensor([ 1, 128552, 3, ..., 131824, 131826, + 7714]), + values=tensor([-1., -1., 1., ..., 1., 1., 1.]), + size=(131828, 131828), nnz=841372, layout=torch.sparse_csr) +tensor([0.9696, 0.8139, 0.4858, ..., 0.2374, 0.1716, 0.9756]) +Shape: torch.Size([131828, 131828]) +NNZ: 841372 +Density: 4.841419648464106e-05 +Time: 2.0945546627044678 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100': + + 326,464 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 20,341,367 BR_RETIRED:u + + 5.525378890 seconds time elapsed + + 28.841740000 seconds user + 199.678982000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371, + 841372]), + col_indices=tensor([ 1, 128552, 3, ..., 131824, 131826, + 7714]), + values=tensor([-1., -1., 1., ..., 1., 1., 1.]), + size=(131828, 131828), nnz=841372, layout=torch.sparse_csr) +tensor([0.3478, 0.0057, 0.8574, ..., 0.6409, 0.1876, 0.8429]) +Shape: torch.Size([131828, 131828]) +NNZ: 841372 +Density: 4.841419648464106e-05 +Time: 2.8504912853240967 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100': + + 27,590,154 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,210 ITLB_WALK:u + 17,536 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 36,763,243 L1D_TLB:u + + 6.425887143 seconds time elapsed + + 33.069094000 seconds user + 256.667850000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371, + 841372]), + col_indices=tensor([ 1, 128552, 3, ..., 131824, 131826, + 7714]), + values=tensor([-1., -1., 1., ..., 1., 1., 1.]), + size=(131828, 131828), nnz=841372, layout=torch.sparse_csr) +tensor([0.5381, 0.6651, 0.4689, ..., 0.7251, 0.3759, 0.8516]) +Shape: torch.Size([131828, 131828]) +NNZ: 841372 +Density: 4.841419648464106e-05 +Time: 1.6941111087799072 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100': + + 31,663,300 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 289,727 L1I_CACHE_REFILL:u + 462,864 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 33,262,254 L1D_CACHE:u + + 5.304170809 seconds time elapsed + + 25.992245000 seconds user + 173.752913000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371, + 841372]), + col_indices=tensor([ 1, 128552, 3, ..., 131824, 131826, + 7714]), + values=tensor([-1., -1., 1., ..., 1., 1., 1.]), + size=(131828, 131828), nnz=841372, layout=torch.sparse_csr) +tensor([0.4145, 0.8515, 0.7222, ..., 0.1386, 0.6641, 0.6662]) +Shape: torch.Size([131828, 131828]) +NNZ: 841372 +Density: 4.841419648464106e-05 +Time: 3.0850296020507812 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100': + + 530,272 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 551,373 LL_CACHE_RD:u + 196,152 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 23,542 L2D_TLB_REFILL:u + 301,998 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,732,662 L2D_CACHE:u + + 6.733517838 seconds time elapsed + + 34.030476000 seconds user + 271.397968000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_sx-mathoverflow_100.json b/pytorch/output/altra_2_2_sx-mathoverflow_100.json new file mode 100644 index 0000000..24b7797 --- /dev/null +++ b/pytorch/output/altra_2_2_sx-mathoverflow_100.json @@ -0,0 +1 @@ +{"power_before": [20.44, 20.2], "shape": [24818, 24818], "nnz": 239978, "% density": 0.00038961697406616504, "time_s": 0.556269645690918, "power": [25.24, 32.16, 33.0, 32.52], "power_after": [34.24, 30.28], "task clock (msec)": 62.49, "page faults": 3312, "cycles": 76783170, "instructions": 77095702, "branch mispredictions": 323514, "branches": 19769937, "ITLB accesses": 26809325, "ITLB misses": 6925, "DTLB misses": 19003, "DTLB accesses": 36516965, "L1I cache accesses": 31104231, "L1I cache misses": 285499, "L1D cache misses": 468498, "L1D cache accesses": 32677465, "LL cache misses": 559358, "LL cache accesses": 571935, "L2D TLB accesses": 194840, "L2D TLB misses": 23481, "L2D cache misses": 313487, "L2D cache accesses": 1779730, "instructions per cycle": 1.004070319055595, "branch miss rate": 0.016363936819829016, "ITLB miss rate": 0.00025830564551699827, "DTLB miss rate": 0.0005203882633729282, "L2D TLB miss rate": 0.12051426811742968, "L1I cache miss rate": 0.009178783426601994, "L1D cache miss rate": 0.01433703624194839, "L2D cache miss rate": 0.1761430104566423, "LL cache miss rate": 0.9780097388689274} diff --git a/pytorch/output/altra_2_2_sx-mathoverflow_100.output b/pytorch/output/altra_2_2_sx-mathoverflow_100.output new file mode 100644 index 0000000..454bd20 --- /dev/null +++ b/pytorch/output/altra_2_2_sx-mathoverflow_100.output @@ -0,0 +1,158 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394144 queued and waiting for resources +srun: job 3394144 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977, + 239978]), + col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]), + values=tensor([151., 17., 6., ..., 1., 1., 1.]), + size=(24818, 24818), nnz=239978, layout=torch.sparse_csr) +tensor([0.7658, 0.2874, 0.7506, ..., 0.3335, 0.5056, 0.9767]) +Shape: torch.Size([24818, 24818]) +NNZ: 239978 +Density: 0.00038961697406616504 +Time: 0.5561239719390869 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100': + + 62.49 msec task-clock:u # 0.015 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,312 page-faults:u # 53.003 K/sec + 76,783,170 cycles:u # 1.229 GHz (62.65%) + 77,095,702 instructions:u # 1.00 insn per cycle (80.20%) + branches:u + 370,891 branch-misses:u (94.99%) + 32,730,448 L1-dcache-loads:u # 523.800 M/sec + 467,718 L1-dcache-load-misses:u # 1.43% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 31,548,469 L1-icache-loads:u # 504.885 M/sec + 298,966 L1-icache-load-misses:u # 0.95% of all L1-icache accesses + 61,098,419 dTLB-loads:u # 977.786 M/sec (20.67%) + 64,747 dTLB-load-misses:u # 0.11% of all dTLB cache accesses (10.91%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 4.062782709 seconds time elapsed + + 16.106338000 seconds user + 32.399716000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977, + 239978]), + col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]), + values=tensor([151., 17., 6., ..., 1., 1., 1.]), + size=(24818, 24818), nnz=239978, layout=torch.sparse_csr) +tensor([0.7531, 0.4727, 0.4126, ..., 0.1574, 0.5247, 0.8875]) +Shape: torch.Size([24818, 24818]) +NNZ: 239978 +Density: 0.00038961697406616504 +Time: 0.6003477573394775 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100': + + 323,514 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 19,769,937 BR_RETIRED:u + + 4.061021393 seconds time elapsed + + 16.155442000 seconds user + 31.047278000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977, + 239978]), + col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]), + values=tensor([151., 17., 6., ..., 1., 1., 1.]), + size=(24818, 24818), nnz=239978, layout=torch.sparse_csr) +tensor([0.3067, 0.4335, 0.8814, ..., 0.2370, 0.1210, 0.7695]) +Shape: torch.Size([24818, 24818]) +NNZ: 239978 +Density: 0.00038961697406616504 +Time: 0.5404119491577148 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100': + + 26,809,325 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,925 ITLB_WALK:u + 19,003 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 36,516,965 L1D_TLB:u + + 4.031175418 seconds time elapsed + + 15.607232000 seconds user + 30.562258000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977, + 239978]), + col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]), + values=tensor([151., 17., 6., ..., 1., 1., 1.]), + size=(24818, 24818), nnz=239978, layout=torch.sparse_csr) +tensor([0.5013, 0.5961, 0.5565, ..., 0.3779, 0.1835, 0.6722]) +Shape: torch.Size([24818, 24818]) +NNZ: 239978 +Density: 0.00038961697406616504 +Time: 0.6185996532440186 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100': + + 31,104,231 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 285,499 L1I_CACHE_REFILL:u + 468,498 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 32,677,465 L1D_CACHE:u + + 4.083129305 seconds time elapsed + + 16.243642000 seconds user + 36.578375000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977, + 239978]), + col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]), + values=tensor([151., 17., 6., ..., 1., 1., 1.]), + size=(24818, 24818), nnz=239978, layout=torch.sparse_csr) +tensor([0.9075, 0.2788, 0.1365, ..., 0.4240, 0.8832, 0.1064]) +Shape: torch.Size([24818, 24818]) +NNZ: 239978 +Density: 0.00038961697406616504 +Time: 0.54673171043396 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100': + + 559,358 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 571,935 LL_CACHE_RD:u + 194,840 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 23,481 L2D_TLB_REFILL:u + 313,487 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,779,730 L2D_CACHE:u + + 3.961843929 seconds time elapsed + + 15.425912000 seconds user + 28.864046000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_ut2010_100.json b/pytorch/output/altra_2_2_ut2010_100.json new file mode 100644 index 0000000..aba2469 --- /dev/null +++ b/pytorch/output/altra_2_2_ut2010_100.json @@ -0,0 +1 @@ +{"power_before": [34.6, 37.16], "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 1.0817186832427979, "power": [34.32, 50.84, 52.12, 52.4, 52.76], "power_after": [49.0, 45.08], "task clock (msec)": 60.55, "page faults": 3490, "cycles": 49977496, "instructions": 78622993, "branch mispredictions": 327078, "branches": 20135808, "ITLB accesses": 27608093, "ITLB misses": 6616, "DTLB misses": 17185, "DTLB accesses": 36866957, "L1I cache accesses": 32639204, "L1I cache misses": 309643, "L1D cache misses": 478856, "L1D cache accesses": 34280618, "LL cache misses": 555275, "LL cache accesses": 578455, "L2D TLB accesses": 188723, "L2D TLB misses": 24635, "L2D cache misses": 319663, "L2D cache accesses": 1799940, "instructions per cycle": 1.573167911413569, "branch miss rate": 0.016243599462211798, "ITLB miss rate": 0.00023963987661154286, "DTLB miss rate": 0.00046613556958335347, "L2D TLB miss rate": 0.13053522888042263, "L1I cache miss rate": 0.009486842877663316, "L1D cache miss rate": 0.013968709665619214, "L2D cache miss rate": 0.17759647543807017, "LL cache miss rate": 0.9599277385449171} diff --git a/pytorch/output/altra_2_2_ut2010_100.output b/pytorch/output/altra_2_2_ut2010_100.output new file mode 100644 index 0000000..687c6a5 --- /dev/null +++ b/pytorch/output/altra_2_2_ut2010_100.output @@ -0,0 +1,168 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394146 queued and waiting for resources +srun: job 3394146 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061, + 572066]), + col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509, + 114602]), + values=tensor([160642., 31335., 282373., ..., 88393., 99485., + 18651.]), size=(115406, 115406), nnz=572066, + layout=torch.sparse_csr) +tensor([0.4608, 0.1516, 0.8492, ..., 0.8920, 0.4275, 0.8070]) +Shape: torch.Size([115406, 115406]) +NNZ: 572066 +Density: 4.295259032005559e-05 +Time: 1.3751039505004883 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100': + + 60.55 msec task-clock:u # 0.012 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,490 page-faults:u # 57.638 K/sec + 49,977,496 cycles:u # 0.825 GHz (40.93%) + 78,622,993 instructions:u # 1.57 insn per cycle (85.37%) + branches:u + 358,029 branch-misses:u + 31,478,500 L1-dcache-loads:u # 519.877 M/sec + 479,449 L1-dcache-load-misses:u # 1.52% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 29,991,824 L1-icache-loads:u # 495.324 M/sec + 294,864 L1-icache-load-misses:u # 0.98% of all L1-icache accesses + 35,154,647 dTLB-loads:u # 580.589 M/sec (23.19%) + dTLB-load-misses:u (0.00%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 4.986156121 seconds time elapsed + + 23.724703000 seconds user + 145.034521000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061, + 572066]), + col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509, + 114602]), + values=tensor([160642., 31335., 282373., ..., 88393., 99485., + 18651.]), size=(115406, 115406), nnz=572066, + layout=torch.sparse_csr) +tensor([0.4697, 0.7121, 0.5987, ..., 0.2619, 0.7308, 0.3129]) +Shape: torch.Size([115406, 115406]) +NNZ: 572066 +Density: 4.295259032005559e-05 +Time: 1.6881086826324463 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100': + + 327,078 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 20,135,808 BR_RETIRED:u + + 5.374156677 seconds time elapsed + + 25.609168000 seconds user + 167.278028000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061, + 572066]), + col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509, + 114602]), + values=tensor([160642., 31335., 282373., ..., 88393., 99485., + 18651.]), size=(115406, 115406), nnz=572066, + layout=torch.sparse_csr) +tensor([0.9215, 0.6706, 0.8015, ..., 0.8507, 0.8546, 0.4441]) +Shape: torch.Size([115406, 115406]) +NNZ: 572066 +Density: 4.295259032005559e-05 +Time: 1.2785694599151611 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100': + + 27,608,093 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,616 ITLB_WALK:u + 17,185 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 36,866,957 L1D_TLB:u + + 4.861513311 seconds time elapsed + + 23.339077000 seconds user + 141.584760000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061, + 572066]), + col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509, + 114602]), + values=tensor([160642., 31335., 282373., ..., 88393., 99485., + 18651.]), size=(115406, 115406), nnz=572066, + layout=torch.sparse_csr) +tensor([0.8973, 0.5228, 0.4492, ..., 0.7677, 0.7722, 0.1700]) +Shape: torch.Size([115406, 115406]) +NNZ: 572066 +Density: 4.295259032005559e-05 +Time: 1.1654376983642578 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100': + + 32,639,204 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 309,643 L1I_CACHE_REFILL:u + 478,856 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 34,280,618 L1D_CACHE:u + + 4.677973310 seconds time elapsed + + 22.972655000 seconds user + 125.062401000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061, + 572066]), + col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509, + 114602]), + values=tensor([160642., 31335., 282373., ..., 88393., 99485., + 18651.]), size=(115406, 115406), nnz=572066, + layout=torch.sparse_csr) +tensor([0.4542, 0.7095, 0.5701, ..., 0.2172, 0.8829, 0.7757]) +Shape: torch.Size([115406, 115406]) +NNZ: 572066 +Density: 4.295259032005559e-05 +Time: 1.1153452396392822 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100': + + 555,275 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 578,455 LL_CACHE_RD:u + 188,723 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 24,635 L2D_TLB_REFILL:u + 319,663 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,799,940 L2D_CACHE:u + + 4.655024760 seconds time elapsed + + 23.104641000 seconds user + 122.294597000 seconds sys + + + diff --git a/pytorch/output/altra_2_2_vt2010_100.json b/pytorch/output/altra_2_2_vt2010_100.json new file mode 100644 index 0000000..e2d7ccf --- /dev/null +++ b/pytorch/output/altra_2_2_vt2010_100.json @@ -0,0 +1 @@ +{"power_before": [34.04, 43.96], "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 0.4164857864379883, "power": [23.72, 23.72, 29.88, 33.32], "power_after": [33.36, 32.52], "task clock (msec)": 61.63, "page faults": 3304, "cycles": 64734203, "instructions": 53597991, "branch mispredictions": 330777, "branches": 20357034, "ITLB accesses": 27381387, "ITLB misses": 6248, "DTLB misses": 17636, "DTLB accesses": 37436110, "L1I cache accesses": 32505993, "L1I cache misses": 303849, "L1D cache misses": 467426, "L1D cache accesses": 34241110, "LL cache misses": 550075, "LL cache accesses": 562829, "L2D TLB accesses": 199285, "L2D TLB misses": 24424, "L2D cache misses": 310155, "L2D cache accesses": 1783824, "instructions per cycle": 0.8279701999266138, "branch miss rate": 0.016248781625063848, "ITLB miss rate": 0.00022818420410916364, "DTLB miss rate": 0.00047109595521543235, "L2D TLB miss rate": 0.12255814536969667, "L1I cache miss rate": 0.009347476325365603, "L1D cache miss rate": 0.01365101773861887, "L2D cache miss rate": 0.17387085272986572, "LL cache miss rate": 0.9773394761108614} diff --git a/pytorch/output/altra_2_2_vt2010_100.output b/pytorch/output/altra_2_2_vt2010_100.output new file mode 100644 index 0000000..e8c12af --- /dev/null +++ b/pytorch/output/altra_2_2_vt2010_100.output @@ -0,0 +1,158 @@ +srun: Job time limit was unset; set to partition default of 60 minutes +srun: ################################################################################ +srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. # +srun: # All submission nodes and all other compute nodes have x86_64 architecture # +srun: # CPUs. Programs, environments, or other software that was built on x86_64 # +srun: # nodes may need to be rebuilt to properly execute on these nodes. # +srun: ################################################################################ +srun: job 3394143 queued and waiting for resources +srun: job 3394143 has been allocated resources +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592, + 155598]), + col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]), + values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]), + size=(32580, 32580), nnz=155598, layout=torch.sparse_csr) +tensor([0.9170, 0.7306, 0.1175, ..., 0.0616, 0.0147, 0.6403]) +Shape: torch.Size([32580, 32580]) +NNZ: 155598 +Density: 0.00014658915806621921 +Time: 0.4440653324127197 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100': + + 61.63 msec task-clock:u # 0.016 CPUs utilized + 0 context-switches:u # 0.000 /sec + 0 cpu-migrations:u # 0.000 /sec + 3,304 page-faults:u # 53.611 K/sec + 64,734,203 cycles:u # 1.050 GHz (50.46%) + 53,597,991 instructions:u # 0.83 insn per cycle (70.10%) + branches:u + 347,389 branch-misses:u (91.95%) + 31,363,842 L1-dcache-loads:u # 508.915 M/sec + 482,780 L1-dcache-load-misses:u # 1.54% of all L1-dcache accesses + LLC-loads:u + LLC-load-misses:u + 30,027,001 L1-icache-loads:u # 487.223 M/sec + 288,023 L1-icache-load-misses:u # 0.96% of all L1-icache accesses + 44,333,825 dTLB-loads:u # 719.368 M/sec (48.58%) + 74,525 dTLB-load-misses:u # 0.17% of all dTLB cache accesses (16.71%) + iTLB-loads:u (0.00%) + iTLB-load-misses:u (0.00%) + + 3.811654040 seconds time elapsed + + 15.616953000 seconds user + 30.906234000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592, + 155598]), + col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]), + values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]), + size=(32580, 32580), nnz=155598, layout=torch.sparse_csr) +tensor([0.5548, 0.3514, 0.6283, ..., 0.5672, 0.1575, 0.4493]) +Shape: torch.Size([32580, 32580]) +NNZ: 155598 +Density: 0.00014658915806621921 +Time: 0.44233155250549316 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100': + + 330,777 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio + 20,357,034 BR_RETIRED:u + + 3.835342404 seconds time elapsed + + 15.497637000 seconds user + 28.676763000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592, + 155598]), + col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]), + values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]), + size=(32580, 32580), nnz=155598, layout=torch.sparse_csr) +tensor([0.0953, 0.5790, 0.0112, ..., 0.9540, 0.3173, 0.4731]) +Shape: torch.Size([32580, 32580]) +NNZ: 155598 +Density: 0.00014658915806621921 +Time: 0.43302106857299805 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100': + + 27,381,387 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio + 6,248 ITLB_WALK:u + 17,636 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio + 37,436,110 L1D_TLB:u + + 3.828586094 seconds time elapsed + + 15.518057000 seconds user + 31.389361000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592, + 155598]), + col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]), + values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]), + size=(32580, 32580), nnz=155598, layout=torch.sparse_csr) +tensor([0.5456, 0.8708, 0.2037, ..., 0.8669, 0.9122, 0.2046]) +Shape: torch.Size([32580, 32580]) +NNZ: 155598 +Density: 0.00014658915806621921 +Time: 0.4426534175872803 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100': + + 32,505,993 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio + 303,849 L1I_CACHE_REFILL:u + 467,426 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio + 34,241,110 L1D_CACHE:u + + 3.811299200 seconds time elapsed + + 15.932195000 seconds user + 30.887870000 seconds sys + + + +/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.) + ).to_sparse_csr().type(torch.float) +tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592, + 155598]), + col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]), + values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]), + size=(32580, 32580), nnz=155598, layout=torch.sparse_csr) +tensor([0.5024, 0.2304, 0.7925, ..., 0.1397, 0.5558, 0.6450]) +Shape: torch.Size([32580, 32580]) +NNZ: 155598 +Density: 0.00014658915806621921 +Time: 0.3671383857727051 seconds + + Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100': + + 550,075 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio + 562,829 LL_CACHE_RD:u + 199,285 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio + 24,424 L2D_TLB_REFILL:u + 310,155 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio + 1,783,824 L2D_CACHE:u + + 3.824434783 seconds time elapsed + + 15.754438000 seconds user + 28.226523000 seconds sys + + +