Got output!

2024-12-03 00:20:09 -05:00 · 2024-12-03 00:20:09 -05:00 · 01b1b0fc0c
commit 01b1b0fc0c
parent c9ed7980b8
33 changed files with 2445 additions and 55 deletions
--- a/pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.json
+++ b/pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.json
@ -1 +0,0 @@
-{"power_before": [20.2, 20.32], "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 1.6952476501464844, "power": [44.68, 57.12, 64.2, 67.04, 55.68, 42.76], "power_after": [53.56, 52.52], "task clock (msec)": 61.63, "page faults": 3293, "cycles": 41677750, "instructions": 91767205, "branch mispredictions": 329386, "branches": 19813961, "ITLB accesses": 27944146, "ITLB misses": 6811, "DTLB misses": 18962, "DTLB accesses": 37689058, "L1I cache accesses": 31746573, "L1I cache misses": 290044, "L1D cache misses": 471100, "L1D cache accesses": 33271575, "LL cache misses": 550308, "LL cache accesses": 564981, "L2D TLB accesses": 168456, "L2D TLB misses": 20450, "L2D cache misses": 306309, "L2D cache accesses": 1745776, "instructions per cycle": 2.201827233955768, "branch miss rate": 0.01662393501228755, "ITLB miss rate": 0.00024373620149279208, "DTLB miss rate": 0.0005031168462740565, "L2D TLB miss rate": 0.12139668518782352, "L1I cache miss rate": 0.009136230231842662, "L1D cache miss rate": 0.014159233519904002, "L2D cache miss rate": 0.17545721787904062, "LL cache miss rate": 0.9740292151417481}
--- a/pytorch/output/altra_2_2_Oregon-2_100.json
+++ b/pytorch/output/altra_2_2_Oregon-2_100.json
@ -0,0 +1 @@
+{"power_before": [50.88, 50.88], "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 0.1896660327911377, "power": [25.52, 32.28, 33.12, 33.12], "power_after": [32.88, 26.52], "task clock (msec)": 42.01, "page faults": 3263, "cycles": 47084933, "instructions": 77895119, "branch mispredictions": 330923, "branches": 19740519, "ITLB accesses": 27761239, "ITLB misses": 6471, "DTLB misses": 17268, "DTLB accesses": 36993265, "L1I cache accesses": 31834980, "L1I cache misses": 298333, "L1D cache misses": 466901, "L1D cache accesses": 33528976, "LL cache misses": 525505, "LL cache accesses": 546521, "L2D TLB accesses": 184884, "L2D TLB misses": 22933, "L2D cache misses": 292367, "L2D cache accesses": 1706226, "instructions per cycle": 1.6543534000568716, "branch miss rate": 0.016763642333821112, "ITLB miss rate": 0.00023309478370183695, "DTLB miss rate": 0.0004667876706746485, "L2D TLB miss rate": 0.12403993855606758, "L1I cache miss rate": 0.009371232524725947, "L1D cache miss rate": 0.013925298523879763, "L2D cache miss rate": 0.1713530329510862, "LL cache miss rate": 0.9615458509371094}
--- a/pytorch/output/altra_2_2_Oregon-2_100.output
+++ b/pytorch/output/altra_2_2_Oregon-2_100.output
@ -0,0 +1,153 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394148 queued and waiting for resources
+srun: job 3394148 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
+       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
+       nnz=65460, layout=torch.sparse_csr)
+tensor([0.3190, 0.2829, 0.6210,  ..., 0.9278, 0.7514, 0.5737])
+Shape: torch.Size([11806, 11806])
+NNZ: 65460
+Density: 0.0004696458003979807
+Time: 0.22389841079711914 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
+
+             42.01 msec task-clock:u                     #    0.012 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,263      page-faults:u                    #   77.672 K/sec                     
+        47,084,933      cycles:u                         #    1.121 GHz                         (65.90%)
+        77,895,119      instructions:u                   #    1.65  insn per cycle              (85.49%)
+   <not supported>      branches:u                                                            
+           352,740      branch-misses:u                                                       
+        30,958,922      L1-dcache-loads:u                #  736.946 M/sec                     
+           442,351      L1-dcache-load-misses:u          #    1.43% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        29,506,648      L1-icache-loads:u                #  702.376 M/sec                     
+           272,063      L1-icache-load-misses:u          #    0.92% of all L1-icache accesses 
+        51,646,382      dTLB-loads:u                     #    1.229 G/sec                       (15.87%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       3.513156571 seconds time elapsed
+
+      15.150380000 seconds user
+      32.922923000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
+       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
+       nnz=65460, layout=torch.sparse_csr)
+tensor([0.0741, 0.5476, 0.1060,  ..., 0.8459, 0.8270, 0.8313])
+Shape: torch.Size([11806, 11806])
+NNZ: 65460
+Density: 0.0004696458003979807
+Time: 0.20610284805297852 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
+
+           330,923      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        19,740,519      BR_RETIRED:u                                                          
+
+       3.639725976 seconds time elapsed
+
+      15.493122000 seconds user
+      27.617441000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
+       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
+       nnz=65460, layout=torch.sparse_csr)
+tensor([0.9699, 0.9368, 0.7284,  ..., 0.7182, 0.5308, 0.9833])
+Shape: torch.Size([11806, 11806])
+NNZ: 65460
+Density: 0.0004696458003979807
+Time: 0.15960955619812012 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
+
+        27,761,239      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,471      ITLB_WALK:u                                                           
+            17,268      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        36,993,265      L1D_TLB:u                                                             
+
+       3.455602215 seconds time elapsed
+
+      15.015027000 seconds user
+      27.930709000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
+       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
+       nnz=65460, layout=torch.sparse_csr)
+tensor([0.5851, 0.3425, 0.8120,  ..., 0.0829, 0.5823, 0.2256])
+Shape: torch.Size([11806, 11806])
+NNZ: 65460
+Density: 0.0004696458003979807
+Time: 0.15697884559631348 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
+
+        31,834,980      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           298,333      L1I_CACHE_REFILL:u                                                    
+           466,901      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        33,528,976      L1D_CACHE:u                                                           
+
+       3.452279902 seconds time elapsed
+
+      14.635240000 seconds user
+      28.262858000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
+       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
+       nnz=65460, layout=torch.sparse_csr)
+tensor([0.0772, 0.9112, 0.0293,  ..., 0.4016, 0.4357, 0.5368])
+Shape: torch.Size([11806, 11806])
+NNZ: 65460
+Density: 0.0004696458003979807
+Time: 0.20962285995483398 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
+
+           525,505      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           546,521      LL_CACHE_RD:u                                                         
+           184,884      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            22,933      L2D_TLB_REFILL:u                                                      
+           292,367      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,706,226      L2D_CACHE:u                                                           
+
+       3.566096255 seconds time elapsed
+
+      15.763579000 seconds user
+      28.620423000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_as-caida_100.json
+++ b/pytorch/output/altra_2_2_as-caida_100.json
@ -0,0 +1 @@
+{"power_before": [20.16, 20.08], "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 0.336850643157959, "power": [24.28, 30.72, 30.72, 34.56], "power_after": [37.32, 32.92], "task clock (msec)": 60.78, "page faults": 3300, "cycles": 66733059, "instructions": 87889334, "branch mispredictions": 326300, "branches": 19832700, "ITLB accesses": 27233629, "ITLB misses": 5868, "DTLB misses": 16893, "DTLB accesses": 36409508, "L1I cache accesses": 30924532, "L1I cache misses": 288199, "L1D cache misses": 462816, "L1D cache accesses": 32428375, "LL cache misses": 551997, "LL cache accesses": 568528, "L2D TLB accesses": 193991, "L2D TLB misses": 24353, "L2D cache misses": 312207, "L2D cache accesses": 1821196, "instructions per cycle": 1.3170284011707, "branch miss rate": 0.016452626218316214, "ITLB miss rate": 0.0002154688969288669, "DTLB miss rate": 0.00046397221297250155, "L2D TLB miss rate": 0.125536751704976, "L1I cache miss rate": 0.009319429635992551, "L1D cache miss rate": 0.014271945479845968, "L2D cache miss rate": 0.17142965391973186, "LL cache miss rate": 0.9709231559395491}
--- a/pytorch/output/altra_2_2_as-caida_100.output
+++ b/pytorch/output/altra_2_2_as-caida_100.output
@ -0,0 +1,158 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394150 queued and waiting for resources
+srun: job 3394150 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
+                            106762]),
+       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
+       nnz=106762, layout=torch.sparse_csr)
+tensor([0.7672, 0.5818, 0.6775,  ..., 0.1052, 0.2539, 0.4347])
+Shape: torch.Size([31379, 31379])
+NNZ: 106762
+Density: 0.00010842726485909405
+Time: 0.28373050689697266 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
+
+             60.78 msec task-clock:u                     #    0.017 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,300      page-faults:u                    #   54.293 K/sec                     
+        66,733,059      cycles:u                         #    1.098 GHz                         (58.34%)
+        87,889,334      instructions:u                   #    1.32  insn per cycle              (93.45%)
+   <not supported>      branches:u                                                            
+           369,909      branch-misses:u                                                       
+        31,872,708      L1-dcache-loads:u                #  524.386 M/sec                     
+           465,719      L1-dcache-load-misses:u          #    1.46% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        30,443,353      L1-icache-loads:u                #  500.870 M/sec                     
+           292,371      L1-icache-load-misses:u          #    0.96% of all L1-icache accesses 
+        34,702,735      dTLB-loads:u                     #  570.947 M/sec                       (6.96%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       3.683429807 seconds time elapsed
+
+      15.161162000 seconds user
+      31.335288000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
+                            106762]),
+       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
+       nnz=106762, layout=torch.sparse_csr)
+tensor([0.2708, 0.2455, 0.7615,  ..., 0.1172, 0.4072, 0.8970])
+Shape: torch.Size([31379, 31379])
+NNZ: 106762
+Density: 0.00010842726485909405
+Time: 0.32511067390441895 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
+
+           326,300      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        19,832,700      BR_RETIRED:u                                                          
+
+       3.755497210 seconds time elapsed
+
+      14.681699000 seconds user
+      29.413955000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
+                            106762]),
+       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
+       nnz=106762, layout=torch.sparse_csr)
+tensor([0.9417, 0.0965, 0.8551,  ..., 0.6665, 0.0164, 0.5102])
+Shape: torch.Size([31379, 31379])
+NNZ: 106762
+Density: 0.00010842726485909405
+Time: 0.33124780654907227 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
+
+        27,233,629      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             5,868      ITLB_WALK:u                                                           
+            16,893      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        36,409,508      L1D_TLB:u                                                             
+
+       3.751203540 seconds time elapsed
+
+      14.849342000 seconds user
+      27.706396000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
+                            106762]),
+       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
+       nnz=106762, layout=torch.sparse_csr)
+tensor([0.9215, 0.4139, 0.1789,  ..., 0.0245, 0.0029, 0.2129])
+Shape: torch.Size([31379, 31379])
+NNZ: 106762
+Density: 0.00010842726485909405
+Time: 0.3386805057525635 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
+
+        30,924,532      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           288,199      L1I_CACHE_REFILL:u                                                    
+           462,816      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        32,428,375      L1D_CACHE:u                                                           
+
+       3.628443937 seconds time elapsed
+
+      15.430937000 seconds user
+      30.878583000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
+                            106762]),
+       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
+       nnz=106762, layout=torch.sparse_csr)
+tensor([0.4983, 0.0268, 0.1695,  ..., 0.6987, 0.7224, 0.8577])
+Shape: torch.Size([31379, 31379])
+NNZ: 106762
+Density: 0.00010842726485909405
+Time: 0.3289623260498047 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
+
+           551,997      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           568,528      LL_CACHE_RD:u                                                         
+           193,991      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            24,353      L2D_TLB_REFILL:u                                                      
+           312,207      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,821,196      L2D_CACHE:u                                                           
+
+       3.698790384 seconds time elapsed
+
+      15.745189000 seconds user
+      31.063512000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_dc2_100.json
+++ b/pytorch/output/altra_2_2_dc2_100.json
@ -0,0 +1 @@
+{"power_before": [16.32, 16.2], "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 2.2665774822235107, "power": [35.16, 50.8, 53.4, 53.4, 46.08, 46.88], "power_after": [58.4, 57.32], "task clock (msec)": 50.43, "page faults": 3285, "cycles": 54118679, "instructions": 77692421, "branch mispredictions": 325039, "branches": 19383216, "ITLB accesses": 26060519, "ITLB misses": 4749, "DTLB misses": 16865, "DTLB accesses": 34819729, "L1I cache accesses": 30777115, "L1I cache misses": 293980, "L1D cache misses": 461522, "L1D cache accesses": 32216597, "LL cache misses": 567700, "LL cache accesses": 588689, "L2D TLB accesses": 189417, "L2D TLB misses": 22360, "L2D cache misses": 328306, "L2D cache accesses": 1908607, "instructions per cycle": 1.4355934482436277, "branch miss rate": 0.0167690954896236, "ITLB miss rate": 0.00018222967854170517, "DTLB miss rate": 0.00048435184547243316, "L2D TLB miss rate": 0.11804642666708902, "L1I cache miss rate": 0.009551902444397404, "L1D cache miss rate": 0.014325597455249542, "L2D cache miss rate": 0.172013410827897, "LL cache miss rate": 0.9643461997761127}
--- a/pytorch/output/altra_2_2_dc2_100.output
+++ b/pytorch/output/altra_2_2_dc2_100.output
@ -0,0 +1,168 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394149 queued and waiting for resources
+srun: job 3394149 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
+                            766396]),
+       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
+                           116834]),
+       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
+                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
+       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
+tensor([0.4749, 0.3788, 0.8812,  ..., 0.8281, 0.8889, 0.4945])
+Shape: torch.Size([116835, 116835])
+NNZ: 766396
+Density: 5.614451099680581e-05
+Time: 2.2480316162109375 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
+
+             50.43 msec task-clock:u                     #    0.009 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,285      page-faults:u                    #   65.135 K/sec                     
+        54,118,679      cycles:u                         #    1.073 GHz                         (60.92%)
+        77,692,421      instructions:u                   #    1.44  insn per cycle              (82.73%)
+   <not supported>      branches:u                                                            
+           367,999      branch-misses:u                                                       
+        32,182,371      L1-dcache-loads:u                #  638.112 M/sec                     
+           491,960      L1-dcache-load-misses:u          #    1.53% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        30,682,258      L1-icache-loads:u                #  608.367 M/sec                     
+           300,874      L1-icache-load-misses:u          #    0.98% of all L1-icache accesses 
+        55,244,523      dTLB-loads:u                     #    1.095 G/sec                       (19.09%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       5.813837947 seconds time elapsed
+
+      28.815118000 seconds user
+     213.749674000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
+                            766396]),
+       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
+                           116834]),
+       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
+                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
+       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
+tensor([0.9715, 0.3920, 0.0297,  ..., 0.1819, 0.5744, 0.8105])
+Shape: torch.Size([116835, 116835])
+NNZ: 766396
+Density: 5.614451099680581e-05
+Time: 2.2333595752716064 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
+
+           325,039      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        19,383,216      BR_RETIRED:u                                                          
+
+       5.973132269 seconds time elapsed
+
+      29.719778000 seconds user
+     213.706315000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
+                            766396]),
+       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
+                           116834]),
+       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
+                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
+       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
+tensor([0.3371, 0.4985, 0.9905,  ..., 0.6075, 0.1568, 0.3782])
+Shape: torch.Size([116835, 116835])
+NNZ: 766396
+Density: 5.614451099680581e-05
+Time: 1.9790923595428467 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
+
+        26,060,519      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             4,749      ITLB_WALK:u                                                           
+            16,865      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        34,819,729      L1D_TLB:u                                                             
+
+       5.575020445 seconds time elapsed
+
+      26.769391000 seconds user
+     188.138935000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
+                            766396]),
+       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
+                           116834]),
+       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
+                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
+       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
+tensor([0.6806, 0.8858, 0.7035,  ..., 0.6007, 0.0880, 0.4550])
+Shape: torch.Size([116835, 116835])
+NNZ: 766396
+Density: 5.614451099680581e-05
+Time: 1.5306556224822998 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
+
+        30,777,115      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           293,980      L1I_CACHE_REFILL:u                                                    
+           461,522      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        32,216,597      L1D_CACHE:u                                                           
+
+       4.961298684 seconds time elapsed
+
+      23.946357000 seconds user
+     156.598674000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
+                            766396]),
+       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
+                           116834]),
+       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
+                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
+       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
+tensor([0.3029, 0.1908, 0.9816,  ..., 0.0418, 0.8182, 0.5474])
+Shape: torch.Size([116835, 116835])
+NNZ: 766396
+Density: 5.614451099680581e-05
+Time: 2.28926944732666 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
+
+           567,700      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           588,689      LL_CACHE_RD:u                                                         
+           189,417      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            22,360      L2D_TLB_REFILL:u                                                      
+           328,306      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,908,607      L2D_CACHE:u                                                           
+
+       5.710829283 seconds time elapsed
+
+      28.671301000 seconds user
+     213.960421000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_de2010_100.json
+++ b/pytorch/output/altra_2_2_de2010_100.json
@ -0,0 +1 @@
+{"power_before": [20.48, 20.96], "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 0.3271017074584961, "power": [25.28, 26.08, 31.28, 32.96], "power_after": [33.4, 30.24], "task clock (msec)": 59.88, "page faults": 3313, "cycles": 58169777, "instructions": 57993431, "branch mispredictions": 330494, "branches": 20578427, "ITLB accesses": 27982097, "ITLB misses": 6614, "DTLB misses": 17270, "DTLB accesses": 37728899, "L1I cache accesses": 29754926, "L1I cache misses": 278786, "L1D cache misses": 454742, "L1D cache accesses": 31173246, "LL cache misses": 543243, "LL cache accesses": 560716, "L2D TLB accesses": 162281, "L2D TLB misses": 19847, "L2D cache misses": 300577, "L2D cache accesses": 1696278, "instructions per cycle": 0.9969684257170179, "branch miss rate": 0.016060216847478187, "ITLB miss rate": 0.0002363654160729984, "DTLB miss rate": 0.00045773930482307474, "L2D TLB miss rate": 0.12230020766448321, "L1I cache miss rate": 0.009369406598423401, "L1D cache miss rate": 0.014587572946365611, "L2D cache miss rate": 0.1771979592967662, "LL cache miss rate": 0.9688380570556218}
--- a/pytorch/output/altra_2_2_de2010_100.output
+++ b/pytorch/output/altra_2_2_de2010_100.output
@ -0,0 +1,163 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394139 queued and waiting for resources
+srun: job 3394139 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
+                            116056]),
+       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
+       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
+                       16949.]), size=(24115, 24115), nnz=116056,
+       layout=torch.sparse_csr)
+tensor([0.4207, 0.3943, 0.6543,  ..., 0.2191, 0.5415, 0.1575])
+Shape: torch.Size([24115, 24115])
+NNZ: 116056
+Density: 0.0001995689928120616
+Time: 0.36042284965515137 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
+
+             59.88 msec task-clock:u                     #    0.016 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,313      page-faults:u                    #   55.328 K/sec                     
+        58,169,777      cycles:u                         #    0.971 GHz                         (61.49%)
+        57,993,431      instructions:u                   #    1.00  insn per cycle              (81.67%)
+   <not supported>      branches:u                                                            
+           341,266      branch-misses:u                                                       
+        31,858,781      L1-dcache-loads:u                #  532.049 M/sec                     
+           467,486      L1-dcache-load-misses:u          #    1.47% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        30,461,310      L1-icache-loads:u                #  508.711 M/sec                     
+           294,156      L1-icache-load-misses:u          #    0.97% of all L1-icache accesses 
+        43,828,130      dTLB-loads:u                     #  731.940 M/sec                       (40.26%)
+            47,836      dTLB-load-misses:u               #    0.11% of all dTLB cache accesses  (25.52%)
+                 0      iTLB-loads:u                     #    0.000 /sec                        (2.73%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       3.824054028 seconds time elapsed
+
+      15.099361000 seconds user
+      28.830417000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
+                            116056]),
+       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
+       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
+                       16949.]), size=(24115, 24115), nnz=116056,
+       layout=torch.sparse_csr)
+tensor([0.0456, 0.2095, 0.0276,  ..., 0.4209, 0.6824, 0.5475])
+Shape: torch.Size([24115, 24115])
+NNZ: 116056
+Density: 0.0001995689928120616
+Time: 0.3598823547363281 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
+
+           330,494      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        20,578,427      BR_RETIRED:u                                                          
+
+       3.781234836 seconds time elapsed
+
+      14.965545000 seconds user
+      29.444131000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
+                            116056]),
+       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
+       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
+                       16949.]), size=(24115, 24115), nnz=116056,
+       layout=torch.sparse_csr)
+tensor([0.9882, 0.5477, 0.6307,  ..., 0.1179, 0.6903, 0.1235])
+Shape: torch.Size([24115, 24115])
+NNZ: 116056
+Density: 0.0001995689928120616
+Time: 0.29088521003723145 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
+
+        27,982,097      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,614      ITLB_WALK:u                                                           
+            17,270      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        37,728,899      L1D_TLB:u                                                             
+
+       3.576632300 seconds time elapsed
+
+      14.864601000 seconds user
+      29.274547000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
+                            116056]),
+       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
+       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
+                       16949.]), size=(24115, 24115), nnz=116056,
+       layout=torch.sparse_csr)
+tensor([0.3952, 0.0475, 0.1125,  ..., 0.3481, 0.1290, 0.3495])
+Shape: torch.Size([24115, 24115])
+NNZ: 116056
+Density: 0.0001995689928120616
+Time: 0.30365920066833496 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
+
+        29,754,926      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           278,786      L1I_CACHE_REFILL:u                                                    
+           454,742      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        31,173,246      L1D_CACHE:u                                                           
+
+       3.730995381 seconds time elapsed
+
+      15.213930000 seconds user
+      30.995070000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
+                            116056]),
+       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
+       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
+                       16949.]), size=(24115, 24115), nnz=116056,
+       layout=torch.sparse_csr)
+tensor([0.7266, 0.7537, 0.9729,  ..., 0.3349, 0.3523, 0.6532])
+Shape: torch.Size([24115, 24115])
+NNZ: 116056
+Density: 0.0001995689928120616
+Time: 0.2798902988433838 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
+
+           543,243      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           560,716      LL_CACHE_RD:u                                                         
+           162,281      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            19,847      L2D_TLB_REFILL:u                                                      
+           300,577      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,696,278      L2D_CACHE:u                                                           
+
+       3.819959836 seconds time elapsed
+
+      15.346035000 seconds user
+      29.199873000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_email-Enron_100.json
+++ b/pytorch/output/altra_2_2_email-Enron_100.json
@ -0,0 +1 @@
+{"power_before": [20.28, 20.32], "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 1.030203104019165, "power": [32.08, 47.84, 55.76, 58.08, 58.24], "power_after": [48.76, 45.16], "task clock (msec)": 60.43, "page faults": 3319, "cycles": 66114448, "instructions": 90786829, "branch mispredictions": 341625, "branches": 20129354, "ITLB accesses": 27441303, "ITLB misses": 6807, "DTLB misses": 20551, "DTLB accesses": 36867114, "L1I cache accesses": 31744243, "L1I cache misses": 271027, "L1D cache misses": 464135, "L1D cache accesses": 33441141, "LL cache misses": 539935, "LL cache accesses": 552519, "L2D TLB accesses": 188291, "L2D TLB misses": 24177, "L2D cache misses": 301281, "L2D cache accesses": 1737575, "instructions per cycle": 1.3731768432824245, "branch miss rate": 0.016971483535934636, "ITLB miss rate": 0.00024805673404065397, "DTLB miss rate": 0.0005574344658494288, "L2D TLB miss rate": 0.12840231344036623, "L1I cache miss rate": 0.008537831568388637, "L1D cache miss rate": 0.01387916159918108, "L2D cache miss rate": 0.17339165215889962, "LL cache miss rate": 0.9772243126480719}
--- a/pytorch/output/altra_2_2_email-Enron_100.output
+++ b/pytorch/output/altra_2_2_email-Enron_100.output
@ -0,0 +1,158 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394152 queued and waiting for resources
+srun: job 3394152 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
+                            367662]),
+       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
+       nnz=367662, layout=torch.sparse_csr)
+tensor([0.3626, 0.7532, 0.0782,  ..., 0.6679, 0.4308, 0.6586])
+Shape: torch.Size([36692, 36692])
+NNZ: 367662
+Density: 0.0002730901120626302
+Time: 1.3745801448822021 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
+
+             60.43 msec task-clock:u                     #    0.012 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,319      page-faults:u                    #   54.926 K/sec                     
+        66,114,448      cycles:u                         #    1.094 GHz                         (58.10%)
+        90,786,829      instructions:u                   #    1.37  insn per cycle              (92.25%)
+   <not supported>      branches:u                                                            
+           372,381      branch-misses:u                                                       
+        32,997,410      L1-dcache-loads:u                #  546.070 M/sec                     
+           470,216      L1-dcache-load-misses:u          #    1.43% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        31,485,339      L1-icache-loads:u                #  521.047 M/sec                     
+           294,395      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
+        31,376,646      dTLB-loads:u                     #  519.248 M/sec                       (10.03%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       4.904488673 seconds time elapsed
+
+      22.874521000 seconds user
+     139.276239000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
+                            367662]),
+       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
+       nnz=367662, layout=torch.sparse_csr)
+tensor([0.2040, 0.8252, 0.0215,  ..., 0.2921, 0.9143, 0.8728])
+Shape: torch.Size([36692, 36692])
+NNZ: 367662
+Density: 0.0002730901120626302
+Time: 1.3087654113769531 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
+
+           341,625      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        20,129,354      BR_RETIRED:u                                                          
+
+       4.644873434 seconds time elapsed
+
+      22.729927000 seconds user
+     132.278582000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
+                            367662]),
+       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
+       nnz=367662, layout=torch.sparse_csr)
+tensor([0.6154, 0.6641, 0.3794,  ..., 0.9736, 0.0619, 0.4790])
+Shape: torch.Size([36692, 36692])
+NNZ: 367662
+Density: 0.0002730901120626302
+Time: 1.2701547145843506 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
+
+        27,441,303      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,807      ITLB_WALK:u                                                           
+            20,551      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        36,867,114      L1D_TLB:u                                                             
+
+       4.861510767 seconds time elapsed
+
+      22.111354000 seconds user
+     132.431608000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
+                            367662]),
+       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
+       nnz=367662, layout=torch.sparse_csr)
+tensor([0.4201, 0.4134, 0.8169,  ..., 0.6631, 0.0087, 0.8439])
+Shape: torch.Size([36692, 36692])
+NNZ: 367662
+Density: 0.0002730901120626302
+Time: 1.1176586151123047 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
+
+        31,744,243      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           271,027      L1I_CACHE_REFILL:u                                                    
+           464,135      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        33,441,141      L1D_CACHE:u                                                           
+
+       4.693803969 seconds time elapsed
+
+      21.724904000 seconds user
+     119.873018000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
+                            367662]),
+       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
+       nnz=367662, layout=torch.sparse_csr)
+tensor([0.1285, 0.3989, 0.3903,  ..., 0.7892, 0.2737, 0.2659])
+Shape: torch.Size([36692, 36692])
+NNZ: 367662
+Density: 0.0002730901120626302
+Time: 1.196892261505127 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
+
+           539,935      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           552,519      LL_CACHE_RD:u                                                         
+           188,291      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            24,177      L2D_TLB_REFILL:u                                                      
+           301,281      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,737,575      L2D_CACHE:u                                                           
+
+       4.741030347 seconds time elapsed
+
+      23.793930000 seconds user
+     125.634838000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_p2p-Gnutella04_100.json
+++ b/pytorch/output/altra_2_2_p2p-Gnutella04_100.json
@ -0,0 +1 @@
+{"power_before": [50.68, 49.4], "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.11296772956848145, "power": [26.2, 29.76, 33.64, 34.44], "power_after": [36.84, 29.44], "task clock (msec)": 67.56, "page faults": 3829, "cycles": 47862000, "instructions": 84392375, "branch mispredictions": 331622, "branches": 19800140, "ITLB accesses": 25905045, "ITLB misses": 6746, "DTLB misses": 17547, "DTLB accesses": 35220079, "L1I cache accesses": 30359576, "L1I cache misses": 283204, "L1D cache misses": 465520, "L1D cache accesses": 31843274, "LL cache misses": 560542, "LL cache accesses": 575610, "L2D TLB accesses": 173643, "L2D TLB misses": 21499, "L2D cache misses": 313335, "L2D cache accesses": 1741621, "instructions per cycle": 1.7632438051063475, "branch miss rate": 0.016748467435078743, "ITLB miss rate": 0.0002604125953072075, "DTLB miss rate": 0.0004982101261044871, "L2D TLB miss rate": 0.12381149830399152, "L1I cache miss rate": 0.009328325270418797, "L1D cache miss rate": 0.014619099782264852, "L2D cache miss rate": 0.17990998041479747, "LL cache miss rate": 0.9738225534650197}
--- a/pytorch/output/altra_2_2_p2p-Gnutella04_100.output
+++ b/pytorch/output/altra_2_2_p2p-Gnutella04_100.output
@ -0,0 +1,153 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394153 queued and waiting for resources
+srun: job 3394153 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
+       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
+       nnz=39994, layout=torch.sparse_csr)
+tensor([0.6982, 0.7263, 0.0064,  ..., 0.9256, 0.7249, 0.5065])
+Shape: torch.Size([10879, 10879])
+NNZ: 39994
+Density: 0.0003379223282393842
+Time: 0.18009519577026367 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
+
+             67.56 msec task-clock:u                     #    0.019 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,829      page-faults:u                    #   56.674 K/sec                     
+        47,862,000      cycles:u                         #    0.708 GHz                         (59.24%)
+        84,392,375      instructions:u                   #    1.76  insn per cycle              (87.61%)
+   <not supported>      branches:u                                                            
+           368,432      branch-misses:u                                                       
+        32,507,448      L1-dcache-loads:u                #  481.147 M/sec                     
+           481,389      L1-dcache-load-misses:u          #    1.48% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        31,030,656      L1-icache-loads:u                #  459.289 M/sec                     
+           308,582      L1-icache-load-misses:u          #    0.99% of all L1-icache accesses 
+        34,988,046      dTLB-loads:u                     #  517.863 M/sec                       (20.00%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       3.538329547 seconds time elapsed
+
+      14.667604000 seconds user
+      29.534487000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
+       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
+       nnz=39994, layout=torch.sparse_csr)
+tensor([0.4946, 0.3509, 0.5239,  ..., 0.4520, 0.4206, 0.8181])
+Shape: torch.Size([10879, 10879])
+NNZ: 39994
+Density: 0.0003379223282393842
+Time: 0.18875432014465332 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
+
+           331,622      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        19,800,140      BR_RETIRED:u                                                          
+
+       3.556031790 seconds time elapsed
+
+      14.799719000 seconds user
+      27.876987000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
+       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
+       nnz=39994, layout=torch.sparse_csr)
+tensor([0.2184, 0.4999, 0.9567,  ..., 0.8794, 0.8213, 0.8713])
+Shape: torch.Size([10879, 10879])
+NNZ: 39994
+Density: 0.0003379223282393842
+Time: 0.1066896915435791 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
+
+        25,905,045      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,746      ITLB_WALK:u                                                           
+            17,547      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        35,220,079      L1D_TLB:u                                                             
+
+       3.505367779 seconds time elapsed
+
+      14.557493000 seconds user
+      29.642958000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
+       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
+       nnz=39994, layout=torch.sparse_csr)
+tensor([0.2180, 0.0881, 0.5532,  ..., 0.4961, 0.0093, 0.4929])
+Shape: torch.Size([10879, 10879])
+NNZ: 39994
+Density: 0.0003379223282393842
+Time: 0.12433028221130371 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
+
+        30,359,576      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           283,204      L1I_CACHE_REFILL:u                                                    
+           465,520      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        31,843,274      L1D_CACHE:u                                                           
+
+       3.565310130 seconds time elapsed
+
+      14.913239000 seconds user
+      28.125605000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
+       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
+       nnz=39994, layout=torch.sparse_csr)
+tensor([0.6394, 0.6808, 0.7957,  ..., 0.1529, 0.0561, 0.7834])
+Shape: torch.Size([10879, 10879])
+NNZ: 39994
+Density: 0.0003379223282393842
+Time: 0.13401126861572266 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
+
+           560,542      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           575,610      LL_CACHE_RD:u                                                         
+           173,643      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            21,499      L2D_TLB_REFILL:u                                                      
+           313,335      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,741,621      L2D_CACHE:u                                                           
+
+       3.503362704 seconds time elapsed
+
+      15.287949000 seconds user
+      28.752303000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_p2p-Gnutella24_100.json
+++ b/pytorch/output/altra_2_2_p2p-Gnutella24_100.json
@ -0,0 +1 @@
+{"power_before": [16.52, 16.24], "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 0.1715233325958252, "power": [18.56, 24.92, 27.84, 27.84], "power_after": [33.2, 27.28], "task clock (msec)": 61.92, "page faults": 3281, "cycles": 66250810, "instructions": 75178179, "branch mispredictions": 332366, "branches": 19076182, "ITLB accesses": 27005133, "ITLB misses": 4791, "DTLB misses": 13403, "DTLB accesses": 36457054, "L1I cache accesses": 32367686, "L1I cache misses": 287524, "L1D cache misses": 467557, "L1D cache accesses": 34022862, "LL cache misses": 535707, "LL cache accesses": 556316, "L2D TLB accesses": 150149, "L2D TLB misses": 18418, "L2D cache misses": 297042, "L2D cache accesses": 1687364, "instructions per cycle": 1.1347510920998551, "branch miss rate": 0.017423088121092577, "ITLB miss rate": 0.00017741071669597036, "DTLB miss rate": 0.00036763804338112453, "L2D TLB miss rate": 0.12266481961251822, "L1I cache miss rate": 0.008883057009388932, "L1D cache miss rate": 0.013742435895016709, "L2D cache miss rate": 0.1760390763344483, "LL cache miss rate": 0.9629545078696281}
--- a/pytorch/output/altra_2_2_p2p-Gnutella24_100.output
+++ b/pytorch/output/altra_2_2_p2p-Gnutella24_100.output
@ -0,0 +1,153 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394141 queued and waiting for resources
+srun: job 3394141 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
+       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
+       nnz=65369, layout=torch.sparse_csr)
+tensor([0.6616, 0.1149, 0.0110,  ..., 0.2481, 0.7877, 0.5589])
+Shape: torch.Size([26518, 26518])
+NNZ: 65369
+Density: 9.295875717624285e-05
+Time: 0.16974925994873047 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
+
+             61.92 msec task-clock:u                     #    0.017 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,281      page-faults:u                    #   52.988 K/sec                     
+        66,250,810      cycles:u                         #    1.070 GHz                         (62.94%)
+        75,178,179      instructions:u                   #    1.13  insn per cycle              (83.47%)
+   <not supported>      branches:u                                                            
+           367,749      branch-misses:u                                                       
+        33,064,095      L1-dcache-loads:u                #  533.986 M/sec                     
+           465,542      L1-dcache-load-misses:u          #    1.41% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        31,552,264      L1-icache-loads:u                #  509.570 M/sec                     
+           296,060      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
+        73,155,896      dTLB-loads:u                     #    1.181 G/sec                       (17.31%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       3.675971385 seconds time elapsed
+
+      14.857293000 seconds user
+      29.791187000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
+       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
+       nnz=65369, layout=torch.sparse_csr)
+tensor([0.1683, 0.8999, 0.0578,  ..., 0.5893, 0.0628, 0.8262])
+Shape: torch.Size([26518, 26518])
+NNZ: 65369
+Density: 9.295875717624285e-05
+Time: 0.2227163314819336 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
+
+           332,366      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        19,076,182      BR_RETIRED:u                                                          
+
+       3.532329673 seconds time elapsed
+
+      14.883993000 seconds user
+      28.516661000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
+       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
+       nnz=65369, layout=torch.sparse_csr)
+tensor([0.8389, 0.5614, 0.9033,  ..., 0.2231, 0.0349, 0.5167])
+Shape: torch.Size([26518, 26518])
+NNZ: 65369
+Density: 9.295875717624285e-05
+Time: 0.17095375061035156 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
+
+        27,005,133      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             4,791      ITLB_WALK:u                                                           
+            13,403      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        36,457,054      L1D_TLB:u                                                             
+
+       3.579041343 seconds time elapsed
+
+      14.885159000 seconds user
+      29.562650000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
+       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
+       nnz=65369, layout=torch.sparse_csr)
+tensor([0.8849, 0.5982, 0.0578,  ..., 0.9975, 0.2204, 0.0718])
+Shape: torch.Size([26518, 26518])
+NNZ: 65369
+Density: 9.295875717624285e-05
+Time: 0.18003463745117188 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
+
+        32,367,686      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           287,524      L1I_CACHE_REFILL:u                                                    
+           467,557      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        34,022,862      L1D_CACHE:u                                                           
+
+       3.405321132 seconds time elapsed
+
+      15.291636000 seconds user
+      28.005015000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
+       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
+       nnz=65369, layout=torch.sparse_csr)
+tensor([0.2790, 0.1291, 0.6053,  ..., 0.1651, 0.4973, 0.6821])
+Shape: torch.Size([26518, 26518])
+NNZ: 65369
+Density: 9.295875717624285e-05
+Time: 0.22036528587341309 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
+
+           535,707      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           556,316      LL_CACHE_RD:u                                                         
+           150,149      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            18,418      L2D_TLB_REFILL:u                                                      
+           297,042      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,687,364      L2D_CACHE:u                                                           
+
+       3.505209576 seconds time elapsed
+
+      15.297738000 seconds user
+      29.848441000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_p2p-Gnutella25_100.json
+++ b/pytorch/output/altra_2_2_p2p-Gnutella25_100.json
@ -0,0 +1 @@
+{"power_before": [29.76, 33.16], "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 0.14322686195373535, "power": [22.6, 22.6, 26.16, 29.2], "power_after": [34.0, 30.16], "task clock (msec)": 64.71, "page faults": 3319, "cycles": 57611295, "instructions": 83148228, "branch mispredictions": 318386, "branches": 19233431, "ITLB accesses": 27039805, "ITLB misses": 6375, "DTLB misses": 17290, "DTLB accesses": 36688544, "L1I cache accesses": 32508072, "L1I cache misses": 297568, "L1D cache misses": 477654, "L1D cache accesses": 34044579, "LL cache misses": 549474, "LL cache accesses": 561939, "L2D TLB accesses": 185622, "L2D TLB misses": 23295, "L2D cache misses": 305878, "L2D cache accesses": 1763089, "instructions per cycle": 1.4432626102225268, "branch miss rate": 0.01655378075809771, "ITLB miss rate": 0.00023576353453732377, "DTLB miss rate": 0.00047126427257511227, "L2D TLB miss rate": 0.12549697772893298, "L1I cache miss rate": 0.009153664972810446, "L1D cache miss rate": 0.014030251336049713, "L2D cache miss rate": 0.17348982382625042, "LL cache miss rate": 0.9778178770293573}
--- a/pytorch/output/altra_2_2_p2p-Gnutella25_100.output
+++ b/pytorch/output/altra_2_2_p2p-Gnutella25_100.output
@ -0,0 +1,153 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394140 queued and waiting for resources
+srun: job 3394140 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
+       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
+       nnz=54705, layout=torch.sparse_csr)
+tensor([0.8199, 0.9849, 0.4642,  ..., 0.7594, 0.3568, 0.4020])
+Shape: torch.Size([22687, 22687])
+NNZ: 54705
+Density: 0.00010628522108964806
+Time: 0.19272208213806152 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
+
+             64.71 msec task-clock:u                     #    0.018 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,319      page-faults:u                    #   51.288 K/sec                     
+        57,611,295      cycles:u                         #    0.890 GHz                         (39.00%)
+        83,148,228      instructions:u                   #    1.44  insn per cycle              (82.73%)
+   <not supported>      branches:u                                                            
+           375,111      branch-misses:u                                                       
+        32,759,228      L1-dcache-loads:u                #  506.221 M/sec                     
+           475,086      L1-dcache-load-misses:u          #    1.45% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        31,366,158      L1-icache-loads:u                #  484.694 M/sec                     
+           297,293      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
+        35,611,781      dTLB-loads:u                     #  550.301 M/sec                       (25.73%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       3.578384817 seconds time elapsed
+
+      14.435258000 seconds user
+      27.700836000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
+       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
+       nnz=54705, layout=torch.sparse_csr)
+tensor([0.0069, 0.9904, 0.5316,  ..., 0.2082, 0.4858, 0.4936])
+Shape: torch.Size([22687, 22687])
+NNZ: 54705
+Density: 0.00010628522108964806
+Time: 0.1423017978668213 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
+
+           318,386      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        19,233,431      BR_RETIRED:u                                                          
+
+       3.555753224 seconds time elapsed
+
+      14.642518000 seconds user
+      30.112207000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
+       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
+       nnz=54705, layout=torch.sparse_csr)
+tensor([0.2250, 0.5676, 0.3018,  ..., 0.5431, 0.7314, 0.5593])
+Shape: torch.Size([22687, 22687])
+NNZ: 54705
+Density: 0.00010628522108964806
+Time: 0.14638042449951172 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
+
+        27,039,805      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,375      ITLB_WALK:u                                                           
+            17,290      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        36,688,544      L1D_TLB:u                                                             
+
+       3.566915241 seconds time elapsed
+
+      16.116565000 seconds user
+      28.752519000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
+       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
+       nnz=54705, layout=torch.sparse_csr)
+tensor([0.0220, 0.7494, 0.7913,  ..., 0.8924, 0.8542, 0.5491])
+Shape: torch.Size([22687, 22687])
+NNZ: 54705
+Density: 0.00010628522108964806
+Time: 0.17815685272216797 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
+
+        32,508,072      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           297,568      L1I_CACHE_REFILL:u                                                    
+           477,654      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        34,044,579      L1D_CACHE:u                                                           
+
+       3.435706033 seconds time elapsed
+
+      14.690285000 seconds user
+      28.763423000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
+       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
+       nnz=54705, layout=torch.sparse_csr)
+tensor([0.6277, 0.4955, 0.9335,  ..., 0.1476, 0.2079, 0.0931])
+Shape: torch.Size([22687, 22687])
+NNZ: 54705
+Density: 0.00010628522108964806
+Time: 0.14432048797607422 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
+
+           549,474      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           561,939      LL_CACHE_RD:u                                                         
+           185,622      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            23,295      L2D_TLB_REFILL:u                                                      
+           305,878      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,763,089      L2D_CACHE:u                                                           
+
+       3.538826979 seconds time elapsed
+
+      15.006109000 seconds user
+      29.644298000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_p2p-Gnutella30_100.json
+++ b/pytorch/output/altra_2_2_p2p-Gnutella30_100.json
@ -0,0 +1 @@
+{"power_before": [20.56, 20.28], "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 0.30861377716064453, "power": [23.88, 27.6, 39.8, 40.12], "power_after": [39.28, 35.2], "task clock (msec)": 65.91, "page faults": 3247, "cycles": 92293071, "instructions": 76208632, "branch mispredictions": 320083, "branches": 19285106, "ITLB accesses": 26853940, "ITLB misses": 6728, "DTLB misses": 13955, "DTLB accesses": 37111059, "L1I cache accesses": 32554796, "L1I cache misses": 298729, "L1D cache misses": 473779, "L1D cache accesses": 34117102, "LL cache misses": 535040, "LL cache accesses": 547502, "L2D TLB accesses": 179876, "L2D TLB misses": 21809, "L2D cache misses": 298620, "L2D cache accesses": 1722959, "instructions per cycle": 0.8257243059990929, "branch miss rate": 0.016597419791210898, "ITLB miss rate": 0.0002505405165871377, "DTLB miss rate": 0.0003760334621547717, "L2D TLB miss rate": 0.12124463519313304, "L1I cache miss rate": 0.009176190199440968, "L1D cache miss rate": 0.013886847716432655, "L2D cache miss rate": 0.17331811145825293, "LL cache miss rate": 0.9772384393116372}
--- a/pytorch/output/altra_2_2_p2p-Gnutella30_100.output
+++ b/pytorch/output/altra_2_2_p2p-Gnutella30_100.output
@ -0,0 +1,153 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394142 queued and waiting for resources
+srun: job 3394142 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
+       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
+       nnz=88328, layout=torch.sparse_csr)
+tensor([0.5867, 0.3729, 0.0718,  ..., 0.5551, 0.6046, 0.6005])
+Shape: torch.Size([36682, 36682])
+NNZ: 88328
+Density: 6.564359899804003e-05
+Time: 0.3765556812286377 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
+
+             65.91 msec task-clock:u                     #    0.017 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,247      page-faults:u                    #   49.267 K/sec                     
+        92,293,071      cycles:u                         #    1.400 GHz                         (58.72%)
+        76,208,632      instructions:u                   #    0.83  insn per cycle              (75.47%)
+   <not supported>      branches:u                                                            
+           336,620      branch-misses:u                                                         (89.96%)
+        33,256,017      L1-dcache-loads:u                #  504.599 M/sec                     
+           479,188      L1-dcache-load-misses:u          #    1.44% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        31,686,331      L1-icache-loads:u                #  480.782 M/sec                     
+           297,521      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
+        55,295,804      dTLB-loads:u                     #  839.012 M/sec                       (27.47%)
+           103,616      dTLB-load-misses:u               #    0.19% of all dTLB cache accesses  (20.17%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       3.803094533 seconds time elapsed
+
+      16.585763000 seconds user
+      62.703127000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
+       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
+       nnz=88328, layout=torch.sparse_csr)
+tensor([0.2027, 0.2128, 0.5093,  ..., 0.8069, 0.6413, 0.1136])
+Shape: torch.Size([36682, 36682])
+NNZ: 88328
+Density: 6.564359899804003e-05
+Time: 0.2942969799041748 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
+
+           320,083      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        19,285,106      BR_RETIRED:u                                                          
+
+       3.763535833 seconds time elapsed
+
+      16.476022000 seconds user
+      55.208213000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
+       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
+       nnz=88328, layout=torch.sparse_csr)
+tensor([0.5930, 0.8044, 0.8115,  ..., 0.6366, 0.1026, 0.6914])
+Shape: torch.Size([36682, 36682])
+NNZ: 88328
+Density: 6.564359899804003e-05
+Time: 0.2431955337524414 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
+
+        26,853,940      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,728      ITLB_WALK:u                                                           
+            13,955      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        37,111,059      L1D_TLB:u                                                             
+
+       3.752433570 seconds time elapsed
+
+      16.433982000 seconds user
+      53.207908000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
+       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
+       nnz=88328, layout=torch.sparse_csr)
+tensor([0.9666, 0.8206, 0.6252,  ..., 0.5180, 0.8170, 0.7406])
+Shape: torch.Size([36682, 36682])
+NNZ: 88328
+Density: 6.564359899804003e-05
+Time: 0.15313339233398438 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
+
+        32,554,796      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           298,729      L1I_CACHE_REFILL:u                                                    
+           473,779      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        34,117,102      L1D_CACHE:u                                                           
+
+       3.595579651 seconds time elapsed
+
+      15.817851000 seconds user
+      44.491315000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
+       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
+       nnz=88328, layout=torch.sparse_csr)
+tensor([0.9800, 0.9021, 0.5677,  ..., 0.3869, 0.2468, 0.3286])
+Shape: torch.Size([36682, 36682])
+NNZ: 88328
+Density: 6.564359899804003e-05
+Time: 0.2539215087890625 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
+
+           535,040      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           547,502      LL_CACHE_RD:u                                                         
+           179,876      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            21,809      L2D_TLB_REFILL:u                                                      
+           298,620      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,722,959      L2D_CACHE:u                                                           
+
+       3.549060962 seconds time elapsed
+
+      16.570077000 seconds user
+      52.238012000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_ri2010_100.json
+++ b/pytorch/output/altra_2_2_ri2010_100.json
@ -0,0 +1 @@
+{"power_before": [30.44, 35.52], "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 0.29622840881347656, "power": [23.84, 29.44, 33.0, 33.04], "power_after": [36.32, 30.0], "task clock (msec)": 60.77, "page faults": 3361, "cycles": 63493475, "instructions": 91578911, "branch mispredictions": 329084, "branches": 20406595, "ITLB accesses": 26859919, "ITLB misses": 6237, "DTLB misses": 16689, "DTLB accesses": 36348977, "L1I cache accesses": 30979764, "L1I cache misses": 292038, "L1D cache misses": 469219, "L1D cache accesses": 32411890, "LL cache misses": 571870, "LL cache accesses": 598306, "L2D TLB accesses": 205488, "L2D TLB misses": 26392, "L2D cache misses": 342141, "L2D cache accesses": 1857697, "instructions per cycle": 1.442335783322617, "branch miss rate": 0.01612635522976763, "ITLB miss rate": 0.00023220472109390948, "DTLB miss rate": 0.0004591325912693499, "L2D TLB miss rate": 0.12843572374055906, "L1I cache miss rate": 0.009426734173959492, "L1D cache miss rate": 0.014476755289494072, "L2D cache miss rate": 0.1841748142996409, "LL cache miss rate": 0.9558152517273769}
--- a/pytorch/output/altra_2_2_ri2010_100.output
+++ b/pytorch/output/altra_2_2_ri2010_100.output
@ -0,0 +1,158 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394145 queued and waiting for resources
+srun: job 3394145 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
+                            125750]),
+       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
+       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
+       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
+tensor([0.1402, 0.0708, 0.4576,  ..., 0.4700, 0.5629, 0.9120])
+Shape: torch.Size([25181, 25181])
+NNZ: 125750
+Density: 0.00019831796057928155
+Time: 0.3585643768310547 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
+
+             60.77 msec task-clock:u                     #    0.016 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,361      page-faults:u                    #   55.311 K/sec                     
+        63,493,475      cycles:u                         #    1.045 GHz                         (49.59%)
+        91,578,911      instructions:u                   #    1.44  insn per cycle              (92.22%)
+   <not supported>      branches:u                                                            
+           374,941      branch-misses:u                                                       
+        33,905,978      L1-dcache-loads:u                #  557.979 M/sec                     
+           470,553      L1-dcache-load-misses:u          #    1.39% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        32,247,376      L1-icache-loads:u                #  530.684 M/sec                     
+           299,037      L1-icache-load-misses:u          #    0.93% of all L1-icache accesses 
+        27,428,635      dTLB-loads:u                     #  451.384 M/sec                       (13.50%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       3.818532962 seconds time elapsed
+
+      15.563570000 seconds user
+      30.194882000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
+                            125750]),
+       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
+       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
+       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
+tensor([0.1841, 0.4436, 0.8281,  ..., 0.0546, 0.5967, 0.9496])
+Shape: torch.Size([25181, 25181])
+NNZ: 125750
+Density: 0.00019831796057928155
+Time: 0.3050577640533447 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
+
+           329,084      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        20,406,595      BR_RETIRED:u                                                          
+
+       3.673527837 seconds time elapsed
+
+      15.520198000 seconds user
+      29.068211000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
+                            125750]),
+       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
+       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
+       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
+tensor([0.1849, 0.5991, 0.5040,  ..., 0.4916, 0.4789, 0.8887])
+Shape: torch.Size([25181, 25181])
+NNZ: 125750
+Density: 0.00019831796057928155
+Time: 0.3605458736419678 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
+
+        26,859,919      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,237      ITLB_WALK:u                                                           
+            16,689      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        36,348,977      L1D_TLB:u                                                             
+
+       3.769690988 seconds time elapsed
+
+      15.173839000 seconds user
+      29.963392000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
+                            125750]),
+       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
+       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
+       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
+tensor([0.0513, 0.4498, 0.6748,  ..., 0.2114, 0.6847, 0.2188])
+Shape: torch.Size([25181, 25181])
+NNZ: 125750
+Density: 0.00019831796057928155
+Time: 0.3485410213470459 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
+
+        30,979,764      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           292,038      L1I_CACHE_REFILL:u                                                    
+           469,219      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        32,411,890      L1D_CACHE:u                                                           
+
+       3.598754329 seconds time elapsed
+
+      16.139631000 seconds user
+      29.287026000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
+                            125750]),
+       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
+       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
+       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
+tensor([0.7270, 0.7858, 0.3165,  ..., 0.7139, 0.8270, 0.9478])
+Shape: torch.Size([25181, 25181])
+NNZ: 125750
+Density: 0.00019831796057928155
+Time: 0.3687746524810791 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
+
+           571,870      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           598,306      LL_CACHE_RD:u                                                         
+           205,488      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            26,392      L2D_TLB_REFILL:u                                                      
+           342,141      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,857,697      L2D_CACHE:u                                                           
+
+       3.726794738 seconds time elapsed
+
+      15.231331000 seconds user
+      32.108693000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.json
+++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.json
@ -0,0 +1 @@
+{"power_before": [16.52, 16.64], "shape": [81871, 81871], "nnz": 545671, "% density": 8.140867447881048e-05, "time_s": 1.3372814655303955, "power": [23.92, 38.6, 46.04, 48.2, 48.2], "power_after": [45.0, 44.08], "task clock (msec)": 59.01, "page faults": 3448, "cycles": 73062796, "instructions": 88329175, "branch mispredictions": 331091, "branches": 20013316, "ITLB accesses": 26330936, "ITLB misses": 5193, "DTLB misses": 16837, "DTLB accesses": 35930477, "L1I cache accesses": 31853890, "L1I cache misses": 306147, "L1D cache misses": 479933, "L1D cache accesses": 33426019, "LL cache misses": 540302, "LL cache accesses": 553181, "L2D TLB accesses": 173206, "L2D TLB misses": 21390, "L2D cache misses": 300032, "L2D cache accesses": 1739931, "instructions per cycle": 1.2089487377406143, "branch miss rate": 0.016543535314187813, "ITLB miss rate": 0.0001972204861991993, "DTLB miss rate": 0.000468599401004334, "L2D TLB miss rate": 0.12349456716280037, "L1I cache miss rate": 0.009610976869701, "L1D cache miss rate": 0.014358066391334247, "L2D cache miss rate": 0.17243902200719455, "LL cache miss rate": 0.9767182893121781}
--- a/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.output
+++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.output
@ -0,0 +1,158 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394151 queued and waiting for resources
+srun: job 3394151 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
+                            545671]),
+       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
+       nnz=545671, layout=torch.sparse_csr)
+tensor([0.3831, 0.6714, 0.8380,  ..., 0.7892, 0.5274, 0.9035])
+Shape: torch.Size([81871, 81871])
+NNZ: 545671
+Density: 8.140867447881048e-05
+Time: 2.044952392578125 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
+
+             59.01 msec task-clock:u                     #    0.010 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,448      page-faults:u                    #   58.432 K/sec                     
+        73,062,796      cycles:u                         #    1.238 GHz                         (59.95%)
+        88,329,175      instructions:u                   #    1.21  insn per cycle              (93.89%)
+   <not supported>      branches:u                                                            
+           365,177      branch-misses:u                                                       
+        31,850,867      L1-dcache-loads:u                #  539.766 M/sec                     
+           473,835      L1-dcache-load-misses:u          #    1.49% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        30,385,913      L1-icache-loads:u                #  514.940 M/sec                     
+           299,969      L1-icache-load-misses:u          #    0.99% of all L1-icache accesses 
+        24,365,554      dTLB-loads:u                     #  412.915 M/sec                       (8.42%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       5.680365622 seconds time elapsed
+
+      27.656957000 seconds user
+     194.823873000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
+                            545671]),
+       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
+       nnz=545671, layout=torch.sparse_csr)
+tensor([0.6906, 0.4067, 0.7042,  ..., 0.8333, 0.7120, 0.3519])
+Shape: torch.Size([81871, 81871])
+NNZ: 545671
+Density: 8.140867447881048e-05
+Time: 1.3788115978240967 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
+
+           331,091      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        20,013,316      BR_RETIRED:u                                                          
+
+       4.886021169 seconds time elapsed
+
+      23.105025000 seconds user
+     141.491451000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
+                            545671]),
+       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
+       nnz=545671, layout=torch.sparse_csr)
+tensor([0.8755, 0.6165, 0.4104,  ..., 0.6974, 0.9453, 0.9872])
+Shape: torch.Size([81871, 81871])
+NNZ: 545671
+Density: 8.140867447881048e-05
+Time: 2.8570749759674072 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
+
+        26,330,936      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             5,193      ITLB_WALK:u                                                           
+            16,837      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        35,930,477      L1D_TLB:u                                                             
+
+       6.371573603 seconds time elapsed
+
+      30.986329000 seconds user
+     254.347216000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
+                            545671]),
+       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
+       nnz=545671, layout=torch.sparse_csr)
+tensor([0.3573, 0.9331, 0.0611,  ..., 0.9133, 0.6057, 0.2374])
+Shape: torch.Size([81871, 81871])
+NNZ: 545671
+Density: 8.140867447881048e-05
+Time: 2.311248540878296 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
+
+        31,853,890      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           306,147      L1I_CACHE_REFILL:u                                                    
+           479,933      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        33,426,019      L1D_CACHE:u                                                           
+
+       5.718741260 seconds time elapsed
+
+      28.451593000 seconds user
+     214.350594000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
+                            545671]),
+       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
+       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
+       nnz=545671, layout=torch.sparse_csr)
+tensor([0.6021, 0.5679, 0.4538,  ..., 0.9086, 0.9552, 0.5329])
+Shape: torch.Size([81871, 81871])
+NNZ: 545671
+Density: 8.140867447881048e-05
+Time: 1.8193013668060303 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
+
+           540,302      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           553,181      LL_CACHE_RD:u                                                         
+           173,206      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            21,390      L2D_TLB_REFILL:u                                                      
+           300,032      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,739,931      L2D_CACHE:u                                                           
+
+       5.546861941 seconds time elapsed
+
+      28.194596000 seconds user
+     181.004698000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.json
+++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.json
@ -0,0 +1 @@
+{"power_before": [53.64, 46.88], "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 1.2292509078979492, "power": [40.64, 52.44, 54.8, 54.96, 46.8], "power_after": [47.88, 47.08], "task clock (msec)": 61.26, "page faults": 3303, "cycles": 44515786, "instructions": 81513738, "branch mispredictions": 328019, "branches": 19893662, "ITLB accesses": 27248112, "ITLB misses": 5792, "DTLB misses": 16632, "DTLB accesses": 36929042, "L1I cache accesses": 31702830, "L1I cache misses": 295778, "L1D cache misses": 470423, "L1D cache accesses": 33155119, "LL cache misses": 545220, "LL cache accesses": 562139, "L2D TLB accesses": 192206, "L2D TLB misses": 24891, "L2D cache misses": 307033, "L2D cache accesses": 1782260, "instructions per cycle": 1.8311198189334452, "branch miss rate": 0.01648861833482443, "ITLB miss rate": 0.0002125651861677609, "DTLB miss rate": 0.0004503772396803578, "L2D TLB miss rate": 0.12950168048864238, "L1I cache miss rate": 0.009329703373484323, "L1D cache miss rate": 0.014188548079106578, "L2D cache miss rate": 0.17227172241984895, "LL cache miss rate": 0.9699024618466251}
--- a/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.output
+++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.output
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
-srun: job 3393718 queued and waiting for resources
-srun: job 3393718 has been allocated resources
+srun: job 3394147 queued and waiting for resources
+srun: job 3394147 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
@ -14,37 +14,37 @@ tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
-tensor([0.8320, 0.8961, 0.3119,  ..., 0.2600, 0.3720, 0.6950])
+tensor([0.2696, 0.6106, 0.1626,  ..., 0.2215, 0.5107, 0.8609])
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
-Time: 3.012270212173462 seconds
+Time: 1.4500706195831299 seconds

 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':

-             61.63 msec task-clock:u                     #    0.008 CPUs utilized             
+             61.26 msec task-clock:u                     #    0.012 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
-             3,293      page-faults:u                    #   53.433 K/sec                     
-        41,677,750      cycles:u                         #    0.676 GHz                         (43.47%)
-        91,767,205      instructions:u                   #    2.20  insn per cycle              (93.66%)
+             3,303      page-faults:u                    #   53.917 K/sec                     
+        44,515,786      cycles:u                         #    0.727 GHz                         (40.46%)
+        81,513,738      instructions:u                   #    1.83  insn per cycle              (73.51%)
   <not supported>      branches:u                                                            
-           369,577      branch-misses:u                                                       
-        33,184,885      L1-dcache-loads:u                #  538.465 M/sec                     
-           489,650      L1-dcache-load-misses:u          #    1.48% of all L1-dcache accesses 
+           344,479      branch-misses:u                                                         (89.42%)
+        34,411,073      L1-dcache-loads:u                #  561.710 M/sec                     
+           484,811      L1-dcache-load-misses:u          #    1.41% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
-        31,518,657      L1-icache-loads:u                #  511.428 M/sec                     
-           300,352      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
-        21,439,232      dTLB-loads:u                     #  347.878 M/sec                       (11.35%)
-     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+        32,789,672      L1-icache-loads:u                #  535.243 M/sec                     
+           293,487      L1-icache-load-misses:u          #    0.90% of all L1-icache accesses 
+        47,065,740      dTLB-loads:u                     #  768.279 M/sec                       (32.81%)
+           146,215      dTLB-load-misses:u               #    0.31% of all dTLB cache accesses  (13.39%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)

-       7.285558270 seconds time elapsed
+       4.966101053 seconds time elapsed

-      30.820742000 seconds user
-     271.093513000 seconds sys
+      23.375418000 seconds user
+     148.052989000 seconds sys



@ -55,21 +55,21 @@ tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
-tensor([0.2625, 0.3727, 0.7700,  ..., 0.9213, 0.0373, 0.4236])
+tensor([0.1999, 0.3932, 0.8035,  ..., 0.5079, 0.5903, 0.7606])
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
-Time: 3.8292958736419678 seconds
+Time: 1.9677543640136719 seconds

 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':

-           329,386      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
-        19,813,961      BR_RETIRED:u                                                          
+           328,019      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        19,893,662      BR_RETIRED:u                                                          

-       7.818393438 seconds time elapsed
+       5.529871590 seconds time elapsed

-      35.952830000 seconds user
-     333.700971000 seconds sys
+      26.844356000 seconds user
+     190.429440000 seconds sys



@ -80,23 +80,23 @@ tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
-tensor([0.0340, 0.2650, 0.1324,  ..., 0.0868, 0.2162, 0.5618])
+tensor([0.2933, 0.6999, 0.0078,  ..., 0.6213, 0.9377, 0.6359])
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
-Time: 3.464143753051758 seconds
+Time: 1.4976201057434082 seconds

 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':

-        27,944,146      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
-             6,811      ITLB_WALK:u                                                           
-            18,962      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
-        37,689,058      L1D_TLB:u                                                             
+        27,248,112      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             5,792      ITLB_WALK:u                                                           
+            16,632      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        36,929,042      L1D_TLB:u                                                             

-       7.541903779 seconds time elapsed
+       4.971341163 seconds time elapsed

-      32.666428000 seconds user
-     309.938101000 seconds sys
+      24.247480000 seconds user
+     151.276717000 seconds sys



@ -107,23 +107,23 @@ tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
-tensor([0.6118, 0.9275, 0.9072,  ..., 0.7025, 0.2788, 0.7796])
+tensor([0.1310, 0.6695, 0.9479,  ..., 0.3141, 0.9327, 0.2117])
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
-Time: 1.4259674549102783 seconds
+Time: 1.0877256393432617 seconds

 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':

-        31,746,573      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
-           290,044      L1I_CACHE_REFILL:u                                                    
-           471,100      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
-        33,271,575      L1D_CACHE:u                                                           
+        31,702,830      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           295,778      L1I_CACHE_REFILL:u                                                    
+           470,423      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        33,155,119      L1D_CACHE:u                                                           

-       5.333100815 seconds time elapsed
+       4.675682406 seconds time elapsed

-      24.606404000 seconds user
-     142.184021000 seconds sys
+      23.098007000 seconds user
+     119.827712000 seconds sys



@ -134,25 +134,25 @@ tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
-tensor([0.1819, 0.6831, 0.7926,  ..., 0.2272, 0.8215, 0.3765])
+tensor([0.0860, 0.5402, 0.6738,  ..., 0.3856, 0.5968, 0.4203])
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
-Time: 2.8267815113067627 seconds
+Time: 1.2302696704864502 seconds

 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':

-           550,308      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
-           564,981      LL_CACHE_RD:u                                                         
-           168,456      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
-            20,450      L2D_TLB_REFILL:u                                                      
-           306,309      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
-         1,745,776      L2D_CACHE:u                                                           
+           545,220      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           562,139      LL_CACHE_RD:u                                                         
+           192,206      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            24,891      L2D_TLB_REFILL:u                                                      
+           307,033      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,782,260      L2D_CACHE:u                                                           

-       7.032343494 seconds time elapsed
+       4.781838296 seconds time elapsed

-      31.547129000 seconds user
-     251.812633000 seconds sys
+      23.716896000 seconds user
+     130.971947000 seconds sys



--- a/pytorch/output/altra_2_2_soc-sign-epinions_100.json
+++ b/pytorch/output/altra_2_2_soc-sign-epinions_100.json
@ -0,0 +1 @@
+{"power_before": [30.48, 33.04], "shape": [131828, 131828], "nnz": 841372, "% density": 4.841419648464106e-05, "time_s": 2.848874092102051, "power": [65.52, 75.88, 71.16, 71.16, 59.72, 47.92, 48.68], "power_after": [68.68, 67.88], "task clock (msec)": 49.87, "page faults": 3300, "cycles": 51935476, "instructions": 83731856, "branch mispredictions": 326464, "branches": 20341367, "ITLB accesses": 27590154, "ITLB misses": 6210, "DTLB misses": 17536, "DTLB accesses": 36763243, "L1I cache accesses": 31663300, "L1I cache misses": 289727, "L1D cache misses": 462864, "L1D cache accesses": 33262254, "LL cache misses": 530272, "LL cache accesses": 551373, "L2D TLB accesses": 196152, "L2D TLB misses": 23542, "L2D cache misses": 301998, "L2D cache accesses": 1732662, "instructions per cycle": 1.6122285275675532, "branch miss rate": 0.01604926551888081, "ITLB miss rate": 0.000225080294948698, "DTLB miss rate": 0.0004769981799483794, "L2D TLB miss rate": 0.12001916880786329, "L1I cache miss rate": 0.00915024649989104, "L1D cache miss rate": 0.013915593332911234, "L2D cache miss rate": 0.17429712200071334, "LL cache miss rate": 0.9617300810884828}
--- a/pytorch/output/altra_2_2_soc-sign-epinions_100.output
+++ b/pytorch/output/altra_2_2_soc-sign-epinions_100.output
@ -0,0 +1,163 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394154 queued and waiting for resources
+srun: job 3394154 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
+                            841372]),
+       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
+                             7714]),
+       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
+       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
+tensor([0.5842, 0.3042, 0.7358,  ..., 0.7882, 0.7596, 0.5895])
+Shape: torch.Size([131828, 131828])
+NNZ: 841372
+Density: 4.841419648464106e-05
+Time: 2.4407293796539307 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
+
+             49.87 msec task-clock:u                     #    0.008 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,300      page-faults:u                    #   66.174 K/sec                     
+        51,935,476      cycles:u                         #    1.041 GHz                         (65.00%)
+        83,731,856      instructions:u                   #    1.61  insn per cycle              (84.25%)
+   <not supported>      branches:u                                                            
+           375,900      branch-misses:u                                                       
+        34,169,837      L1-dcache-loads:u                #  685.197 M/sec                     
+           474,410      L1-dcache-load-misses:u          #    1.39% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        32,443,215      L1-icache-loads:u                #  650.574 M/sec                     
+           294,146      L1-icache-load-misses:u          #    0.91% of all L1-icache accesses 
+        63,709,518      dTLB-loads:u                     #    1.278 G/sec                       (16.44%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       6.058862056 seconds time elapsed
+
+      29.101578000 seconds user
+     224.790489000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
+                            841372]),
+       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
+                             7714]),
+       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
+       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
+tensor([0.9696, 0.8139, 0.4858,  ..., 0.2374, 0.1716, 0.9756])
+Shape: torch.Size([131828, 131828])
+NNZ: 841372
+Density: 4.841419648464106e-05
+Time: 2.0945546627044678 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
+
+           326,464      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        20,341,367      BR_RETIRED:u                                                          
+
+       5.525378890 seconds time elapsed
+
+      28.841740000 seconds user
+     199.678982000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
+                            841372]),
+       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
+                             7714]),
+       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
+       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
+tensor([0.3478, 0.0057, 0.8574,  ..., 0.6409, 0.1876, 0.8429])
+Shape: torch.Size([131828, 131828])
+NNZ: 841372
+Density: 4.841419648464106e-05
+Time: 2.8504912853240967 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
+
+        27,590,154      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,210      ITLB_WALK:u                                                           
+            17,536      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        36,763,243      L1D_TLB:u                                                             
+
+       6.425887143 seconds time elapsed
+
+      33.069094000 seconds user
+     256.667850000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
+                            841372]),
+       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
+                             7714]),
+       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
+       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
+tensor([0.5381, 0.6651, 0.4689,  ..., 0.7251, 0.3759, 0.8516])
+Shape: torch.Size([131828, 131828])
+NNZ: 841372
+Density: 4.841419648464106e-05
+Time: 1.6941111087799072 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
+
+        31,663,300      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           289,727      L1I_CACHE_REFILL:u                                                    
+           462,864      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        33,262,254      L1D_CACHE:u                                                           
+
+       5.304170809 seconds time elapsed
+
+      25.992245000 seconds user
+     173.752913000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
+                            841372]),
+       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
+                             7714]),
+       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
+       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
+tensor([0.4145, 0.8515, 0.7222,  ..., 0.1386, 0.6641, 0.6662])
+Shape: torch.Size([131828, 131828])
+NNZ: 841372
+Density: 4.841419648464106e-05
+Time: 3.0850296020507812 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
+
+           530,272      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           551,373      LL_CACHE_RD:u                                                         
+           196,152      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            23,542      L2D_TLB_REFILL:u                                                      
+           301,998      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,732,662      L2D_CACHE:u                                                           
+
+       6.733517838 seconds time elapsed
+
+      34.030476000 seconds user
+     271.397968000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_sx-mathoverflow_100.json
+++ b/pytorch/output/altra_2_2_sx-mathoverflow_100.json
@ -0,0 +1 @@
+{"power_before": [20.44, 20.2], "shape": [24818, 24818], "nnz": 239978, "% density": 0.00038961697406616504, "time_s": 0.556269645690918, "power": [25.24, 32.16, 33.0, 32.52], "power_after": [34.24, 30.28], "task clock (msec)": 62.49, "page faults": 3312, "cycles": 76783170, "instructions": 77095702, "branch mispredictions": 323514, "branches": 19769937, "ITLB accesses": 26809325, "ITLB misses": 6925, "DTLB misses": 19003, "DTLB accesses": 36516965, "L1I cache accesses": 31104231, "L1I cache misses": 285499, "L1D cache misses": 468498, "L1D cache accesses": 32677465, "LL cache misses": 559358, "LL cache accesses": 571935, "L2D TLB accesses": 194840, "L2D TLB misses": 23481, "L2D cache misses": 313487, "L2D cache accesses": 1779730, "instructions per cycle": 1.004070319055595, "branch miss rate": 0.016363936819829016, "ITLB miss rate": 0.00025830564551699827, "DTLB miss rate": 0.0005203882633729282, "L2D TLB miss rate": 0.12051426811742968, "L1I cache miss rate": 0.009178783426601994, "L1D cache miss rate": 0.01433703624194839, "L2D cache miss rate": 0.1761430104566423, "LL cache miss rate": 0.9780097388689274}
--- a/pytorch/output/altra_2_2_sx-mathoverflow_100.output
+++ b/pytorch/output/altra_2_2_sx-mathoverflow_100.output
@ -0,0 +1,158 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394144 queued and waiting for resources
+srun: job 3394144 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
+                            239978]),
+       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
+       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
+       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
+tensor([0.7658, 0.2874, 0.7506,  ..., 0.3335, 0.5056, 0.9767])
+Shape: torch.Size([24818, 24818])
+NNZ: 239978
+Density: 0.00038961697406616504
+Time: 0.5561239719390869 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
+
+             62.49 msec task-clock:u                     #    0.015 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,312      page-faults:u                    #   53.003 K/sec                     
+        76,783,170      cycles:u                         #    1.229 GHz                         (62.65%)
+        77,095,702      instructions:u                   #    1.00  insn per cycle              (80.20%)
+   <not supported>      branches:u                                                            
+           370,891      branch-misses:u                                                         (94.99%)
+        32,730,448      L1-dcache-loads:u                #  523.800 M/sec                     
+           467,718      L1-dcache-load-misses:u          #    1.43% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        31,548,469      L1-icache-loads:u                #  504.885 M/sec                     
+           298,966      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
+        61,098,419      dTLB-loads:u                     #  977.786 M/sec                       (20.67%)
+            64,747      dTLB-load-misses:u               #    0.11% of all dTLB cache accesses  (10.91%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       4.062782709 seconds time elapsed
+
+      16.106338000 seconds user
+      32.399716000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
+                            239978]),
+       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
+       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
+       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
+tensor([0.7531, 0.4727, 0.4126,  ..., 0.1574, 0.5247, 0.8875])
+Shape: torch.Size([24818, 24818])
+NNZ: 239978
+Density: 0.00038961697406616504
+Time: 0.6003477573394775 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
+
+           323,514      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        19,769,937      BR_RETIRED:u                                                          
+
+       4.061021393 seconds time elapsed
+
+      16.155442000 seconds user
+      31.047278000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
+                            239978]),
+       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
+       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
+       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
+tensor([0.3067, 0.4335, 0.8814,  ..., 0.2370, 0.1210, 0.7695])
+Shape: torch.Size([24818, 24818])
+NNZ: 239978
+Density: 0.00038961697406616504
+Time: 0.5404119491577148 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
+
+        26,809,325      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,925      ITLB_WALK:u                                                           
+            19,003      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        36,516,965      L1D_TLB:u                                                             
+
+       4.031175418 seconds time elapsed
+
+      15.607232000 seconds user
+      30.562258000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
+                            239978]),
+       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
+       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
+       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
+tensor([0.5013, 0.5961, 0.5565,  ..., 0.3779, 0.1835, 0.6722])
+Shape: torch.Size([24818, 24818])
+NNZ: 239978
+Density: 0.00038961697406616504
+Time: 0.6185996532440186 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
+
+        31,104,231      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           285,499      L1I_CACHE_REFILL:u                                                    
+           468,498      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        32,677,465      L1D_CACHE:u                                                           
+
+       4.083129305 seconds time elapsed
+
+      16.243642000 seconds user
+      36.578375000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
+                            239978]),
+       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
+       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
+       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
+tensor([0.9075, 0.2788, 0.1365,  ..., 0.4240, 0.8832, 0.1064])
+Shape: torch.Size([24818, 24818])
+NNZ: 239978
+Density: 0.00038961697406616504
+Time: 0.54673171043396 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
+
+           559,358      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           571,935      LL_CACHE_RD:u                                                         
+           194,840      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            23,481      L2D_TLB_REFILL:u                                                      
+           313,487      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,779,730      L2D_CACHE:u                                                           
+
+       3.961843929 seconds time elapsed
+
+      15.425912000 seconds user
+      28.864046000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_ut2010_100.json
+++ b/pytorch/output/altra_2_2_ut2010_100.json
@ -0,0 +1 @@
+{"power_before": [34.6, 37.16], "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 1.0817186832427979, "power": [34.32, 50.84, 52.12, 52.4, 52.76], "power_after": [49.0, 45.08], "task clock (msec)": 60.55, "page faults": 3490, "cycles": 49977496, "instructions": 78622993, "branch mispredictions": 327078, "branches": 20135808, "ITLB accesses": 27608093, "ITLB misses": 6616, "DTLB misses": 17185, "DTLB accesses": 36866957, "L1I cache accesses": 32639204, "L1I cache misses": 309643, "L1D cache misses": 478856, "L1D cache accesses": 34280618, "LL cache misses": 555275, "LL cache accesses": 578455, "L2D TLB accesses": 188723, "L2D TLB misses": 24635, "L2D cache misses": 319663, "L2D cache accesses": 1799940, "instructions per cycle": 1.573167911413569, "branch miss rate": 0.016243599462211798, "ITLB miss rate": 0.00023963987661154286, "DTLB miss rate": 0.00046613556958335347, "L2D TLB miss rate": 0.13053522888042263, "L1I cache miss rate": 0.009486842877663316, "L1D cache miss rate": 0.013968709665619214, "L2D cache miss rate": 0.17759647543807017, "LL cache miss rate": 0.9599277385449171}
--- a/pytorch/output/altra_2_2_ut2010_100.output
+++ b/pytorch/output/altra_2_2_ut2010_100.output
@ -0,0 +1,168 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394146 queued and waiting for resources
+srun: job 3394146 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
+                            572066]),
+       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
+                           114602]),
+       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
+                       18651.]), size=(115406, 115406), nnz=572066,
+       layout=torch.sparse_csr)
+tensor([0.4608, 0.1516, 0.8492,  ..., 0.8920, 0.4275, 0.8070])
+Shape: torch.Size([115406, 115406])
+NNZ: 572066
+Density: 4.295259032005559e-05
+Time: 1.3751039505004883 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
+
+             60.55 msec task-clock:u                     #    0.012 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,490      page-faults:u                    #   57.638 K/sec                     
+        49,977,496      cycles:u                         #    0.825 GHz                         (40.93%)
+        78,622,993      instructions:u                   #    1.57  insn per cycle              (85.37%)
+   <not supported>      branches:u                                                            
+           358,029      branch-misses:u                                                       
+        31,478,500      L1-dcache-loads:u                #  519.877 M/sec                     
+           479,449      L1-dcache-load-misses:u          #    1.52% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        29,991,824      L1-icache-loads:u                #  495.324 M/sec                     
+           294,864      L1-icache-load-misses:u          #    0.98% of all L1-icache accesses 
+        35,154,647      dTLB-loads:u                     #  580.589 M/sec                       (23.19%)
+     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       4.986156121 seconds time elapsed
+
+      23.724703000 seconds user
+     145.034521000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
+                            572066]),
+       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
+                           114602]),
+       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
+                       18651.]), size=(115406, 115406), nnz=572066,
+       layout=torch.sparse_csr)
+tensor([0.4697, 0.7121, 0.5987,  ..., 0.2619, 0.7308, 0.3129])
+Shape: torch.Size([115406, 115406])
+NNZ: 572066
+Density: 4.295259032005559e-05
+Time: 1.6881086826324463 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
+
+           327,078      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        20,135,808      BR_RETIRED:u                                                          
+
+       5.374156677 seconds time elapsed
+
+      25.609168000 seconds user
+     167.278028000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
+                            572066]),
+       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
+                           114602]),
+       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
+                       18651.]), size=(115406, 115406), nnz=572066,
+       layout=torch.sparse_csr)
+tensor([0.9215, 0.6706, 0.8015,  ..., 0.8507, 0.8546, 0.4441])
+Shape: torch.Size([115406, 115406])
+NNZ: 572066
+Density: 4.295259032005559e-05
+Time: 1.2785694599151611 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
+
+        27,608,093      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,616      ITLB_WALK:u                                                           
+            17,185      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        36,866,957      L1D_TLB:u                                                             
+
+       4.861513311 seconds time elapsed
+
+      23.339077000 seconds user
+     141.584760000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
+                            572066]),
+       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
+                           114602]),
+       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
+                       18651.]), size=(115406, 115406), nnz=572066,
+       layout=torch.sparse_csr)
+tensor([0.8973, 0.5228, 0.4492,  ..., 0.7677, 0.7722, 0.1700])
+Shape: torch.Size([115406, 115406])
+NNZ: 572066
+Density: 4.295259032005559e-05
+Time: 1.1654376983642578 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
+
+        32,639,204      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           309,643      L1I_CACHE_REFILL:u                                                    
+           478,856      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        34,280,618      L1D_CACHE:u                                                           
+
+       4.677973310 seconds time elapsed
+
+      22.972655000 seconds user
+     125.062401000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
+                            572066]),
+       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
+                           114602]),
+       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
+                       18651.]), size=(115406, 115406), nnz=572066,
+       layout=torch.sparse_csr)
+tensor([0.4542, 0.7095, 0.5701,  ..., 0.2172, 0.8829, 0.7757])
+Shape: torch.Size([115406, 115406])
+NNZ: 572066
+Density: 4.295259032005559e-05
+Time: 1.1153452396392822 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
+
+           555,275      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           578,455      LL_CACHE_RD:u                                                         
+           188,723      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            24,635      L2D_TLB_REFILL:u                                                      
+           319,663      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,799,940      L2D_CACHE:u                                                           
+
+       4.655024760 seconds time elapsed
+
+      23.104641000 seconds user
+     122.294597000 seconds sys
+
+
+
--- a/pytorch/output/altra_2_2_vt2010_100.json
+++ b/pytorch/output/altra_2_2_vt2010_100.json
@ -0,0 +1 @@
+{"power_before": [34.04, 43.96], "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 0.4164857864379883, "power": [23.72, 23.72, 29.88, 33.32], "power_after": [33.36, 32.52], "task clock (msec)": 61.63, "page faults": 3304, "cycles": 64734203, "instructions": 53597991, "branch mispredictions": 330777, "branches": 20357034, "ITLB accesses": 27381387, "ITLB misses": 6248, "DTLB misses": 17636, "DTLB accesses": 37436110, "L1I cache accesses": 32505993, "L1I cache misses": 303849, "L1D cache misses": 467426, "L1D cache accesses": 34241110, "LL cache misses": 550075, "LL cache accesses": 562829, "L2D TLB accesses": 199285, "L2D TLB misses": 24424, "L2D cache misses": 310155, "L2D cache accesses": 1783824, "instructions per cycle": 0.8279701999266138, "branch miss rate": 0.016248781625063848, "ITLB miss rate": 0.00022818420410916364, "DTLB miss rate": 0.00047109595521543235, "L2D TLB miss rate": 0.12255814536969667, "L1I cache miss rate": 0.009347476325365603, "L1D cache miss rate": 0.01365101773861887, "L2D cache miss rate": 0.17387085272986572, "LL cache miss rate": 0.9773394761108614}
--- a/pytorch/output/altra_2_2_vt2010_100.output
+++ b/pytorch/output/altra_2_2_vt2010_100.output
@ -0,0 +1,158 @@
+srun: Job time limit was unset; set to partition default of 60 minutes
+srun: ################################################################################
+srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
+srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
+srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
+srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
+srun: ################################################################################
+srun: job 3394143 queued and waiting for resources
+srun: job 3394143 has been allocated resources
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
+                            155598]),
+       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
+       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
+       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
+tensor([0.9170, 0.7306, 0.1175,  ..., 0.0616, 0.0147, 0.6403])
+Shape: torch.Size([32580, 32580])
+NNZ: 155598
+Density: 0.00014658915806621921
+Time: 0.4440653324127197 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
+
+             61.63 msec task-clock:u                     #    0.016 CPUs utilized             
+                 0      context-switches:u               #    0.000 /sec                      
+                 0      cpu-migrations:u                 #    0.000 /sec                      
+             3,304      page-faults:u                    #   53.611 K/sec                     
+        64,734,203      cycles:u                         #    1.050 GHz                         (50.46%)
+        53,597,991      instructions:u                   #    0.83  insn per cycle              (70.10%)
+   <not supported>      branches:u                                                            
+           347,389      branch-misses:u                                                         (91.95%)
+        31,363,842      L1-dcache-loads:u                #  508.915 M/sec                     
+           482,780      L1-dcache-load-misses:u          #    1.54% of all L1-dcache accesses 
+   <not supported>      LLC-loads:u                                                           
+   <not supported>      LLC-load-misses:u                                                     
+        30,027,001      L1-icache-loads:u                #  487.223 M/sec                     
+           288,023      L1-icache-load-misses:u          #    0.96% of all L1-icache accesses 
+        44,333,825      dTLB-loads:u                     #  719.368 M/sec                       (48.58%)
+            74,525      dTLB-load-misses:u               #    0.17% of all dTLB cache accesses  (16.71%)
+     <not counted>      iTLB-loads:u                                                            (0.00%)
+     <not counted>      iTLB-load-misses:u                                                      (0.00%)
+
+       3.811654040 seconds time elapsed
+
+      15.616953000 seconds user
+      30.906234000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
+                            155598]),
+       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
+       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
+       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
+tensor([0.5548, 0.3514, 0.6283,  ..., 0.5672, 0.1575, 0.4493])
+Shape: torch.Size([32580, 32580])
+NNZ: 155598
+Density: 0.00014658915806621921
+Time: 0.44233155250549316 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
+
+           330,777      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+        20,357,034      BR_RETIRED:u                                                          
+
+       3.835342404 seconds time elapsed
+
+      15.497637000 seconds user
+      28.676763000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
+                            155598]),
+       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
+       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
+       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
+tensor([0.0953, 0.5790, 0.0112,  ..., 0.9540, 0.3173, 0.4731])
+Shape: torch.Size([32580, 32580])
+NNZ: 155598
+Density: 0.00014658915806621921
+Time: 0.43302106857299805 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
+
+        27,381,387      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+             6,248      ITLB_WALK:u                                                           
+            17,636      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+        37,436,110      L1D_TLB:u                                                             
+
+       3.828586094 seconds time elapsed
+
+      15.518057000 seconds user
+      31.389361000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
+                            155598]),
+       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
+       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
+       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
+tensor([0.5456, 0.8708, 0.2037,  ..., 0.8669, 0.9122, 0.2046])
+Shape: torch.Size([32580, 32580])
+NNZ: 155598
+Density: 0.00014658915806621921
+Time: 0.4426534175872803 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
+
+        32,505,993      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+           303,849      L1I_CACHE_REFILL:u                                                    
+           467,426      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+        34,241,110      L1D_CACHE:u                                                           
+
+       3.811299200 seconds time elapsed
+
+      15.932195000 seconds user
+      30.887870000 seconds sys
+
+
+
+/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
+  ).to_sparse_csr().type(torch.float)
+tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
+                            155598]),
+       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
+       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
+       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
+tensor([0.5024, 0.2304, 0.7925,  ..., 0.1397, 0.5558, 0.6450])
+Shape: torch.Size([32580, 32580])
+NNZ: 155598
+Density: 0.00014658915806621921
+Time: 0.3671383857727051 seconds
+
+ Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
+
+           550,075      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           562,829      LL_CACHE_RD:u                                                         
+           199,285      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+            24,424      L2D_TLB_REFILL:u                                                      
+           310,155      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+         1,783,824      L2D_CACHE:u                                                           
+
+       3.824434783 seconds time elapsed
+
+      15.754438000 seconds user
+      28.226523000 seconds sys
+
+
+
				`@ -1 +0,0 @@`
				{"power_before": [20.2, 20.32], "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 1.6952476501464844, "power": [44.68, 57.12, 64.2, 67.04, 55.68, 42.76], "power_after": [53.56, 52.52], "task clock (msec)": 61.63, "page faults": 3293, "cycles": 41677750, "instructions": 91767205, "branch mispredictions": 329386, "branches": 19813961, "ITLB accesses": 27944146, "ITLB misses": 6811, "DTLB misses": 18962, "DTLB accesses": 37689058, "L1I cache accesses": 31746573, "L1I cache misses": 290044, "L1D cache misses": 471100, "L1D cache accesses": 33271575, "LL cache misses": 550308, "LL cache accesses": 564981, "L2D TLB accesses": 168456, "L2D TLB misses": 20450, "L2D cache misses": 306309, "L2D cache accesses": 1745776, "instructions per cycle": 2.201827233955768, "branch miss rate": 0.01662393501228755, "ITLB miss rate": 0.00024373620149279208, "DTLB miss rate": 0.0005031168462740565, "L2D TLB miss rate": 0.12139668518782352, "L1I cache miss rate": 0.009136230231842662, "L1D cache miss rate": 0.014159233519904002, "L2D cache miss rate": 0.17545721787904062, "LL cache miss rate": 0.9740292151417481}
				`@ -0,0 +1 @@`
				{"power_before": [50.88, 50.88], "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 0.1896660327911377, "power": [25.52, 32.28, 33.12, 33.12], "power_after": [32.88, 26.52], "task clock (msec)": 42.01, "page faults": 3263, "cycles": 47084933, "instructions": 77895119, "branch mispredictions": 330923, "branches": 19740519, "ITLB accesses": 27761239, "ITLB misses": 6471, "DTLB misses": 17268, "DTLB accesses": 36993265, "L1I cache accesses": 31834980, "L1I cache misses": 298333, "L1D cache misses": 466901, "L1D cache accesses": 33528976, "LL cache misses": 525505, "LL cache accesses": 546521, "L2D TLB accesses": 184884, "L2D TLB misses": 22933, "L2D cache misses": 292367, "L2D cache accesses": 1706226, "instructions per cycle": 1.6543534000568716, "branch miss rate": 0.016763642333821112, "ITLB miss rate": 0.00023309478370183695, "DTLB miss rate": 0.0004667876706746485, "L2D TLB miss rate": 0.12403993855606758, "L1I cache miss rate": 0.009371232524725947, "L1D cache miss rate": 0.013925298523879763, "L2D cache miss rate": 0.1713530329510862, "LL cache miss rate": 0.9615458509371094}
				`@ -0,0 +1 @@`
				{"power_before": [20.16, 20.08], "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 0.336850643157959, "power": [24.28, 30.72, 30.72, 34.56], "power_after": [37.32, 32.92], "task clock (msec)": 60.78, "page faults": 3300, "cycles": 66733059, "instructions": 87889334, "branch mispredictions": 326300, "branches": 19832700, "ITLB accesses": 27233629, "ITLB misses": 5868, "DTLB misses": 16893, "DTLB accesses": 36409508, "L1I cache accesses": 30924532, "L1I cache misses": 288199, "L1D cache misses": 462816, "L1D cache accesses": 32428375, "LL cache misses": 551997, "LL cache accesses": 568528, "L2D TLB accesses": 193991, "L2D TLB misses": 24353, "L2D cache misses": 312207, "L2D cache accesses": 1821196, "instructions per cycle": 1.3170284011707, "branch miss rate": 0.016452626218316214, "ITLB miss rate": 0.0002154688969288669, "DTLB miss rate": 0.00046397221297250155, "L2D TLB miss rate": 0.125536751704976, "L1I cache miss rate": 0.009319429635992551, "L1D cache miss rate": 0.014271945479845968, "L2D cache miss rate": 0.17142965391973186, "LL cache miss rate": 0.9709231559395491}
				`@ -0,0 +1 @@`
				{"power_before": [16.32, 16.2], "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 2.2665774822235107, "power": [35.16, 50.8, 53.4, 53.4, 46.08, 46.88], "power_after": [58.4, 57.32], "task clock (msec)": 50.43, "page faults": 3285, "cycles": 54118679, "instructions": 77692421, "branch mispredictions": 325039, "branches": 19383216, "ITLB accesses": 26060519, "ITLB misses": 4749, "DTLB misses": 16865, "DTLB accesses": 34819729, "L1I cache accesses": 30777115, "L1I cache misses": 293980, "L1D cache misses": 461522, "L1D cache accesses": 32216597, "LL cache misses": 567700, "LL cache accesses": 588689, "L2D TLB accesses": 189417, "L2D TLB misses": 22360, "L2D cache misses": 328306, "L2D cache accesses": 1908607, "instructions per cycle": 1.4355934482436277, "branch miss rate": 0.0167690954896236, "ITLB miss rate": 0.00018222967854170517, "DTLB miss rate": 0.00048435184547243316, "L2D TLB miss rate": 0.11804642666708902, "L1I cache miss rate": 0.009551902444397404, "L1D cache miss rate": 0.014325597455249542, "L2D cache miss rate": 0.172013410827897, "LL cache miss rate": 0.9643461997761127}
				`@ -0,0 +1 @@`
				{"power_before": [20.48, 20.96], "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 0.3271017074584961, "power": [25.28, 26.08, 31.28, 32.96], "power_after": [33.4, 30.24], "task clock (msec)": 59.88, "page faults": 3313, "cycles": 58169777, "instructions": 57993431, "branch mispredictions": 330494, "branches": 20578427, "ITLB accesses": 27982097, "ITLB misses": 6614, "DTLB misses": 17270, "DTLB accesses": 37728899, "L1I cache accesses": 29754926, "L1I cache misses": 278786, "L1D cache misses": 454742, "L1D cache accesses": 31173246, "LL cache misses": 543243, "LL cache accesses": 560716, "L2D TLB accesses": 162281, "L2D TLB misses": 19847, "L2D cache misses": 300577, "L2D cache accesses": 1696278, "instructions per cycle": 0.9969684257170179, "branch miss rate": 0.016060216847478187, "ITLB miss rate": 0.0002363654160729984, "DTLB miss rate": 0.00045773930482307474, "L2D TLB miss rate": 0.12230020766448321, "L1I cache miss rate": 0.009369406598423401, "L1D cache miss rate": 0.014587572946365611, "L2D cache miss rate": 0.1771979592967662, "LL cache miss rate": 0.9688380570556218}
				`@ -0,0 +1 @@`
				{"power_before": [20.28, 20.32], "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 1.030203104019165, "power": [32.08, 47.84, 55.76, 58.08, 58.24], "power_after": [48.76, 45.16], "task clock (msec)": 60.43, "page faults": 3319, "cycles": 66114448, "instructions": 90786829, "branch mispredictions": 341625, "branches": 20129354, "ITLB accesses": 27441303, "ITLB misses": 6807, "DTLB misses": 20551, "DTLB accesses": 36867114, "L1I cache accesses": 31744243, "L1I cache misses": 271027, "L1D cache misses": 464135, "L1D cache accesses": 33441141, "LL cache misses": 539935, "LL cache accesses": 552519, "L2D TLB accesses": 188291, "L2D TLB misses": 24177, "L2D cache misses": 301281, "L2D cache accesses": 1737575, "instructions per cycle": 1.3731768432824245, "branch miss rate": 0.016971483535934636, "ITLB miss rate": 0.00024805673404065397, "DTLB miss rate": 0.0005574344658494288, "L2D TLB miss rate": 0.12840231344036623, "L1I cache miss rate": 0.008537831568388637, "L1D cache miss rate": 0.01387916159918108, "L2D cache miss rate": 0.17339165215889962, "LL cache miss rate": 0.9772243126480719}
				`@ -0,0 +1 @@`
				{"power_before": [50.68, 49.4], "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.11296772956848145, "power": [26.2, 29.76, 33.64, 34.44], "power_after": [36.84, 29.44], "task clock (msec)": 67.56, "page faults": 3829, "cycles": 47862000, "instructions": 84392375, "branch mispredictions": 331622, "branches": 19800140, "ITLB accesses": 25905045, "ITLB misses": 6746, "DTLB misses": 17547, "DTLB accesses": 35220079, "L1I cache accesses": 30359576, "L1I cache misses": 283204, "L1D cache misses": 465520, "L1D cache accesses": 31843274, "LL cache misses": 560542, "LL cache accesses": 575610, "L2D TLB accesses": 173643, "L2D TLB misses": 21499, "L2D cache misses": 313335, "L2D cache accesses": 1741621, "instructions per cycle": 1.7632438051063475, "branch miss rate": 0.016748467435078743, "ITLB miss rate": 0.0002604125953072075, "DTLB miss rate": 0.0004982101261044871, "L2D TLB miss rate": 0.12381149830399152, "L1I cache miss rate": 0.009328325270418797, "L1D cache miss rate": 0.014619099782264852, "L2D cache miss rate": 0.17990998041479747, "LL cache miss rate": 0.9738225534650197}
				`@ -0,0 +1 @@`
				{"power_before": [16.52, 16.24], "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 0.1715233325958252, "power": [18.56, 24.92, 27.84, 27.84], "power_after": [33.2, 27.28], "task clock (msec)": 61.92, "page faults": 3281, "cycles": 66250810, "instructions": 75178179, "branch mispredictions": 332366, "branches": 19076182, "ITLB accesses": 27005133, "ITLB misses": 4791, "DTLB misses": 13403, "DTLB accesses": 36457054, "L1I cache accesses": 32367686, "L1I cache misses": 287524, "L1D cache misses": 467557, "L1D cache accesses": 34022862, "LL cache misses": 535707, "LL cache accesses": 556316, "L2D TLB accesses": 150149, "L2D TLB misses": 18418, "L2D cache misses": 297042, "L2D cache accesses": 1687364, "instructions per cycle": 1.1347510920998551, "branch miss rate": 0.017423088121092577, "ITLB miss rate": 0.00017741071669597036, "DTLB miss rate": 0.00036763804338112453, "L2D TLB miss rate": 0.12266481961251822, "L1I cache miss rate": 0.008883057009388932, "L1D cache miss rate": 0.013742435895016709, "L2D cache miss rate": 0.1760390763344483, "LL cache miss rate": 0.9629545078696281}
				`@ -0,0 +1 @@`
				{"power_before": [29.76, 33.16], "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 0.14322686195373535, "power": [22.6, 22.6, 26.16, 29.2], "power_after": [34.0, 30.16], "task clock (msec)": 64.71, "page faults": 3319, "cycles": 57611295, "instructions": 83148228, "branch mispredictions": 318386, "branches": 19233431, "ITLB accesses": 27039805, "ITLB misses": 6375, "DTLB misses": 17290, "DTLB accesses": 36688544, "L1I cache accesses": 32508072, "L1I cache misses": 297568, "L1D cache misses": 477654, "L1D cache accesses": 34044579, "LL cache misses": 549474, "LL cache accesses": 561939, "L2D TLB accesses": 185622, "L2D TLB misses": 23295, "L2D cache misses": 305878, "L2D cache accesses": 1763089, "instructions per cycle": 1.4432626102225268, "branch miss rate": 0.01655378075809771, "ITLB miss rate": 0.00023576353453732377, "DTLB miss rate": 0.00047126427257511227, "L2D TLB miss rate": 0.12549697772893298, "L1I cache miss rate": 0.009153664972810446, "L1D cache miss rate": 0.014030251336049713, "L2D cache miss rate": 0.17348982382625042, "LL cache miss rate": 0.9778178770293573}
				`@ -0,0 +1 @@`
				{"power_before": [20.56, 20.28], "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 0.30861377716064453, "power": [23.88, 27.6, 39.8, 40.12], "power_after": [39.28, 35.2], "task clock (msec)": 65.91, "page faults": 3247, "cycles": 92293071, "instructions": 76208632, "branch mispredictions": 320083, "branches": 19285106, "ITLB accesses": 26853940, "ITLB misses": 6728, "DTLB misses": 13955, "DTLB accesses": 37111059, "L1I cache accesses": 32554796, "L1I cache misses": 298729, "L1D cache misses": 473779, "L1D cache accesses": 34117102, "LL cache misses": 535040, "LL cache accesses": 547502, "L2D TLB accesses": 179876, "L2D TLB misses": 21809, "L2D cache misses": 298620, "L2D cache accesses": 1722959, "instructions per cycle": 0.8257243059990929, "branch miss rate": 0.016597419791210898, "ITLB miss rate": 0.0002505405165871377, "DTLB miss rate": 0.0003760334621547717, "L2D TLB miss rate": 0.12124463519313304, "L1I cache miss rate": 0.009176190199440968, "L1D cache miss rate": 0.013886847716432655, "L2D cache miss rate": 0.17331811145825293, "LL cache miss rate": 0.9772384393116372}