Got output!

2024-12-03 00:20:09 -05:00 · 2024-12-03 00:20:09 -05:00 · 01b1b0fc0c
commit 01b1b0fc0c
parent c9ed7980b8
33 changed files with 2445 additions and 55 deletions
--- a/pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.json
+++ b/pytorch/output/altra_100_soc-sign-Slashdot090221_2_2.json
@ -1 +0,0 @@
 {"power_before": [20.2, 20.32], "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 1.6952476501464844, "power": [44.68, 57.12, 64.2, 67.04, 55.68, 42.76], "power_after": [53.56, 52.52], "task clock (msec)": 61.63, "page faults": 3293, "cycles": 41677750, "instructions": 91767205, "branch mispredictions": 329386, "branches": 19813961, "ITLB accesses": 27944146, "ITLB misses": 6811, "DTLB misses": 18962, "DTLB accesses": 37689058, "L1I cache accesses": 31746573, "L1I cache misses": 290044, "L1D cache misses": 471100, "L1D cache accesses": 33271575, "LL cache misses": 550308, "LL cache accesses": 564981, "L2D TLB accesses": 168456, "L2D TLB misses": 20450, "L2D cache misses": 306309, "L2D cache accesses": 1745776, "instructions per cycle": 2.201827233955768, "branch miss rate": 0.01662393501228755, "ITLB miss rate": 0.00024373620149279208, "DTLB miss rate": 0.0005031168462740565, "L2D TLB miss rate": 0.12139668518782352, "L1I cache miss rate": 0.009136230231842662, "L1D cache miss rate": 0.014159233519904002, "L2D cache miss rate": 0.17545721787904062, "LL cache miss rate": 0.9740292151417481}
--- a/pytorch/output/altra_2_2_Oregon-2_100.json
+++ b/pytorch/output/altra_2_2_Oregon-2_100.json
@ -0,0 +1 @@
 {"power_before": [50.88, 50.88], "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 0.1896660327911377, "power": [25.52, 32.28, 33.12, 33.12], "power_after": [32.88, 26.52], "task clock (msec)": 42.01, "page faults": 3263, "cycles": 47084933, "instructions": 77895119, "branch mispredictions": 330923, "branches": 19740519, "ITLB accesses": 27761239, "ITLB misses": 6471, "DTLB misses": 17268, "DTLB accesses": 36993265, "L1I cache accesses": 31834980, "L1I cache misses": 298333, "L1D cache misses": 466901, "L1D cache accesses": 33528976, "LL cache misses": 525505, "LL cache accesses": 546521, "L2D TLB accesses": 184884, "L2D TLB misses": 22933, "L2D cache misses": 292367, "L2D cache accesses": 1706226, "instructions per cycle": 1.6543534000568716, "branch miss rate": 0.016763642333821112, "ITLB miss rate": 0.00023309478370183695, "DTLB miss rate": 0.0004667876706746485, "L2D TLB miss rate": 0.12403993855606758, "L1I cache miss rate": 0.009371232524725947, "L1D cache miss rate": 0.013925298523879763, "L2D cache miss rate": 0.1713530329510862, "LL cache miss rate": 0.9615458509371094}
--- a/pytorch/output/altra_2_2_Oregon-2_100.output
+++ b/pytorch/output/altra_2_2_Oregon-2_100.output
@ -0,0 +1,153 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394148 queued and waiting for resources
 srun: job 3394148 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.3190, 0.2829, 0.6210,  ..., 0.9278, 0.7514, 0.5737])
 Shape: torch.Size([11806, 11806])
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 0.22389841079711914 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
             42.01 msec task-clock:u                     #    0.012 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,263      page-faults:u                    #   77.672 K/sec                     
        47,084,933      cycles:u                         #    1.121 GHz                         (65.90%)
        77,895,119      instructions:u                   #    1.65  insn per cycle              (85.49%)
   <not supported>      branches:u                                                            
           352,740      branch-misses:u                                                       
        30,958,922      L1-dcache-loads:u                #  736.946 M/sec                     
           442,351      L1-dcache-load-misses:u          #    1.43% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        29,506,648      L1-icache-loads:u                #  702.376 M/sec                     
           272,063      L1-icache-load-misses:u          #    0.92% of all L1-icache accesses 
        51,646,382      dTLB-loads:u                     #    1.229 G/sec                       (15.87%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       3.513156571 seconds time elapsed
      15.150380000 seconds user
      32.922923000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.0741, 0.5476, 0.1060,  ..., 0.8459, 0.8270, 0.8313])
 Shape: torch.Size([11806, 11806])
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 0.20610284805297852 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
           330,923      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,740,519      BR_RETIRED:u                                                          
       3.639725976 seconds time elapsed
      15.493122000 seconds user
      27.617441000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.9699, 0.9368, 0.7284,  ..., 0.7182, 0.5308, 0.9833])
 Shape: torch.Size([11806, 11806])
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 0.15960955619812012 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
        27,761,239      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,471      ITLB_WALK:u                                                           
            17,268      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,993,265      L1D_TLB:u                                                             
       3.455602215 seconds time elapsed
      15.015027000 seconds user
      27.930709000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.5851, 0.3425, 0.8120,  ..., 0.0829, 0.5823, 0.2256])
 Shape: torch.Size([11806, 11806])
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 0.15697884559631348 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
        31,834,980      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           298,333      L1I_CACHE_REFILL:u                                                    
           466,901      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,528,976      L1D_CACHE:u                                                           
       3.452279902 seconds time elapsed
      14.635240000 seconds user
      28.262858000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.0772, 0.9112, 0.0293,  ..., 0.4016, 0.4357, 0.5368])
 Shape: torch.Size([11806, 11806])
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 0.20962285995483398 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
           525,505      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           546,521      LL_CACHE_RD:u                                                         
           184,884      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            22,933      L2D_TLB_REFILL:u                                                      
           292,367      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,706,226      L2D_CACHE:u                                                           
       3.566096255 seconds time elapsed
      15.763579000 seconds user
      28.620423000 seconds sys
--- a/pytorch/output/altra_2_2_as-caida_100.json
+++ b/pytorch/output/altra_2_2_as-caida_100.json
@ -0,0 +1 @@
 {"power_before": [20.16, 20.08], "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 0.336850643157959, "power": [24.28, 30.72, 30.72, 34.56], "power_after": [37.32, 32.92], "task clock (msec)": 60.78, "page faults": 3300, "cycles": 66733059, "instructions": 87889334, "branch mispredictions": 326300, "branches": 19832700, "ITLB accesses": 27233629, "ITLB misses": 5868, "DTLB misses": 16893, "DTLB accesses": 36409508, "L1I cache accesses": 30924532, "L1I cache misses": 288199, "L1D cache misses": 462816, "L1D cache accesses": 32428375, "LL cache misses": 551997, "LL cache accesses": 568528, "L2D TLB accesses": 193991, "L2D TLB misses": 24353, "L2D cache misses": 312207, "L2D cache accesses": 1821196, "instructions per cycle": 1.3170284011707, "branch miss rate": 0.016452626218316214, "ITLB miss rate": 0.0002154688969288669, "DTLB miss rate": 0.00046397221297250155, "L2D TLB miss rate": 0.125536751704976, "L1I cache miss rate": 0.009319429635992551, "L1D cache miss rate": 0.014271945479845968, "L2D cache miss rate": 0.17142965391973186, "LL cache miss rate": 0.9709231559395491}
--- a/pytorch/output/altra_2_2_as-caida_100.output
+++ b/pytorch/output/altra_2_2_as-caida_100.output
@ -0,0 +1,158 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394150 queued and waiting for resources
 srun: job 3394150 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.7672, 0.5818, 0.6775,  ..., 0.1052, 0.2539, 0.4347])
 Shape: torch.Size([31379, 31379])
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 0.28373050689697266 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
             60.78 msec task-clock:u                     #    0.017 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,300      page-faults:u                    #   54.293 K/sec                     
        66,733,059      cycles:u                         #    1.098 GHz                         (58.34%)
        87,889,334      instructions:u                   #    1.32  insn per cycle              (93.45%)
   <not supported>      branches:u                                                            
           369,909      branch-misses:u                                                       
        31,872,708      L1-dcache-loads:u                #  524.386 M/sec                     
           465,719      L1-dcache-load-misses:u          #    1.46% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,443,353      L1-icache-loads:u                #  500.870 M/sec                     
           292,371      L1-icache-load-misses:u          #    0.96% of all L1-icache accesses 
        34,702,735      dTLB-loads:u                     #  570.947 M/sec                       (6.96%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       3.683429807 seconds time elapsed
      15.161162000 seconds user
      31.335288000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.2708, 0.2455, 0.7615,  ..., 0.1172, 0.4072, 0.8970])
 Shape: torch.Size([31379, 31379])
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 0.32511067390441895 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
           326,300      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,832,700      BR_RETIRED:u                                                          
       3.755497210 seconds time elapsed
      14.681699000 seconds user
      29.413955000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.9417, 0.0965, 0.8551,  ..., 0.6665, 0.0164, 0.5102])
 Shape: torch.Size([31379, 31379])
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 0.33124780654907227 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
        27,233,629      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,868      ITLB_WALK:u                                                           
            16,893      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,409,508      L1D_TLB:u                                                             
       3.751203540 seconds time elapsed
      14.849342000 seconds user
      27.706396000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.9215, 0.4139, 0.1789,  ..., 0.0245, 0.0029, 0.2129])
 Shape: torch.Size([31379, 31379])
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 0.3386805057525635 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
        30,924,532      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           288,199      L1I_CACHE_REFILL:u                                                    
           462,816      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,428,375      L1D_CACHE:u                                                           
       3.628443937 seconds time elapsed
      15.430937000 seconds user
      30.878583000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.4983, 0.0268, 0.1695,  ..., 0.6987, 0.7224, 0.8577])
 Shape: torch.Size([31379, 31379])
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 0.3289623260498047 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
           551,997      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           568,528      LL_CACHE_RD:u                                                         
           193,991      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,353      L2D_TLB_REFILL:u                                                      
           312,207      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,821,196      L2D_CACHE:u                                                           
       3.698790384 seconds time elapsed
      15.745189000 seconds user
      31.063512000 seconds sys
--- a/pytorch/output/altra_2_2_dc2_100.json
+++ b/pytorch/output/altra_2_2_dc2_100.json
@ -0,0 +1 @@
 {"power_before": [16.32, 16.2], "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 2.2665774822235107, "power": [35.16, 50.8, 53.4, 53.4, 46.08, 46.88], "power_after": [58.4, 57.32], "task clock (msec)": 50.43, "page faults": 3285, "cycles": 54118679, "instructions": 77692421, "branch mispredictions": 325039, "branches": 19383216, "ITLB accesses": 26060519, "ITLB misses": 4749, "DTLB misses": 16865, "DTLB accesses": 34819729, "L1I cache accesses": 30777115, "L1I cache misses": 293980, "L1D cache misses": 461522, "L1D cache accesses": 32216597, "LL cache misses": 567700, "LL cache accesses": 588689, "L2D TLB accesses": 189417, "L2D TLB misses": 22360, "L2D cache misses": 328306, "L2D cache accesses": 1908607, "instructions per cycle": 1.4355934482436277, "branch miss rate": 0.0167690954896236, "ITLB miss rate": 0.00018222967854170517, "DTLB miss rate": 0.00048435184547243316, "L2D TLB miss rate": 0.11804642666708902, "L1I cache miss rate": 0.009551902444397404, "L1D cache miss rate": 0.014325597455249542, "L2D cache miss rate": 0.172013410827897, "LL cache miss rate": 0.9643461997761127}
--- a/pytorch/output/altra_2_2_dc2_100.output
+++ b/pytorch/output/altra_2_2_dc2_100.output
@ -0,0 +1,168 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394149 queued and waiting for resources
 srun: job 3394149 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.4749, 0.3788, 0.8812,  ..., 0.8281, 0.8889, 0.4945])
 Shape: torch.Size([116835, 116835])
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 2.2480316162109375 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
             50.43 msec task-clock:u                     #    0.009 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,285      page-faults:u                    #   65.135 K/sec                     
        54,118,679      cycles:u                         #    1.073 GHz                         (60.92%)
        77,692,421      instructions:u                   #    1.44  insn per cycle              (82.73%)
   <not supported>      branches:u                                                            
           367,999      branch-misses:u                                                       
        32,182,371      L1-dcache-loads:u                #  638.112 M/sec                     
           491,960      L1-dcache-load-misses:u          #    1.53% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,682,258      L1-icache-loads:u                #  608.367 M/sec                     
           300,874      L1-icache-load-misses:u          #    0.98% of all L1-icache accesses 
        55,244,523      dTLB-loads:u                     #    1.095 G/sec                       (19.09%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       5.813837947 seconds time elapsed
      28.815118000 seconds user
     213.749674000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.9715, 0.3920, 0.0297,  ..., 0.1819, 0.5744, 0.8105])
 Shape: torch.Size([116835, 116835])
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 2.2333595752716064 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
           325,039      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,383,216      BR_RETIRED:u                                                          
       5.973132269 seconds time elapsed
      29.719778000 seconds user
     213.706315000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.3371, 0.4985, 0.9905,  ..., 0.6075, 0.1568, 0.3782])
 Shape: torch.Size([116835, 116835])
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 1.9790923595428467 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
        26,060,519      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             4,749      ITLB_WALK:u                                                           
            16,865      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        34,819,729      L1D_TLB:u                                                             
       5.575020445 seconds time elapsed
      26.769391000 seconds user
     188.138935000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.6806, 0.8858, 0.7035,  ..., 0.6007, 0.0880, 0.4550])
 Shape: torch.Size([116835, 116835])
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 1.5306556224822998 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
        30,777,115      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           293,980      L1I_CACHE_REFILL:u                                                    
           461,522      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,216,597      L1D_CACHE:u                                                           
       4.961298684 seconds time elapsed
      23.946357000 seconds user
     156.598674000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.3029, 0.1908, 0.9816,  ..., 0.0418, 0.8182, 0.5474])
 Shape: torch.Size([116835, 116835])
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 2.28926944732666 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
           567,700      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           588,689      LL_CACHE_RD:u                                                         
           189,417      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            22,360      L2D_TLB_REFILL:u                                                      
           328,306      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,908,607      L2D_CACHE:u                                                           
       5.710829283 seconds time elapsed
      28.671301000 seconds user
     213.960421000 seconds sys
--- a/pytorch/output/altra_2_2_de2010_100.json
+++ b/pytorch/output/altra_2_2_de2010_100.json
@ -0,0 +1 @@
 {"power_before": [20.48, 20.96], "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 0.3271017074584961, "power": [25.28, 26.08, 31.28, 32.96], "power_after": [33.4, 30.24], "task clock (msec)": 59.88, "page faults": 3313, "cycles": 58169777, "instructions": 57993431, "branch mispredictions": 330494, "branches": 20578427, "ITLB accesses": 27982097, "ITLB misses": 6614, "DTLB misses": 17270, "DTLB accesses": 37728899, "L1I cache accesses": 29754926, "L1I cache misses": 278786, "L1D cache misses": 454742, "L1D cache accesses": 31173246, "LL cache misses": 543243, "LL cache accesses": 560716, "L2D TLB accesses": 162281, "L2D TLB misses": 19847, "L2D cache misses": 300577, "L2D cache accesses": 1696278, "instructions per cycle": 0.9969684257170179, "branch miss rate": 0.016060216847478187, "ITLB miss rate": 0.0002363654160729984, "DTLB miss rate": 0.00045773930482307474, "L2D TLB miss rate": 0.12230020766448321, "L1I cache miss rate": 0.009369406598423401, "L1D cache miss rate": 0.014587572946365611, "L2D cache miss rate": 0.1771979592967662, "LL cache miss rate": 0.9688380570556218}
--- a/pytorch/output/altra_2_2_de2010_100.output
+++ b/pytorch/output/altra_2_2_de2010_100.output
@ -0,0 +1,163 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394139 queued and waiting for resources
 srun: job 3394139 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.4207, 0.3943, 0.6543,  ..., 0.2191, 0.5415, 0.1575])
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 0.36042284965515137 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
             59.88 msec task-clock:u                     #    0.016 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,313      page-faults:u                    #   55.328 K/sec                     
        58,169,777      cycles:u                         #    0.971 GHz                         (61.49%)
        57,993,431      instructions:u                   #    1.00  insn per cycle              (81.67%)
   <not supported>      branches:u                                                            
           341,266      branch-misses:u                                                       
        31,858,781      L1-dcache-loads:u                #  532.049 M/sec                     
           467,486      L1-dcache-load-misses:u          #    1.47% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,461,310      L1-icache-loads:u                #  508.711 M/sec                     
           294,156      L1-icache-load-misses:u          #    0.97% of all L1-icache accesses 
        43,828,130      dTLB-loads:u                     #  731.940 M/sec                       (40.26%)
            47,836      dTLB-load-misses:u               #    0.11% of all dTLB cache accesses  (25.52%)
                 0      iTLB-loads:u                     #    0.000 /sec                        (2.73%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       3.824054028 seconds time elapsed
      15.099361000 seconds user
      28.830417000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.0456, 0.2095, 0.0276,  ..., 0.4209, 0.6824, 0.5475])
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 0.3598823547363281 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
           330,494      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,578,427      BR_RETIRED:u                                                          
       3.781234836 seconds time elapsed
      14.965545000 seconds user
      29.444131000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.9882, 0.5477, 0.6307,  ..., 0.1179, 0.6903, 0.1235])
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 0.29088521003723145 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
        27,982,097      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,614      ITLB_WALK:u                                                           
            17,270      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,728,899      L1D_TLB:u                                                             
       3.576632300 seconds time elapsed
      14.864601000 seconds user
      29.274547000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.3952, 0.0475, 0.1125,  ..., 0.3481, 0.1290, 0.3495])
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 0.30365920066833496 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
        29,754,926      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           278,786      L1I_CACHE_REFILL:u                                                    
           454,742      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        31,173,246      L1D_CACHE:u                                                           
       3.730995381 seconds time elapsed
      15.213930000 seconds user
      30.995070000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.7266, 0.7537, 0.9729,  ..., 0.3349, 0.3523, 0.6532])
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 0.2798902988433838 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
           543,243      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           560,716      LL_CACHE_RD:u                                                         
           162,281      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            19,847      L2D_TLB_REFILL:u                                                      
           300,577      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,696,278      L2D_CACHE:u                                                           
       3.819959836 seconds time elapsed
      15.346035000 seconds user
      29.199873000 seconds sys
--- a/pytorch/output/altra_2_2_email-Enron_100.json
+++ b/pytorch/output/altra_2_2_email-Enron_100.json
@ -0,0 +1 @@
 {"power_before": [20.28, 20.32], "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 1.030203104019165, "power": [32.08, 47.84, 55.76, 58.08, 58.24], "power_after": [48.76, 45.16], "task clock (msec)": 60.43, "page faults": 3319, "cycles": 66114448, "instructions": 90786829, "branch mispredictions": 341625, "branches": 20129354, "ITLB accesses": 27441303, "ITLB misses": 6807, "DTLB misses": 20551, "DTLB accesses": 36867114, "L1I cache accesses": 31744243, "L1I cache misses": 271027, "L1D cache misses": 464135, "L1D cache accesses": 33441141, "LL cache misses": 539935, "LL cache accesses": 552519, "L2D TLB accesses": 188291, "L2D TLB misses": 24177, "L2D cache misses": 301281, "L2D cache accesses": 1737575, "instructions per cycle": 1.3731768432824245, "branch miss rate": 0.016971483535934636, "ITLB miss rate": 0.00024805673404065397, "DTLB miss rate": 0.0005574344658494288, "L2D TLB miss rate": 0.12840231344036623, "L1I cache miss rate": 0.008537831568388637, "L1D cache miss rate": 0.01387916159918108, "L2D cache miss rate": 0.17339165215889962, "LL cache miss rate": 0.9772243126480719}
--- a/pytorch/output/altra_2_2_email-Enron_100.output
+++ b/pytorch/output/altra_2_2_email-Enron_100.output
@ -0,0 +1,158 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394152 queued and waiting for resources
 srun: job 3394152 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.3626, 0.7532, 0.0782,  ..., 0.6679, 0.4308, 0.6586])
 Shape: torch.Size([36692, 36692])
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 1.3745801448822021 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
             60.43 msec task-clock:u                     #    0.012 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,319      page-faults:u                    #   54.926 K/sec                     
        66,114,448      cycles:u                         #    1.094 GHz                         (58.10%)
        90,786,829      instructions:u                   #    1.37  insn per cycle              (92.25%)
   <not supported>      branches:u                                                            
           372,381      branch-misses:u                                                       
        32,997,410      L1-dcache-loads:u                #  546.070 M/sec                     
           470,216      L1-dcache-load-misses:u          #    1.43% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,485,339      L1-icache-loads:u                #  521.047 M/sec                     
           294,395      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        31,376,646      dTLB-loads:u                     #  519.248 M/sec                       (10.03%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       4.904488673 seconds time elapsed
      22.874521000 seconds user
     139.276239000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.2040, 0.8252, 0.0215,  ..., 0.2921, 0.9143, 0.8728])
 Shape: torch.Size([36692, 36692])
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 1.3087654113769531 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
           341,625      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,129,354      BR_RETIRED:u                                                          
       4.644873434 seconds time elapsed
      22.729927000 seconds user
     132.278582000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.6154, 0.6641, 0.3794,  ..., 0.9736, 0.0619, 0.4790])
 Shape: torch.Size([36692, 36692])
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 1.2701547145843506 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
        27,441,303      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,807      ITLB_WALK:u                                                           
            20,551      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,867,114      L1D_TLB:u                                                             
       4.861510767 seconds time elapsed
      22.111354000 seconds user
     132.431608000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.4201, 0.4134, 0.8169,  ..., 0.6631, 0.0087, 0.8439])
 Shape: torch.Size([36692, 36692])
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 1.1176586151123047 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
        31,744,243      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           271,027      L1I_CACHE_REFILL:u                                                    
           464,135      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,441,141      L1D_CACHE:u                                                           
       4.693803969 seconds time elapsed
      21.724904000 seconds user
     119.873018000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.1285, 0.3989, 0.3903,  ..., 0.7892, 0.2737, 0.2659])
 Shape: torch.Size([36692, 36692])
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 1.196892261505127 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
           539,935      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           552,519      LL_CACHE_RD:u                                                         
           188,291      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,177      L2D_TLB_REFILL:u                                                      
           301,281      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,737,575      L2D_CACHE:u                                                           
       4.741030347 seconds time elapsed
      23.793930000 seconds user
     125.634838000 seconds sys
--- a/pytorch/output/altra_2_2_p2p-Gnutella04_100.json
+++ b/pytorch/output/altra_2_2_p2p-Gnutella04_100.json
@ -0,0 +1 @@
 {"power_before": [50.68, 49.4], "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.11296772956848145, "power": [26.2, 29.76, 33.64, 34.44], "power_after": [36.84, 29.44], "task clock (msec)": 67.56, "page faults": 3829, "cycles": 47862000, "instructions": 84392375, "branch mispredictions": 331622, "branches": 19800140, "ITLB accesses": 25905045, "ITLB misses": 6746, "DTLB misses": 17547, "DTLB accesses": 35220079, "L1I cache accesses": 30359576, "L1I cache misses": 283204, "L1D cache misses": 465520, "L1D cache accesses": 31843274, "LL cache misses": 560542, "LL cache accesses": 575610, "L2D TLB accesses": 173643, "L2D TLB misses": 21499, "L2D cache misses": 313335, "L2D cache accesses": 1741621, "instructions per cycle": 1.7632438051063475, "branch miss rate": 0.016748467435078743, "ITLB miss rate": 0.0002604125953072075, "DTLB miss rate": 0.0004982101261044871, "L2D TLB miss rate": 0.12381149830399152, "L1I cache miss rate": 0.009328325270418797, "L1D cache miss rate": 0.014619099782264852, "L2D cache miss rate": 0.17990998041479747, "LL cache miss rate": 0.9738225534650197}
--- a/pytorch/output/altra_2_2_p2p-Gnutella04_100.output
+++ b/pytorch/output/altra_2_2_p2p-Gnutella04_100.output
@ -0,0 +1,153 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394153 queued and waiting for resources
 srun: job 3394153 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.6982, 0.7263, 0.0064,  ..., 0.9256, 0.7249, 0.5065])
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 0.18009519577026367 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
             67.56 msec task-clock:u                     #    0.019 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,829      page-faults:u                    #   56.674 K/sec                     
        47,862,000      cycles:u                         #    0.708 GHz                         (59.24%)
        84,392,375      instructions:u                   #    1.76  insn per cycle              (87.61%)
   <not supported>      branches:u                                                            
           368,432      branch-misses:u                                                       
        32,507,448      L1-dcache-loads:u                #  481.147 M/sec                     
           481,389      L1-dcache-load-misses:u          #    1.48% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,030,656      L1-icache-loads:u                #  459.289 M/sec                     
           308,582      L1-icache-load-misses:u          #    0.99% of all L1-icache accesses 
        34,988,046      dTLB-loads:u                     #  517.863 M/sec                       (20.00%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       3.538329547 seconds time elapsed
      14.667604000 seconds user
      29.534487000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.4946, 0.3509, 0.5239,  ..., 0.4520, 0.4206, 0.8181])
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 0.18875432014465332 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
           331,622      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,800,140      BR_RETIRED:u                                                          
       3.556031790 seconds time elapsed
      14.799719000 seconds user
      27.876987000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.2184, 0.4999, 0.9567,  ..., 0.8794, 0.8213, 0.8713])
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 0.1066896915435791 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
        25,905,045      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,746      ITLB_WALK:u                                                           
            17,547      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        35,220,079      L1D_TLB:u                                                             
       3.505367779 seconds time elapsed
      14.557493000 seconds user
      29.642958000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.2180, 0.0881, 0.5532,  ..., 0.4961, 0.0093, 0.4929])
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 0.12433028221130371 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
        30,359,576      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           283,204      L1I_CACHE_REFILL:u                                                    
           465,520      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        31,843,274      L1D_CACHE:u                                                           
       3.565310130 seconds time elapsed
      14.913239000 seconds user
      28.125605000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.6394, 0.6808, 0.7957,  ..., 0.1529, 0.0561, 0.7834])
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 0.13401126861572266 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
           560,542      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           575,610      LL_CACHE_RD:u                                                         
           173,643      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            21,499      L2D_TLB_REFILL:u                                                      
           313,335      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,741,621      L2D_CACHE:u                                                           
       3.503362704 seconds time elapsed
      15.287949000 seconds user
      28.752303000 seconds sys
--- a/pytorch/output/altra_2_2_p2p-Gnutella24_100.json
+++ b/pytorch/output/altra_2_2_p2p-Gnutella24_100.json
@ -0,0 +1 @@
 {"power_before": [16.52, 16.24], "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 0.1715233325958252, "power": [18.56, 24.92, 27.84, 27.84], "power_after": [33.2, 27.28], "task clock (msec)": 61.92, "page faults": 3281, "cycles": 66250810, "instructions": 75178179, "branch mispredictions": 332366, "branches": 19076182, "ITLB accesses": 27005133, "ITLB misses": 4791, "DTLB misses": 13403, "DTLB accesses": 36457054, "L1I cache accesses": 32367686, "L1I cache misses": 287524, "L1D cache misses": 467557, "L1D cache accesses": 34022862, "LL cache misses": 535707, "LL cache accesses": 556316, "L2D TLB accesses": 150149, "L2D TLB misses": 18418, "L2D cache misses": 297042, "L2D cache accesses": 1687364, "instructions per cycle": 1.1347510920998551, "branch miss rate": 0.017423088121092577, "ITLB miss rate": 0.00017741071669597036, "DTLB miss rate": 0.00036763804338112453, "L2D TLB miss rate": 0.12266481961251822, "L1I cache miss rate": 0.008883057009388932, "L1D cache miss rate": 0.013742435895016709, "L2D cache miss rate": 0.1760390763344483, "LL cache miss rate": 0.9629545078696281}
--- a/pytorch/output/altra_2_2_p2p-Gnutella24_100.output
+++ b/pytorch/output/altra_2_2_p2p-Gnutella24_100.output
@ -0,0 +1,153 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394141 queued and waiting for resources
 srun: job 3394141 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.6616, 0.1149, 0.0110,  ..., 0.2481, 0.7877, 0.5589])
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 0.16974925994873047 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
             61.92 msec task-clock:u                     #    0.017 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,281      page-faults:u                    #   52.988 K/sec                     
        66,250,810      cycles:u                         #    1.070 GHz                         (62.94%)
        75,178,179      instructions:u                   #    1.13  insn per cycle              (83.47%)
   <not supported>      branches:u                                                            
           367,749      branch-misses:u                                                       
        33,064,095      L1-dcache-loads:u                #  533.986 M/sec                     
           465,542      L1-dcache-load-misses:u          #    1.41% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,552,264      L1-icache-loads:u                #  509.570 M/sec                     
           296,060      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        73,155,896      dTLB-loads:u                     #    1.181 G/sec                       (17.31%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       3.675971385 seconds time elapsed
      14.857293000 seconds user
      29.791187000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.1683, 0.8999, 0.0578,  ..., 0.5893, 0.0628, 0.8262])
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 0.2227163314819336 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
           332,366      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,076,182      BR_RETIRED:u                                                          
       3.532329673 seconds time elapsed
      14.883993000 seconds user
      28.516661000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.8389, 0.5614, 0.9033,  ..., 0.2231, 0.0349, 0.5167])
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 0.17095375061035156 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
        27,005,133      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             4,791      ITLB_WALK:u                                                           
            13,403      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,457,054      L1D_TLB:u                                                             
       3.579041343 seconds time elapsed
      14.885159000 seconds user
      29.562650000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.8849, 0.5982, 0.0578,  ..., 0.9975, 0.2204, 0.0718])
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 0.18003463745117188 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
        32,367,686      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           287,524      L1I_CACHE_REFILL:u                                                    
           467,557      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        34,022,862      L1D_CACHE:u                                                           
       3.405321132 seconds time elapsed
      15.291636000 seconds user
      28.005015000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.2790, 0.1291, 0.6053,  ..., 0.1651, 0.4973, 0.6821])
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 0.22036528587341309 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
           535,707      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           556,316      LL_CACHE_RD:u                                                         
           150,149      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            18,418      L2D_TLB_REFILL:u                                                      
           297,042      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,687,364      L2D_CACHE:u                                                           
       3.505209576 seconds time elapsed
      15.297738000 seconds user
      29.848441000 seconds sys
--- a/pytorch/output/altra_2_2_p2p-Gnutella25_100.json
+++ b/pytorch/output/altra_2_2_p2p-Gnutella25_100.json
@ -0,0 +1 @@
 {"power_before": [29.76, 33.16], "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 0.14322686195373535, "power": [22.6, 22.6, 26.16, 29.2], "power_after": [34.0, 30.16], "task clock (msec)": 64.71, "page faults": 3319, "cycles": 57611295, "instructions": 83148228, "branch mispredictions": 318386, "branches": 19233431, "ITLB accesses": 27039805, "ITLB misses": 6375, "DTLB misses": 17290, "DTLB accesses": 36688544, "L1I cache accesses": 32508072, "L1I cache misses": 297568, "L1D cache misses": 477654, "L1D cache accesses": 34044579, "LL cache misses": 549474, "LL cache accesses": 561939, "L2D TLB accesses": 185622, "L2D TLB misses": 23295, "L2D cache misses": 305878, "L2D cache accesses": 1763089, "instructions per cycle": 1.4432626102225268, "branch miss rate": 0.01655378075809771, "ITLB miss rate": 0.00023576353453732377, "DTLB miss rate": 0.00047126427257511227, "L2D TLB miss rate": 0.12549697772893298, "L1I cache miss rate": 0.009153664972810446, "L1D cache miss rate": 0.014030251336049713, "L2D cache miss rate": 0.17348982382625042, "LL cache miss rate": 0.9778178770293573}
--- a/pytorch/output/altra_2_2_p2p-Gnutella25_100.output
+++ b/pytorch/output/altra_2_2_p2p-Gnutella25_100.output
@ -0,0 +1,153 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394140 queued and waiting for resources
 srun: job 3394140 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.8199, 0.9849, 0.4642,  ..., 0.7594, 0.3568, 0.4020])
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 0.19272208213806152 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
             64.71 msec task-clock:u                     #    0.018 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,319      page-faults:u                    #   51.288 K/sec                     
        57,611,295      cycles:u                         #    0.890 GHz                         (39.00%)
        83,148,228      instructions:u                   #    1.44  insn per cycle              (82.73%)
   <not supported>      branches:u                                                            
           375,111      branch-misses:u                                                       
        32,759,228      L1-dcache-loads:u                #  506.221 M/sec                     
           475,086      L1-dcache-load-misses:u          #    1.45% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,366,158      L1-icache-loads:u                #  484.694 M/sec                     
           297,293      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
        35,611,781      dTLB-loads:u                     #  550.301 M/sec                       (25.73%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       3.578384817 seconds time elapsed
      14.435258000 seconds user
      27.700836000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.0069, 0.9904, 0.5316,  ..., 0.2082, 0.4858, 0.4936])
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 0.1423017978668213 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
           318,386      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,233,431      BR_RETIRED:u                                                          
       3.555753224 seconds time elapsed
      14.642518000 seconds user
      30.112207000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.2250, 0.5676, 0.3018,  ..., 0.5431, 0.7314, 0.5593])
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 0.14638042449951172 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
        27,039,805      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,375      ITLB_WALK:u                                                           
            17,290      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,688,544      L1D_TLB:u                                                             
       3.566915241 seconds time elapsed
      16.116565000 seconds user
      28.752519000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.0220, 0.7494, 0.7913,  ..., 0.8924, 0.8542, 0.5491])
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 0.17815685272216797 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
        32,508,072      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           297,568      L1I_CACHE_REFILL:u                                                    
           477,654      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        34,044,579      L1D_CACHE:u                                                           
       3.435706033 seconds time elapsed
      14.690285000 seconds user
      28.763423000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.6277, 0.4955, 0.9335,  ..., 0.1476, 0.2079, 0.0931])
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 0.14432048797607422 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
           549,474      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           561,939      LL_CACHE_RD:u                                                         
           185,622      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,295      L2D_TLB_REFILL:u                                                      
           305,878      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,763,089      L2D_CACHE:u                                                           
       3.538826979 seconds time elapsed
      15.006109000 seconds user
      29.644298000 seconds sys
--- a/pytorch/output/altra_2_2_p2p-Gnutella30_100.json
+++ b/pytorch/output/altra_2_2_p2p-Gnutella30_100.json
@ -0,0 +1 @@
 {"power_before": [20.56, 20.28], "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 0.30861377716064453, "power": [23.88, 27.6, 39.8, 40.12], "power_after": [39.28, 35.2], "task clock (msec)": 65.91, "page faults": 3247, "cycles": 92293071, "instructions": 76208632, "branch mispredictions": 320083, "branches": 19285106, "ITLB accesses": 26853940, "ITLB misses": 6728, "DTLB misses": 13955, "DTLB accesses": 37111059, "L1I cache accesses": 32554796, "L1I cache misses": 298729, "L1D cache misses": 473779, "L1D cache accesses": 34117102, "LL cache misses": 535040, "LL cache accesses": 547502, "L2D TLB accesses": 179876, "L2D TLB misses": 21809, "L2D cache misses": 298620, "L2D cache accesses": 1722959, "instructions per cycle": 0.8257243059990929, "branch miss rate": 0.016597419791210898, "ITLB miss rate": 0.0002505405165871377, "DTLB miss rate": 0.0003760334621547717, "L2D TLB miss rate": 0.12124463519313304, "L1I cache miss rate": 0.009176190199440968, "L1D cache miss rate": 0.013886847716432655, "L2D cache miss rate": 0.17331811145825293, "LL cache miss rate": 0.9772384393116372}
--- a/pytorch/output/altra_2_2_p2p-Gnutella30_100.output
+++ b/pytorch/output/altra_2_2_p2p-Gnutella30_100.output
@ -0,0 +1,153 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394142 queued and waiting for resources
 srun: job 3394142 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.5867, 0.3729, 0.0718,  ..., 0.5551, 0.6046, 0.6005])
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 0.3765556812286377 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
             65.91 msec task-clock:u                     #    0.017 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,247      page-faults:u                    #   49.267 K/sec                     
        92,293,071      cycles:u                         #    1.400 GHz                         (58.72%)
        76,208,632      instructions:u                   #    0.83  insn per cycle              (75.47%)
   <not supported>      branches:u                                                            
           336,620      branch-misses:u                                                         (89.96%)
        33,256,017      L1-dcache-loads:u                #  504.599 M/sec                     
           479,188      L1-dcache-load-misses:u          #    1.44% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,686,331      L1-icache-loads:u                #  480.782 M/sec                     
           297,521      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        55,295,804      dTLB-loads:u                     #  839.012 M/sec                       (27.47%)
           103,616      dTLB-load-misses:u               #    0.19% of all dTLB cache accesses  (20.17%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       3.803094533 seconds time elapsed
      16.585763000 seconds user
      62.703127000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.2027, 0.2128, 0.5093,  ..., 0.8069, 0.6413, 0.1136])
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 0.2942969799041748 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
           320,083      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,285,106      BR_RETIRED:u                                                          
       3.763535833 seconds time elapsed
      16.476022000 seconds user
      55.208213000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.5930, 0.8044, 0.8115,  ..., 0.6366, 0.1026, 0.6914])
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 0.2431955337524414 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
        26,853,940      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,728      ITLB_WALK:u                                                           
            13,955      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,111,059      L1D_TLB:u                                                             
       3.752433570 seconds time elapsed
      16.433982000 seconds user
      53.207908000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.9666, 0.8206, 0.6252,  ..., 0.5180, 0.8170, 0.7406])
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 0.15313339233398438 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
        32,554,796      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           298,729      L1I_CACHE_REFILL:u                                                    
           473,779      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        34,117,102      L1D_CACHE:u                                                           
       3.595579651 seconds time elapsed
      15.817851000 seconds user
      44.491315000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.9800, 0.9021, 0.5677,  ..., 0.3869, 0.2468, 0.3286])
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 0.2539215087890625 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
           535,040      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           547,502      LL_CACHE_RD:u                                                         
           179,876      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            21,809      L2D_TLB_REFILL:u                                                      
           298,620      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,722,959      L2D_CACHE:u                                                           
       3.549060962 seconds time elapsed
      16.570077000 seconds user
      52.238012000 seconds sys
--- a/pytorch/output/altra_2_2_ri2010_100.json
+++ b/pytorch/output/altra_2_2_ri2010_100.json
@ -0,0 +1 @@
 {"power_before": [30.44, 35.52], "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 0.29622840881347656, "power": [23.84, 29.44, 33.0, 33.04], "power_after": [36.32, 30.0], "task clock (msec)": 60.77, "page faults": 3361, "cycles": 63493475, "instructions": 91578911, "branch mispredictions": 329084, "branches": 20406595, "ITLB accesses": 26859919, "ITLB misses": 6237, "DTLB misses": 16689, "DTLB accesses": 36348977, "L1I cache accesses": 30979764, "L1I cache misses": 292038, "L1D cache misses": 469219, "L1D cache accesses": 32411890, "LL cache misses": 571870, "LL cache accesses": 598306, "L2D TLB accesses": 205488, "L2D TLB misses": 26392, "L2D cache misses": 342141, "L2D cache accesses": 1857697, "instructions per cycle": 1.442335783322617, "branch miss rate": 0.01612635522976763, "ITLB miss rate": 0.00023220472109390948, "DTLB miss rate": 0.0004591325912693499, "L2D TLB miss rate": 0.12843572374055906, "L1I cache miss rate": 0.009426734173959492, "L1D cache miss rate": 0.014476755289494072, "L2D cache miss rate": 0.1841748142996409, "LL cache miss rate": 0.9558152517273769}
--- a/pytorch/output/altra_2_2_ri2010_100.output
+++ b/pytorch/output/altra_2_2_ri2010_100.output
@ -0,0 +1,158 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394145 queued and waiting for resources
 srun: job 3394145 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.1402, 0.0708, 0.4576,  ..., 0.4700, 0.5629, 0.9120])
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 0.3585643768310547 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
             60.77 msec task-clock:u                     #    0.016 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,361      page-faults:u                    #   55.311 K/sec                     
        63,493,475      cycles:u                         #    1.045 GHz                         (49.59%)
        91,578,911      instructions:u                   #    1.44  insn per cycle              (92.22%)
   <not supported>      branches:u                                                            
           374,941      branch-misses:u                                                       
        33,905,978      L1-dcache-loads:u                #  557.979 M/sec                     
           470,553      L1-dcache-load-misses:u          #    1.39% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        32,247,376      L1-icache-loads:u                #  530.684 M/sec                     
           299,037      L1-icache-load-misses:u          #    0.93% of all L1-icache accesses 
        27,428,635      dTLB-loads:u                     #  451.384 M/sec                       (13.50%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       3.818532962 seconds time elapsed
      15.563570000 seconds user
      30.194882000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.1841, 0.4436, 0.8281,  ..., 0.0546, 0.5967, 0.9496])
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 0.3050577640533447 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
           329,084      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,406,595      BR_RETIRED:u                                                          
       3.673527837 seconds time elapsed
      15.520198000 seconds user
      29.068211000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.1849, 0.5991, 0.5040,  ..., 0.4916, 0.4789, 0.8887])
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 0.3605458736419678 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
        26,859,919      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,237      ITLB_WALK:u                                                           
            16,689      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,348,977      L1D_TLB:u                                                             
       3.769690988 seconds time elapsed
      15.173839000 seconds user
      29.963392000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.0513, 0.4498, 0.6748,  ..., 0.2114, 0.6847, 0.2188])
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 0.3485410213470459 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
        30,979,764      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           292,038      L1I_CACHE_REFILL:u                                                    
           469,219      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,411,890      L1D_CACHE:u                                                           
       3.598754329 seconds time elapsed
      16.139631000 seconds user
      29.287026000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.7270, 0.7858, 0.3165,  ..., 0.7139, 0.8270, 0.9478])
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 0.3687746524810791 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
           571,870      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           598,306      LL_CACHE_RD:u                                                         
           205,488      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            26,392      L2D_TLB_REFILL:u                                                      
           342,141      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,857,697      L2D_CACHE:u                                                           
       3.726794738 seconds time elapsed
      15.231331000 seconds user
      32.108693000 seconds sys
--- a/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.json
+++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.json
@ -0,0 +1 @@
 {"power_before": [16.52, 16.64], "shape": [81871, 81871], "nnz": 545671, "% density": 8.140867447881048e-05, "time_s": 1.3372814655303955, "power": [23.92, 38.6, 46.04, 48.2, 48.2], "power_after": [45.0, 44.08], "task clock (msec)": 59.01, "page faults": 3448, "cycles": 73062796, "instructions": 88329175, "branch mispredictions": 331091, "branches": 20013316, "ITLB accesses": 26330936, "ITLB misses": 5193, "DTLB misses": 16837, "DTLB accesses": 35930477, "L1I cache accesses": 31853890, "L1I cache misses": 306147, "L1D cache misses": 479933, "L1D cache accesses": 33426019, "LL cache misses": 540302, "LL cache accesses": 553181, "L2D TLB accesses": 173206, "L2D TLB misses": 21390, "L2D cache misses": 300032, "L2D cache accesses": 1739931, "instructions per cycle": 1.2089487377406143, "branch miss rate": 0.016543535314187813, "ITLB miss rate": 0.0001972204861991993, "DTLB miss rate": 0.000468599401004334, "L2D TLB miss rate": 0.12349456716280037, "L1I cache miss rate": 0.009610976869701, "L1D cache miss rate": 0.014358066391334247, "L2D cache miss rate": 0.17243902200719455, "LL cache miss rate": 0.9767182893121781}
--- a/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.output
+++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090216_100.output
@ -0,0 +1,158 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394151 queued and waiting for resources
 srun: job 3394151 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
                            545671]),
       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
       nnz=545671, layout=torch.sparse_csr)
 tensor([0.3831, 0.6714, 0.8380,  ..., 0.7892, 0.5274, 0.9035])
 Shape: torch.Size([81871, 81871])
 NNZ: 545671
 Density: 8.140867447881048e-05
 Time: 2.044952392578125 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
             59.01 msec task-clock:u                     #    0.010 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,448      page-faults:u                    #   58.432 K/sec                     
        73,062,796      cycles:u                         #    1.238 GHz                         (59.95%)
        88,329,175      instructions:u                   #    1.21  insn per cycle              (93.89%)
   <not supported>      branches:u                                                            
           365,177      branch-misses:u                                                       
        31,850,867      L1-dcache-loads:u                #  539.766 M/sec                     
           473,835      L1-dcache-load-misses:u          #    1.49% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,385,913      L1-icache-loads:u                #  514.940 M/sec                     
           299,969      L1-icache-load-misses:u          #    0.99% of all L1-icache accesses 
        24,365,554      dTLB-loads:u                     #  412.915 M/sec                       (8.42%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       5.680365622 seconds time elapsed
      27.656957000 seconds user
     194.823873000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
                            545671]),
       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
       nnz=545671, layout=torch.sparse_csr)
 tensor([0.6906, 0.4067, 0.7042,  ..., 0.8333, 0.7120, 0.3519])
 Shape: torch.Size([81871, 81871])
 NNZ: 545671
 Density: 8.140867447881048e-05
 Time: 1.3788115978240967 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
           331,091      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,013,316      BR_RETIRED:u                                                          
       4.886021169 seconds time elapsed
      23.105025000 seconds user
     141.491451000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
                            545671]),
       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
       nnz=545671, layout=torch.sparse_csr)
 tensor([0.8755, 0.6165, 0.4104,  ..., 0.6974, 0.9453, 0.9872])
 Shape: torch.Size([81871, 81871])
 NNZ: 545671
 Density: 8.140867447881048e-05
 Time: 2.8570749759674072 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
        26,330,936      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,193      ITLB_WALK:u                                                           
            16,837      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        35,930,477      L1D_TLB:u                                                             
       6.371573603 seconds time elapsed
      30.986329000 seconds user
     254.347216000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
                            545671]),
       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
       nnz=545671, layout=torch.sparse_csr)
 tensor([0.3573, 0.9331, 0.0611,  ..., 0.9133, 0.6057, 0.2374])
 Shape: torch.Size([81871, 81871])
 NNZ: 545671
 Density: 8.140867447881048e-05
 Time: 2.311248540878296 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
        31,853,890      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           306,147      L1I_CACHE_REFILL:u                                                    
           479,933      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,426,019      L1D_CACHE:u                                                           
       5.718741260 seconds time elapsed
      28.451593000 seconds user
     214.350594000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
                            545671]),
       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
       nnz=545671, layout=torch.sparse_csr)
 tensor([0.6021, 0.5679, 0.4538,  ..., 0.9086, 0.9552, 0.5329])
 Shape: torch.Size([81871, 81871])
 NNZ: 545671
 Density: 8.140867447881048e-05
 Time: 1.8193013668060303 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
           540,302      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           553,181      LL_CACHE_RD:u                                                         
           173,206      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            21,390      L2D_TLB_REFILL:u                                                      
           300,032      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,739,931      L2D_CACHE:u                                                           
       5.546861941 seconds time elapsed
      28.194596000 seconds user
     181.004698000 seconds sys
--- a/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.json
+++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.json
@ -0,0 +1 @@
 {"power_before": [53.64, 46.88], "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 1.2292509078979492, "power": [40.64, 52.44, 54.8, 54.96, 46.8], "power_after": [47.88, 47.08], "task clock (msec)": 61.26, "page faults": 3303, "cycles": 44515786, "instructions": 81513738, "branch mispredictions": 328019, "branches": 19893662, "ITLB accesses": 27248112, "ITLB misses": 5792, "DTLB misses": 16632, "DTLB accesses": 36929042, "L1I cache accesses": 31702830, "L1I cache misses": 295778, "L1D cache misses": 470423, "L1D cache accesses": 33155119, "LL cache misses": 545220, "LL cache accesses": 562139, "L2D TLB accesses": 192206, "L2D TLB misses": 24891, "L2D cache misses": 307033, "L2D cache accesses": 1782260, "instructions per cycle": 1.8311198189334452, "branch miss rate": 0.01648861833482443, "ITLB miss rate": 0.0002125651861677609, "DTLB miss rate": 0.0004503772396803578, "L2D TLB miss rate": 0.12950168048864238, "L1I cache miss rate": 0.009329703373484323, "L1D cache miss rate": 0.014188548079106578, "L2D cache miss rate": 0.17227172241984895, "LL cache miss rate": 0.9699024618466251}
--- a/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.output
+++ b/pytorch/output/altra_2_2_soc-sign-Slashdot090221_100.output
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
-srun: job 3393718 queued and waiting for resources
+srun: job 3394147 queued and waiting for resources
-srun: job 3393718 has been allocated resources
+srun: job 3394147 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
@ -14,37 +14,37 @@ tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
-tensor([0.8320, 0.8961, 0.3119,  ..., 0.2600, 0.3720, 0.6950])
+tensor([0.2696, 0.6106, 0.1626,  ..., 0.2215, 0.5107, 0.8609])
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
-Time: 3.012270212173462 seconds
+Time: 1.4500706195831299 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
-             61.63 msec task-clock:u                     #    0.008 CPUs utilized             
+             61.26 msec task-clock:u                     #    0.012 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
-             3,293      page-faults:u                    #   53.433 K/sec                     
+             3,303      page-faults:u                    #   53.917 K/sec                     
-        41,677,750      cycles:u                         #    0.676 GHz                         (43.47%)
+        44,515,786      cycles:u                         #    0.727 GHz                         (40.46%)
-        91,767,205      instructions:u                   #    2.20  insn per cycle              (93.66%)
+        81,513,738      instructions:u                   #    1.83  insn per cycle              (73.51%)
   <not supported>      branches:u                                                            
-           369,577      branch-misses:u                                                       
+           344,479      branch-misses:u                                                         (89.42%)
-        33,184,885      L1-dcache-loads:u                #  538.465 M/sec                     
+        34,411,073      L1-dcache-loads:u                #  561.710 M/sec                     
-           489,650      L1-dcache-load-misses:u          #    1.48% of all L1-dcache accesses 
+           484,811      L1-dcache-load-misses:u          #    1.41% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
-        31,518,657      L1-icache-loads:u                #  511.428 M/sec                     
+        32,789,672      L1-icache-loads:u                #  535.243 M/sec                     
-           300,352      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
+           293,487      L1-icache-load-misses:u          #    0.90% of all L1-icache accesses 
-        21,439,232      dTLB-loads:u                     #  347.878 M/sec                       (11.35%)
+        47,065,740      dTLB-loads:u                     #  768.279 M/sec                       (32.81%)
-     <not counted>      dTLB-load-misses:u                                                      (0.00%)
+           146,215      dTLB-load-misses:u               #    0.31% of all dTLB cache accesses  (13.39%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
-       7.285558270 seconds time elapsed
+       4.966101053 seconds time elapsed
-      30.820742000 seconds user
+      23.375418000 seconds user
-     271.093513000 seconds sys
+     148.052989000 seconds sys
@ -55,21 +55,21 @@ tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
-tensor([0.2625, 0.3727, 0.7700,  ..., 0.9213, 0.0373, 0.4236])
+tensor([0.1999, 0.3932, 0.8035,  ..., 0.5079, 0.5903, 0.7606])
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
-Time: 3.8292958736419678 seconds
+Time: 1.9677543640136719 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
-           329,386      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
+           328,019      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
-        19,813,961      BR_RETIRED:u                                                          
+        19,893,662      BR_RETIRED:u                                                          
-       7.818393438 seconds time elapsed
+       5.529871590 seconds time elapsed
-      35.952830000 seconds user
+      26.844356000 seconds user
-     333.700971000 seconds sys
+     190.429440000 seconds sys
@ -80,23 +80,23 @@ tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
-tensor([0.0340, 0.2650, 0.1324,  ..., 0.0868, 0.2162, 0.5618])
+tensor([0.2933, 0.6999, 0.0078,  ..., 0.6213, 0.9377, 0.6359])
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
-Time: 3.464143753051758 seconds
+Time: 1.4976201057434082 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
-        27,944,146      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
+        27,248,112      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
-             6,811      ITLB_WALK:u                                                           
+             5,792      ITLB_WALK:u                                                           
-            18,962      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
+            16,632      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
-        37,689,058      L1D_TLB:u                                                             
+        36,929,042      L1D_TLB:u                                                             
-       7.541903779 seconds time elapsed
+       4.971341163 seconds time elapsed
-      32.666428000 seconds user
+      24.247480000 seconds user
-     309.938101000 seconds sys
+     151.276717000 seconds sys
@ -107,23 +107,23 @@ tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
-tensor([0.6118, 0.9275, 0.9072,  ..., 0.7025, 0.2788, 0.7796])
+tensor([0.1310, 0.6695, 0.9479,  ..., 0.3141, 0.9327, 0.2117])
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
-Time: 1.4259674549102783 seconds
+Time: 1.0877256393432617 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
-        31,746,573      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
+        31,702,830      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
-           290,044      L1I_CACHE_REFILL:u                                                    
+           295,778      L1I_CACHE_REFILL:u                                                    
-           471,100      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
+           470,423      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
-        33,271,575      L1D_CACHE:u                                                           
+        33,155,119      L1D_CACHE:u                                                           
-       5.333100815 seconds time elapsed
+       4.675682406 seconds time elapsed
-      24.606404000 seconds user
+      23.098007000 seconds user
-     142.184021000 seconds sys
+     119.827712000 seconds sys
@ -134,25 +134,25 @@ tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
-tensor([0.1819, 0.6831, 0.7926,  ..., 0.2272, 0.8215, 0.3765])
+tensor([0.0860, 0.5402, 0.6738,  ..., 0.3856, 0.5968, 0.4203])
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
-Time: 2.8267815113067627 seconds
+Time: 1.2302696704864502 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
-           550,308      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
+           545,220      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
-           564,981      LL_CACHE_RD:u                                                         
+           562,139      LL_CACHE_RD:u                                                         
-           168,456      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
+           192,206      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
-            20,450      L2D_TLB_REFILL:u                                                      
+            24,891      L2D_TLB_REFILL:u                                                      
-           306,309      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
+           307,033      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
-         1,745,776      L2D_CACHE:u                                                           
+         1,782,260      L2D_CACHE:u                                                           
-       7.032343494 seconds time elapsed
+       4.781838296 seconds time elapsed
-      31.547129000 seconds user
+      23.716896000 seconds user
-     251.812633000 seconds sys
+     130.971947000 seconds sys
--- a/pytorch/output/altra_2_2_soc-sign-epinions_100.json
+++ b/pytorch/output/altra_2_2_soc-sign-epinions_100.json
@ -0,0 +1 @@
 {"power_before": [30.48, 33.04], "shape": [131828, 131828], "nnz": 841372, "% density": 4.841419648464106e-05, "time_s": 2.848874092102051, "power": [65.52, 75.88, 71.16, 71.16, 59.72, 47.92, 48.68], "power_after": [68.68, 67.88], "task clock (msec)": 49.87, "page faults": 3300, "cycles": 51935476, "instructions": 83731856, "branch mispredictions": 326464, "branches": 20341367, "ITLB accesses": 27590154, "ITLB misses": 6210, "DTLB misses": 17536, "DTLB accesses": 36763243, "L1I cache accesses": 31663300, "L1I cache misses": 289727, "L1D cache misses": 462864, "L1D cache accesses": 33262254, "LL cache misses": 530272, "LL cache accesses": 551373, "L2D TLB accesses": 196152, "L2D TLB misses": 23542, "L2D cache misses": 301998, "L2D cache accesses": 1732662, "instructions per cycle": 1.6122285275675532, "branch miss rate": 0.01604926551888081, "ITLB miss rate": 0.000225080294948698, "DTLB miss rate": 0.0004769981799483794, "L2D TLB miss rate": 0.12001916880786329, "L1I cache miss rate": 0.00915024649989104, "L1D cache miss rate": 0.013915593332911234, "L2D cache miss rate": 0.17429712200071334, "LL cache miss rate": 0.9617300810884828}
--- a/pytorch/output/altra_2_2_soc-sign-epinions_100.output
+++ b/pytorch/output/altra_2_2_soc-sign-epinions_100.output
@ -0,0 +1,163 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394154 queued and waiting for resources
 srun: job 3394154 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
                            841372]),
       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
                             7714]),
       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
 tensor([0.5842, 0.3042, 0.7358,  ..., 0.7882, 0.7596, 0.5895])
 Shape: torch.Size([131828, 131828])
 NNZ: 841372
 Density: 4.841419648464106e-05
 Time: 2.4407293796539307 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
             49.87 msec task-clock:u                     #    0.008 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,300      page-faults:u                    #   66.174 K/sec                     
        51,935,476      cycles:u                         #    1.041 GHz                         (65.00%)
        83,731,856      instructions:u                   #    1.61  insn per cycle              (84.25%)
   <not supported>      branches:u                                                            
           375,900      branch-misses:u                                                       
        34,169,837      L1-dcache-loads:u                #  685.197 M/sec                     
           474,410      L1-dcache-load-misses:u          #    1.39% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        32,443,215      L1-icache-loads:u                #  650.574 M/sec                     
           294,146      L1-icache-load-misses:u          #    0.91% of all L1-icache accesses 
        63,709,518      dTLB-loads:u                     #    1.278 G/sec                       (16.44%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       6.058862056 seconds time elapsed
      29.101578000 seconds user
     224.790489000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
                            841372]),
       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
                             7714]),
       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
 tensor([0.9696, 0.8139, 0.4858,  ..., 0.2374, 0.1716, 0.9756])
 Shape: torch.Size([131828, 131828])
 NNZ: 841372
 Density: 4.841419648464106e-05
 Time: 2.0945546627044678 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
           326,464      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,341,367      BR_RETIRED:u                                                          
       5.525378890 seconds time elapsed
      28.841740000 seconds user
     199.678982000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
                            841372]),
       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
                             7714]),
       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
 tensor([0.3478, 0.0057, 0.8574,  ..., 0.6409, 0.1876, 0.8429])
 Shape: torch.Size([131828, 131828])
 NNZ: 841372
 Density: 4.841419648464106e-05
 Time: 2.8504912853240967 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
        27,590,154      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,210      ITLB_WALK:u                                                           
            17,536      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,763,243      L1D_TLB:u                                                             
       6.425887143 seconds time elapsed
      33.069094000 seconds user
     256.667850000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
                            841372]),
       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
                             7714]),
       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
 tensor([0.5381, 0.6651, 0.4689,  ..., 0.7251, 0.3759, 0.8516])
 Shape: torch.Size([131828, 131828])
 NNZ: 841372
 Density: 4.841419648464106e-05
 Time: 1.6941111087799072 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
        31,663,300      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           289,727      L1I_CACHE_REFILL:u                                                    
           462,864      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,262,254      L1D_CACHE:u                                                           
       5.304170809 seconds time elapsed
      25.992245000 seconds user
     173.752913000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
                            841372]),
       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
                             7714]),
       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
 tensor([0.4145, 0.8515, 0.7222,  ..., 0.1386, 0.6641, 0.6662])
 Shape: torch.Size([131828, 131828])
 NNZ: 841372
 Density: 4.841419648464106e-05
 Time: 3.0850296020507812 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
           530,272      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           551,373      LL_CACHE_RD:u                                                         
           196,152      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,542      L2D_TLB_REFILL:u                                                      
           301,998      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,732,662      L2D_CACHE:u                                                           
       6.733517838 seconds time elapsed
      34.030476000 seconds user
     271.397968000 seconds sys
--- a/pytorch/output/altra_2_2_sx-mathoverflow_100.json
+++ b/pytorch/output/altra_2_2_sx-mathoverflow_100.json
@ -0,0 +1 @@
 {"power_before": [20.44, 20.2], "shape": [24818, 24818], "nnz": 239978, "% density": 0.00038961697406616504, "time_s": 0.556269645690918, "power": [25.24, 32.16, 33.0, 32.52], "power_after": [34.24, 30.28], "task clock (msec)": 62.49, "page faults": 3312, "cycles": 76783170, "instructions": 77095702, "branch mispredictions": 323514, "branches": 19769937, "ITLB accesses": 26809325, "ITLB misses": 6925, "DTLB misses": 19003, "DTLB accesses": 36516965, "L1I cache accesses": 31104231, "L1I cache misses": 285499, "L1D cache misses": 468498, "L1D cache accesses": 32677465, "LL cache misses": 559358, "LL cache accesses": 571935, "L2D TLB accesses": 194840, "L2D TLB misses": 23481, "L2D cache misses": 313487, "L2D cache accesses": 1779730, "instructions per cycle": 1.004070319055595, "branch miss rate": 0.016363936819829016, "ITLB miss rate": 0.00025830564551699827, "DTLB miss rate": 0.0005203882633729282, "L2D TLB miss rate": 0.12051426811742968, "L1I cache miss rate": 0.009178783426601994, "L1D cache miss rate": 0.01433703624194839, "L2D cache miss rate": 0.1761430104566423, "LL cache miss rate": 0.9780097388689274}
--- a/pytorch/output/altra_2_2_sx-mathoverflow_100.output
+++ b/pytorch/output/altra_2_2_sx-mathoverflow_100.output
@ -0,0 +1,158 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394144 queued and waiting for resources
 srun: job 3394144 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.7658, 0.2874, 0.7506,  ..., 0.3335, 0.5056, 0.9767])
 Shape: torch.Size([24818, 24818])
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 0.5561239719390869 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
             62.49 msec task-clock:u                     #    0.015 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,312      page-faults:u                    #   53.003 K/sec                     
        76,783,170      cycles:u                         #    1.229 GHz                         (62.65%)
        77,095,702      instructions:u                   #    1.00  insn per cycle              (80.20%)
   <not supported>      branches:u                                                            
           370,891      branch-misses:u                                                         (94.99%)
        32,730,448      L1-dcache-loads:u                #  523.800 M/sec                     
           467,718      L1-dcache-load-misses:u          #    1.43% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,548,469      L1-icache-loads:u                #  504.885 M/sec                     
           298,966      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
        61,098,419      dTLB-loads:u                     #  977.786 M/sec                       (20.67%)
            64,747      dTLB-load-misses:u               #    0.11% of all dTLB cache accesses  (10.91%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       4.062782709 seconds time elapsed
      16.106338000 seconds user
      32.399716000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.7531, 0.4727, 0.4126,  ..., 0.1574, 0.5247, 0.8875])
 Shape: torch.Size([24818, 24818])
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 0.6003477573394775 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
           323,514      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,769,937      BR_RETIRED:u                                                          
       4.061021393 seconds time elapsed
      16.155442000 seconds user
      31.047278000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.3067, 0.4335, 0.8814,  ..., 0.2370, 0.1210, 0.7695])
 Shape: torch.Size([24818, 24818])
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 0.5404119491577148 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
        26,809,325      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,925      ITLB_WALK:u                                                           
            19,003      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,516,965      L1D_TLB:u                                                             
       4.031175418 seconds time elapsed
      15.607232000 seconds user
      30.562258000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.5013, 0.5961, 0.5565,  ..., 0.3779, 0.1835, 0.6722])
 Shape: torch.Size([24818, 24818])
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 0.6185996532440186 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
        31,104,231      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           285,499      L1I_CACHE_REFILL:u                                                    
           468,498      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,677,465      L1D_CACHE:u                                                           
       4.083129305 seconds time elapsed
      16.243642000 seconds user
      36.578375000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.9075, 0.2788, 0.1365,  ..., 0.4240, 0.8832, 0.1064])
 Shape: torch.Size([24818, 24818])
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 0.54673171043396 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
           559,358      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           571,935      LL_CACHE_RD:u                                                         
           194,840      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,481      L2D_TLB_REFILL:u                                                      
           313,487      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,779,730      L2D_CACHE:u                                                           
       3.961843929 seconds time elapsed
      15.425912000 seconds user
      28.864046000 seconds sys
--- a/pytorch/output/altra_2_2_ut2010_100.json
+++ b/pytorch/output/altra_2_2_ut2010_100.json
@ -0,0 +1 @@
 {"power_before": [34.6, 37.16], "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 1.0817186832427979, "power": [34.32, 50.84, 52.12, 52.4, 52.76], "power_after": [49.0, 45.08], "task clock (msec)": 60.55, "page faults": 3490, "cycles": 49977496, "instructions": 78622993, "branch mispredictions": 327078, "branches": 20135808, "ITLB accesses": 27608093, "ITLB misses": 6616, "DTLB misses": 17185, "DTLB accesses": 36866957, "L1I cache accesses": 32639204, "L1I cache misses": 309643, "L1D cache misses": 478856, "L1D cache accesses": 34280618, "LL cache misses": 555275, "LL cache accesses": 578455, "L2D TLB accesses": 188723, "L2D TLB misses": 24635, "L2D cache misses": 319663, "L2D cache accesses": 1799940, "instructions per cycle": 1.573167911413569, "branch miss rate": 0.016243599462211798, "ITLB miss rate": 0.00023963987661154286, "DTLB miss rate": 0.00046613556958335347, "L2D TLB miss rate": 0.13053522888042263, "L1I cache miss rate": 0.009486842877663316, "L1D cache miss rate": 0.013968709665619214, "L2D cache miss rate": 0.17759647543807017, "LL cache miss rate": 0.9599277385449171}
--- a/pytorch/output/altra_2_2_ut2010_100.output
+++ b/pytorch/output/altra_2_2_ut2010_100.output
@ -0,0 +1,168 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394146 queued and waiting for resources
 srun: job 3394146 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.4608, 0.1516, 0.8492,  ..., 0.8920, 0.4275, 0.8070])
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 1.3751039505004883 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
             60.55 msec task-clock:u                     #    0.012 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,490      page-faults:u                    #   57.638 K/sec                     
        49,977,496      cycles:u                         #    0.825 GHz                         (40.93%)
        78,622,993      instructions:u                   #    1.57  insn per cycle              (85.37%)
   <not supported>      branches:u                                                            
           358,029      branch-misses:u                                                       
        31,478,500      L1-dcache-loads:u                #  519.877 M/sec                     
           479,449      L1-dcache-load-misses:u          #    1.52% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        29,991,824      L1-icache-loads:u                #  495.324 M/sec                     
           294,864      L1-icache-load-misses:u          #    0.98% of all L1-icache accesses 
        35,154,647      dTLB-loads:u                     #  580.589 M/sec                       (23.19%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       4.986156121 seconds time elapsed
      23.724703000 seconds user
     145.034521000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.4697, 0.7121, 0.5987,  ..., 0.2619, 0.7308, 0.3129])
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 1.6881086826324463 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
           327,078      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,135,808      BR_RETIRED:u                                                          
       5.374156677 seconds time elapsed
      25.609168000 seconds user
     167.278028000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.9215, 0.6706, 0.8015,  ..., 0.8507, 0.8546, 0.4441])
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 1.2785694599151611 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
        27,608,093      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,616      ITLB_WALK:u                                                           
            17,185      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,866,957      L1D_TLB:u                                                             
       4.861513311 seconds time elapsed
      23.339077000 seconds user
     141.584760000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.8973, 0.5228, 0.4492,  ..., 0.7677, 0.7722, 0.1700])
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 1.1654376983642578 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
        32,639,204      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           309,643      L1I_CACHE_REFILL:u                                                    
           478,856      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        34,280,618      L1D_CACHE:u                                                           
       4.677973310 seconds time elapsed
      22.972655000 seconds user
     125.062401000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.4542, 0.7095, 0.5701,  ..., 0.2172, 0.8829, 0.7757])
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 1.1153452396392822 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
           555,275      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           578,455      LL_CACHE_RD:u                                                         
           188,723      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,635      L2D_TLB_REFILL:u                                                      
           319,663      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,799,940      L2D_CACHE:u                                                           
       4.655024760 seconds time elapsed
      23.104641000 seconds user
     122.294597000 seconds sys
--- a/pytorch/output/altra_2_2_vt2010_100.json
+++ b/pytorch/output/altra_2_2_vt2010_100.json
@ -0,0 +1 @@
 {"power_before": [34.04, 43.96], "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 0.4164857864379883, "power": [23.72, 23.72, 29.88, 33.32], "power_after": [33.36, 32.52], "task clock (msec)": 61.63, "page faults": 3304, "cycles": 64734203, "instructions": 53597991, "branch mispredictions": 330777, "branches": 20357034, "ITLB accesses": 27381387, "ITLB misses": 6248, "DTLB misses": 17636, "DTLB accesses": 37436110, "L1I cache accesses": 32505993, "L1I cache misses": 303849, "L1D cache misses": 467426, "L1D cache accesses": 34241110, "LL cache misses": 550075, "LL cache accesses": 562829, "L2D TLB accesses": 199285, "L2D TLB misses": 24424, "L2D cache misses": 310155, "L2D cache accesses": 1783824, "instructions per cycle": 0.8279701999266138, "branch miss rate": 0.016248781625063848, "ITLB miss rate": 0.00022818420410916364, "DTLB miss rate": 0.00047109595521543235, "L2D TLB miss rate": 0.12255814536969667, "L1I cache miss rate": 0.009347476325365603, "L1D cache miss rate": 0.01365101773861887, "L2D cache miss rate": 0.17387085272986572, "LL cache miss rate": 0.9773394761108614}
--- a/pytorch/output/altra_2_2_vt2010_100.output
+++ b/pytorch/output/altra_2_2_vt2010_100.output
@ -0,0 +1,158 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394143 queued and waiting for resources
 srun: job 3394143 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.9170, 0.7306, 0.1175,  ..., 0.0616, 0.0147, 0.6403])
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 0.4440653324127197 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
             61.63 msec task-clock:u                     #    0.016 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,304      page-faults:u                    #   53.611 K/sec                     
        64,734,203      cycles:u                         #    1.050 GHz                         (50.46%)
        53,597,991      instructions:u                   #    0.83  insn per cycle              (70.10%)
   <not supported>      branches:u                                                            
           347,389      branch-misses:u                                                         (91.95%)
        31,363,842      L1-dcache-loads:u                #  508.915 M/sec                     
           482,780      L1-dcache-load-misses:u          #    1.54% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,027,001      L1-icache-loads:u                #  487.223 M/sec                     
           288,023      L1-icache-load-misses:u          #    0.96% of all L1-icache accesses 
        44,333,825      dTLB-loads:u                     #  719.368 M/sec                       (48.58%)
            74,525      dTLB-load-misses:u               #    0.17% of all dTLB cache accesses  (16.71%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       3.811654040 seconds time elapsed
      15.616953000 seconds user
      30.906234000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.5548, 0.3514, 0.6283,  ..., 0.5672, 0.1575, 0.4493])
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 0.44233155250549316 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
           330,777      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,357,034      BR_RETIRED:u                                                          
       3.835342404 seconds time elapsed
      15.497637000 seconds user
      28.676763000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.0953, 0.5790, 0.0112,  ..., 0.9540, 0.3173, 0.4731])
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 0.43302106857299805 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
        27,381,387      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,248      ITLB_WALK:u                                                           
            17,636      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,436,110      L1D_TLB:u                                                             
       3.828586094 seconds time elapsed
      15.518057000 seconds user
      31.389361000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.5456, 0.8708, 0.2037,  ..., 0.8669, 0.9122, 0.2046])
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 0.4426534175872803 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
        32,505,993      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           303,849      L1I_CACHE_REFILL:u                                                    
           467,426      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        34,241,110      L1D_CACHE:u                                                           
       3.811299200 seconds time elapsed
      15.932195000 seconds user
      30.887870000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.5024, 0.2304, 0.7925,  ..., 0.1397, 0.5558, 0.6450])
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 0.3671383857727051 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
           550,075      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           562,829      LL_CACHE_RD:u                                                         
           199,285      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,424      L2D_TLB_REFILL:u                                                      
           310,155      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,783,824      L2D_CACHE:u                                                           
       3.824434783 seconds time elapsed
      15.754438000 seconds user
      28.226523000 seconds sys
		`@ -1 +0,0 @@`
			{"power_before": [20.2, 20.32], "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 1.6952476501464844, "power": [44.68, 57.12, 64.2, 67.04, 55.68, 42.76], "power_after": [53.56, 52.52], "task clock (msec)": 61.63, "page faults": 3293, "cycles": 41677750, "instructions": 91767205, "branch mispredictions": 329386, "branches": 19813961, "ITLB accesses": 27944146, "ITLB misses": 6811, "DTLB misses": 18962, "DTLB accesses": 37689058, "L1I cache accesses": 31746573, "L1I cache misses": 290044, "L1D cache misses": 471100, "L1D cache accesses": 33271575, "LL cache misses": 550308, "LL cache accesses": 564981, "L2D TLB accesses": 168456, "L2D TLB misses": 20450, "L2D cache misses": 306309, "L2D cache accesses": 1745776, "instructions per cycle": 2.201827233955768, "branch miss rate": 0.01662393501228755, "ITLB miss rate": 0.00024373620149279208, "DTLB miss rate": 0.0005031168462740565, "L2D TLB miss rate": 0.12139668518782352, "L1I cache miss rate": 0.009136230231842662, "L1D cache miss rate": 0.014159233519904002, "L2D cache miss rate": 0.17545721787904062, "LL cache miss rate": 0.9740292151417481}
		`@ -0,0 +1 @@`
							{"power_before": [50.88, 50.88], "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 0.1896660327911377, "power": [25.52, 32.28, 33.12, 33.12], "power_after": [32.88, 26.52], "task clock (msec)": 42.01, "page faults": 3263, "cycles": 47084933, "instructions": 77895119, "branch mispredictions": 330923, "branches": 19740519, "ITLB accesses": 27761239, "ITLB misses": 6471, "DTLB misses": 17268, "DTLB accesses": 36993265, "L1I cache accesses": 31834980, "L1I cache misses": 298333, "L1D cache misses": 466901, "L1D cache accesses": 33528976, "LL cache misses": 525505, "LL cache accesses": 546521, "L2D TLB accesses": 184884, "L2D TLB misses": 22933, "L2D cache misses": 292367, "L2D cache accesses": 1706226, "instructions per cycle": 1.6543534000568716, "branch miss rate": 0.016763642333821112, "ITLB miss rate": 0.00023309478370183695, "DTLB miss rate": 0.0004667876706746485, "L2D TLB miss rate": 0.12403993855606758, "L1I cache miss rate": 0.009371232524725947, "L1D cache miss rate": 0.013925298523879763, "L2D cache miss rate": 0.1713530329510862, "LL cache miss rate": 0.9615458509371094}
		`@ -0,0 +1 @@`
							{"power_before": [20.16, 20.08], "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 0.336850643157959, "power": [24.28, 30.72, 30.72, 34.56], "power_after": [37.32, 32.92], "task clock (msec)": 60.78, "page faults": 3300, "cycles": 66733059, "instructions": 87889334, "branch mispredictions": 326300, "branches": 19832700, "ITLB accesses": 27233629, "ITLB misses": 5868, "DTLB misses": 16893, "DTLB accesses": 36409508, "L1I cache accesses": 30924532, "L1I cache misses": 288199, "L1D cache misses": 462816, "L1D cache accesses": 32428375, "LL cache misses": 551997, "LL cache accesses": 568528, "L2D TLB accesses": 193991, "L2D TLB misses": 24353, "L2D cache misses": 312207, "L2D cache accesses": 1821196, "instructions per cycle": 1.3170284011707, "branch miss rate": 0.016452626218316214, "ITLB miss rate": 0.0002154688969288669, "DTLB miss rate": 0.00046397221297250155, "L2D TLB miss rate": 0.125536751704976, "L1I cache miss rate": 0.009319429635992551, "L1D cache miss rate": 0.014271945479845968, "L2D cache miss rate": 0.17142965391973186, "LL cache miss rate": 0.9709231559395491}
		`@ -0,0 +1 @@`
							{"power_before": [16.32, 16.2], "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 2.2665774822235107, "power": [35.16, 50.8, 53.4, 53.4, 46.08, 46.88], "power_after": [58.4, 57.32], "task clock (msec)": 50.43, "page faults": 3285, "cycles": 54118679, "instructions": 77692421, "branch mispredictions": 325039, "branches": 19383216, "ITLB accesses": 26060519, "ITLB misses": 4749, "DTLB misses": 16865, "DTLB accesses": 34819729, "L1I cache accesses": 30777115, "L1I cache misses": 293980, "L1D cache misses": 461522, "L1D cache accesses": 32216597, "LL cache misses": 567700, "LL cache accesses": 588689, "L2D TLB accesses": 189417, "L2D TLB misses": 22360, "L2D cache misses": 328306, "L2D cache accesses": 1908607, "instructions per cycle": 1.4355934482436277, "branch miss rate": 0.0167690954896236, "ITLB miss rate": 0.00018222967854170517, "DTLB miss rate": 0.00048435184547243316, "L2D TLB miss rate": 0.11804642666708902, "L1I cache miss rate": 0.009551902444397404, "L1D cache miss rate": 0.014325597455249542, "L2D cache miss rate": 0.172013410827897, "LL cache miss rate": 0.9643461997761127}
		`@ -0,0 +1 @@`
							{"power_before": [20.48, 20.96], "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 0.3271017074584961, "power": [25.28, 26.08, 31.28, 32.96], "power_after": [33.4, 30.24], "task clock (msec)": 59.88, "page faults": 3313, "cycles": 58169777, "instructions": 57993431, "branch mispredictions": 330494, "branches": 20578427, "ITLB accesses": 27982097, "ITLB misses": 6614, "DTLB misses": 17270, "DTLB accesses": 37728899, "L1I cache accesses": 29754926, "L1I cache misses": 278786, "L1D cache misses": 454742, "L1D cache accesses": 31173246, "LL cache misses": 543243, "LL cache accesses": 560716, "L2D TLB accesses": 162281, "L2D TLB misses": 19847, "L2D cache misses": 300577, "L2D cache accesses": 1696278, "instructions per cycle": 0.9969684257170179, "branch miss rate": 0.016060216847478187, "ITLB miss rate": 0.0002363654160729984, "DTLB miss rate": 0.00045773930482307474, "L2D TLB miss rate": 0.12230020766448321, "L1I cache miss rate": 0.009369406598423401, "L1D cache miss rate": 0.014587572946365611, "L2D cache miss rate": 0.1771979592967662, "LL cache miss rate": 0.9688380570556218}
		`@ -0,0 +1 @@`
							{"power_before": [20.28, 20.32], "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 1.030203104019165, "power": [32.08, 47.84, 55.76, 58.08, 58.24], "power_after": [48.76, 45.16], "task clock (msec)": 60.43, "page faults": 3319, "cycles": 66114448, "instructions": 90786829, "branch mispredictions": 341625, "branches": 20129354, "ITLB accesses": 27441303, "ITLB misses": 6807, "DTLB misses": 20551, "DTLB accesses": 36867114, "L1I cache accesses": 31744243, "L1I cache misses": 271027, "L1D cache misses": 464135, "L1D cache accesses": 33441141, "LL cache misses": 539935, "LL cache accesses": 552519, "L2D TLB accesses": 188291, "L2D TLB misses": 24177, "L2D cache misses": 301281, "L2D cache accesses": 1737575, "instructions per cycle": 1.3731768432824245, "branch miss rate": 0.016971483535934636, "ITLB miss rate": 0.00024805673404065397, "DTLB miss rate": 0.0005574344658494288, "L2D TLB miss rate": 0.12840231344036623, "L1I cache miss rate": 0.008537831568388637, "L1D cache miss rate": 0.01387916159918108, "L2D cache miss rate": 0.17339165215889962, "LL cache miss rate": 0.9772243126480719}
		`@ -0,0 +1 @@`
							{"power_before": [50.68, 49.4], "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.11296772956848145, "power": [26.2, 29.76, 33.64, 34.44], "power_after": [36.84, 29.44], "task clock (msec)": 67.56, "page faults": 3829, "cycles": 47862000, "instructions": 84392375, "branch mispredictions": 331622, "branches": 19800140, "ITLB accesses": 25905045, "ITLB misses": 6746, "DTLB misses": 17547, "DTLB accesses": 35220079, "L1I cache accesses": 30359576, "L1I cache misses": 283204, "L1D cache misses": 465520, "L1D cache accesses": 31843274, "LL cache misses": 560542, "LL cache accesses": 575610, "L2D TLB accesses": 173643, "L2D TLB misses": 21499, "L2D cache misses": 313335, "L2D cache accesses": 1741621, "instructions per cycle": 1.7632438051063475, "branch miss rate": 0.016748467435078743, "ITLB miss rate": 0.0002604125953072075, "DTLB miss rate": 0.0004982101261044871, "L2D TLB miss rate": 0.12381149830399152, "L1I cache miss rate": 0.009328325270418797, "L1D cache miss rate": 0.014619099782264852, "L2D cache miss rate": 0.17990998041479747, "LL cache miss rate": 0.9738225534650197}
		`@ -0,0 +1 @@`
							{"power_before": [16.52, 16.24], "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 0.1715233325958252, "power": [18.56, 24.92, 27.84, 27.84], "power_after": [33.2, 27.28], "task clock (msec)": 61.92, "page faults": 3281, "cycles": 66250810, "instructions": 75178179, "branch mispredictions": 332366, "branches": 19076182, "ITLB accesses": 27005133, "ITLB misses": 4791, "DTLB misses": 13403, "DTLB accesses": 36457054, "L1I cache accesses": 32367686, "L1I cache misses": 287524, "L1D cache misses": 467557, "L1D cache accesses": 34022862, "LL cache misses": 535707, "LL cache accesses": 556316, "L2D TLB accesses": 150149, "L2D TLB misses": 18418, "L2D cache misses": 297042, "L2D cache accesses": 1687364, "instructions per cycle": 1.1347510920998551, "branch miss rate": 0.017423088121092577, "ITLB miss rate": 0.00017741071669597036, "DTLB miss rate": 0.00036763804338112453, "L2D TLB miss rate": 0.12266481961251822, "L1I cache miss rate": 0.008883057009388932, "L1D cache miss rate": 0.013742435895016709, "L2D cache miss rate": 0.1760390763344483, "LL cache miss rate": 0.9629545078696281}
		`@ -0,0 +1 @@`
							{"power_before": [29.76, 33.16], "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 0.14322686195373535, "power": [22.6, 22.6, 26.16, 29.2], "power_after": [34.0, 30.16], "task clock (msec)": 64.71, "page faults": 3319, "cycles": 57611295, "instructions": 83148228, "branch mispredictions": 318386, "branches": 19233431, "ITLB accesses": 27039805, "ITLB misses": 6375, "DTLB misses": 17290, "DTLB accesses": 36688544, "L1I cache accesses": 32508072, "L1I cache misses": 297568, "L1D cache misses": 477654, "L1D cache accesses": 34044579, "LL cache misses": 549474, "LL cache accesses": 561939, "L2D TLB accesses": 185622, "L2D TLB misses": 23295, "L2D cache misses": 305878, "L2D cache accesses": 1763089, "instructions per cycle": 1.4432626102225268, "branch miss rate": 0.01655378075809771, "ITLB miss rate": 0.00023576353453732377, "DTLB miss rate": 0.00047126427257511227, "L2D TLB miss rate": 0.12549697772893298, "L1I cache miss rate": 0.009153664972810446, "L1D cache miss rate": 0.014030251336049713, "L2D cache miss rate": 0.17348982382625042, "LL cache miss rate": 0.9778178770293573}
		`@ -0,0 +1 @@`
							{"power_before": [20.56, 20.28], "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 0.30861377716064453, "power": [23.88, 27.6, 39.8, 40.12], "power_after": [39.28, 35.2], "task clock (msec)": 65.91, "page faults": 3247, "cycles": 92293071, "instructions": 76208632, "branch mispredictions": 320083, "branches": 19285106, "ITLB accesses": 26853940, "ITLB misses": 6728, "DTLB misses": 13955, "DTLB accesses": 37111059, "L1I cache accesses": 32554796, "L1I cache misses": 298729, "L1D cache misses": 473779, "L1D cache accesses": 34117102, "LL cache misses": 535040, "LL cache accesses": 547502, "L2D TLB accesses": 179876, "L2D TLB misses": 21809, "L2D cache misses": 298620, "L2D cache accesses": 1722959, "instructions per cycle": 0.8257243059990929, "branch miss rate": 0.016597419791210898, "ITLB miss rate": 0.0002505405165871377, "DTLB miss rate": 0.0003760334621547717, "L2D TLB miss rate": 0.12124463519313304, "L1I cache miss rate": 0.009176190199440968, "L1D cache miss rate": 0.013886847716432655, "L2D cache miss rate": 0.17331811145825293, "LL cache miss rate": 0.9772384393116372}