Output!

2024-12-12 02:35:03 -05:00 · 2024-12-12 02:35:03 -05:00 · b402503c31
commit b402503c31
parent 66b0699fa8
377 changed files with 505 additions and 8073 deletions
--- a/pytorch/output/altra_10_30_Oregon-2_1000.json
+++ b/pytorch/output/altra_10_30_Oregon-2_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [37.36, 22.88, 22.36, 22.72, 22.52, 22.2, 21.96, 21.8, 21.48, 21.48], "matrix": "Oregon-2", "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 1.5312557220458984, "power": [26.68, 27.84, 28.48, 29.92, 30.0], "power_after": [21.16, 21.32, 21.16, 21.16, 21.16, 20.88, 20.92, 20.76, 20.96, 21.2], "task clock (msec)": 64.81, "page faults": 3244, "cycles": 82069432, "instructions": 78292700, "branch mispredictions": 319703, "branches": 19996903, "ITLB accesses": 26988315, "ITLB misses": 5988, "DTLB misses": 14570, "DTLB accesses": 36879854, "L1I cache accesses": 30465174, "L1I cache misses": 293085, "L1D cache misses": 487330, "L1D cache accesses": 31932249, "LL cache misses": 545501, "LL cache accesses": 558084, "L2D TLB accesses": 204746, "L2D TLB misses": 25302, "L2D cache misses": 314594, "L2D cache accesses": 1828047, "instructions per cycle": 0.9539812582107307, "branch miss rate": 0.01598762568383714, "ITLB miss rate": 0.00022187379982781437, "DTLB miss rate": 0.0003950666399058955, "L2D TLB miss rate": 0.12357750578765886, "L1I cache miss rate": 0.009620329101025322, "L1D cache miss rate": 0.015261374167538278, "L2D cache miss rate": 0.17209294947011755, "LL cache miss rate": 0.9774532149282187}
--- a/pytorch/output/altra_10_30_Oregon-2_1000.output
+++ b/pytorch/output/altra_10_30_Oregon-2_1000.output
@ -1,158 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394980 queued and waiting for resources
 srun: job 3394980 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.9231, 0.7723, 0.0509,  ..., 0.0839, 0.6982, 0.3459])
 Matrix: Oregon-2
 Shape: torch.Size([11806, 11806])
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 1.5677142143249512 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
             64.81 msec task-clock:u                     #    0.013 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,244      page-faults:u                    #   50.056 K/sec                     
        82,069,432      cycles:u                         #    1.266 GHz                         (59.04%)
        78,292,700      instructions:u                   #    0.95  insn per cycle              (76.75%)
   <not supported>      branches:u                                                            
           341,509      branch-misses:u                                                         (90.97%)
        33,032,555      L1-dcache-loads:u                #  509.704 M/sec                     
           478,674      L1-dcache-load-misses:u          #    1.45% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,508,310      L1-icache-loads:u                #  486.184 M/sec                     
           297,528      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        49,358,091      dTLB-loads:u                     #  761.613 M/sec                       (27.83%)
            88,514      dTLB-load-misses:u               #    0.18% of all dTLB cache accesses  (14.82%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       5.016393105 seconds time elapsed
      16.759527000 seconds user
      31.429551000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.8423, 0.9339, 0.8037,  ..., 0.5953, 0.0649, 0.1559])
 Matrix: Oregon-2
 Shape: torch.Size([11806, 11806])
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 1.516484022140503 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
           319,703      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,996,903      BR_RETIRED:u                                                          
       4.945699041 seconds time elapsed
      16.431978000 seconds user
      29.752452000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.8058, 0.2922, 0.1227,  ..., 0.2176, 0.9496, 0.8838])
 Matrix: Oregon-2
 Shape: torch.Size([11806, 11806])
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 1.6458909511566162 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
        26,988,315      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,988      ITLB_WALK:u                                                           
            14,570      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,879,854      L1D_TLB:u                                                             
       5.011871473 seconds time elapsed
      16.529942000 seconds user
      30.438432000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.7728, 0.1182, 0.3337,  ..., 0.2555, 0.2523, 0.5746])
 Matrix: Oregon-2
 Shape: torch.Size([11806, 11806])
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 1.529954433441162 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
        30,465,174      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           293,085      L1I_CACHE_REFILL:u                                                    
           487,330      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        31,932,249      L1D_CACHE:u                                                           
       4.954100105 seconds time elapsed
      16.282966000 seconds user
      28.926724000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.5613, 0.3211, 0.1739,  ..., 0.5461, 0.1391, 0.8387])
 Matrix: Oregon-2
 Shape: torch.Size([11806, 11806])
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 1.5726752281188965 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
           545,501      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           558,084      LL_CACHE_RD:u                                                         
           204,746      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            25,302      L2D_TLB_REFILL:u                                                      
           314,594      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,828,047      L2D_CACHE:u                                                           
       4.866549675 seconds time elapsed
      16.609257000 seconds user
      31.381282000 seconds sys
--- a/pytorch/output/altra_10_30_as-caida_1000.json
+++ b/pytorch/output/altra_10_30_as-caida_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [21.6, 21.64, 21.88, 22.08, 22.2, 22.32, 22.36, 22.04, 22.0, 21.96], "matrix": "as-caida", "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 2.6254467964172363, "power": [30.92, 29.2, 29.52, 29.72, 29.72, 31.72], "power_after": [21.04, 21.28, 21.04, 21.16, 21.16, 20.96, 21.04, 20.88, 20.56, 20.84], "task clock (msec)": 61.4, "page faults": 3507, "cycles": 78967021, "instructions": 94334531, "branch mispredictions": 325893, "branches": 19069753, "ITLB accesses": 27181279, "ITLB misses": 5995, "DTLB misses": 17412, "DTLB accesses": 37016930, "L1I cache accesses": 31535482, "L1I cache misses": 292676, "L1D cache misses": 471752, "L1D cache accesses": 33119145, "LL cache misses": 540894, "LL cache accesses": 554700, "L2D TLB accesses": 191772, "L2D TLB misses": 23711, "L2D cache misses": 306195, "L2D cache accesses": 1755986, "instructions per cycle": 1.1946066827061894, "branch miss rate": 0.017089523917797993, "ITLB miss rate": 0.00022055621444450792, "DTLB miss rate": 0.00047037936425305935, "L2D TLB miss rate": 0.12364161608576851, "L1I cache miss rate": 0.009280847522799873, "L1D cache miss rate": 0.01424408752097918, "L2D cache miss rate": 0.17437211913990203, "LL cache miss rate": 0.975110870740941}
--- a/pytorch/output/altra_10_30_as-caida_1000.output
+++ b/pytorch/output/altra_10_30_as-caida_1000.output
@ -1,163 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394983 queued and waiting for resources
 srun: job 3394983 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.4886, 0.3652, 0.5691,  ..., 0.6466, 0.4355, 0.8397])
 Matrix: as-caida
 Shape: torch.Size([31379, 31379])
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 2.6297245025634766 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
             61.40 msec task-clock:u                     #    0.010 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,507      page-faults:u                    #   57.117 K/sec                     
        78,967,021      cycles:u                         #    1.286 GHz                         (61.13%)
        94,334,531      instructions:u                   #    1.19  insn per cycle              (95.16%)
   <not supported>      branches:u                                                            
           365,239      branch-misses:u                                                       
        33,334,312      L1-dcache-loads:u                #  542.906 M/sec                     
           457,950      L1-dcache-load-misses:u          #    1.37% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,725,851      L1-icache-loads:u                #  516.709 M/sec                     
           297,720      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        25,188,580      dTLB-loads:u                     #  410.239 M/sec                       (5.16%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       6.049042045 seconds time elapsed
      17.649315000 seconds user
      29.335859000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.8344, 0.2588, 0.2246,  ..., 0.5607, 0.8141, 0.9893])
 Matrix: as-caida
 Shape: torch.Size([31379, 31379])
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 2.6495532989501953 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
           325,893      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,069,753      BR_RETIRED:u                                                          
       6.023780447 seconds time elapsed
      17.654658000 seconds user
      28.848805000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.0814, 0.1132, 0.8515,  ..., 0.8987, 0.5912, 0.5002])
 Matrix: as-caida
 Shape: torch.Size([31379, 31379])
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 2.5444185733795166 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
        27,181,279      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,995      ITLB_WALK:u                                                           
            17,412      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,016,930      L1D_TLB:u                                                             
       5.790360666 seconds time elapsed
      17.919315000 seconds user
      30.569858000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.0439, 0.1884, 0.3342,  ..., 0.2027, 0.5532, 0.7245])
 Matrix: as-caida
 Shape: torch.Size([31379, 31379])
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 2.620804786682129 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
        31,535,482      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           292,676      L1I_CACHE_REFILL:u                                                    
           471,752      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,119,145      L1D_CACHE:u                                                           
       6.002311801 seconds time elapsed
      17.427887000 seconds user
      30.063688000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.1495, 0.5856, 0.8600,  ..., 0.2101, 0.6229, 0.2019])
 Matrix: as-caida
 Shape: torch.Size([31379, 31379])
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 2.561279296875 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
           540,894      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           554,700      LL_CACHE_RD:u                                                         
           191,772      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,711      L2D_TLB_REFILL:u                                                      
           306,195      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,755,986      L2D_CACHE:u                                                           
       5.946428572 seconds time elapsed
      17.396567000 seconds user
      32.141235000 seconds sys
--- a/pytorch/output/altra_10_30_dc2_1000.json
+++ b/pytorch/output/altra_10_30_dc2_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [83.04, 78.44, 65.92, 53.76, 38.68, 38.68, 25.68, 22.6, 22.52, 22.32], "matrix": "dc2", "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 14.128849267959595, "power": [89.84, 89.4, 82.8, 71.32, 57.72, 51.92, 53.0, 63.8, 78.24, 78.24, 90.2, 90.36, 90.08, 88.64, 88.64, 87.64, 87.68, 87.24], "power_after": [21.4, 21.2, 21.08, 21.08, 21.28, 21.04, 20.92, 21.12, 21.08, 21.0], "task clock (msec)": 58.45, "page faults": 3471, "cycles": 76691414, "instructions": 89547095, "branch mispredictions": 329725, "branches": 19946857, "ITLB accesses": 27648951, "ITLB misses": 6857, "DTLB misses": 18047, "DTLB accesses": 37225736, "L1I cache accesses": 32434686, "L1I cache misses": 293072, "L1D cache misses": 483557, "L1D cache accesses": 34059722, "LL cache misses": 561480, "LL cache accesses": 578369, "L2D TLB accesses": 192306, "L2D TLB misses": 25364, "L2D cache misses": 317121, "L2D cache accesses": 1812330, "instructions per cycle": 1.16762868656979, "branch miss rate": 0.01653017314958442, "ITLB miss rate": 0.00024800217556174194, "DTLB miss rate": 0.00048479901109275584, "L2D TLB miss rate": 0.13189396066685385, "L1I cache miss rate": 0.00903575881696527, "L1D cache miss rate": 0.014197326683993487, "L2D cache miss rate": 0.17497972223601663, "LL cache miss rate": 0.9707989190292011}
--- a/pytorch/output/altra_10_30_dc2_1000.output
+++ b/pytorch/output/altra_10_30_dc2_1000.output
@ -1,173 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394982 queued and waiting for resources
 srun: job 3394982 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.0986, 0.6504, 0.0132,  ..., 0.6525, 0.3337, 0.7557])
 Matrix: dc2
 Shape: torch.Size([116835, 116835])
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 18.46260714530945 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
             58.45 msec task-clock:u                     #    0.003 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,471      page-faults:u                    #   59.382 K/sec                     
        76,691,414      cycles:u                         #    1.312 GHz                         (41.20%)
        89,547,095      instructions:u                   #    1.17  insn per cycle              (73.16%)
   <not supported>      branches:u                                                            
           382,362      branch-misses:u                                                         (96.21%)
        33,271,433      L1-dcache-loads:u                #  569.211 M/sec                     
           488,730      L1-dcache-load-misses:u          #    1.47% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,926,596      L1-icache-loads:u                #  546.204 M/sec                     
           304,792      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
        36,392,791      dTLB-loads:u                     #  622.612 M/sec                       (31.21%)
                 0      dTLB-load-misses:u                                                      (5.35%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      22.126601025 seconds time elapsed
     103.642372000 seconds user
    1434.131491000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.5605, 0.9374, 0.4444,  ..., 0.5937, 0.3099, 0.2252])
 Matrix: dc2
 Shape: torch.Size([116835, 116835])
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 13.607120752334595 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
           329,725      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,946,857      BR_RETIRED:u                                                          
      17.131143957 seconds time elapsed
      96.945305000 seconds user
    1045.242697000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.8954, 0.9777, 0.8042,  ..., 0.2069, 0.7063, 0.8479])
 Matrix: dc2
 Shape: torch.Size([116835, 116835])
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 17.22396969795227 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
        27,648,951      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,857      ITLB_WALK:u                                                           
            18,047      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,225,736      L1D_TLB:u                                                             
      20.911480243 seconds time elapsed
     107.392462000 seconds user
    1329.272154000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.9293, 0.9606, 0.8914,  ..., 0.2407, 0.2843, 0.5174])
 Matrix: dc2
 Shape: torch.Size([116835, 116835])
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 13.233965873718262 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
        32,434,686      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           293,072      L1I_CACHE_REFILL:u                                                    
           483,557      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        34,059,722      L1D_CACHE:u                                                           
      16.956477005 seconds time elapsed
      88.393687000 seconds user
    1037.101858000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.8850, 0.9552, 0.7029,  ..., 0.3357, 0.0248, 0.5395])
 Matrix: dc2
 Shape: torch.Size([116835, 116835])
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 13.873224973678589 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
           561,480      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           578,369      LL_CACHE_RD:u                                                         
           192,306      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            25,364      L2D_TLB_REFILL:u                                                      
           317,121      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,812,330      L2D_CACHE:u                                                           
      17.467787426 seconds time elapsed
      92.463054000 seconds user
    1072.584062000 seconds sys
--- a/pytorch/output/altra_10_30_de2010_1000.json
+++ b/pytorch/output/altra_10_30_de2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [28.56, 28.04, 23.8, 23.08, 22.12, 21.16, 21.16, 21.0, 20.96, 20.72], "matrix": "de2010", "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 2.713265895843506, "power": [33.24, 30.84, 29.96, 27.68, 25.8, 25.8, 31.16], "power_after": [20.6, 20.48, 20.24, 20.32, 20.2, 20.36, 20.4, 20.4, 20.36, 20.36], "task clock (msec)": 48.96, "page faults": 3285, "cycles": 48563060, "instructions": 73465190, "branch mispredictions": 326361, "branches": 19599354, "ITLB accesses": 26666488, "ITLB misses": 6643, "DTLB misses": 17347, "DTLB accesses": 35986736, "L1I cache accesses": 32502068, "L1I cache misses": 302739, "L1D cache misses": 480619, "L1D cache accesses": 34031072, "LL cache misses": 552815, "LL cache accesses": 567373, "L2D TLB accesses": 188248, "L2D TLB misses": 23165, "L2D cache misses": 308211, "L2D cache accesses": 1787647, "instructions per cycle": 1.5127792606149613, "branch miss rate": 0.016651620252381788, "ITLB miss rate": 0.0002491141690649327, "DTLB miss rate": 0.0004820387155978803, "L2D TLB miss rate": 0.12305575623645404, "L1I cache miss rate": 0.00931445346800702, "L1D cache miss rate": 0.014122946229845479, "L2D cache miss rate": 0.17241155552522394, "LL cache miss rate": 0.9743413944618443}
--- a/pytorch/output/altra_10_30_de2010_1000.output
+++ b/pytorch/output/altra_10_30_de2010_1000.output
@ -1,168 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394985 queued and waiting for resources
 srun: job 3394985 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.6055, 0.8789, 0.0482,  ..., 0.0736, 0.1316, 0.6744])
 Matrix: de2010
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 2.6956887245178223 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
             48.96 msec task-clock:u                     #    0.008 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,285      page-faults:u                    #   67.090 K/sec                     
        48,563,060      cycles:u                         #    0.992 GHz                         (59.76%)
        73,465,190      instructions:u                   #    1.51  insn per cycle              (78.23%)
   <not supported>      branches:u                                                            
           369,314      branch-misses:u                                                         (98.16%)
        31,769,641      L1-dcache-loads:u                #  648.836 M/sec                     
           479,594      L1-dcache-load-misses:u          #    1.51% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,338,929      L1-icache-loads:u                #  619.616 M/sec                     
           282,162      L1-icache-load-misses:u          #    0.93% of all L1-icache accesses 
        55,516,925      dTLB-loads:u                     #    1.134 G/sec                       (23.54%)
            12,345      dTLB-load-misses:u               #    0.02% of all dTLB cache accesses  (3.47%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       6.017085179 seconds time elapsed
      17.484355000 seconds user
      28.678064000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.2815, 0.8196, 0.3706,  ..., 0.1328, 0.4062, 0.9113])
 Matrix: de2010
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 2.7908551692962646 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
           326,361      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,599,354      BR_RETIRED:u                                                          
       6.215591535 seconds time elapsed
      18.097112000 seconds user
      27.831633000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.9002, 0.0843, 0.5558,  ..., 0.3931, 0.8070, 0.7414])
 Matrix: de2010
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 2.819589376449585 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
        26,666,488      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,643      ITLB_WALK:u                                                           
            17,347      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        35,986,736      L1D_TLB:u                                                             
       6.243883495 seconds time elapsed
      17.783312000 seconds user
      31.714619000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.9109, 0.6392, 0.7899,  ..., 0.0945, 0.3298, 0.6865])
 Matrix: de2010
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 2.747800827026367 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
        32,502,068      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           302,739      L1I_CACHE_REFILL:u                                                    
           480,619      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        34,031,072      L1D_CACHE:u                                                           
       6.126767063 seconds time elapsed
      17.702029000 seconds user
      29.137072000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.7083, 0.6766, 0.7649,  ..., 0.3027, 0.9885, 0.8086])
 Matrix: de2010
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 2.795116901397705 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
           552,815      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           567,373      LL_CACHE_RD:u                                                         
           188,248      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,165      L2D_TLB_REFILL:u                                                      
           308,211      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,787,647      L2D_CACHE:u                                                           
       6.041792624 seconds time elapsed
      17.791735000 seconds user
      29.790006000 seconds sys
--- a/pytorch/output/altra_10_30_email-Enron_1000.json
+++ b/pytorch/output/altra_10_30_email-Enron_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [28.96, 27.92, 27.24, 23.0, 22.28, 22.28, 21.6, 20.8, 20.68, 20.76], "matrix": "email-Enron", "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 12.818164587020874, "power": [84.24, 82.72, 82.72, 72.0, 60.2, 51.88, 52.4, 59.36, 72.08, 83.88, 86.48, 84.28, 82.28, 81.12, 80.96, 80.96, 81.16], "power_after": [20.92, 20.92, 20.92, 20.92, 21.0, 20.96, 20.88, 20.84, 20.88, 20.68], "task clock (msec)": 48.76, "page faults": 3281, "cycles": 45495589, "instructions": 79104832, "branch mispredictions": 335574, "branches": 20121415, "ITLB accesses": 26011880, "ITLB misses": 5842, "DTLB misses": 16448, "DTLB accesses": 35000292, "L1I cache accesses": 32193112, "L1I cache misses": 310304, "L1D cache misses": 495806, "L1D cache accesses": 33829187, "LL cache misses": 546628, "LL cache accesses": 570044, "L2D TLB accesses": 196794, "L2D TLB misses": 24071, "L2D cache misses": 316028, "L2D cache accesses": 1836018, "instructions per cycle": 1.7387362981496954, "branch miss rate": 0.016677455338006797, "ITLB miss rate": 0.00022458968748125855, "DTLB miss rate": 0.000469938936509444, "L2D TLB miss rate": 0.1223157210077543, "L1I cache miss rate": 0.009638832058236556, "L1D cache miss rate": 0.014656160669779029, "L2D cache miss rate": 0.1721268527868463, "LL cache miss rate": 0.9589224691427328}
--- a/pytorch/output/altra_10_30_email-Enron_1000.output
+++ b/pytorch/output/altra_10_30_email-Enron_1000.output
@ -1,163 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394986 queued and waiting for resources
 srun: job 3394986 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.9906, 0.9401, 0.5661,  ..., 0.4491, 0.7550, 0.2452])
 Matrix: email-Enron
 Shape: torch.Size([36692, 36692])
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 12.80848503112793 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
             48.76 msec task-clock:u                     #    0.003 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,281      page-faults:u                    #   67.289 K/sec                     
        45,495,589      cycles:u                         #    0.933 GHz                         (57.79%)
        79,104,832      instructions:u                   #    1.74  insn per cycle              (81.70%)
   <not supported>      branches:u                                                            
           372,161      branch-misses:u                                                       
        32,089,348      L1-dcache-loads:u                #  658.113 M/sec                     
           467,576      L1-dcache-load-misses:u          #    1.46% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,688,995      L1-icache-loads:u                #  629.393 M/sec                     
           289,698      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        47,006,355      dTLB-loads:u                     #  964.042 M/sec                       (22.12%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      16.331438990 seconds time elapsed
      76.869141000 seconds user
     999.179638000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.7565, 0.5273, 0.1038,  ..., 0.9432, 0.1309, 0.5542])
 Matrix: email-Enron
 Shape: torch.Size([36692, 36692])
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 26.91536283493042 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
           335,574      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,121,415      BR_RETIRED:u                                                          
      30.559245388 seconds time elapsed
     126.799314000 seconds user
    2081.777635000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.2321, 0.0702, 0.2538,  ..., 0.6254, 0.6308, 0.5317])
 Matrix: email-Enron
 Shape: torch.Size([36692, 36692])
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 14.841739892959595 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
        26,011,880      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,842      ITLB_WALK:u                                                           
            16,448      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        35,000,292      L1D_TLB:u                                                             
      18.443612527 seconds time elapsed
      80.694133000 seconds user
    1159.740575000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.7091, 0.9447, 0.0959,  ..., 0.0090, 0.7012, 0.6025])
 Matrix: email-Enron
 Shape: torch.Size([36692, 36692])
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 10.863199234008789 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
        32,193,112      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           310,304      L1I_CACHE_REFILL:u                                                    
           495,806      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,829,187      L1D_CACHE:u                                                           
      14.426841778 seconds time elapsed
      70.728541000 seconds user
     853.184507000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.8267, 0.6185, 0.8015,  ..., 0.8593, 0.4881, 0.8599])
 Matrix: email-Enron
 Shape: torch.Size([36692, 36692])
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 12.076026678085327 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
           546,628      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           570,044      LL_CACHE_RD:u                                                         
           196,794      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,071      L2D_TLB_REFILL:u                                                      
           316,028      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,836,018      L2D_CACHE:u                                                           
      15.581045199 seconds time elapsed
      77.345591000 seconds user
     942.987439000 seconds sys
--- a/pytorch/output/altra_10_30_p2p-Gnutella04_1000.json
+++ b/pytorch/output/altra_10_30_p2p-Gnutella04_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.12, 16.36, 16.8, 16.76, 16.6, 16.48, 16.44, 16.28, 16.28, 16.16], "matrix": "p2p-Gnutella04", "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 1.0642461776733398, "power": [26.6, 27.52, 27.52, 31.16, 28.48], "power_after": [16.28, 16.4, 16.32, 16.12, 16.24, 16.0, 16.0, 16.24, 16.52, 17.04], "task clock (msec)": 50.59, "page faults": 3303, "cycles": 51318459, "instructions": 74705078, "branch mispredictions": 328853, "branches": 19620312, "ITLB accesses": 27939682, "ITLB misses": 5470, "DTLB misses": 17679, "DTLB accesses": 37425602, "L1I cache accesses": 30276633, "L1I cache misses": 291467, "L1D cache misses": 479061, "L1D cache accesses": 31689326, "LL cache misses": 529426, "LL cache accesses": 550033, "L2D TLB accesses": 171913, "L2D TLB misses": 20624, "L2D cache misses": 296662, "L2D cache accesses": 1714211, "instructions per cycle": 1.455715535028049, "branch miss rate": 0.01676084457780284, "ITLB miss rate": 0.0001957788925443031, "DTLB miss rate": 0.00047237717111404113, "L2D TLB miss rate": 0.11996765805959991, "L1I cache miss rate": 0.009626797008769106, "L1D cache miss rate": 0.015117424712661923, "L2D cache miss rate": 0.17306037588138215, "LL cache miss rate": 0.9625349751742168}
--- a/pytorch/output/altra_10_30_p2p-Gnutella04_1000.output
+++ b/pytorch/output/altra_10_30_p2p-Gnutella04_1000.output
@ -1,158 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394992 queued and waiting for resources
 srun: job 3394992 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.1181, 0.8387, 0.0554,  ..., 0.8107, 0.4393, 0.9489])
 Matrix: p2p-Gnutella04
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 1.061662197113037 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
             50.59 msec task-clock:u                     #    0.012 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,303      page-faults:u                    #   65.291 K/sec                     
        51,318,459      cycles:u                         #    1.014 GHz                         (59.34%)
        74,705,078      instructions:u                   #    1.46  insn per cycle              (83.02%)
   <not supported>      branches:u                                                            
           366,825      branch-misses:u                                                       
        31,809,194      L1-dcache-loads:u                #  628.781 M/sec                     
           466,198      L1-dcache-load-misses:u          #    1.47% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,390,161      L1-icache-loads:u                #  600.731 M/sec                     
           296,270      L1-icache-load-misses:u          #    0.97% of all L1-icache accesses 
        61,518,375      dTLB-loads:u                     #    1.216 G/sec                       (17.94%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       4.302241563 seconds time elapsed
      16.122298000 seconds user
      29.141140000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.7249, 0.8723, 0.3843,  ..., 0.2264, 0.4891, 0.9107])
 Matrix: p2p-Gnutella04
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 1.0079431533813477 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
           328,853      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,620,312      BR_RETIRED:u                                                          
       4.241400567 seconds time elapsed
      15.325937000 seconds user
      28.223386000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.7608, 0.2449, 0.5322,  ..., 0.5547, 0.8659, 0.8437])
 Matrix: p2p-Gnutella04
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 1.1017234325408936 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
        27,939,682      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,470      ITLB_WALK:u                                                           
            17,679      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,425,602      L1D_TLB:u                                                             
       4.296820500 seconds time elapsed
      15.875162000 seconds user
      28.803412000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.9980, 0.9991, 0.6749,  ..., 0.4225, 0.7297, 0.3717])
 Matrix: p2p-Gnutella04
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 1.0812580585479736 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
        30,276,633      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           291,467      L1I_CACHE_REFILL:u                                                    
           479,061      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        31,689,326      L1D_CACHE:u                                                           
       4.500137840 seconds time elapsed
      15.794710000 seconds user
      27.773851000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.8707, 0.5871, 0.5970,  ..., 0.8826, 0.4673, 0.4994])
 Matrix: p2p-Gnutella04
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 0.9900743961334229 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
           529,426      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           550,033      LL_CACHE_RD:u                                                         
           171,913      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            20,624      L2D_TLB_REFILL:u                                                      
           296,662      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,714,211      L2D_CACHE:u                                                           
       4.284402033 seconds time elapsed
      15.584671000 seconds user
      27.523772000 seconds sys
--- a/pytorch/output/altra_10_30_p2p-Gnutella24_1000.json
+++ b/pytorch/output/altra_10_30_p2p-Gnutella24_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.12, 16.12, 16.12, 16.36, 16.56, 16.52, 17.04, 16.76, 16.64, 16.92], "matrix": "p2p-Gnutella24", "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 1.6947758197784424, "power": [25.2, 25.2, 26.6, 26.28, 26.48], "power_after": [16.4, 16.6, 16.6, 16.64, 16.8, 16.48, 16.44, 16.16, 16.12, 16.2], "task clock (msec)": 66.78, "page faults": 3520, "cycles": 28858055, "instructions": 64429843, "branch mispredictions": 331167, "branches": 19518210, "ITLB accesses": 26964483, "ITLB misses": 4666, "DTLB misses": 14001, "DTLB accesses": 36143905, "L1I cache accesses": 31901160, "L1I cache misses": 302516, "L1D cache misses": 475663, "L1D cache accesses": 33507563, "LL cache misses": 558546, "LL cache accesses": 578676, "L2D TLB accesses": 187549, "L2D TLB misses": 22990, "L2D cache misses": 321826, "L2D cache accesses": 1816571, "instructions per cycle": 2.2326467601506756, "branch miss rate": 0.016967078435983628, "ITLB miss rate": 0.00017304244253449992, "DTLB miss rate": 0.00038736821602425086, "L2D TLB miss rate": 0.12258129875392564, "L1I cache miss rate": 0.009482915354802146, "L1D cache miss rate": 0.01419569068630864, "L2D cache miss rate": 0.1771612560147663, "LL cache miss rate": 0.9652136947099932}
--- a/pytorch/output/altra_10_30_p2p-Gnutella24_1000.output
+++ b/pytorch/output/altra_10_30_p2p-Gnutella24_1000.output
@ -1,158 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394989 queued and waiting for resources
 srun: job 3394989 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.2470, 0.4231, 0.1036,  ..., 0.7937, 0.3241, 0.7116])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 1.6974337100982666 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
             66.78 msec task-clock:u                     #    0.013 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,520      page-faults:u                    #   52.713 K/sec                     
        28,858,055      cycles:u                         #    0.432 GHz                         (26.93%)
        64,429,843      instructions:u                   #    2.23  insn per cycle              (67.63%)
   <not supported>      branches:u                                                            
           296,857      branch-misses:u                                                         (84.08%)
        33,646,348      L1-dcache-loads:u                #  503.866 M/sec                     
           493,998      L1-dcache-load-misses:u          #    1.47% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        32,070,415      L1-icache-loads:u                #  480.266 M/sec                     
           305,993      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
        46,903,081      dTLB-loads:u                     #  702.391 M/sec                       (46.16%)
           114,272      dTLB-load-misses:u               #    0.24% of all dTLB cache accesses  (32.45%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       5.106933083 seconds time elapsed
      16.391614000 seconds user
      28.913912000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.2307, 0.4662, 0.3789,  ..., 0.0144, 0.6300, 0.7829])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 1.6379659175872803 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
           331,167      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,518,210      BR_RETIRED:u                                                          
       5.017894585 seconds time elapsed
      16.446505000 seconds user
      31.004338000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.7309, 0.0314, 0.4424,  ..., 0.7434, 0.2124, 0.1432])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 1.7232718467712402 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
        26,964,483      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             4,666      ITLB_WALK:u                                                           
            14,001      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,143,905      L1D_TLB:u                                                             
       5.053286721 seconds time elapsed
      16.447780000 seconds user
      28.580949000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.5695, 0.5025, 0.1946,  ..., 0.7428, 0.9634, 0.4327])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 1.644775629043579 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
        31,901,160      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           302,516      L1I_CACHE_REFILL:u                                                    
           475,663      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,507,563      L1D_CACHE:u                                                           
       4.978338941 seconds time elapsed
      16.455298000 seconds user
      30.249373000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.0969, 0.1950, 0.8456,  ..., 0.3315, 0.1512, 0.3182])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 1.752812385559082 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
           558,546      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           578,676      LL_CACHE_RD:u                                                         
           187,549      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            22,990      L2D_TLB_REFILL:u                                                      
           321,826      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,816,571      L2D_CACHE:u                                                           
       4.952297819 seconds time elapsed
      16.648691000 seconds user
      27.005944000 seconds sys
--- a/pytorch/output/altra_10_30_p2p-Gnutella25_1000.json
+++ b/pytorch/output/altra_10_30_p2p-Gnutella25_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.0, 16.4, 16.4, 16.28, 16.48, 16.6, 16.48, 16.56, 16.88, 16.92], "matrix": "p2p-Gnutella25", "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 1.4688231945037842, "power": [23.04, 29.0, 30.24, 27.96, 28.04], "power_after": [16.52, 16.68, 16.88, 17.12, 17.08, 17.04, 16.84, 16.72, 16.84, 16.84], "task clock (msec)": 48.61, "page faults": 3308, "cycles": 60072179, "instructions": 70991785, "branch mispredictions": 331765, "branches": 19906014, "ITLB accesses": 28194337, "ITLB misses": 5083, "DTLB misses": 17916, "DTLB accesses": 37944713, "L1I cache accesses": 31162212, "L1I cache misses": 270684, "L1D cache misses": 465467, "L1D cache accesses": 32857500, "LL cache misses": 541118, "LL cache accesses": 564199, "L2D TLB accesses": 194022, "L2D TLB misses": 23932, "L2D cache misses": 311476, "L2D cache accesses": 1783574, "instructions per cycle": 1.1817747613250387, "branch miss rate": 0.016666571218125335, "ITLB miss rate": 0.00018028443087702328, "DTLB miss rate": 0.00047216064066685654, "L2D TLB miss rate": 0.12334683695663379, "L1I cache miss rate": 0.008686289663904475, "L1D cache miss rate": 0.014166232975728525, "L2D cache miss rate": 0.17463587157022922, "LL cache miss rate": 0.9590906754531646}
--- a/pytorch/output/altra_10_30_p2p-Gnutella25_1000.output
+++ b/pytorch/output/altra_10_30_p2p-Gnutella25_1000.output
@ -1,158 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394994 queued and waiting for resources
 srun: job 3394994 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.1465, 0.4354, 0.7334,  ..., 0.2837, 0.5913, 0.9525])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.4786670207977295 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
             48.61 msec task-clock:u                     #    0.010 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,308      page-faults:u                    #   68.054 K/sec                     
        60,072,179      cycles:u                         #    1.236 GHz                         (53.26%)
        70,991,785      instructions:u                   #    1.18  insn per cycle              (71.54%)
   <not supported>      branches:u                                                            
           371,197      branch-misses:u                                                       
        32,964,378      L1-dcache-loads:u                #  678.165 M/sec                     
           465,448      L1-dcache-load-misses:u          #    1.41% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,435,424      L1-icache-loads:u                #  646.710 M/sec                     
           293,561      L1-icache-load-misses:u          #    0.93% of all L1-icache accesses 
        56,761,270      dTLB-loads:u                     #    1.168 G/sec                       (30.54%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       4.700046411 seconds time elapsed
      16.235801000 seconds user
      28.396327000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.7780, 0.3388, 0.1540,  ..., 0.2989, 0.3682, 0.9160])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.4235138893127441 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
           331,765      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,906,014      BR_RETIRED:u                                                          
       4.757340585 seconds time elapsed
      16.412311000 seconds user
      29.238029000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.4944, 0.8057, 0.8211,  ..., 0.5137, 0.3388, 0.6316])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.4664146900177002 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
        28,194,337      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,083      ITLB_WALK:u                                                           
            17,916      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,944,713      L1D_TLB:u                                                             
       4.844329421 seconds time elapsed
      16.081022000 seconds user
      28.021902000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.0963, 0.5806, 0.0397,  ..., 0.1604, 0.5700, 0.8103])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.3717434406280518 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
        31,162,212      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           270,684      L1I_CACHE_REFILL:u                                                    
           465,467      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,857,500      L1D_CACHE:u                                                           
       4.598461782 seconds time elapsed
      15.609727000 seconds user
      30.606837000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.9137, 0.5009, 0.7507,  ..., 0.6623, 0.8760, 0.2991])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.4291880130767822 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
           541,118      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           564,199      LL_CACHE_RD:u                                                         
           194,022      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,932      L2D_TLB_REFILL:u                                                      
           311,476      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,783,574      L2D_CACHE:u                                                           
       4.792239951 seconds time elapsed
      15.902307000 seconds user
      28.747620000 seconds sys
--- a/pytorch/output/altra_10_30_p2p-Gnutella30_1000.json
+++ b/pytorch/output/altra_10_30_p2p-Gnutella30_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.96, 22.0, 22.16, 22.16, 21.84, 22.08, 22.4, 22.08, 22.0, 21.48], "matrix": "p2p-Gnutella30", "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 3.504030466079712, "power": [54.2, 64.16, 67.64, 67.64, 65.92, 58.96, 59.92], "power_after": [20.72, 20.76, 20.76, 20.8, 20.8, 20.88, 20.92, 21.04, 21.04, 21.12], "task clock (msec)": 56.52, "page faults": 3194, "cycles": 58074747, "instructions": 90036443, "branch mispredictions": 327895, "branches": 20553601, "ITLB accesses": 26120611, "ITLB misses": 7531, "DTLB misses": 19097, "DTLB accesses": 35744928, "L1I cache accesses": 31819981, "L1I cache misses": 284493, "L1D cache misses": 486709, "L1D cache accesses": 33545755, "LL cache misses": 544742, "LL cache accesses": 558323, "L2D TLB accesses": 190574, "L2D TLB misses": 23746, "L2D cache misses": 305844, "L2D cache accesses": 1736964, "instructions per cycle": 1.5503544595725918, "branch miss rate": 0.015953165579111903, "ITLB miss rate": 0.00028831637973552763, "DTLB miss rate": 0.0005342576155140109, "L2D TLB miss rate": 0.12460251660772194, "L1I cache miss rate": 0.008940703012990485, "L1D cache miss rate": 0.014508810429218243, "L2D cache miss rate": 0.17607964241055082, "LL cache miss rate": 0.9756753707083534}
--- a/pytorch/output/altra_10_30_p2p-Gnutella30_1000.output
+++ b/pytorch/output/altra_10_30_p2p-Gnutella30_1000.output
@ -1,158 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394991 queued and waiting for resources
 srun: job 3394991 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.3046, 0.0725, 0.4580,  ..., 0.0593, 0.5121, 0.2116])
 Matrix: p2p-Gnutella30
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 3.6646029949188232 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
             56.52 msec task-clock:u                     #    0.008 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,194      page-faults:u                    #   56.515 K/sec                     
        58,074,747      cycles:u                         #    1.028 GHz                         (51.20%)
        90,036,443      instructions:u                   #    1.55  insn per cycle              (89.06%)
   <not supported>      branches:u                                                            
           363,262      branch-misses:u                                                       
        33,111,438      L1-dcache-loads:u                #  585.875 M/sec                     
           454,665      L1-dcache-load-misses:u          #    1.37% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,646,314      L1-icache-loads:u                #  559.951 M/sec                     
           281,443      L1-icache-load-misses:u          #    0.89% of all L1-icache accesses 
        43,495,524      dTLB-loads:u                     #  769.611 M/sec                       (11.87%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       7.033463989 seconds time elapsed
      34.670765000 seconds user
     307.031553000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.9700, 0.1728, 0.2199,  ..., 0.6107, 0.3357, 0.2661])
 Matrix: p2p-Gnutella30
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 2.3380045890808105 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
           327,895      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,553,601      BR_RETIRED:u                                                          
       5.895917276 seconds time elapsed
      31.121063000 seconds user
     208.127447000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.9533, 0.7568, 0.8141,  ..., 0.8395, 0.5617, 0.7830])
 Matrix: p2p-Gnutella30
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 4.476518869400024 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
        26,120,611      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             7,531      ITLB_WALK:u                                                           
            19,097      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        35,744,928      L1D_TLB:u                                                             
       8.109622410 seconds time elapsed
      38.467161000 seconds user
     370.437915000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.6886, 0.7814, 0.9957,  ..., 0.8460, 0.1015, 0.8097])
 Matrix: p2p-Gnutella30
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 2.856834888458252 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
        31,819,981      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           284,493      L1I_CACHE_REFILL:u                                                    
           486,709      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,545,755      L1D_CACHE:u                                                           
       6.374371632 seconds time elapsed
      30.817943000 seconds user
     247.363843000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.8464, 0.0437, 0.1230,  ..., 0.6221, 0.9268, 0.5436])
 Matrix: p2p-Gnutella30
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 4.838747978210449 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
           544,742      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           558,323      LL_CACHE_RD:u                                                         
           190,574      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,746      L2D_TLB_REFILL:u                                                      
           305,844      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,736,964      L2D_CACHE:u                                                           
       8.386896120 seconds time elapsed
      39.861141000 seconds user
     395.959334000 seconds sys
--- a/pytorch/output/altra_10_30_ri2010_1000.json
+++ b/pytorch/output/altra_10_30_ri2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.2, 31.56, 31.56, 30.84, 24.52, 23.2, 21.32, 20.76, 20.84, 20.84], "matrix": "ri2010", "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 3.077709913253784, "power": [27.76, 28.28, 28.44, 28.28, 25.16, 30.44, 30.6], "power_after": [21.08, 20.88, 20.68, 20.68, 20.6, 20.56, 20.68, 20.8, 20.96, 21.24], "task clock (msec)": 64.49, "page faults": 3473, "cycles": 42783607, "instructions": 84598454, "branch mispredictions": 331326, "branches": 20438455, "ITLB accesses": 26869742, "ITLB misses": 6302, "DTLB misses": 14926, "DTLB accesses": 36876841, "L1I cache accesses": 31664385, "L1I cache misses": 301678, "L1D cache misses": 493536, "L1D cache accesses": 33219437, "LL cache misses": 552180, "LL cache accesses": 564990, "L2D TLB accesses": 167824, "L2D TLB misses": 19594, "L2D cache misses": 304114, "L2D cache accesses": 1716370, "instructions per cycle": 1.977356747877756, "branch miss rate": 0.01621091222404042, "ITLB miss rate": 0.00023453890997539165, "DTLB miss rate": 0.00040475267390718204, "L2D TLB miss rate": 0.11675326532557918, "L1I cache miss rate": 0.009527360155581737, "L1D cache miss rate": 0.014856844202386693, "L2D cache miss rate": 0.17718440662561102, "LL cache miss rate": 0.9773270323368555}
--- a/pytorch/output/altra_10_30_ri2010_1000.output
+++ b/pytorch/output/altra_10_30_ri2010_1000.output
@ -1,163 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394984 queued and waiting for resources
 srun: job 3394984 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.5906, 0.9651, 0.2033,  ..., 0.2175, 0.4484, 0.0412])
 Matrix: ri2010
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 3.107008934020996 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
             64.49 msec task-clock:u                     #    0.010 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,473      page-faults:u                    #   53.852 K/sec                     
        42,783,607      cycles:u                         #    0.663 GHz                         (37.27%)
        84,598,454      instructions:u                   #    1.98  insn per cycle              (73.53%)
   <not supported>      branches:u                                                            
           353,558      branch-misses:u                                                         (89.57%)
        33,192,964      L1-dcache-loads:u                #  514.689 M/sec                     
           466,217      L1-dcache-load-misses:u          #    1.40% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,727,502      L1-icache-loads:u                #  491.965 M/sec                     
           292,570      L1-icache-load-misses:u          #    0.92% of all L1-icache accesses 
        38,623,737      dTLB-loads:u                     #  598.898 M/sec                       (34.88%)
           124,174      dTLB-load-misses:u               #    0.32% of all dTLB cache accesses  (14.74%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       6.612563197 seconds time elapsed
      18.114584000 seconds user
      29.808542000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.6092, 0.5511, 0.6052,  ..., 0.8002, 0.0295, 0.2972])
 Matrix: ri2010
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 2.9385879039764404 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
           331,326      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,438,455      BR_RETIRED:u                                                          
       6.446731410 seconds time elapsed
      17.939571000 seconds user
      33.272929000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.3348, 0.2974, 0.2569,  ..., 0.2397, 0.1965, 0.5651])
 Matrix: ri2010
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 2.972891330718994 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
        26,869,742      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,302      ITLB_WALK:u                                                           
            14,926      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,876,841      L1D_TLB:u                                                             
       6.376775396 seconds time elapsed
      17.836418000 seconds user
      29.830135000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.7889, 0.7395, 0.6553,  ..., 0.3938, 0.2478, 0.7923])
 Matrix: ri2010
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 2.9658284187316895 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
        31,664,385      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           301,678      L1I_CACHE_REFILL:u                                                    
           493,536      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,219,437      L1D_CACHE:u                                                           
       6.559158078 seconds time elapsed
      19.008146000 seconds user
      38.233666000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.1256, 0.1417, 0.9800,  ..., 0.2509, 0.8121, 0.6210])
 Matrix: ri2010
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 2.9228267669677734 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
           552,180      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           564,990      LL_CACHE_RD:u                                                         
           167,824      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            19,594      L2D_TLB_REFILL:u                                                      
           304,114      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,716,370      L2D_CACHE:u                                                           
       6.135787277 seconds time elapsed
      18.029630000 seconds user
      28.723217000 seconds sys
--- a/pytorch/output/altra_10_30_soc-sign-Slashdot090216_1000.json
+++ b/pytorch/output/altra_10_30_soc-sign-Slashdot090216_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [29.88, 23.64, 23.08, 21.84, 21.4, 21.2, 21.0, 21.0, 21.16, 21.0], "matrix": "soc-sign-Slashdot090216", "shape": [81871, 81871], "nnz": 545671, "% density": 8.140867447881048e-05, "time_s": 19.113287687301636, "power": [81.08, 81.56, 71.96, 60.52, 47.16, 48.4, 53.84, 53.84, 67.4, 82.64, 90.8, 89.16, 87.96, 85.76, 84.64, 84.04, 83.64, 84.68, 84.88, 84.88, 84.64, 84.04, 83.6], "power_after": [20.72, 20.6, 20.68, 20.88, 21.2, 21.28, 21.28, 21.48, 21.56, 21.36], "task clock (msec)": 67.66, "page faults": 3317, "cycles": 41915850, "instructions": 84471787, "branch mispredictions": 344452, "branches": 20610765, "ITLB accesses": 27276117, "ITLB misses": 6358, "DTLB misses": 17361, "DTLB accesses": 36565837, "L1I cache accesses": 32022662, "L1I cache misses": 293044, "L1D cache misses": 458939, "L1D cache accesses": 33505164, "LL cache misses": 553814, "LL cache accesses": 567372, "L2D TLB accesses": 199301, "L2D TLB misses": 25193, "L2D cache misses": 313278, "L2D cache accesses": 1796299, "instructions per cycle": 2.015270762730566, "branch miss rate": 0.016712237512775483, "ITLB miss rate": 0.00023309769495416082, "DTLB miss rate": 0.0004747874361524939, "L2D TLB miss rate": 0.12640679173712124, "L1I cache miss rate": 0.009151144274014446, "L1D cache miss rate": 0.01369756017311242, "L2D cache miss rate": 0.17440192306514674, "LL cache miss rate": 0.97610386131145}
--- a/pytorch/output/altra_10_30_soc-sign-Slashdot090216_1000.output
+++ b/pytorch/output/altra_10_30_soc-sign-Slashdot090216_1000.output
@ -1,163 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394981 queued and waiting for resources
 srun: job 3394981 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
                            545671]),
       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
       nnz=545671, layout=torch.sparse_csr)
 tensor([0.6780, 0.5234, 0.1205,  ..., 0.2995, 0.6275, 0.1399])
 Matrix: soc-sign-Slashdot090216
 Shape: torch.Size([81871, 81871])
 NNZ: 545671
 Density: 8.140867447881048e-05
 Time: 30.653191089630127 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
             67.66 msec task-clock:u                     #    0.002 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,317      page-faults:u                    #   49.022 K/sec                     
        41,915,850      cycles:u                         #    0.619 GHz                         (57.88%)
        84,471,787      instructions:u                   #    2.02  insn per cycle              (88.19%)
   <not supported>      branches:u                                                            
           375,016      branch-misses:u                                                       
        32,438,527      L1-dcache-loads:u                #  479.407 M/sec                     
           499,618      L1-dcache-load-misses:u          #    1.54% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,998,693      L1-icache-loads:u                #  458.127 M/sec                     
           306,445      L1-icache-load-misses:u          #    0.99% of all L1-icache accesses 
        34,294,934      dTLB-loads:u                     #  506.842 M/sec                       (18.86%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      34.340632995 seconds time elapsed
     149.743244000 seconds user
    2355.852109000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
                            545671]),
       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
       nnz=545671, layout=torch.sparse_csr)
 tensor([0.9875, 0.2031, 0.7260,  ..., 0.5908, 0.1575, 0.7971])
 Matrix: soc-sign-Slashdot090216
 Shape: torch.Size([81871, 81871])
 NNZ: 545671
 Density: 8.140867447881048e-05
 Time: 13.671181440353394 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
           344,452      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,610,765      BR_RETIRED:u                                                          
      17.331425967 seconds time elapsed
      83.136180000 seconds user
    1069.027469000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
                            545671]),
       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
       nnz=545671, layout=torch.sparse_csr)
 tensor([0.2046, 0.3645, 0.7960,  ..., 0.6490, 0.4098, 0.5342])
 Matrix: soc-sign-Slashdot090216
 Shape: torch.Size([81871, 81871])
 NNZ: 545671
 Density: 8.140867447881048e-05
 Time: 19.569235801696777 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
        27,276,117      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,358      ITLB_WALK:u                                                           
            17,361      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,565,837      L1D_TLB:u                                                             
      23.323243037 seconds time elapsed
     108.830923000 seconds user
    1521.834565000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
                            545671]),
       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
       nnz=545671, layout=torch.sparse_csr)
 tensor([0.4164, 0.2188, 0.5460,  ..., 0.1057, 0.5277, 0.0624])
 Matrix: soc-sign-Slashdot090216
 Shape: torch.Size([81871, 81871])
 NNZ: 545671
 Density: 8.140867447881048e-05
 Time: 26.337355375289917 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
        32,022,662      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           293,044      L1I_CACHE_REFILL:u                                                    
           458,939      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,505,164      L1D_CACHE:u                                                           
      30.017812847 seconds time elapsed
     131.976276000 seconds user
    2029.636174000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 545669, 545669,
                            545671]),
       col_indices=tensor([    1,     2,     3,  ..., 81869, 81699, 81863]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(81871, 81871),
       nnz=545671, layout=torch.sparse_csr)
 tensor([0.7679, 0.9196, 0.3474,  ..., 0.5624, 0.0163, 0.8596])
 Matrix: soc-sign-Slashdot090216
 Shape: torch.Size([81871, 81871])
 NNZ: 545671
 Density: 8.140867447881048e-05
 Time: 29.926054000854492 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
           553,814      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           567,372      LL_CACHE_RD:u                                                         
           199,301      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            25,193      L2D_TLB_REFILL:u                                                      
           313,278      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,796,299      L2D_CACHE:u                                                           
      33.553779692 seconds time elapsed
     154.498461000 seconds user
    2293.574463000 seconds sys
--- a/pytorch/output/altra_10_30_soc-sign-Slashdot090221_1000.json
+++ b/pytorch/output/altra_10_30_soc-sign-Slashdot090221_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [21.92, 21.84, 20.96, 20.24, 20.28, 20.16, 19.96, 19.72, 19.88, 19.76], "matrix": "soc-sign-Slashdot090221", "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 18.79910135269165, "power": [80.48, 80.08, 69.04, 69.04, 55.0, 46.8, 49.16, 56.2, 70.84, 82.84, 86.52, 84.28, 82.56, 81.2, 80.28, 80.28, 80.04, 80.16, 80.8, 81.0, 81.92, 83.04, 82.88], "power_after": [21.0, 20.96, 21.12, 20.76, 20.72, 20.56, 20.52, 20.64, 20.88, 21.04], "task clock (msec)": 58.57, "page faults": 3259, "cycles": 74509373, "instructions": 88672751, "branch mispredictions": 342121, "branches": 20436338, "ITLB accesses": 27189335, "ITLB misses": 6437, "DTLB misses": 18156, "DTLB accesses": 36676625, "L1I cache accesses": 30721032, "L1I cache misses": 302777, "L1D cache misses": 469833, "L1D cache accesses": 32109077, "LL cache misses": 551850, "LL cache accesses": 565355, "L2D TLB accesses": 200417, "L2D TLB misses": 25536, "L2D cache misses": 304133, "L2D cache accesses": 1801849, "instructions per cycle": 1.190088540941017, "branch miss rate": 0.016740817263836603, "ITLB miss rate": 0.0002367472393127673, "DTLB miss rate": 0.0004950291909356436, "L2D TLB miss rate": 0.12741434109880898, "L1I cache miss rate": 0.009855691045795596, "L1D cache miss rate": 0.014632404413244267, "L2D cache miss rate": 0.16878939356183564, "LL cache miss rate": 0.9761123541845389}
--- a/pytorch/output/altra_10_30_soc-sign-Slashdot090221_1000.output
+++ b/pytorch/output/altra_10_30_soc-sign-Slashdot090221_1000.output
@ -1,163 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394979 queued and waiting for resources
 srun: job 3394979 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
                            549202]),
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
 tensor([0.4201, 0.7748, 0.6565,  ..., 0.0517, 0.6958, 0.5341])
 Matrix: soc-sign-Slashdot090221
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
 Time: 27.35153603553772 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
             58.57 msec task-clock:u                     #    0.002 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,259      page-faults:u                    #   55.640 K/sec                     
        74,509,373      cycles:u                         #    1.272 GHz                         (58.00%)
        88,672,751      instructions:u                   #    1.19  insn per cycle              (90.97%)
   <not supported>      branches:u                                                            
           361,568      branch-misses:u                                                       
        31,594,797      L1-dcache-loads:u                #  539.410 M/sec                     
           460,467      L1-dcache-load-misses:u          #    1.46% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,148,838      L1-icache-loads:u                #  514.724 M/sec                     
           282,768      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        19,757,856      dTLB-loads:u                     #  337.321 M/sec                       (11.69%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      31.087250856 seconds time elapsed
     142.716222000 seconds user
    2102.420776000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
                            549202]),
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
 tensor([0.7637, 0.5328, 0.8286,  ..., 0.7084, 0.8903, 0.1707])
 Matrix: soc-sign-Slashdot090221
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
 Time: 17.188836336135864 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
           342,121      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,436,338      BR_RETIRED:u                                                          
      20.753346873 seconds time elapsed
      98.605331000 seconds user
    1332.291974000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
                            549202]),
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
 tensor([0.9017, 0.8505, 0.0023,  ..., 0.4182, 0.6895, 0.5023])
 Matrix: soc-sign-Slashdot090221
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
 Time: 16.22375249862671 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
        27,189,335      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,437      ITLB_WALK:u                                                           
            18,156      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,676,625      L1D_TLB:u                                                             
      19.748749363 seconds time elapsed
     103.049578000 seconds user
    1249.814927000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
                            549202]),
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
 tensor([0.4805, 0.2325, 0.2103,  ..., 0.1710, 0.7638, 0.9368])
 Matrix: soc-sign-Slashdot090221
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
 Time: 15.453373908996582 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
        30,721,032      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           302,777      L1I_CACHE_REFILL:u                                                    
           469,833      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,109,077      L1D_CACHE:u                                                           
      19.090250444 seconds time elapsed
      94.904880000 seconds user
    1195.102767000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     29,    124,  ..., 549200, 549200,
                            549202]),
       col_indices=tensor([    1,     2,     3,  ..., 82142, 81974, 82136]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(82144, 82144),
       nnz=549202, layout=torch.sparse_csr)
 tensor([0.8430, 0.9439, 0.4260,  ..., 0.8172, 0.4243, 0.3834])
 Matrix: soc-sign-Slashdot090221
 Shape: torch.Size([82144, 82144])
 NNZ: 549202
 Density: 8.13917555860553e-05
 Time: 29.316507816314697 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
           551,850      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           565,355      LL_CACHE_RD:u                                                         
           200,417      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            25,536      L2D_TLB_REFILL:u                                                      
           304,133      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,801,849      L2D_CACHE:u                                                           
      32.859276963 seconds time elapsed
     148.969816000 seconds user
    2252.321936000 seconds sys
--- a/pytorch/output/altra_10_30_soc-sign-epinions_1000.json
+++ b/pytorch/output/altra_10_30_soc-sign-epinions_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.32, 20.52, 20.52, 20.56, 20.6, 20.4, 20.76, 20.6, 20.36, 20.4], "matrix": "soc-sign-epinions", "shape": [131828, 131828], "nnz": 841372, "% density": 4.841419648464106e-05, "time_s": 22.52380871772766, "power": [81.24, 81.16, 74.84, 62.04, 51.6, 50.56, 52.4, 52.4, 68.24, 80.56, 91.44, 91.36, 90.28, 88.32, 86.4, 85.16, 83.64, 82.36, 82.96, 82.84, 82.84, 82.56, 82.44, 82.08, 83.64, 84.4], "power_after": [20.8, 20.88, 20.8, 20.92, 20.88, 20.88, 20.8, 20.84, 20.84, 20.6], "task clock (msec)": 63.9, "page faults": 3446, "cycles": 55931043, "instructions": 77907356, "branch mispredictions": 332778, "branches": 20000746, "ITLB accesses": 27000304, "ITLB misses": 6713, "DTLB misses": 18689, "DTLB accesses": 36395663, "L1I cache accesses": 32396405, "L1I cache misses": 292629, "L1D cache misses": 473799, "L1D cache accesses": 34061981, "LL cache misses": 542765, "LL cache accesses": 557193, "L2D TLB accesses": 203626, "L2D TLB misses": 24363, "L2D cache misses": 303397, "L2D cache accesses": 1772084, "instructions per cycle": 1.3929179901043505, "branch miss rate": 0.01663827939217867, "ITLB miss rate": 0.00024862683027568875, "DTLB miss rate": 0.0005134952480464499, "L2D TLB miss rate": 0.11964582126054629, "L1I cache miss rate": 0.009032761505481858, "L1D cache miss rate": 0.01390990735389113, "L2D cache miss rate": 0.171209152613533, "LL cache miss rate": 0.9741059202107708}
--- a/pytorch/output/altra_10_30_soc-sign-epinions_1000.output
+++ b/pytorch/output/altra_10_30_soc-sign-epinions_1000.output
@ -1,168 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394990 queued and waiting for resources
 srun: job 3394990 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
                            841372]),
       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
                             7714]),
       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
 tensor([0.3914, 0.2076, 0.6733,  ..., 0.4758, 0.6360, 0.6316])
 Matrix: soc-sign-epinions
 Shape: torch.Size([131828, 131828])
 NNZ: 841372
 Density: 4.841419648464106e-05
 Time: 20.04187798500061 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
             63.90 msec task-clock:u                     #    0.003 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,446      page-faults:u                    #   53.927 K/sec                     
        55,931,043      cycles:u                         #    0.875 GHz                         (85.43%)
        77,907,356      instructions:u                   #    1.39  insn per cycle            
   <not supported>      branches:u                                                            
           357,739      branch-misses:u                                                       
        33,000,188      L1-dcache-loads:u                #  516.421 M/sec                     
           466,824      L1-dcache-load-misses:u          #    1.41% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,503,048      L1-icache-loads:u                #  492.992 M/sec                     
           301,112      L1-icache-load-misses:u          #    0.96% of all L1-icache accesses 
        34,740,872      dTLB-loads:u                     #  543.661 M/sec                       (18.37%)
            32,355      dTLB-load-misses:u               #    0.09% of all dTLB cache accesses  (12.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      23.478083368 seconds time elapsed
     119.232326000 seconds user
    1541.081607000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
                            841372]),
       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
                             7714]),
       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
 tensor([0.3970, 0.5643, 0.0036,  ..., 0.0338, 0.0807, 0.3885])
 Matrix: soc-sign-epinions
 Shape: torch.Size([131828, 131828])
 NNZ: 841372
 Density: 4.841419648464106e-05
 Time: 16.115705490112305 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
           332,778      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,000,746      BR_RETIRED:u                                                          
      19.765627973 seconds time elapsed
     103.591961000 seconds user
    1250.845091000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
                            841372]),
       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
                             7714]),
       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
 tensor([0.0049, 0.4550, 0.3166,  ..., 0.3734, 0.8337, 0.5156])
 Matrix: soc-sign-epinions
 Shape: torch.Size([131828, 131828])
 NNZ: 841372
 Density: 4.841419648464106e-05
 Time: 18.55180263519287 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
        27,000,304      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,713      ITLB_WALK:u                                                           
            18,689      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,395,663      L1D_TLB:u                                                             
      22.333459337 seconds time elapsed
     109.075160000 seconds user
    1441.055730000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
                            841372]),
       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
                             7714]),
       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
 tensor([0.0560, 0.8530, 0.8946,  ..., 0.4591, 0.5391, 0.2898])
 Matrix: soc-sign-epinions
 Shape: torch.Size([131828, 131828])
 NNZ: 841372
 Density: 4.841419648464106e-05
 Time: 25.587534427642822 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
        32,396,405      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           292,629      L1I_CACHE_REFILL:u                                                    
           473,799      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        34,061,981      L1D_CACHE:u                                                           
      29.367381835 seconds time elapsed
     142.233743000 seconds user
    1962.747683000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 841371, 841371,
                            841372]),
       col_indices=tensor([     1, 128552,      3,  ..., 131824, 131826,
                             7714]),
       values=tensor([-1., -1.,  1.,  ...,  1.,  1.,  1.]),
       size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
 tensor([0.7002, 0.7829, 0.1511,  ..., 0.3651, 0.2391, 0.7788])
 Matrix: soc-sign-epinions
 Shape: torch.Size([131828, 131828])
 NNZ: 841372
 Density: 4.841419648464106e-05
 Time: 23.656178951263428 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
           542,765      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           557,193      LL_CACHE_RD:u                                                         
           203,626      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,363      L2D_TLB_REFILL:u                                                      
           303,397      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,772,084      L2D_CACHE:u                                                           
      27.453055481 seconds time elapsed
     128.709934000 seconds user
    1831.887905000 seconds sys
--- a/pytorch/output/altra_10_30_sx-mathoverflow_1000.json
+++ b/pytorch/output/altra_10_30_sx-mathoverflow_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.28, 16.44, 16.68, 16.68, 16.84, 17.04, 16.84, 16.84, 16.72, 16.72], "matrix": "sx-mathoverflow", "shape": [24818, 24818], "nnz": 239978, "% density": 0.00038961697406616504, "time_s": 5.405760288238525, "power": [25.64, 20.44, 21.24, 22.16, 22.28, 27.04, 26.92, 26.28, 25.32], "power_after": [16.32, 16.44, 16.4, 16.4, 16.6, 16.48, 16.56, 16.6, 16.32, 16.44], "task clock (msec)": 50.36, "page faults": 3296, "cycles": 56049457, "instructions": 72333565, "branch mispredictions": 325529, "branches": 19463406, "ITLB accesses": 27374917, "ITLB misses": 5203, "DTLB misses": 16771, "DTLB accesses": 36373182, "L1I cache accesses": 31839975, "L1I cache misses": 274158, "L1D cache misses": 471992, "L1D cache accesses": 33638817, "LL cache misses": 538067, "LL cache accesses": 557981, "L2D TLB accesses": 170169, "L2D TLB misses": 21987, "L2D cache misses": 301746, "L2D cache accesses": 1735872, "instructions per cycle": 1.2905310572411077, "branch miss rate": 0.016725181604905125, "ITLB miss rate": 0.00019006450320927, "DTLB miss rate": 0.00046108146381034247, "L2D TLB miss rate": 0.12920684731061474, "L1I cache miss rate": 0.00861049671050307, "L1D cache miss rate": 0.014031171191305569, "L2D cache miss rate": 0.1738296372082734, "LL cache miss rate": 0.9643106127269566}
--- a/pytorch/output/altra_10_30_sx-mathoverflow_1000.output
+++ b/pytorch/output/altra_10_30_sx-mathoverflow_1000.output
@ -1,163 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394987 queued and waiting for resources
 srun: job 3394987 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.8864, 0.5637, 0.9805,  ..., 0.0234, 0.9487, 0.4860])
 Matrix: sx-mathoverflow
 Shape: torch.Size([24818, 24818])
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 5.484489917755127 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
             50.36 msec task-clock:u                     #    0.006 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,296      page-faults:u                    #   65.452 K/sec                     
        56,049,457      cycles:u                         #    1.113 GHz                         (49.66%)
        72,333,565      instructions:u                   #    1.29  insn per cycle              (66.35%)
   <not supported>      branches:u                                                            
           369,218      branch-misses:u                                                         (86.12%)
        33,730,437      L1-dcache-loads:u                #  669.814 M/sec                       (93.88%)
           459,922      L1-dcache-load-misses:u          #    1.36% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,827,672      L1-icache-loads:u                #  632.030 M/sec                     
           295,060      L1-icache-load-misses:u          #    0.93% of all L1-icache accesses 
        54,366,618      dTLB-loads:u                     #    1.080 G/sec                       (35.64%)
            84,768      dTLB-load-misses:u               #    0.16% of all dTLB cache accesses  (25.48%)
        12,107,953      iTLB-loads:u                     #  240.438 M/sec                       (10.11%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       8.968532171 seconds time elapsed
      20.749643000 seconds user
      28.745486000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.5549, 0.0336, 0.9472,  ..., 0.2657, 0.3394, 0.6185])
 Matrix: sx-mathoverflow
 Shape: torch.Size([24818, 24818])
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 5.532417297363281 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
           325,529      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,463,406      BR_RETIRED:u                                                          
       8.912497962 seconds time elapsed
      20.214519000 seconds user
      31.566513000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.3330, 0.8843, 0.5150,  ..., 0.7292, 0.0873, 0.4184])
 Matrix: sx-mathoverflow
 Shape: torch.Size([24818, 24818])
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 5.457342863082886 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
        27,374,917      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,203      ITLB_WALK:u                                                           
            16,771      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,373,182      L1D_TLB:u                                                             
       8.730534933 seconds time elapsed
      20.156482000 seconds user
      31.426118000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.5864, 0.4449, 0.4042,  ..., 0.1651, 0.7793, 0.8302])
 Matrix: sx-mathoverflow
 Shape: torch.Size([24818, 24818])
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 5.449937582015991 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
        31,839,975      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           274,158      L1I_CACHE_REFILL:u                                                    
           471,992      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,638,817      L1D_CACHE:u                                                           
       8.845491835 seconds time elapsed
      20.577696000 seconds user
      35.105662000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.8880, 0.4700, 0.5542,  ..., 0.8505, 0.9123, 0.5742])
 Matrix: sx-mathoverflow
 Shape: torch.Size([24818, 24818])
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 5.400304794311523 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
           538,067      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           557,981      LL_CACHE_RD:u                                                         
           170,169      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            21,987      L2D_TLB_REFILL:u                                                      
           301,746      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,735,872      L2D_CACHE:u                                                           
       8.606800178 seconds time elapsed
      21.064990000 seconds user
      34.158762000 seconds sys
--- a/pytorch/output/altra_10_30_ut2010_1000.json
+++ b/pytorch/output/altra_10_30_ut2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.36, 20.4, 20.68, 20.64, 20.92, 20.92, 20.88, 20.68, 20.68, 20.6], "matrix": "ut2010", "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 11.10523509979248, "power": [90.68, 90.68, 88.24, 72.2, 59.48, 52.0, 54.72, 64.28, 79.24, 94.08, 96.24, 93.72, 92.36, 92.36, 90.08], "power_after": [21.24, 21.28, 20.96, 21.16, 20.92, 21.04, 21.32, 21.56, 21.16, 21.24], "task clock (msec)": 52.22, "page faults": 3288, "cycles": 67463873, "instructions": 73042754, "branch mispredictions": 344635, "branches": 20775821, "ITLB accesses": 27488750, "ITLB misses": 6494, "DTLB misses": 18293, "DTLB accesses": 36697113, "L1I cache accesses": 31066176, "L1I cache misses": 298652, "L1D cache misses": 473808, "L1D cache accesses": 32572985, "LL cache misses": 547428, "LL cache accesses": 566356, "L2D TLB accesses": 162858, "L2D TLB misses": 19852, "L2D cache misses": 304056, "L2D cache accesses": 1713420, "instructions per cycle": 1.0826943481291091, "branch miss rate": 0.01658827345499367, "ITLB miss rate": 0.00023624209904051657, "DTLB miss rate": 0.0004984860798177775, "L2D TLB miss rate": 0.12189760404769799, "L1I cache miss rate": 0.009613413636747567, "L1D cache miss rate": 0.014546041758223879, "L2D cache miss rate": 0.17745561508561825, "LL cache miss rate": 0.9665793246650517}
--- a/pytorch/output/altra_10_30_ut2010_1000.output
+++ b/pytorch/output/altra_10_30_ut2010_1000.output
@ -1,173 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394993 queued and waiting for resources
 srun: job 3394993 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.6983, 0.2845, 0.5984,  ..., 0.1182, 0.9468, 0.3161])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 8.604448795318604 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
             52.22 msec task-clock:u                     #    0.004 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,288      page-faults:u                    #   62.965 K/sec                     
        67,463,873      cycles:u                         #    1.292 GHz                         (52.95%)
        73,042,754      instructions:u                   #    1.08  insn per cycle              (71.78%)
   <not supported>      branches:u                                                            
           376,297      branch-misses:u                                                         (87.57%)
        34,189,906      L1-dcache-loads:u                #  654.731 M/sec                       (97.72%)
           471,636      L1-dcache-load-misses:u          #    1.38% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,870,328      L1-icache-loads:u                #  610.312 M/sec                     
           297,680      L1-icache-load-misses:u          #    0.93% of all L1-icache accesses 
        57,623,823      dTLB-loads:u                     #    1.103 G/sec                       (30.16%)
            75,454      dTLB-load-misses:u               #    0.13% of all dTLB cache accesses  (24.31%)
                 0      iTLB-loads:u                     #    0.000 /sec                        (3.96%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      12.112100803 seconds time elapsed
      66.253313000 seconds user
     675.855469000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.0260, 0.8569, 0.4315,  ..., 0.5243, 0.8018, 0.1763])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 8.702903270721436 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
           344,635      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,775,821      BR_RETIRED:u                                                          
      12.383096073 seconds time elapsed
      64.544546000 seconds user
     688.477174000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.7940, 0.1585, 0.6879,  ..., 0.4017, 0.1738, 0.9713])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 7.38647985458374 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
        27,488,750      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,494      ITLB_WALK:u                                                           
            18,293      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,697,113      L1D_TLB:u                                                             
      10.936742446 seconds time elapsed
      63.993242000 seconds user
     580.515047000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.2725, 0.6578, 0.8180,  ..., 0.0148, 0.5094, 0.1155])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 12.719107389450073 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
        31,066,176      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           298,652      L1I_CACHE_REFILL:u                                                    
           473,808      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,572,985      L1D_CACHE:u                                                           
      16.299576479 seconds time elapsed
      86.072431000 seconds user
     987.199923000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.1156, 0.5715, 0.3099,  ..., 0.3964, 0.9672, 0.5694])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 12.682909727096558 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
           547,428      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           566,356      LL_CACHE_RD:u                                                         
           162,858      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            19,852      L2D_TLB_REFILL:u                                                      
           304,056      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,713,420      L2D_CACHE:u                                                           
      16.221517033 seconds time elapsed
      79.927661000 seconds user
     988.333919000 seconds sys
--- a/pytorch/output/altra_10_30_vt2010_1000.json
+++ b/pytorch/output/altra_10_30_vt2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.88, 20.76, 20.76, 20.96, 20.92, 20.88, 20.72, 20.4, 20.4, 20.24], "matrix": "vt2010", "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 3.6774682998657227, "power": [34.12, 31.52, 30.36, 27.2, 27.16, 30.64, 31.0, 31.32], "power_after": [20.44, 20.52, 20.68, 20.72, 20.68, 20.72, 20.88, 20.8, 20.88, 20.52], "task clock (msec)": 48.59, "page faults": 3274, "cycles": 55030923, "instructions": 78222423, "branch mispredictions": 323004, "branches": 19091130, "ITLB accesses": 27178617, "ITLB misses": 6398, "DTLB misses": 19770, "DTLB accesses": 36355567, "L1I cache accesses": 31341858, "L1I cache misses": 291951, "L1D cache misses": 468242, "L1D cache accesses": 32805413, "LL cache misses": 520057, "LL cache accesses": 541186, "L2D TLB accesses": 191068, "L2D TLB misses": 22725, "L2D cache misses": 288895, "L2D cache accesses": 1728320, "instructions per cycle": 1.4214266949511278, "branch miss rate": 0.01691906136514706, "ITLB miss rate": 0.00023540564996371965, "DTLB miss rate": 0.0005437956723381593, "L2D TLB miss rate": 0.11893671363074926, "L1I cache miss rate": 0.009315050817982775, "L1D cache miss rate": 0.014273315199537345, "L2D cache miss rate": 0.16715365210146269, "LL cache miss rate": 0.9609579700879181}
--- a/pytorch/output/altra_10_30_vt2010_1000.output
+++ b/pytorch/output/altra_10_30_vt2010_1000.output
@ -1,163 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3394988 queued and waiting for resources
 srun: job 3394988 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.2022, 0.3400, 0.2561,  ..., 0.8370, 0.0285, 0.6506])
 Matrix: vt2010
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 3.74875545501709 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
             48.59 msec task-clock:u                     #    0.007 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,274      page-faults:u                    #   67.376 K/sec                     
        55,030,923      cycles:u                         #    1.132 GHz                         (65.54%)
        78,222,423      instructions:u                   #    1.42  insn per cycle              (83.60%)
   <not supported>      branches:u                                                            
           369,917      branch-misses:u                                                       
        32,435,815      L1-dcache-loads:u                #  667.500 M/sec                     
           467,963      L1-dcache-load-misses:u          #    1.44% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,013,287      L1-icache-loads:u                #  638.226 M/sec                     
           289,982      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        60,644,978      dTLB-loads:u                     #    1.248 G/sec                       (17.29%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       6.978143797 seconds time elapsed
      18.401752000 seconds user
      28.060858000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.3381, 0.0423, 0.5363,  ..., 0.0429, 0.4077, 0.4744])
 Matrix: vt2010
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 3.7925527095794678 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
           323,004      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,091,130      BR_RETIRED:u                                                          
       7.233250772 seconds time elapsed
      19.111768000 seconds user
      32.178633000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.7962, 0.6492, 0.2778,  ..., 0.5407, 0.1159, 0.3587])
 Matrix: vt2010
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 3.668635129928589 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
        27,178,617      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,398      ITLB_WALK:u                                                           
            19,770      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,355,567      L1D_TLB:u                                                             
       6.925944164 seconds time elapsed
      18.970654000 seconds user
      30.786317000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.8340, 0.3434, 0.3449,  ..., 0.9828, 0.6683, 0.0312])
 Matrix: vt2010
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 3.623232126235962 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
        31,341,858      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           291,951      L1I_CACHE_REFILL:u                                                    
           468,242      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,805,413      L1D_CACHE:u                                                           
       6.941260499 seconds time elapsed
      18.410270000 seconds user
      27.908787000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.2754, 0.3661, 0.9484,  ..., 0.7285, 0.5354, 0.4116])
 Matrix: vt2010
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 3.7337992191314697 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
           520,057      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           541,186      LL_CACHE_RD:u                                                         
           191,068      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            22,725      L2D_TLB_REFILL:u                                                      
           288,895      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,728,320      L2D_CACHE:u                                                           
       7.164825085 seconds time elapsed
      18.193885000 seconds user
      30.023194000 seconds sys
--- a/pytorch/output_HPC.json
+++ b/pytorch/output_HPC.json
--- a/pytorch/output_HPC/altra_10_30_ASIC_680k_1000.json
+++ b/pytorch/output_HPC/altra_10_30_ASIC_680k_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [80.64, 75.2, 61.4, 49.84, 38.12, 38.12, 24.16, 22.68, 22.36, 22.2], "matrix": "ASIC_680k", "shape": [682862, 682862], "nnz": 3871773, "% density": 8.303171256088674e-06, "time_s": 41.51614689826965, "power": [92.4, 92.44, 84.28, 73.04, 59.64, 53.28, 56.32, 65.88, 80.28, 93.76, 96.16, 94.44, 94.44, 94.2, 93.92, 92.48, 92.16, 91.84, 92.08, 91.84, 91.68, 90.68, 90.88, 90.28, 90.28, 92.44, 92.52, 92.84, 90.0, 89.64, 88.16, 87.28, 88.12, 88.24, 88.08, 85.72, 85.12, 85.12, 81.72, 82.52, 83.84, 86.32, 88.8, 91.0, 90.2], "power_after": [21.92, 21.88, 21.92, 21.88, 21.88, 21.72, 21.72, 21.72, 21.72, 21.44], "task clock (msec)": 55.74, "page faults": 3266, "cycles": 51085608, "instructions": 88049969, "branch mispredictions": 332704, "branches": 20219525, "ITLB accesses": 27856157, "ITLB misses": 6496, "DTLB misses": 17046, "DTLB accesses": 37522360, "L1I cache accesses": 31475230, "L1I cache misses": 277921, "L1D cache misses": 462005, "L1D cache accesses": 33126938, "LL cache misses": 558923, "LL cache accesses": 571263, "L2D TLB accesses": 190627, "L2D TLB misses": 24234, "L2D cache misses": 314815, "L2D cache accesses": 1760110, "instructions per cycle": 1.7235768046452535, "branch miss rate": 0.01645459030318467, "ITLB miss rate": 0.00023319799640704206, "DTLB miss rate": 0.0004542891225392006, "L2D TLB miss rate": 0.12712784652751186, "L1I cache miss rate": 0.008829832220447635, "L1D cache miss rate": 0.013946504805243395, "L2D cache miss rate": 0.17886098027964162, "LL cache miss rate": 0.978398741035215}
--- a/pytorch/output_HPC/altra_10_30_ASIC_680k_1000.output
+++ b/pytorch/output_HPC/altra_10_30_ASIC_680k_1000.output
@ -1,173 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395287 queued and waiting for resources
 srun: job 3395287 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,       4,  ..., 3871767,
                            3871770, 3871773]),
       col_indices=tensor([     0,  11698,  11699,  ..., 169456, 645874,
                           682861]),
       values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05,  ...,
                       0.0000e+00,  0.0000e+00,  7.9289e-02]),
       size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
 tensor([0.9283, 0.0381, 0.0668,  ..., 0.8379, 0.4193, 0.2544])
 Matrix: ASIC_680k
 Shape: torch.Size([682862, 682862])
 NNZ: 3871773
 Density: 8.303171256088674e-06
 Time: 29.317893266677856 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
             55.74 msec task-clock:u                     #    0.002 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,266      page-faults:u                    #   58.589 K/sec                     
        51,085,608      cycles:u                         #    0.916 GHz                         (47.05%)
        88,049,969      instructions:u                   #    1.72  insn per cycle              (92.14%)
   <not supported>      branches:u                                                            
           360,079      branch-misses:u                                                       
        31,381,953      L1-dcache-loads:u                #  562.963 M/sec                     
           471,072      L1-dcache-load-misses:u          #    1.50% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        29,944,756      L1-icache-loads:u                #  537.181 M/sec                     
           283,203      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
        20,217,238      dTLB-loads:u                     #  362.679 M/sec                       (11.38%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      33.488240295 seconds time elapsed
     222.678572000 seconds user
    2205.889153000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,       4,  ..., 3871767,
                            3871770, 3871773]),
       col_indices=tensor([     0,  11698,  11699,  ..., 169456, 645874,
                           682861]),
       values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05,  ...,
                       0.0000e+00,  0.0000e+00,  7.9289e-02]),
       size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
 tensor([0.3482, 0.5546, 0.8398,  ..., 0.6137, 0.0654, 0.9075])
 Matrix: ASIC_680k
 Shape: torch.Size([682862, 682862])
 NNZ: 3871773
 Density: 8.303171256088674e-06
 Time: 38.4066903591156 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
           332,704      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,219,525      BR_RETIRED:u                                                          
      42.582064532 seconds time elapsed
     238.965431000 seconds user
    2914.615754000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,       4,  ..., 3871767,
                            3871770, 3871773]),
       col_indices=tensor([     0,  11698,  11699,  ..., 169456, 645874,
                           682861]),
       values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05,  ...,
                       0.0000e+00,  0.0000e+00,  7.9289e-02]),
       size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
 tensor([0.2581, 0.2884, 0.9465,  ..., 0.4833, 0.3421, 0.4862])
 Matrix: ASIC_680k
 Shape: torch.Size([682862, 682862])
 NNZ: 3871773
 Density: 8.303171256088674e-06
 Time: 34.74818539619446 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
        27,856,157      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,496      ITLB_WALK:u                                                           
            17,046      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,522,360      L1D_TLB:u                                                             
      39.019872270 seconds time elapsed
     239.678206000 seconds user
    2622.552757000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,       4,  ..., 3871767,
                            3871770, 3871773]),
       col_indices=tensor([     0,  11698,  11699,  ..., 169456, 645874,
                           682861]),
       values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05,  ...,
                       0.0000e+00,  0.0000e+00,  7.9289e-02]),
       size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
 tensor([0.8603, 0.0423, 0.3724,  ..., 0.4873, 0.6469, 0.9634])
 Matrix: ASIC_680k
 Shape: torch.Size([682862, 682862])
 NNZ: 3871773
 Density: 8.303171256088674e-06
 Time: 33.05097770690918 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
        31,475,230      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           277,921      L1I_CACHE_REFILL:u                                                    
           462,005      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,126,938      L1D_CACHE:u                                                           
      37.399374202 seconds time elapsed
     239.238852000 seconds user
    2492.385966000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,       4,  ..., 3871767,
                            3871770, 3871773]),
       col_indices=tensor([     0,  11698,  11699,  ..., 169456, 645874,
                           682861]),
       values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05,  ...,
                       0.0000e+00,  0.0000e+00,  7.9289e-02]),
       size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
 tensor([0.1993, 0.2167, 0.6338,  ..., 0.0614, 0.0230, 0.4851])
 Matrix: ASIC_680k
 Shape: torch.Size([682862, 682862])
 NNZ: 3871773
 Density: 8.303171256088674e-06
 Time: 32.37103772163391 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
           558,923      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           571,263      LL_CACHE_RD:u                                                         
           190,627      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,234      L2D_TLB_REFILL:u                                                      
           314,815      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,760,110      L2D_CACHE:u                                                           
      36.644016288 seconds time elapsed
     233.933818000 seconds user
    2439.284669000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_de2010_1000.json
+++ b/pytorch/output_HPC/altra_10_30_de2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [22.08, 21.88, 21.88, 21.88, 21.56, 21.64, 21.84, 21.88, 21.72, 21.92], "matrix": "de2010", "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 2.7533018589019775, "power": [29.48, 30.24, 27.96, 28.4, 26.84, 30.6, 30.92], "power_after": [20.84, 21.24, 21.2, 21.24, 21.28, 20.88, 20.68, 20.56, 20.52, 20.56], "task clock (msec)": 61.38, "page faults": 3315, "cycles": 65013274, "instructions": 87442627, "branch mispredictions": 328392, "branches": 19496396, "ITLB accesses": 28311619, "ITLB misses": 6963, "DTLB misses": 17888, "DTLB accesses": 38223408, "L1I cache accesses": 30063404, "L1I cache misses": 272797, "L1D cache misses": 468341, "L1D cache accesses": 31519623, "LL cache misses": 538689, "LL cache accesses": 552789, "L2D TLB accesses": 192995, "L2D TLB misses": 23339, "L2D cache misses": 300578, "L2D cache accesses": 1764035, "instructions per cycle": 1.344996515634638, "branch miss rate": 0.016843728451145536, "ITLB miss rate": 0.0002459414277933028, "DTLB miss rate": 0.00046798548156668814, "L2D TLB miss rate": 0.12093059405684085, "L1I cache miss rate": 0.009074055619250568, "L1D cache miss rate": 0.01485871198395996, "L2D cache miss rate": 0.17039231081015965, "LL cache miss rate": 0.9744929801425137}
--- a/pytorch/output_HPC/altra_10_30_de2010_1000.output
+++ b/pytorch/output_HPC/altra_10_30_de2010_1000.output
@ -1,168 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395278 queued and waiting for resources
 srun: job 3395278 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.3547, 0.6554, 0.2142,  ..., 0.8854, 0.1041, 0.2243])
 Matrix: de2010
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 2.74495267868042 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
             61.38 msec task-clock:u                     #    0.010 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,315      page-faults:u                    #   54.008 K/sec                     
        65,013,274      cycles:u                         #    1.059 GHz                         (90.47%)
        87,442,627      instructions:u                   #    1.34  insn per cycle            
   <not supported>      branches:u                                                            
           369,052      branch-misses:u                                                       
        31,570,549      L1-dcache-loads:u                #  514.350 M/sec                     
           477,402      L1-dcache-load-misses:u          #    1.51% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,354,192      L1-icache-loads:u                #  494.533 M/sec                     
           294,845      L1-icache-load-misses:u          #    0.97% of all L1-icache accesses 
                 0      dTLB-loads:u                     #    0.000 /sec                        (3.92%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       6.232986287 seconds time elapsed
      17.354331000 seconds user
      29.036034000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.3177, 0.9122, 0.6465,  ..., 0.5489, 0.2254, 0.7965])
 Matrix: de2010
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 2.7603256702423096 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
           328,392      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,496,396      BR_RETIRED:u                                                          
       6.149991615 seconds time elapsed
      17.630426000 seconds user
      30.586756000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.7815, 0.6240, 0.3715,  ..., 0.5116, 0.5969, 0.4241])
 Matrix: de2010
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 2.7978765964508057 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
        28,311,619      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,963      ITLB_WALK:u                                                           
            17,888      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        38,223,408      L1D_TLB:u                                                             
       6.151843492 seconds time elapsed
      17.202045000 seconds user
      28.014218000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.9638, 0.0929, 0.0479,  ..., 0.1500, 0.3117, 0.9664])
 Matrix: de2010
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 2.684640884399414 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
        30,063,404      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           272,797      L1I_CACHE_REFILL:u                                                    
           468,341      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        31,519,623      L1D_CACHE:u                                                           
       5.874324363 seconds time elapsed
      17.629166000 seconds user
      29.998701000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,     13,     21,  ..., 116047, 116051,
                            116056]),
       col_indices=tensor([  250,   251,   757,  ..., 23334, 23553, 24050]),
       values=tensor([ 14900.,  33341.,  20255.,  ..., 164227.,  52413.,
                       16949.]), size=(24115, 24115), nnz=116056,
       layout=torch.sparse_csr)
 tensor([0.3936, 0.9167, 0.4396,  ..., 0.1628, 0.6361, 0.1875])
 Matrix: de2010
 Shape: torch.Size([24115, 24115])
 NNZ: 116056
 Density: 0.0001995689928120616
 Time: 2.747934103012085 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
           538,689      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           552,789      LL_CACHE_RD:u                                                         
           192,995      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,339      L2D_TLB_REFILL:u                                                      
           300,578      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,764,035      L2D_CACHE:u                                                           
       6.102012809 seconds time elapsed
      18.001082000 seconds user
      27.986033000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_fl2010_1000.json
+++ b/pytorch/output_HPC/altra_10_30_fl2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.72, 20.8, 20.96, 21.08, 21.4, 21.48, 21.48, 21.36, 21.08, 21.04], "matrix": "fl2010", "shape": [484481, 484481], "nnz": 2346294, "% density": 9.99606174861054e-06, "time_s": 14.43001127243042, "power": [93.04, 93.04, 89.16, 77.68, 62.92, 55.12, 53.84, 64.72, 77.04, 89.56, 94.4, 94.76, 93.52, 93.52, 96.04, 97.12, 96.44, 93.88, 93.72], "power_after": [21.08, 21.28, 21.28, 21.36, 21.08, 21.24, 21.08, 20.8, 21.04, 20.88], "task clock (msec)": 61.6, "page faults": 3276, "cycles": 41408849, "instructions": 49118917, "branch mispredictions": 331330, "branches": 19331189, "ITLB accesses": 27367982, "ITLB misses": 6160, "DTLB misses": 17157, "DTLB accesses": 36828216, "L1I cache accesses": 30147304, "L1I cache misses": 280082, "L1D cache misses": 454022, "L1D cache accesses": 31595140, "LL cache misses": 536056, "LL cache accesses": 550006, "L2D TLB accesses": 185998, "L2D TLB misses": 23735, "L2D cache misses": 296648, "L2D cache accesses": 1723525, "instructions per cycle": 1.1861937287848787, "branch miss rate": 0.017139659645353425, "ITLB miss rate": 0.00022508053388810325, "DTLB miss rate": 0.00046586562867992305, "L2D TLB miss rate": 0.12760889902041958, "L1I cache miss rate": 0.009290449321770198, "L1D cache miss rate": 0.014369994878959232, "L2D cache miss rate": 0.172117027603313, "LL cache miss rate": 0.97463664032756}
--- a/pytorch/output_HPC/altra_10_30_fl2010_1000.output
+++ b/pytorch/output_HPC/altra_10_30_fl2010_1000.output
@ -1,169 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395283 queued and waiting for resources
 srun: job 3395283 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2346288,
                            2346292, 2346294]),
       col_indices=tensor([  1513,   5311,    947,  ..., 484460, 482463,
                           484022]),
       values=tensor([28364., 12497., 11567.,  ...,  8532., 22622., 35914.]),
       size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
 tensor([2.0367e-04, 1.7661e-01, 2.1772e-01,  ..., 1.8646e-01, 2.2210e-01,
        4.2364e-02])
 Matrix: fl2010
 Shape: torch.Size([484481, 484481])
 NNZ: 2346294
 Density: 9.99606174861054e-06
 Time: 16.31556534767151 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
             61.60 msec task-clock:u                     #    0.003 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,276      page-faults:u                    #   53.185 K/sec                     
        41,408,849      cycles:u                         #    0.672 GHz                         (41.57%)
        49,118,917      instructions:u                   #    1.19  insn per cycle              (67.74%)
   <not supported>      branches:u                                                            
           344,653      branch-misses:u                                                         (91.69%)
        31,501,274      L1-dcache-loads:u                #  511.418 M/sec                     
           477,740      L1-dcache-load-misses:u          #    1.52% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,099,667      L1-icache-loads:u                #  488.663 M/sec                     
           285,734      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
        41,879,387      dTLB-loads:u                     #  679.904 M/sec                       (54.00%)
            99,044      dTLB-load-misses:u               #    0.24% of all dTLB cache accesses  (13.61%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      20.288512544 seconds time elapsed
     134.447078000 seconds user
    1247.121046000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2346288,
                            2346292, 2346294]),
       col_indices=tensor([  1513,   5311,    947,  ..., 484460, 482463,
                           484022]),
       values=tensor([28364., 12497., 11567.,  ...,  8532., 22622., 35914.]),
       size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
 tensor([0.9700, 0.5813, 0.6566,  ..., 0.4126, 0.7652, 0.9833])
 Matrix: fl2010
 Shape: torch.Size([484481, 484481])
 NNZ: 2346294
 Density: 9.99606174861054e-06
 Time: 16.561575651168823 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
           331,330      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,331,189      BR_RETIRED:u                                                          
      20.603578845 seconds time elapsed
     136.555709000 seconds user
    1264.382740000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2346288,
                            2346292, 2346294]),
       col_indices=tensor([  1513,   5311,    947,  ..., 484460, 482463,
                           484022]),
       values=tensor([28364., 12497., 11567.,  ...,  8532., 22622., 35914.]),
       size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
 tensor([0.1770, 0.8270, 0.4236,  ..., 0.0091, 0.2300, 0.5084])
 Matrix: fl2010
 Shape: torch.Size([484481, 484481])
 NNZ: 2346294
 Density: 9.99606174861054e-06
 Time: 17.374610424041748 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
        27,367,982      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,160      ITLB_WALK:u                                                           
            17,157      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,828,216      L1D_TLB:u                                                             
      21.377378255 seconds time elapsed
     140.848520000 seconds user
    1326.124469000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2346288,
                            2346292, 2346294]),
       col_indices=tensor([  1513,   5311,    947,  ..., 484460, 482463,
                           484022]),
       values=tensor([28364., 12497., 11567.,  ...,  8532., 22622., 35914.]),
       size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
 tensor([0.1268, 0.8786, 0.9762,  ..., 0.0649, 0.4474, 0.9707])
 Matrix: fl2010
 Shape: torch.Size([484481, 484481])
 NNZ: 2346294
 Density: 9.99606174861054e-06
 Time: 16.753613471984863 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
        30,147,304      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           280,082      L1I_CACHE_REFILL:u                                                    
           454,022      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        31,595,140      L1D_CACHE:u                                                           
      20.706929400 seconds time elapsed
     139.881127000 seconds user
    1278.527504000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2346288,
                            2346292, 2346294]),
       col_indices=tensor([  1513,   5311,    947,  ..., 484460, 482463,
                           484022]),
       values=tensor([28364., 12497., 11567.,  ...,  8532., 22622., 35914.]),
       size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
 tensor([0.1394, 0.8842, 0.4362,  ..., 0.8265, 0.1643, 0.9034])
 Matrix: fl2010
 Shape: torch.Size([484481, 484481])
 NNZ: 2346294
 Density: 9.99606174861054e-06
 Time: 14.484151124954224 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
           536,056      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           550,006      LL_CACHE_RD:u                                                         
           185,998      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,735      L2D_TLB_REFILL:u                                                      
           296,648      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,723,525      L2D_CACHE:u                                                           
      18.443039315 seconds time elapsed
     135.498625000 seconds user
    1101.745145000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_ga2010_1000.json
+++ b/pytorch/output_HPC/altra_10_30_ga2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [51.04, 38.64, 22.84, 22.24, 21.88, 21.88, 21.6, 21.4, 21.24, 21.28], "matrix": "ga2010", "shape": [291086, 291086], "nnz": 1418056, "% density": 1.6735964475229304e-05, "time_s": 15.249999523162842, "power": [88.88, 89.52, 78.6, 64.88, 52.64, 52.64, 54.76, 60.16, 71.44, 86.84, 90.72, 89.6, 90.56, 90.36, 91.68, 91.84, 93.4, 93.4, 92.72], "power_after": [21.68, 21.4, 21.28, 21.04, 21.04, 20.96, 20.92, 20.76, 20.8, 20.96], "task clock (msec)": 72.45, "page faults": 3289, "cycles": 24836161, "instructions": 74134706, "branch mispredictions": 325643, "branches": 19697746, "ITLB accesses": 27767290, "ITLB misses": 5832, "DTLB misses": 18134, "DTLB accesses": 37063060, "L1I cache accesses": 32135376, "L1I cache misses": 302429, "L1D cache misses": 484427, "L1D cache accesses": 33639686, "LL cache misses": 548380, "LL cache accesses": 561312, "L2D TLB accesses": 186006, "L2D TLB misses": 25022, "L2D cache misses": 304539, "L2D cache accesses": 1750107, "instructions per cycle": 2.9849502908279586, "branch miss rate": 0.01653199305138771, "ITLB miss rate": 0.00021003129941740803, "DTLB miss rate": 0.0004892742261432272, "L2D TLB miss rate": 0.13452254228358226, "L1I cache miss rate": 0.009411092622659838, "L1D cache miss rate": 0.014400461407398393, "L2D cache miss rate": 0.17401164614506429, "LL cache miss rate": 0.976961119662505}
--- a/pytorch/output_HPC/altra_10_30_ga2010_1000.output
+++ b/pytorch/output_HPC/altra_10_30_ga2010_1000.output
@ -1,168 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395281 queued and waiting for resources
 srun: job 3395281 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,      10,  ..., 1418047,
                            1418054, 1418056]),
       col_indices=tensor([  1566,   1871,   1997,  ..., 291064, 289820,
                           290176]),
       values=tensor([18760., 17851., 18847.,  ..., 65219., 56729., 77629.]),
       size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
 tensor([0.8043, 0.7164, 0.5687,  ..., 0.1275, 0.5142, 0.8456])
 Matrix: ga2010
 Shape: torch.Size([291086, 291086])
 NNZ: 1418056
 Density: 1.6735964475229304e-05
 Time: 13.566045045852661 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
             72.45 msec task-clock:u                     #    0.004 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,289      page-faults:u                    #   45.396 K/sec                     
        24,836,161      cycles:u                         #    0.343 GHz                         (23.15%)
        74,134,706      instructions:u                   #    2.98  insn per cycle              (85.49%)
   <not supported>      branches:u                                                            
           381,828      branch-misses:u                                                       
        33,748,654      L1-dcache-loads:u                #  465.814 M/sec                     
           497,166      L1-dcache-load-misses:u          #    1.47% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        32,271,900      L1-icache-loads:u                #  445.431 M/sec                     
           311,814      L1-icache-load-misses:u          #    0.97% of all L1-icache accesses 
        43,431,516      dTLB-loads:u                     #  599.461 M/sec                       (27.81%)
            33,416      dTLB-load-misses:u               #    0.08% of all dTLB cache accesses  (4.55%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      17.276157893 seconds time elapsed
     100.320029000 seconds user
    1057.703228000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,      10,  ..., 1418047,
                            1418054, 1418056]),
       col_indices=tensor([  1566,   1871,   1997,  ..., 291064, 289820,
                           290176]),
       values=tensor([18760., 17851., 18847.,  ..., 65219., 56729., 77629.]),
       size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
 tensor([0.6290, 0.2236, 0.0669,  ..., 0.6531, 0.4280, 0.4384])
 Matrix: ga2010
 Shape: torch.Size([291086, 291086])
 NNZ: 1418056
 Density: 1.6735964475229304e-05
 Time: 17.094524145126343 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
           325,643      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,697,746      BR_RETIRED:u                                                          
      20.849795214 seconds time elapsed
     115.280665000 seconds user
    1318.654953000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,      10,  ..., 1418047,
                            1418054, 1418056]),
       col_indices=tensor([  1566,   1871,   1997,  ..., 291064, 289820,
                           290176]),
       values=tensor([18760., 17851., 18847.,  ..., 65219., 56729., 77629.]),
       size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
 tensor([0.1008, 0.2309, 0.3749,  ..., 0.1568, 0.8852, 0.8182])
 Matrix: ga2010
 Shape: torch.Size([291086, 291086])
 NNZ: 1418056
 Density: 1.6735964475229304e-05
 Time: 15.106332063674927 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
        27,767,290      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,832      ITLB_WALK:u                                                           
            18,134      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,063,060      L1D_TLB:u                                                             
      18.753509375 seconds time elapsed
     112.958759000 seconds user
    1167.457916000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,      10,  ..., 1418047,
                            1418054, 1418056]),
       col_indices=tensor([  1566,   1871,   1997,  ..., 291064, 289820,
                           290176]),
       values=tensor([18760., 17851., 18847.,  ..., 65219., 56729., 77629.]),
       size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
 tensor([0.8347, 0.6624, 0.6196,  ..., 0.2250, 0.0157, 0.1843])
 Matrix: ga2010
 Shape: torch.Size([291086, 291086])
 NNZ: 1418056
 Density: 1.6735964475229304e-05
 Time: 13.73094367980957 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
        32,135,376      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           302,429      L1I_CACHE_REFILL:u                                                    
           484,427      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,639,686      L1D_CACHE:u                                                           
      17.400567824 seconds time elapsed
     110.027662000 seconds user
    1054.271122000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,      10,  ..., 1418047,
                            1418054, 1418056]),
       col_indices=tensor([  1566,   1871,   1997,  ..., 291064, 289820,
                           290176]),
       values=tensor([18760., 17851., 18847.,  ..., 65219., 56729., 77629.]),
       size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
 tensor([0.8369, 0.3399, 0.1689,  ..., 0.2081, 0.0714, 0.7388])
 Matrix: ga2010
 Shape: torch.Size([291086, 291086])
 NNZ: 1418056
 Density: 1.6735964475229304e-05
 Time: 15.809288501739502 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
           548,380      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           561,312      LL_CACHE_RD:u                                                         
           186,006      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            25,022      L2D_TLB_REFILL:u                                                      
           304,539      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,750,107      L2D_CACHE:u                                                           
      19.626934574 seconds time elapsed
     116.733174000 seconds user
    1214.439657000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_mac_econ_fwd500_1000.json
+++ b/pytorch/output_HPC/altra_10_30_mac_econ_fwd500_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [22.04, 21.32, 21.32, 21.32, 21.12, 21.12, 21.0, 20.68, 20.72, 20.56], "matrix": "mac_econ_fwd500", "shape": [206500, 206500], "nnz": 1273389, "% density": 2.9862143765866013e-05, "time_s": 15.046087741851807, "power": [91.88, 91.12, 83.92, 72.88, 57.76, 51.24, 53.12, 62.84, 78.32, 91.64, 95.8, 95.8, 94.08, 92.48, 91.6, 89.88, 87.36, 87.84, 87.32], "power_after": [20.92, 21.04, 21.12, 20.92, 20.92, 20.88, 20.88, 20.92, 21.04, 20.96], "task clock (msec)": 62.46, "page faults": 3243, "cycles": 57150420, "instructions": 94155455, "branch mispredictions": 320781, "branches": 19491698, "ITLB accesses": 27433101, "ITLB misses": 7382, "DTLB misses": 19213, "DTLB accesses": 37123052, "L1I cache accesses": 32027284, "L1I cache misses": 290368, "L1D cache misses": 471338, "L1D cache accesses": 33366668, "LL cache misses": 571063, "LL cache accesses": 583554, "L2D TLB accesses": 196434, "L2D TLB misses": 25171, "L2D cache misses": 329198, "L2D cache accesses": 1814040, "instructions per cycle": 1.6475024155553013, "branch miss rate": 0.016457314288370363, "ITLB miss rate": 0.0002690909788142434, "DTLB miss rate": 0.0005175490420345827, "L2D TLB miss rate": 0.1281397314110592, "L1I cache miss rate": 0.009066269871650684, "L1D cache miss rate": 0.014126013421537926, "L2D cache miss rate": 0.1814722938854711, "LL cache miss rate": 0.9785949543658342}
--- a/pytorch/output_HPC/altra_10_30_mac_econ_fwd500_1000.output
+++ b/pytorch/output_HPC/altra_10_30_mac_econ_fwd500_1000.output
@ -1,173 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395279 queued and waiting for resources
 srun: job 3395279 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,       8,  ..., 1273376,
                            1273379, 1273389]),
       col_indices=tensor([     3,     30,     44,  ..., 206363, 206408,
                           206459]),
       values=tensor([-3.7877e-03, -1.5420e-01,  9.5305e-04,  ...,
                       1.2290e-01,  2.2235e-01, -1.0000e+00]),
       size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
 tensor([0.5388, 0.2921, 0.7349,  ..., 0.6379, 0.9676, 0.6389])
 Matrix: mac_econ_fwd500
 Shape: torch.Size([206500, 206500])
 NNZ: 1273389
 Density: 2.9862143765866013e-05
 Time: 21.700236320495605 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
             62.46 msec task-clock:u                     #    0.002 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,243      page-faults:u                    #   51.921 K/sec                     
        57,150,420      cycles:u                         #    0.915 GHz                         (90.14%)
        94,155,455      instructions:u                   #    1.65  insn per cycle            
   <not supported>      branches:u                                                            
           373,032      branch-misses:u                                                       
        33,654,742      L1-dcache-loads:u                #  538.817 M/sec                     
           479,068      L1-dcache-load-misses:u          #    1.42% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        32,149,866      L1-icache-loads:u                #  514.724 M/sec                     
           293,643      L1-icache-load-misses:u          #    0.91% of all L1-icache accesses 
                 0      dTLB-loads:u                     #    0.000 /sec                        (5.14%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      25.310174677 seconds time elapsed
     125.287203000 seconds user
    1680.798909000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,       8,  ..., 1273376,
                            1273379, 1273389]),
       col_indices=tensor([     3,     30,     44,  ..., 206363, 206408,
                           206459]),
       values=tensor([-3.7877e-03, -1.5420e-01,  9.5305e-04,  ...,
                       1.2290e-01,  2.2235e-01, -1.0000e+00]),
       size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
 tensor([0.6433, 0.3677, 0.3308,  ..., 0.5364, 0.2509, 0.4204])
 Matrix: mac_econ_fwd500
 Shape: torch.Size([206500, 206500])
 NNZ: 1273389
 Density: 2.9862143765866013e-05
 Time: 16.171404361724854 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
           320,781      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,491,698      BR_RETIRED:u                                                          
      19.988421837 seconds time elapsed
     112.429117000 seconds user
    1245.246161000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,       8,  ..., 1273376,
                            1273379, 1273389]),
       col_indices=tensor([     3,     30,     44,  ..., 206363, 206408,
                           206459]),
       values=tensor([-3.7877e-03, -1.5420e-01,  9.5305e-04,  ...,
                       1.2290e-01,  2.2235e-01, -1.0000e+00]),
       size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
 tensor([0.9344, 0.9844, 0.2313,  ..., 0.8634, 0.6912, 0.9693])
 Matrix: mac_econ_fwd500
 Shape: torch.Size([206500, 206500])
 NNZ: 1273389
 Density: 2.9862143765866013e-05
 Time: 11.788637161254883 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
        27,433,101      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             7,382      ITLB_WALK:u                                                           
            19,213      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,123,052      L1D_TLB:u                                                             
      15.542834153 seconds time elapsed
      99.681401000 seconds user
     906.856853000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,       8,  ..., 1273376,
                            1273379, 1273389]),
       col_indices=tensor([     3,     30,     44,  ..., 206363, 206408,
                           206459]),
       values=tensor([-3.7877e-03, -1.5420e-01,  9.5305e-04,  ...,
                       1.2290e-01,  2.2235e-01, -1.0000e+00]),
       size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
 tensor([0.2037, 0.6417, 0.9786,  ..., 0.8187, 0.4933, 0.1289])
 Matrix: mac_econ_fwd500
 Shape: torch.Size([206500, 206500])
 NNZ: 1273389
 Density: 2.9862143765866013e-05
 Time: 13.596147060394287 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
        32,027,284      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           290,368      L1I_CACHE_REFILL:u                                                    
           471,338      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,366,668      L1D_CACHE:u                                                           
      17.325855116 seconds time elapsed
     101.368582000 seconds user
    1053.826259000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,       8,  ..., 1273376,
                            1273379, 1273389]),
       col_indices=tensor([     3,     30,     44,  ..., 206363, 206408,
                           206459]),
       values=tensor([-3.7877e-03, -1.5420e-01,  9.5305e-04,  ...,
                       1.2290e-01,  2.2235e-01, -1.0000e+00]),
       size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
 tensor([0.2072, 0.8681, 0.4768,  ..., 0.4873, 0.8997, 0.8601])
 Matrix: mac_econ_fwd500
 Shape: torch.Size([206500, 206500])
 NNZ: 1273389
 Density: 2.9862143765866013e-05
 Time: 14.157796382904053 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
           571,063      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           583,554      LL_CACHE_RD:u                                                         
           196,434      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            25,171      L2D_TLB_REFILL:u                                                      
           329,198      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,814,040      L2D_CACHE:u                                                           
      17.958287837 seconds time elapsed
     104.145071000 seconds user
    1089.962121000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_mc2depi_1000.json
+++ b/pytorch/output_HPC/altra_10_30_mc2depi_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [94.16, 91.68, 78.92, 60.88, 46.72, 28.36, 22.08, 21.64, 21.64, 21.64], "matrix": "mc2depi", "shape": [525825, 525825], "nnz": 2100225, "% density": 7.595972132902821e-06, "time_s": 11.03979206085205, "power": [95.44, 94.0, 88.76, 72.12, 59.48, 51.92, 53.88, 68.6, 83.2, 97.76, 98.4, 97.12, 97.12, 95.28, 94.12], "power_after": [21.48, 21.44, 21.28, 21.24, 21.16, 21.08, 21.24, 21.24, 21.24, 21.16], "task clock (msec)": 56.14, "page faults": 3289, "cycles": 47515158, "instructions": 72388154, "branch mispredictions": 327042, "branches": 19309026, "ITLB accesses": 26093030, "ITLB misses": 6189, "DTLB misses": 17253, "DTLB accesses": 35168741, "L1I cache accesses": 30539322, "L1I cache misses": 285404, "L1D cache misses": 465747, "L1D cache accesses": 31932803, "LL cache misses": 530261, "LL cache accesses": 551030, "L2D TLB accesses": 183570, "L2D TLB misses": 23883, "L2D cache misses": 297006, "L2D cache accesses": 1721848, "instructions per cycle": 1.5234749719236964, "branch miss rate": 0.01693726032581861, "ITLB miss rate": 0.0002371897782664566, "DTLB miss rate": 0.0004905776979619486, "L2D TLB miss rate": 0.13010295799967314, "L1I cache miss rate": 0.009345459601231487, "L1D cache miss rate": 0.014585221347465175, "L2D cache miss rate": 0.1724925777420539, "LL cache miss rate": 0.9623087672177558}
--- a/pytorch/output_HPC/altra_10_30_mc2depi_1000.output
+++ b/pytorch/output_HPC/altra_10_30_mc2depi_1000.output
@ -1,168 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395280 queued and waiting for resources
 srun: job 3395280 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2100220,
                            2100223, 2100225]),
       col_indices=tensor([     0,      1,      1,  ..., 525824, 525821,
                           525824]),
       values=tensor([-2025.,  2025., -2026.,  ...,  2025.,  1024., -1024.]),
       size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
 tensor([0.7162, 0.9445, 0.3087,  ..., 0.2863, 0.2977, 0.0994])
 Matrix: mc2depi
 Shape: torch.Size([525825, 525825])
 NNZ: 2100225
 Density: 7.595972132902821e-06
 Time: 14.228392839431763 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
             56.14 msec task-clock:u                     #    0.003 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,289      page-faults:u                    #   58.584 K/sec                     
        47,515,158      cycles:u                         #    0.846 GHz                         (55.54%)
        72,388,154      instructions:u                   #    1.52  insn per cycle              (79.69%)
   <not supported>      branches:u                                                            
           369,139      branch-misses:u                                                       
        32,820,508      L1-dcache-loads:u                #  584.601 M/sec                     
           483,558      L1-dcache-load-misses:u          #    1.47% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,317,848      L1-icache-loads:u                #  557.836 M/sec                     
           288,398      L1-icache-load-misses:u          #    0.92% of all L1-icache accesses 
        39,511,659      dTLB-loads:u                     #  703.784 M/sec                       (36.64%)
                 0      dTLB-load-misses:u                                                      (3.47%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      18.186987302 seconds time elapsed
     124.639912000 seconds user
    1088.590740000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2100220,
                            2100223, 2100225]),
       col_indices=tensor([     0,      1,      1,  ..., 525824, 525821,
                           525824]),
       values=tensor([-2025.,  2025., -2026.,  ...,  2025.,  1024., -1024.]),
       size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
 tensor([0.4954, 0.2907, 0.0979,  ..., 0.0742, 0.4519, 0.0278])
 Matrix: mc2depi
 Shape: torch.Size([525825, 525825])
 NNZ: 2100225
 Density: 7.595972132902821e-06
 Time: 11.948119163513184 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
           327,042      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,309,026      BR_RETIRED:u                                                          
      15.715674756 seconds time elapsed
     115.898749000 seconds user
     910.018676000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2100220,
                            2100223, 2100225]),
       col_indices=tensor([     0,      1,      1,  ..., 525824, 525821,
                           525824]),
       values=tensor([-2025.,  2025., -2026.,  ...,  2025.,  1024., -1024.]),
       size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
 tensor([0.1402, 0.9048, 0.8859,  ..., 0.9542, 0.3509, 0.0695])
 Matrix: mc2depi
 Shape: torch.Size([525825, 525825])
 NNZ: 2100225
 Density: 7.595972132902821e-06
 Time: 14.170094966888428 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
        26,093,030      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,189      ITLB_WALK:u                                                           
            17,253      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        35,168,741      L1D_TLB:u                                                             
      18.132605509 seconds time elapsed
     121.020111000 seconds user
    1090.508165000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2100220,
                            2100223, 2100225]),
       col_indices=tensor([     0,      1,      1,  ..., 525824, 525821,
                           525824]),
       values=tensor([-2025.,  2025., -2026.,  ...,  2025.,  1024., -1024.]),
       size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
 tensor([0.1192, 0.6084, 0.4643,  ..., 0.3445, 0.4658, 0.7085])
 Matrix: mc2depi
 Shape: torch.Size([525825, 525825])
 NNZ: 2100225
 Density: 7.595972132902821e-06
 Time: 13.925398826599121 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
        30,539,322      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           285,404      L1I_CACHE_REFILL:u                                                    
           465,747      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        31,932,803      L1D_CACHE:u                                                           
      17.812911214 seconds time elapsed
     119.918777000 seconds user
    1067.928403000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2100220,
                            2100223, 2100225]),
       col_indices=tensor([     0,      1,      1,  ..., 525824, 525821,
                           525824]),
       values=tensor([-2025.,  2025., -2026.,  ...,  2025.,  1024., -1024.]),
       size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
 tensor([0.2075, 0.7442, 0.4477,  ..., 0.0794, 0.0859, 0.8652])
 Matrix: mc2depi
 Shape: torch.Size([525825, 525825])
 NNZ: 2100225
 Density: 7.595972132902821e-06
 Time: 12.866743564605713 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
           530,261      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           551,030      LL_CACHE_RD:u                                                         
           183,570      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,883      L2D_TLB_REFILL:u                                                      
           297,006      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,721,848      L2D_CACHE:u                                                           
      16.812811712 seconds time elapsed
     117.780323000 seconds user
     986.834040000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_p2p-Gnutella04_1000.json
+++ b/pytorch/output_HPC/altra_10_30_p2p-Gnutella04_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [30.08, 25.12, 24.68, 23.68, 22.84, 21.96, 21.08, 20.96, 20.8, 20.96], "matrix": "p2p-Gnutella04", "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.9992897510528564, "power": [29.48, 30.52, 31.88, 31.24, 34.32], "power_after": [20.4, 20.6, 20.64, 20.76, 20.92, 20.84, 20.88, 20.88, 20.88, 20.84], "task clock (msec)": 52.68, "page faults": 3272, "cycles": 63019732, "instructions": 73518898, "branch mispredictions": 333423, "branches": 19435905, "ITLB accesses": 27447537, "ITLB misses": 6417, "DTLB misses": 18300, "DTLB accesses": 37569384, "L1I cache accesses": 30830481, "L1I cache misses": 290545, "L1D cache misses": 473875, "L1D cache accesses": 32284772, "LL cache misses": 529403, "LL cache accesses": 549794, "L2D TLB accesses": 198306, "L2D TLB misses": 24497, "L2D cache misses": 298519, "L2D cache accesses": 1772795, "instructions per cycle": 1.1666012480027683, "branch miss rate": 0.017155002558409294, "ITLB miss rate": 0.00023379146915805232, "DTLB miss rate": 0.000487098750408045, "L2D TLB miss rate": 0.12353131019737174, "L1I cache miss rate": 0.009423952873132274, "L1D cache miss rate": 0.014677972636758903, "L2D cache miss rate": 0.16838890001381998, "LL cache miss rate": 0.9629115632400499}
--- a/pytorch/output_HPC/altra_10_30_p2p-Gnutella04_1000.output
+++ b/pytorch/output_HPC/altra_10_30_p2p-Gnutella04_1000.output
@ -1,158 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395271 queued and waiting for resources
 srun: job 3395271 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.3559, 0.4732, 0.3024,  ..., 0.9176, 0.7712, 0.4949])
 Matrix: p2p-Gnutella04
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 1.0082497596740723 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
             52.68 msec task-clock:u                     #    0.012 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,272      page-faults:u                    #   62.105 K/sec                     
        63,019,732      cycles:u                         #    1.196 GHz                         (70.67%)
        73,518,898      instructions:u                   #    1.17  insn per cycle              (85.80%)
   <not supported>      branches:u                                                            
           359,236      branch-misses:u                                                         (99.44%)
        31,459,751      L1-dcache-loads:u                #  597.131 M/sec                     
           460,969      L1-dcache-load-misses:u          #    1.47% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        29,975,208      L1-icache-loads:u                #  568.954 M/sec                     
           281,710      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        59,589,523      dTLB-loads:u                     #    1.131 G/sec                       (17.10%)
                 0      dTLB-load-misses:u                                                      (1.27%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       4.456867719 seconds time elapsed
      16.389568000 seconds user
      29.247355000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.0123, 0.4107, 0.7785,  ..., 0.7964, 0.7541, 0.4153])
 Matrix: p2p-Gnutella04
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 1.030029058456421 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
           333,423      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,435,905      BR_RETIRED:u                                                          
       4.359656946 seconds time elapsed
      16.490532000 seconds user
      28.366462000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.1898, 0.0740, 0.4564,  ..., 0.7987, 0.1017, 0.5949])
 Matrix: p2p-Gnutella04
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 1.004878044128418 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
        27,447,537      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,417      ITLB_WALK:u                                                           
            18,300      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,569,384      L1D_TLB:u                                                             
       4.355627133 seconds time elapsed
      15.883078000 seconds user
      27.120829000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.1682, 0.9350, 0.9210,  ..., 0.3758, 0.2263, 0.1068])
 Matrix: p2p-Gnutella04
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 1.0207850933074951 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
        30,830,481      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           290,545      L1I_CACHE_REFILL:u                                                    
           473,875      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,284,772      L1D_CACHE:u                                                           
       4.427088851 seconds time elapsed
      15.711555000 seconds user
      29.627091000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    20,  ..., 39994, 39994, 39994]),
       col_indices=tensor([    1,     2,     3,  ...,  9711, 10875, 10876]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(10879, 10879),
       nnz=39994, layout=torch.sparse_csr)
 tensor([0.9351, 0.3836, 0.0822,  ..., 0.9798, 0.3726, 0.7394])
 Matrix: p2p-Gnutella04
 Shape: torch.Size([10879, 10879])
 NNZ: 39994
 Density: 0.0003379223282393842
 Time: 1.041510820388794 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
           529,403      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           549,794      LL_CACHE_RD:u                                                         
           198,306      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,497      L2D_TLB_REFILL:u                                                      
           298,519      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,772,795      L2D_CACHE:u                                                           
       4.454107604 seconds time elapsed
      16.577921000 seconds user
      29.390427000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_p2p-Gnutella24_1000.json
+++ b/pytorch/output_HPC/altra_10_30_p2p-Gnutella24_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [30.72, 30.6, 28.68, 26.48, 22.44, 21.4, 21.28, 21.08, 21.32, 21.6], "matrix": "p2p-Gnutella24", "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 1.718301773071289, "power": [31.52, 32.48, 33.64, 33.88, 33.44, 31.52], "power_after": [20.96, 20.84, 20.92, 20.8, 20.76, 20.76, 20.76, 20.68, 20.72, 20.92], "task clock (msec)": 67.08, "page faults": 3303, "cycles": 61261862, "instructions": 83757591, "branch mispredictions": 329248, "branches": 19953212, "ITLB accesses": 27084694, "ITLB misses": 7107, "DTLB misses": 17529, "DTLB accesses": 36684333, "L1I cache accesses": 32158234, "L1I cache misses": 286484, "L1D cache misses": 474161, "L1D cache accesses": 33730073, "LL cache misses": 550064, "LL cache accesses": 565245, "L2D TLB accesses": 191046, "L2D TLB misses": 23775, "L2D cache misses": 307419, "L2D cache accesses": 1772169, "instructions per cycle": 1.3672060930828385, "branch miss rate": 0.016501002445120115, "ITLB miss rate": 0.0002623991247602797, "DTLB miss rate": 0.0004778334118818516, "L2D TLB miss rate": 0.12444646838981188, "L1I cache miss rate": 0.008908573773049851, "L1D cache miss rate": 0.014057514788064645, "L2D cache miss rate": 0.1734704760099065, "LL cache miss rate": 0.973142619572044}
--- a/pytorch/output_HPC/altra_10_30_p2p-Gnutella24_1000.output
+++ b/pytorch/output_HPC/altra_10_30_p2p-Gnutella24_1000.output
@ -1,158 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395289 queued and waiting for resources
 srun: job 3395289 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.3210, 0.3418, 0.9584,  ..., 0.8929, 0.9807, 0.5532])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 1.6565663814544678 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
             67.08 msec task-clock:u                     #    0.013 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,303      page-faults:u                    #   49.241 K/sec                     
        61,261,862      cycles:u                         #    0.913 GHz                         (49.19%)
        83,757,591      instructions:u                   #    1.37  insn per cycle              (88.30%)
   <not supported>      branches:u                                                            
           364,692      branch-misses:u                                                       
        31,954,743      L1-dcache-loads:u                #  476.379 M/sec                     
           490,953      L1-dcache-load-misses:u          #    1.54% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,490,915      L1-icache-loads:u                #  454.556 M/sec                     
           291,964      L1-icache-load-misses:u          #    0.96% of all L1-icache accesses 
        32,131,046      dTLB-loads:u                     #  479.007 M/sec                       (19.20%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       5.107407925 seconds time elapsed
      16.045361000 seconds user
      30.574855000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.4851, 0.2524, 0.2134,  ..., 0.5976, 0.0089, 0.2284])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 1.6902527809143066 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
           329,248      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,953,212      BR_RETIRED:u                                                          
       4.990707186 seconds time elapsed
      16.713526000 seconds user
      27.761595000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.1844, 0.9003, 0.0155,  ..., 0.5184, 0.1445, 0.3588])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 1.6478993892669678 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
        27,084,694      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             7,107      ITLB_WALK:u                                                           
            17,529      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,684,333      L1D_TLB:u                                                             
       5.010572757 seconds time elapsed
      16.570396000 seconds user
      27.387405000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.2313, 0.8375, 0.3065,  ..., 0.2374, 0.2281, 0.2100])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 1.637598991394043 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
        32,158,234      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           286,484      L1I_CACHE_REFILL:u                                                    
           474,161      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,730,073      L1D_CACHE:u                                                           
       4.963121627 seconds time elapsed
      16.730431000 seconds user
      29.869416000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.5006, 0.8470, 0.3527,  ..., 0.3901, 0.3581, 0.1154])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 1.6584653854370117 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
           550,064      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           565,245      LL_CACHE_RD:u                                                         
           191,046      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,775      L2D_TLB_REFILL:u                                                      
           307,419      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,772,169      L2D_CACHE:u                                                           
       5.019317303 seconds time elapsed
      16.518292000 seconds user
      30.069880000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_p2p-Gnutella25_1000.json
+++ b/pytorch/output_HPC/altra_10_30_p2p-Gnutella25_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [86.48, 72.16, 59.36, 41.84, 28.44, 22.96, 22.92, 22.92, 23.04, 23.24], "matrix": "p2p-Gnutella25", "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 1.431199073791504, "power": [35.16, 36.2, 36.72, 37.52, 37.52], "power_after": [21.32, 21.2, 21.2, 21.28, 21.52, 21.44, 21.92, 21.68, 21.6, 21.36], "task clock (msec)": 59.85, "page faults": 3318, "cycles": 76505130, "instructions": 72343215, "branch mispredictions": 322338, "branches": 19784096, "ITLB accesses": 27270404, "ITLB misses": 6607, "DTLB misses": 17981, "DTLB accesses": 36751047, "L1I cache accesses": 30620441, "L1I cache misses": 302139, "L1D cache misses": 471011, "L1D cache accesses": 32141810, "LL cache misses": 531907, "LL cache accesses": 545159, "L2D TLB accesses": 188244, "L2D TLB misses": 23034, "L2D cache misses": 293848, "L2D cache accesses": 1757551, "instructions per cycle": 0.945599530384433, "branch miss rate": 0.016292783860329025, "ITLB miss rate": 0.00024227730546272803, "DTLB miss rate": 0.0004892649725054092, "L2D TLB miss rate": 0.12236246573595971, "L1I cache miss rate": 0.009867232153841285, "L1D cache miss rate": 0.014654152955294054, "L2D cache miss rate": 0.1671917344077071, "LL cache miss rate": 0.9756914955086498}
--- a/pytorch/output_HPC/altra_10_30_p2p-Gnutella25_1000.output
+++ b/pytorch/output_HPC/altra_10_30_p2p-Gnutella25_1000.output
@ -1,158 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395288 queued and waiting for resources
 srun: job 3395288 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.9962, 0.2550, 0.9564,  ..., 0.7113, 0.6635, 0.3831])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.4832944869995117 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
             59.85 msec task-clock:u                     #    0.012 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,318      page-faults:u                    #   55.439 K/sec                     
        76,505,130      cycles:u                         #    1.278 GHz                         (43.11%)
        72,343,215      instructions:u                   #    0.95  insn per cycle              (62.06%)
   <not supported>      branches:u                                                            
           371,337      branch-misses:u                                                         (77.63%)
        33,969,604      L1-dcache-loads:u                #  567.579 M/sec                       (88.85%)
           472,023      L1-dcache-load-misses:u          #    1.39% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,728,689      L1-icache-loads:u                #  530.137 M/sec                     
           299,356      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        50,921,898      dTLB-loads:u                     #  850.825 M/sec                       (39.93%)
            90,542      dTLB-load-misses:u               #    0.18% of all dTLB cache accesses  (36.53%)
        11,563,883      iTLB-loads:u                     #  193.214 M/sec                       (20.26%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       4.953668960 seconds time elapsed
      16.652653000 seconds user
      30.408692000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.9968, 0.7101, 0.9319,  ..., 0.2871, 0.7386, 0.8934])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.3799591064453125 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
           322,338      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,784,096      BR_RETIRED:u                                                          
       4.633544255 seconds time elapsed
      16.572749000 seconds user
      26.228349000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.3551, 0.8297, 0.9950,  ..., 0.9625, 0.7129, 0.2173])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.400240182876587 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
        27,270,404      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,607      ITLB_WALK:u                                                           
            17,981      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,751,047      L1D_TLB:u                                                             
       4.696092090 seconds time elapsed
      15.781810000 seconds user
      28.383624000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.3600, 0.0388, 0.5262,  ..., 0.5849, 0.3707, 0.1514])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.4545772075653076 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
        30,620,441      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           302,139      L1I_CACHE_REFILL:u                                                    
           471,011      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,141,810      L1D_CACHE:u                                                           
       4.897499310 seconds time elapsed
      16.207163000 seconds user
      32.246890000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.1220, 0.8435, 0.7035,  ..., 0.2109, 0.0289, 0.0715])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.4200170040130615 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
           531,907      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           545,159      LL_CACHE_RD:u                                                         
           188,244      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,034      L2D_TLB_REFILL:u                                                      
           293,848      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,757,551      L2D_CACHE:u                                                           
       4.683262937 seconds time elapsed
      16.111909000 seconds user
      29.660483000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_p2p-Gnutella30_1000.json
+++ b/pytorch/output_HPC/altra_10_30_p2p-Gnutella30_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.44, 16.44, 16.44, 16.84, 16.72, 16.6, 16.72, 16.84, 16.68, 16.84], "matrix": "p2p-Gnutella30", "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 2.896674871444702, "power": [56.32, 68.24, 71.76, 59.48, 47.6, 48.76, 52.6], "power_after": [16.92, 17.0, 16.96, 16.8, 16.48, 16.52, 16.52, 16.52, 16.24, 16.36], "task clock (msec)": 56.47, "page faults": 3222, "cycles": 69105836, "instructions": 89065155, "branch mispredictions": 333669, "branches": 20078755, "ITLB accesses": 26015038, "ITLB misses": 5212, "DTLB misses": 17039, "DTLB accesses": 35296010, "L1I cache accesses": 31837486, "L1I cache misses": 293353, "L1D cache misses": 462358, "L1D cache accesses": 33478540, "LL cache misses": 546516, "LL cache accesses": 559865, "L2D TLB accesses": 190400, "L2D TLB misses": 23787, "L2D cache misses": 307032, "L2D cache accesses": 1768186, "instructions per cycle": 1.288822480926213, "branch miss rate": 0.016618012421586895, "ITLB miss rate": 0.00020034566161310238, "DTLB miss rate": 0.00048274578344691083, "L2D TLB miss rate": 0.12493172268907562, "L1I cache miss rate": 0.009214075508348869, "L1D cache miss rate": 0.013810578358554464, "L2D cache miss rate": 0.17364236567872385, "LL cache miss rate": 0.9761567520741607}
--- a/pytorch/output_HPC/altra_10_30_p2p-Gnutella30_1000.output
+++ b/pytorch/output_HPC/altra_10_30_p2p-Gnutella30_1000.output
@ -1,158 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395282 queued and waiting for resources
 srun: job 3395282 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.0302, 0.1334, 0.4142,  ..., 0.9516, 0.6030, 0.3883])
 Matrix: p2p-Gnutella30
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 2.790724277496338 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
             56.47 msec task-clock:u                     #    0.009 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,222      page-faults:u                    #   57.061 K/sec                     
        69,105,836      cycles:u                         #    1.224 GHz                         (53.55%)
        89,065,155      instructions:u                   #    1.29  insn per cycle              (92.79%)
   <not supported>      branches:u                                                            
           367,525      branch-misses:u                                                       
        32,122,654      L1-dcache-loads:u                #  568.886 M/sec                     
           467,921      L1-dcache-load-misses:u          #    1.46% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,765,438      L1-icache-loads:u                #  544.850 M/sec                     
           289,327      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        24,642,710      dTLB-loads:u                     #  436.418 M/sec                       (11.11%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       6.334250152 seconds time elapsed
      32.099712000 seconds user
     240.206702000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.6147, 0.4171, 0.2258,  ..., 0.0253, 0.8932, 0.8040])
 Matrix: p2p-Gnutella30
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 2.092158079147339 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
           333,669      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,078,755      BR_RETIRED:u                                                          
       5.557038624 seconds time elapsed
      29.074016000 seconds user
     186.372846000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.0146, 0.2151, 0.1948,  ..., 0.7633, 0.4329, 0.7106])
 Matrix: p2p-Gnutella30
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 3.1269772052764893 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
        26,015,038      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,212      ITLB_WALK:u                                                           
            17,039      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        35,296,010      L1D_TLB:u                                                             
       6.550798214 seconds time elapsed
      36.334689000 seconds user
     263.614426000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.1810, 0.5208, 0.0542,  ..., 0.6108, 0.4905, 0.8918])
 Matrix: p2p-Gnutella30
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 1.9065814018249512 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
        31,837,486      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           293,353      L1I_CACHE_REFILL:u                                                    
           462,358      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,478,540      L1D_CACHE:u                                                           
       5.319975004 seconds time elapsed
      26.918342000 seconds user
     175.603919000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,    10,    10,  ..., 88328, 88328, 88328]),
       col_indices=tensor([    1,     2,     3,  ..., 36675, 36676, 36677]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36682, 36682),
       nnz=88328, layout=torch.sparse_csr)
 tensor([0.8456, 0.8302, 0.2078,  ..., 0.8155, 0.5148, 0.5853])
 Matrix: p2p-Gnutella30
 Shape: torch.Size([36682, 36682])
 NNZ: 88328
 Density: 6.564359899804003e-05
 Time: 3.8523874282836914 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
           546,516      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           559,865      LL_CACHE_RD:u                                                         
           190,400      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,787      L2D_TLB_REFILL:u                                                      
           307,032      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,768,186      L2D_CACHE:u                                                           
       7.266305868 seconds time elapsed
      37.085321000 seconds user
     320.780766000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_ri2010_1000.json
+++ b/pytorch/output_HPC/altra_10_30_ri2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.6, 16.64, 17.04, 17.08, 16.92, 17.24, 16.88, 16.36, 16.4, 16.4], "matrix": "ri2010", "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 2.970583200454712, "power": [23.04, 23.28, 23.76, 24.12, 21.4, 26.28, 26.36], "power_after": [16.16, 16.16, 16.52, 16.48, 16.52, 16.44, 16.36, 16.48, 16.76, 16.6], "task clock (msec)": 52.61, "page faults": 3292, "cycles": 42915672, "instructions": 71002596, "branch mispredictions": 344300, "branches": 20224759, "ITLB accesses": 26039851, "ITLB misses": 5035, "DTLB misses": 16402, "DTLB accesses": 34820806, "L1I cache accesses": 31878105, "L1I cache misses": 299057, "L1D cache misses": 471869, "L1D cache accesses": 33450518, "LL cache misses": 530093, "LL cache accesses": 551126, "L2D TLB accesses": 188315, "L2D TLB misses": 22856, "L2D cache misses": 299885, "L2D cache accesses": 1763155, "instructions per cycle": 1.6544677664607, "branch miss rate": 0.01702368863826758, "ITLB miss rate": 0.00019335748119296073, "DTLB miss rate": 0.0004710402165877493, "L2D TLB miss rate": 0.12137110692191275, "L1I cache miss rate": 0.009381266546427399, "L1D cache miss rate": 0.014106478111938357, "L2D cache miss rate": 0.1700843090936418, "LL cache miss rate": 0.9618363132931489}
--- a/pytorch/output_HPC/altra_10_30_ri2010_1000.output
+++ b/pytorch/output_HPC/altra_10_30_ri2010_1000.output
@ -1,163 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395268 queued and waiting for resources
 srun: job 3395268 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.4029, 0.5373, 0.8376,  ..., 0.9299, 0.3127, 0.4778])
 Matrix: ri2010
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 2.9858975410461426 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
             52.61 msec task-clock:u                     #    0.008 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,292      page-faults:u                    #   62.576 K/sec                     
        42,915,672      cycles:u                         #    0.816 GHz                         (55.04%)
        71,002,596      instructions:u                   #    1.65  insn per cycle              (81.89%)
   <not supported>      branches:u                                                            
           369,793      branch-misses:u                                                       
        33,163,106      L1-dcache-loads:u                #  630.381 M/sec                     
           471,533      L1-dcache-load-misses:u          #    1.42% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,640,002      L1-icache-loads:u                #  601.429 M/sec                     
           297,919      L1-icache-load-misses:u          #    0.94% of all L1-icache accesses 
        48,642,108      dTLB-loads:u                     #  924.614 M/sec                       (29.77%)
                 0      dTLB-load-misses:u                                                      (5.06%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       6.215745697 seconds time elapsed
      17.600216000 seconds user
      30.777524000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.8706, 0.3724, 0.8779,  ..., 0.4299, 0.0920, 0.4238])
 Matrix: ri2010
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 2.9231789112091064 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
           344,300      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,224,759      BR_RETIRED:u                                                          
       6.297708483 seconds time elapsed
      17.546068000 seconds user
      26.920857000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.2988, 0.0160, 0.4360,  ..., 0.7543, 0.0919, 0.2321])
 Matrix: ri2010
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 2.9701316356658936 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
        26,039,851      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,035      ITLB_WALK:u                                                           
            16,402      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        34,820,806      L1D_TLB:u                                                             
       6.227977259 seconds time elapsed
      17.937381000 seconds user
      30.196552000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.5797, 0.8992, 0.8317,  ..., 0.0283, 0.7124, 0.2690])
 Matrix: ri2010
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 2.968733072280884 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
        31,878,105      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           299,057      L1I_CACHE_REFILL:u                                                    
           471,869      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,450,518      L1D_CACHE:u                                                           
       6.278062824 seconds time elapsed
      17.822878000 seconds user
      27.932170000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      8,  ..., 125742, 125747,
                            125750]),
       col_indices=tensor([   25,    56,   662,  ..., 21738, 22279, 23882]),
       values=tensor([17171., 37318.,  5284.,  ..., 25993., 24918.,   803.]),
       size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
 tensor([0.0630, 0.5194, 0.8720,  ..., 0.9537, 0.3959, 0.5550])
 Matrix: ri2010
 Shape: torch.Size([25181, 25181])
 NNZ: 125750
 Density: 0.00019831796057928155
 Time: 2.9069995880126953 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
           530,093      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           551,126      LL_CACHE_RD:u                                                         
           188,315      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            22,856      L2D_TLB_REFILL:u                                                      
           299,885      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,763,155      L2D_CACHE:u                                                           
       6.075529293 seconds time elapsed
      17.073983000 seconds user
      27.811966000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_rma10_1000.json
+++ b/pytorch/output_HPC/altra_10_30_rma10_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.36, 30.64, 31.12, 24.52, 24.16, 23.12, 22.08, 21.28, 21.16, 20.88], "matrix": "rma10", "shape": [46835, 46835], "nnz": 2374001, "% density": 0.0010822805369125833, "time_s": 68.86891412734985, "power": [81.8, 81.32, 75.08, 63.48, 51.92, 51.96, 51.8, 65.0, 65.0, 75.12, 82.68, 82.32, 82.08, 82.76, 82.8, 83.6, 83.36, 83.08, 82.88, 83.0, 83.32, 83.32, 83.36, 84.64, 84.56, 84.24, 83.52, 83.4, 83.36, 83.36, 83.72, 84.16, 83.24, 82.76, 82.76, 82.96, 82.36, 82.24, 81.64, 81.6, 81.4, 81.6, 81.88, 82.32, 83.04, 83.48, 83.48, 84.32, 84.04, 84.32, 83.16, 82.44, 81.96, 81.4, 81.8, 82.08, 81.8, 81.84, 82.04, 82.04, 82.08, 82.44, 82.6, 82.84, 83.8, 84.24, 84.6, 85.4, 85.6, 86.0, 85.72, 85.36], "power_after": [21.96, 21.88, 21.96, 21.96, 22.0, 21.68, 21.44, 21.16, 21.04, 20.92], "task clock (msec)": 58.3, "page faults": 3281, "cycles": 81319364, "instructions": 90830397, "branch mispredictions": 342237, "branches": 20641135, "ITLB accesses": 27974213, "ITLB misses": 6660, "DTLB misses": 18441, "DTLB accesses": 37780346, "L1I cache accesses": 31166891, "L1I cache misses": 291301, "L1D cache misses": 477186, "L1D cache accesses": 32682323, "LL cache misses": 538552, "LL cache accesses": 552543, "L2D TLB accesses": 202351, "L2D TLB misses": 24178, "L2D cache misses": 298051, "L2D cache accesses": 1775481, "instructions per cycle": 1.1169590185186398, "branch miss rate": 0.01658033824205888, "ITLB miss rate": 0.00023807640272132053, "DTLB miss rate": 0.00048811093471722044, "L2D TLB miss rate": 0.11948544855226809, "L1I cache miss rate": 0.00934648887500521, "L1D cache miss rate": 0.014600736918241704, "L2D cache miss rate": 0.1678705657790762, "LL cache miss rate": 0.9746788937693537}
--- a/pytorch/output_HPC/altra_10_30_rma10_1000.output
+++ b/pytorch/output_HPC/altra_10_30_rma10_1000.output
@ -1,168 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395286 queued and waiting for resources
 srun: job 3395286 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,      17,      34,  ..., 2373939,
                            2373970, 2374001]),
       col_indices=tensor([    0,     1,     2,  ..., 46831, 46833, 46834]),
       values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04,  ...,
                       8.3378e+01,  2.5138e+00,  1.2184e+03]),
       size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
 tensor([0.4937, 0.5946, 0.4240,  ..., 0.9888, 0.5278, 0.9155])
 Matrix: rma10
 Shape: torch.Size([46835, 46835])
 NNZ: 2374001
 Density: 0.0010822805369125833
 Time: 52.320035219192505 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
             58.30 msec task-clock:u                     #    0.001 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,281      page-faults:u                    #   56.279 K/sec                     
        81,319,364      cycles:u                         #    1.395 GHz                         (62.38%)
        90,830,397      instructions:u                   #    1.12  insn per cycle              (94.62%)
   <not supported>      branches:u                                                            
           358,947      branch-misses:u                                                       
        32,561,141      L1-dcache-loads:u                #  558.523 M/sec                     
           477,147      L1-dcache-load-misses:u          #    1.47% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,044,361      L1-icache-loads:u                #  532.506 M/sec                     
           286,125      L1-icache-load-misses:u          #    0.92% of all L1-icache accesses 
        29,678,379      dTLB-loads:u                     #  509.075 M/sec                       (5.72%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      56.145511940 seconds time elapsed
     269.541895000 seconds user
    3993.928150000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,      17,      34,  ..., 2373939,
                            2373970, 2374001]),
       col_indices=tensor([    0,     1,     2,  ..., 46831, 46833, 46834]),
       values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04,  ...,
                       8.3378e+01,  2.5138e+00,  1.2184e+03]),
       size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
 tensor([0.2401, 0.9608, 0.9686,  ..., 0.2643, 0.1097, 0.0695])
 Matrix: rma10
 Shape: torch.Size([46835, 46835])
 NNZ: 2374001
 Density: 0.0010822805369125833
 Time: 65.29214668273926 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
           342,237      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,641,135      BR_RETIRED:u                                                          
      69.131216008 seconds time elapsed
     324.908899000 seconds user
    4969.165543000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,      17,      34,  ..., 2373939,
                            2373970, 2374001]),
       col_indices=tensor([    0,     1,     2,  ..., 46831, 46833, 46834]),
       values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04,  ...,
                       8.3378e+01,  2.5138e+00,  1.2184e+03]),
       size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
 tensor([0.5237, 0.3525, 0.2809,  ..., 0.8641, 0.3894, 0.4198])
 Matrix: rma10
 Shape: torch.Size([46835, 46835])
 NNZ: 2374001
 Density: 0.0010822805369125833
 Time: 66.05637407302856 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
        27,974,213      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,660      ITLB_WALK:u                                                           
            18,441      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        37,780,346      L1D_TLB:u                                                             
      69.880637029 seconds time elapsed
     320.759259000 seconds user
    5037.255757000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,      17,      34,  ..., 2373939,
                            2373970, 2374001]),
       col_indices=tensor([    0,     1,     2,  ..., 46831, 46833, 46834]),
       values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04,  ...,
                       8.3378e+01,  2.5138e+00,  1.2184e+03]),
       size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
 tensor([0.8185, 0.4278, 0.7553,  ..., 0.5022, 0.1058, 0.0783])
 Matrix: rma10
 Shape: torch.Size([46835, 46835])
 NNZ: 2374001
 Density: 0.0010822805369125833
 Time: 63.55399775505066 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
        31,166,891      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           291,301      L1I_CACHE_REFILL:u                                                    
           477,186      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,682,323      L1D_CACHE:u                                                           
      67.517251505 seconds time elapsed
     319.301754000 seconds user
    4839.755901000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,      17,      34,  ..., 2373939,
                            2373970, 2374001]),
       col_indices=tensor([    0,     1,     2,  ..., 46831, 46833, 46834]),
       values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04,  ...,
                       8.3378e+01,  2.5138e+00,  1.2184e+03]),
       size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
 tensor([0.8358, 0.0086, 0.1779,  ..., 0.6354, 0.7134, 0.5745])
 Matrix: rma10
 Shape: torch.Size([46835, 46835])
 NNZ: 2374001
 Density: 0.0010822805369125833
 Time: 63.55393171310425 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
           538,552      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           552,543      LL_CACHE_RD:u                                                         
           202,351      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,178      L2D_TLB_REFILL:u                                                      
           298,051      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,775,481      L2D_CACHE:u                                                           
      67.538674790 seconds time elapsed
     321.810383000 seconds user
    4836.154538000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_tn2010_1000.json
+++ b/pytorch/output_HPC/altra_10_30_tn2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [37.56, 23.12, 22.32, 22.28, 22.28, 21.96, 21.76, 21.72, 21.88, 21.84], "matrix": "tn2010", "shape": [240116, 240116], "nnz": 1193966, "% density": 2.070855328296721e-05, "time_s": 16.282614707946777, "power": [85.48, 85.84, 79.28, 70.16, 55.52, 49.48, 49.48, 60.48, 76.32, 88.88, 91.0, 91.0, 90.68, 88.32, 86.92, 86.4, 88.08, 86.8, 87.32, 87.8], "power_after": [21.68, 21.48, 21.44, 21.36, 21.52, 21.4, 21.4, 21.32, 21.2, 21.04], "task clock (msec)": 68.11, "page faults": 3486, "cycles": 70427921, "instructions": 85638293, "branch mispredictions": 333780, "branches": 19402540, "ITLB accesses": 26935483, "ITLB misses": 5639, "DTLB misses": 16688, "DTLB accesses": 36421540, "L1I cache accesses": 33029213, "L1I cache misses": 302558, "L1D cache misses": 481598, "L1D cache accesses": 34668833, "LL cache misses": 551659, "LL cache accesses": 564579, "L2D TLB accesses": 188346, "L2D TLB misses": 24479, "L2D cache misses": 311796, "L2D cache accesses": 1767924, "instructions per cycle": 1.215970765344614, "branch miss rate": 0.017202902300420462, "ITLB miss rate": 0.0002093521025778524, "DTLB miss rate": 0.00045819040051573877, "L2D TLB miss rate": 0.12996824992301403, "L1I cache miss rate": 0.00916031514284037, "L1D cache miss rate": 0.013891381922200843, "L2D cache miss rate": 0.17636278482559206, "LL cache miss rate": 0.9771156915152707}
--- a/pytorch/output_HPC/altra_10_30_tn2010_1000.output
+++ b/pytorch/output_HPC/altra_10_30_tn2010_1000.output
@ -1,173 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395275 queued and waiting for resources
 srun: job 3395275 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       4,      20,  ..., 1193961,
                            1193963, 1193966]),
       col_indices=tensor([  1152,   1272,   1961,  ..., 238254, 239142,
                           240113]),
       values=tensor([  5728.,   2871., 418449.,  ...,  10058.,  33324.,
                       34928.]), size=(240116, 240116), nnz=1193966,
       layout=torch.sparse_csr)
 tensor([0.2511, 0.1104, 0.8257,  ..., 0.4006, 0.1534, 0.0009])
 Matrix: tn2010
 Shape: torch.Size([240116, 240116])
 NNZ: 1193966
 Density: 2.070855328296721e-05
 Time: 12.89618182182312 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
             68.11 msec task-clock:u                     #    0.004 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,486      page-faults:u                    #   51.182 K/sec                     
        70,427,921      cycles:u                         #    1.034 GHz                         (46.81%)
        85,638,293      instructions:u                   #    1.22  insn per cycle              (74.19%)
   <not supported>      branches:u                                                            
           356,748      branch-misses:u                                                         (89.74%)
        34,044,117      L1-dcache-loads:u                #  499.843 M/sec                     
           481,076      L1-dcache-load-misses:u          #    1.41% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        32,553,977      L1-icache-loads:u                #  477.965 M/sec                     
           309,127      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
        41,245,978      dTLB-loads:u                     #  605.583 M/sec                       (33.60%)
           127,770      dTLB-load-misses:u               #    0.31% of all dTLB cache accesses  (15.43%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      16.626373547 seconds time elapsed
     101.073288000 seconds user
     996.348020000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       4,      20,  ..., 1193961,
                            1193963, 1193966]),
       col_indices=tensor([  1152,   1272,   1961,  ..., 238254, 239142,
                           240113]),
       values=tensor([  5728.,   2871., 418449.,  ...,  10058.,  33324.,
                       34928.]), size=(240116, 240116), nnz=1193966,
       layout=torch.sparse_csr)
 tensor([0.0138, 0.1394, 0.6273,  ..., 0.8681, 0.0444, 0.2705])
 Matrix: tn2010
 Shape: torch.Size([240116, 240116])
 NNZ: 1193966
 Density: 2.070855328296721e-05
 Time: 14.216531038284302 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
           333,780      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,402,540      BR_RETIRED:u                                                          
      17.985093703 seconds time elapsed
     106.904608000 seconds user
    1091.172933000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       4,      20,  ..., 1193961,
                            1193963, 1193966]),
       col_indices=tensor([  1152,   1272,   1961,  ..., 238254, 239142,
                           240113]),
       values=tensor([  5728.,   2871., 418449.,  ...,  10058.,  33324.,
                       34928.]), size=(240116, 240116), nnz=1193966,
       layout=torch.sparse_csr)
 tensor([0.6279, 0.1696, 0.6937,  ..., 0.4267, 0.4847, 0.6447])
 Matrix: tn2010
 Shape: torch.Size([240116, 240116])
 NNZ: 1193966
 Density: 2.070855328296721e-05
 Time: 12.462992429733276 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
        26,935,483      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             5,639      ITLB_WALK:u                                                           
            16,688      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,421,540      L1D_TLB:u                                                             
      15.984498303 seconds time elapsed
      95.195897000 seconds user
     962.237122000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       4,      20,  ..., 1193961,
                            1193963, 1193966]),
       col_indices=tensor([  1152,   1272,   1961,  ..., 238254, 239142,
                           240113]),
       values=tensor([  5728.,   2871., 418449.,  ...,  10058.,  33324.,
                       34928.]), size=(240116, 240116), nnz=1193966,
       layout=torch.sparse_csr)
 tensor([0.4060, 0.4915, 0.8557,  ..., 0.9902, 0.0548, 0.2450])
 Matrix: tn2010
 Shape: torch.Size([240116, 240116])
 NNZ: 1193966
 Density: 2.070855328296721e-05
 Time: 9.298198223114014 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
        33,029,213      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           302,558      L1I_CACHE_REFILL:u                                                    
           481,598      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        34,668,833      L1D_CACHE:u                                                           
      12.985459942 seconds time elapsed
      78.950722000 seconds user
     727.126874000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       4,      20,  ..., 1193961,
                            1193963, 1193966]),
       col_indices=tensor([  1152,   1272,   1961,  ..., 238254, 239142,
                           240113]),
       values=tensor([  5728.,   2871., 418449.,  ...,  10058.,  33324.,
                       34928.]), size=(240116, 240116), nnz=1193966,
       layout=torch.sparse_csr)
 tensor([0.0166, 0.6910, 0.0311,  ..., 0.6156, 0.5689, 0.9849])
 Matrix: tn2010
 Shape: torch.Size([240116, 240116])
 NNZ: 1193966
 Density: 2.070855328296721e-05
 Time: 12.012693405151367 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
           551,659      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           564,579      LL_CACHE_RD:u                                                         
           188,346      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,479      L2D_TLB_REFILL:u                                                      
           311,796      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,767,924      L2D_CACHE:u                                                           
      15.749851583 seconds time elapsed
      98.008506000 seconds user
     926.127594000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_ut2010_1000.json
+++ b/pytorch/output_HPC/altra_10_30_ut2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [93.52, 87.76, 71.6, 58.32, 39.6, 26.24, 26.24, 22.16, 22.24, 22.24], "matrix": "ut2010", "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 8.478580713272095, "power": [89.68, 88.92, 80.84, 68.96, 56.64, 54.52, 55.88, 70.44, 85.36, 85.36, 98.2, 96.52], "power_after": [21.24, 21.32, 21.16, 21.44, 21.68, 21.76, 21.72, 22.0, 21.72, 21.72], "task clock (msec)": 53.84, "page faults": 3291, "cycles": 66389970, "instructions": 74935543, "branch mispredictions": 330515, "branches": 19475058, "ITLB accesses": 26125490, "ITLB misses": 6431, "DTLB misses": 13728, "DTLB accesses": 35274185, "L1I cache accesses": 30428652, "L1I cache misses": 288897, "L1D cache misses": 475615, "L1D cache accesses": 31855716, "LL cache misses": 553829, "LL cache accesses": 574192, "L2D TLB accesses": 181148, "L2D TLB misses": 23202, "L2D cache misses": 307806, "L2D cache accesses": 1767037, "instructions per cycle": 1.1287178319255153, "branch miss rate": 0.016971194642911976, "ITLB miss rate": 0.00024615806248992844, "DTLB miss rate": 0.0003891797925309968, "L2D TLB miss rate": 0.12808311435952924, "L1I cache miss rate": 0.009494242465949527, "L1D cache miss rate": 0.014930287550278261, "L2D cache miss rate": 0.17419329646181717, "LL cache miss rate": 0.9645362526820297}
--- a/pytorch/output_HPC/altra_10_30_ut2010_1000.output
+++ b/pytorch/output_HPC/altra_10_30_ut2010_1000.output
@ -1,173 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395284 queued and waiting for resources
 srun: job 3395284 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.1487, 0.4275, 0.9471,  ..., 0.3851, 0.0801, 0.4295])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 8.772023677825928 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
             53.84 msec task-clock:u                     #    0.004 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,291      page-faults:u                    #   61.127 K/sec                     
        66,389,970      cycles:u                         #    1.233 GHz                         (67.37%)
        74,935,543      instructions:u                   #    1.13  insn per cycle              (83.30%)
   <not supported>      branches:u                                                            
           365,846      branch-misses:u                                                       
        31,684,169      L1-dcache-loads:u                #  588.504 M/sec                     
           462,583      L1-dcache-load-misses:u          #    1.46% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,260,337      L1-icache-loads:u                #  562.058 M/sec                     
           288,196      L1-icache-load-misses:u          #    0.95% of all L1-icache accesses 
        57,721,334      dTLB-loads:u                     #    1.072 G/sec                       (18.54%)
     <not counted>      dTLB-load-misses:u                                                      (0.00%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      12.179628060 seconds time elapsed
      68.068275000 seconds user
     690.223452000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.9553, 0.9401, 0.7135,  ..., 0.8664, 0.5986, 0.8459])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 8.94040060043335 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
           330,515      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,475,058      BR_RETIRED:u                                                          
      12.428594105 seconds time elapsed
      67.011228000 seconds user
     709.528404000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.6289, 0.8171, 0.1590,  ..., 0.7515, 0.5400, 0.3693])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 14.403366804122925 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
        26,125,490      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,431      ITLB_WALK:u                                                           
            13,728      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        35,274,185      L1D_TLB:u                                                             
      18.084508405 seconds time elapsed
      95.162133000 seconds user
    1117.716009000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.8824, 0.0692, 0.7225,  ..., 0.8736, 0.6854, 0.7514])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 9.64679503440857 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
        30,428,652      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           288,897      L1I_CACHE_REFILL:u                                                    
           475,615      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        31,855,716      L1D_CACHE:u                                                           
      13.170070008 seconds time elapsed
      68.362809000 seconds user
     761.360459000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.9552, 0.0509, 0.7738,  ..., 0.7722, 0.4417, 0.7772])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 12.372079133987427 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
           553,829      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           574,192      LL_CACHE_RD:u                                                         
           181,148      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,202      L2D_TLB_REFILL:u                                                      
           307,806      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,767,037      L2D_CACHE:u                                                           
      15.923392394 seconds time elapsed
      83.307253000 seconds user
     958.949992000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_va2010_1000.json
+++ b/pytorch/output_HPC/altra_10_30_va2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [32.08, 31.8, 28.68, 27.6, 22.96, 22.08, 21.0, 20.84, 20.68, 20.72], "matrix": "va2010", "shape": [285762, 285762], "nnz": 1402128, "% density": 1.717033263003816e-05, "time_s": 14.632386922836304, "power": [85.16, 83.48, 76.96, 67.44, 54.04, 51.4, 54.24, 66.76, 83.2, 96.44, 96.44, 95.84, 94.24, 92.36, 91.2, 89.32, 87.48, 88.68, 88.24], "power_after": [21.12, 21.0, 21.16, 21.4, 21.32, 21.36, 21.36, 21.12, 20.76, 20.84], "task clock (msec)": 57.32, "page faults": 3280, "cycles": 39497791, "instructions": 64385555, "branch mispredictions": 332792, "branches": 19983954, "ITLB accesses": 27156853, "ITLB misses": 6466, "DTLB misses": 18244, "DTLB accesses": 36466301, "L1I cache accesses": 30929971, "L1I cache misses": 291811, "L1D cache misses": 473063, "L1D cache accesses": 32462905, "LL cache misses": 544953, "LL cache accesses": 565172, "L2D TLB accesses": 183225, "L2D TLB misses": 23924, "L2D cache misses": 301362, "L2D cache accesses": 1756590, "instructions per cycle": 1.6301052127193645, "branch miss rate": 0.01665296067034582, "ITLB miss rate": 0.00023809828038616994, "DTLB miss rate": 0.000500297521264907, "L2D TLB miss rate": 0.13057170145995362, "L1I cache miss rate": 0.009434570759862659, "L1D cache miss rate": 0.014572417348354991, "L2D cache miss rate": 0.17156080815671274, "LL cache miss rate": 0.964225050073252}
--- a/pytorch/output_HPC/altra_10_30_va2010_1000.output
+++ b/pytorch/output_HPC/altra_10_30_va2010_1000.output
@ -1,173 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395277 queued and waiting for resources
 srun: job 3395277 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       8,  ..., 1402119,
                            1402123, 1402128]),
       col_indices=tensor([  2006,   2464,   1166,  ..., 285581, 285634,
                           285760]),
       values=tensor([125334.,   3558.,   1192.,  ...,  10148.,   1763.,
                        9832.]), size=(285762, 285762), nnz=1402128,
       layout=torch.sparse_csr)
 tensor([0.2920, 0.3583, 0.0598,  ..., 0.2208, 0.1741, 0.4955])
 Matrix: va2010
 Shape: torch.Size([285762, 285762])
 NNZ: 1402128
 Density: 1.717033263003816e-05
 Time: 14.792448997497559 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
             57.32 msec task-clock:u                     #    0.003 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,280      page-faults:u                    #   57.220 K/sec                     
        39,497,791      cycles:u                         #    0.689 GHz                         (54.25%)
        64,385,555      instructions:u                   #    1.63  insn per cycle              (81.24%)
   <not supported>      branches:u                                                            
           362,674      branch-misses:u                                                       
        33,532,520      L1-dcache-loads:u                #  584.977 M/sec                     
           481,355      L1-dcache-load-misses:u          #    1.44% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        31,924,348      L1-icache-loads:u                #  556.922 M/sec                     
           296,637      L1-icache-load-misses:u          #    0.93% of all L1-icache accesses 
        43,420,143      dTLB-loads:u                     #  757.467 M/sec                       (40.22%)
            30,923      dTLB-load-misses:u               #    0.07% of all dTLB cache accesses  (19.05%)
     <not counted>      iTLB-loads:u                                                            (0.00%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
      18.678937115 seconds time elapsed
     112.979167000 seconds user
    1135.785668000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       8,  ..., 1402119,
                            1402123, 1402128]),
       col_indices=tensor([  2006,   2464,   1166,  ..., 285581, 285634,
                           285760]),
       values=tensor([125334.,   3558.,   1192.,  ...,  10148.,   1763.,
                        9832.]), size=(285762, 285762), nnz=1402128,
       layout=torch.sparse_csr)
 tensor([0.7703, 0.7481, 0.5351,  ..., 0.4663, 0.6089, 0.3679])
 Matrix: va2010
 Shape: torch.Size([285762, 285762])
 NNZ: 1402128
 Density: 1.717033263003816e-05
 Time: 14.130552530288696 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
           332,792      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        19,983,954      BR_RETIRED:u                                                          
      17.923156218 seconds time elapsed
     107.999690000 seconds user
    1091.659165000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       8,  ..., 1402119,
                            1402123, 1402128]),
       col_indices=tensor([  2006,   2464,   1166,  ..., 285581, 285634,
                           285760]),
       values=tensor([125334.,   3558.,   1192.,  ...,  10148.,   1763.,
                        9832.]), size=(285762, 285762), nnz=1402128,
       layout=torch.sparse_csr)
 tensor([0.8850, 0.1406, 0.0617,  ..., 0.4325, 0.2725, 0.9292])
 Matrix: va2010
 Shape: torch.Size([285762, 285762])
 NNZ: 1402128
 Density: 1.717033263003816e-05
 Time: 13.32977032661438 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
        27,156,853      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,466      ITLB_WALK:u                                                           
            18,244      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        36,466,301      L1D_TLB:u                                                             
      17.186572497 seconds time elapsed
     104.940187000 seconds user
    1032.527271000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       8,  ..., 1402119,
                            1402123, 1402128]),
       col_indices=tensor([  2006,   2464,   1166,  ..., 285581, 285634,
                           285760]),
       values=tensor([125334.,   3558.,   1192.,  ...,  10148.,   1763.,
                        9832.]), size=(285762, 285762), nnz=1402128,
       layout=torch.sparse_csr)
 tensor([0.6289, 0.0403, 0.9207,  ..., 0.0183, 0.4807, 0.7504])
 Matrix: va2010
 Shape: torch.Size([285762, 285762])
 NNZ: 1402128
 Density: 1.717033263003816e-05
 Time: 13.460915803909302 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
        30,929,971      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           291,811      L1I_CACHE_REFILL:u                                                    
           473,063      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        32,462,905      L1D_CACHE:u                                                           
      17.219448483 seconds time elapsed
     100.274467000 seconds user
    1045.271682000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       8,  ..., 1402119,
                            1402123, 1402128]),
       col_indices=tensor([  2006,   2464,   1166,  ..., 285581, 285634,
                           285760]),
       values=tensor([125334.,   3558.,   1192.,  ...,  10148.,   1763.,
                        9832.]), size=(285762, 285762), nnz=1402128,
       layout=torch.sparse_csr)
 tensor([0.6412, 0.1151, 0.5075,  ..., 0.9251, 0.9288, 0.3560])
 Matrix: va2010
 Shape: torch.Size([285762, 285762])
 NNZ: 1402128
 Density: 1.717033263003816e-05
 Time: 15.992860555648804 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
           544,953      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           565,172      LL_CACHE_RD:u                                                         
           183,225      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            23,924      L2D_TLB_REFILL:u                                                      
           301,362      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,756,590      L2D_CACHE:u                                                           
      19.884223259 seconds time elapsed
     113.211516000 seconds user
    1230.525804000 seconds sys
--- a/pytorch/output_HPC/altra_10_30_vt2010_1000.json
+++ b/pytorch/output_HPC/altra_10_30_vt2010_1000.json
@ -1 +0,0 @@
 {"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [77.2, 64.12, 64.12, 48.92, 36.2, 21.72, 21.88, 22.36, 22.36, 22.44], "matrix": "vt2010", "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 3.5892834663391113, "power": [33.44, 30.68, 31.08, 26.96, 26.88, 32.48, 32.24], "power_after": [21.24, 21.24, 21.36, 21.36, 21.2, 21.04, 20.84, 20.72, 20.72, 20.56], "task clock (msec)": 55.26, "page faults": 3297, "cycles": 49276491, "instructions": 64763517, "branch mispredictions": 340611, "branches": 20355849, "ITLB accesses": 27946393, "ITLB misses": 6805, "DTLB misses": 17877, "DTLB accesses": 38226912, "L1I cache accesses": 31946141, "L1I cache misses": 295259, "L1D cache misses": 468136, "L1D cache accesses": 33395666, "LL cache misses": 527109, "LL cache accesses": 540409, "L2D TLB accesses": 192519, "L2D TLB misses": 24204, "L2D cache misses": 290933, "L2D cache accesses": 1743452, "instructions per cycle": 1.3142883286880147, "branch miss rate": 0.016732831924622747, "ITLB miss rate": 0.00024350190738389746, "DTLB miss rate": 0.0004676548291423592, "L2D TLB miss rate": 0.1257226559456469, "L1I cache miss rate": 0.009242399574959616, "L1D cache miss rate": 0.014017866869311724, "L2D cache miss rate": 0.16687181522634406, "LL cache miss rate": 0.9753890109158063}
--- a/pytorch/output_HPC/altra_10_30_vt2010_1000.output
+++ b/pytorch/output_HPC/altra_10_30_vt2010_1000.output
@ -1,163 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3395285 queued and waiting for resources
 srun: job 3395285 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.1179, 0.2288, 0.5357,  ..., 0.4845, 0.6375, 0.4513])
 Matrix: vt2010
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 3.628732681274414 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
             55.26 msec task-clock:u                     #    0.008 CPUs utilized             
                 0      context-switches:u               #    0.000 /sec                      
                 0      cpu-migrations:u                 #    0.000 /sec                      
             3,297      page-faults:u                    #   59.661 K/sec                     
        49,276,491      cycles:u                         #    0.892 GHz                         (31.65%)
        64,763,517      instructions:u                   #    1.31  insn per cycle              (57.73%)
   <not supported>      branches:u                                                            
           357,693      branch-misses:u                                                         (76.18%)
        32,426,852      L1-dcache-loads:u                #  586.784 M/sec                       (88.36%)
           469,495      L1-dcache-load-misses:u          #    1.45% of all L1-dcache accesses 
   <not supported>      LLC-loads:u                                                           
   <not supported>      LLC-load-misses:u                                                     
        30,941,957      L1-icache-loads:u                #  559.914 M/sec                     
           279,512      L1-icache-load-misses:u          #    0.90% of all L1-icache accesses 
        47,128,547      dTLB-loads:u                     #  852.821 M/sec                       (46.73%)
           108,931      dTLB-load-misses:u               #    0.23% of all dTLB cache accesses  (32.30%)
        14,189,608      iTLB-loads:u                     #  256.770 M/sec                       (19.86%)
     <not counted>      iTLB-load-misses:u                                                      (0.00%)
       7.117399121 seconds time elapsed
      18.404618000 seconds user
      29.532104000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.7544, 0.0071, 0.0491,  ..., 0.7236, 0.5537, 0.4901])
 Matrix: vt2010
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 3.6322426795959473 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
           340,611      BR_MIS_PRED_RETIRED:u            #      0.0 per branch  branch_misprediction_ratio
        20,355,849      BR_RETIRED:u                                                          
       7.112879848 seconds time elapsed
      18.362004000 seconds user
      29.398677000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.7651, 0.6605, 0.7128,  ..., 0.7434, 0.6656, 0.3987])
 Matrix: vt2010
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 3.7933311462402344 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
        27,946,393      L1I_TLB:u                        #      0.0 per TLB access  itlb_walk_ratio
             6,805      ITLB_WALK:u                                                           
            17,877      DTLB_WALK:u                      #      0.0 per TLB access  dtlb_walk_ratio
        38,226,912      L1D_TLB:u                                                             
       7.235266934 seconds time elapsed
      18.566568000 seconds user
      29.759130000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.3319, 0.1241, 0.4830,  ..., 0.5188, 0.8684, 0.1488])
 Matrix: vt2010
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 3.662006378173828 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
        31,946,141      L1I_CACHE:u                      #      0.0 per cache access  l1i_cache_miss_ratio
           295,259      L1I_CACHE_REFILL:u                                                    
           468,136      L1D_CACHE_REFILL:u               #      0.0 per cache access  l1d_cache_miss_ratio
        33,395,666      L1D_CACHE:u                                                           
       7.187008251 seconds time elapsed
      18.275672000 seconds user
      30.724065000 seconds sys
 /nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      4,      7,  ..., 155588, 155592,
                            155598]),
       col_indices=tensor([  131,   561,   996,  ..., 32237, 32238, 32570]),
       values=tensor([79040.,  7820., 15136.,  ...,  2828., 17986.,  2482.]),
       size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
 tensor([0.0055, 0.7774, 0.9046,  ..., 0.5143, 0.0678, 0.4725])
 Matrix: vt2010
 Shape: torch.Size([32580, 32580])
 NNZ: 155598
 Density: 0.00014658915806621921
 Time: 3.616023063659668 seconds
 Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
           527,109      LL_CACHE_MISS_RD:u               #      1.0 per cache access  ll_cache_read_miss_ratio
           540,409      LL_CACHE_RD:u                                                         
           192,519      L2D_TLB:u                        #      0.1 per TLB access  l2_tlb_miss_ratio
            24,204      L2D_TLB_REFILL:u                                                      
           290,933      L2D_CACHE_REFILL:u               #      0.2 per cache access  l2_cache_miss_ratio
         1,743,452      L2D_CACHE:u                                                           
       7.030605378 seconds time elapsed
      18.274323000 seconds user
      28.779020000 seconds sys
--- a/pytorch/output_cpu/altra_10_10_ASIC_680k_10000.json
+++ b/pytorch/output_cpu/altra_10_10_ASIC_680k_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "ASIC_680k", "MATRIX_SHAPE": [682862, 682862], "MATRIX_SIZE": 466300511044, "MATRIX_NNZ": 3871773, "MATRIX_DENSITY": 8.303171256088674e-06, "TIME_S": 11.77456283569336, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.36, 20.44, 20.48, 20.72, 20.8, 21.0, 21.32, 21.32, 21.28, 21.08], "POWER": [92.0, 91.8, 78.72, 66.68, 51.2, 46.6, 53.36, 53.36, 70.48, 90.16, 100.04, 103.68, 98.2, 95.64, 97.16, 101.4], "JOULES": 938.4206715393068, "POWER_AFTER": [20.96, 20.76, 20.76, 21.08, 21.24, 21.16, 21.28, 21.2, 21.0, 21.08]}
--- a/pytorch/output_cpu/altra_10_10_Oregon-2_10000.json
+++ b/pytorch/output_cpu/altra_10_10_Oregon-2_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "Oregon-2", "MATRIX_SHAPE": [11806, 11806], "MATRIX_SIZE": 139381636, "MATRIX_NNZ": 65460, "MATRIX_DENSITY": 0.0004696458003979807, "TIME_S": 0.9880795478820801, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [21.04, 21.12, 21.2, 21.12, 21.04, 20.96, 20.92, 20.88, 21.16, 21.08], "POWER": [25.92, 42.32, 42.32, 45.44, 45.4], "JOULES": 44.85881147384644, "POWER_AFTER": [20.72, 20.72, 20.84, 20.84, 20.84, 20.96, 20.92, 20.6, 20.68, 20.84]}
--- a/pytorch/output_cpu/altra_10_10_Oregon-2_10000.output
+++ b/pytorch/output_cpu/altra_10_10_Oregon-2_10000.output
@ -1,23 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3471014 queued and waiting for resources
 srun: job 3471014 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,   583,   584,  ..., 65459, 65460, 65460]),
       col_indices=tensor([   2,   23,   27,  ..., 3324,  958,  841]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(11806, 11806),
       nnz=65460, layout=torch.sparse_csr)
 tensor([0.2158, 0.5422, 0.9585,  ..., 0.6377, 0.8158, 0.5743])
 Matrix: Oregon-2
 Shape: torch.Size([11806, 11806])
 Size: 139381636
 NNZ: 65460
 Density: 0.0004696458003979807
 Time: 0.9880795478820801 seconds
--- a/pytorch/output_cpu/altra_10_10_as-caida_10000.json
+++ b/pytorch/output_cpu/altra_10_10_as-caida_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "as-caida", "MATRIX_SHAPE": [31379, 31379], "MATRIX_SIZE": 984641641, "MATRIX_NNZ": 106762, "MATRIX_DENSITY": 0.00010842726485909405, "TIME_S": 1.066300630569458, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.64, 20.48, 20.68, 20.64, 20.32, 20.32, 20.4, 20.2, 20.52, 20.52], "POWER": [26.32, 39.88, 50.16, 50.64, 50.24], "JOULES": 53.97094367980957, "POWER_AFTER": [20.28, 20.4, 20.2, 20.32, 20.32, 20.4, 20.48, 20.28, 20.28, 20.44]}
--- a/pytorch/output_cpu/altra_10_10_as-caida_10000.output
+++ b/pytorch/output_cpu/altra_10_10_as-caida_10000.output
@ -1,24 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3470988 queued and waiting for resources
 srun: job 3470988 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      0,      0,  ..., 106761, 106761,
                            106762]),
       col_indices=tensor([  106,   329,  1040,  ...,   155,   160, 12170]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(31379, 31379),
       nnz=106762, layout=torch.sparse_csr)
 tensor([0.8877, 0.6518, 0.0601,  ..., 0.0372, 0.4806, 0.8853])
 Matrix: as-caida
 Shape: torch.Size([31379, 31379])
 Size: 984641641
 NNZ: 106762
 Density: 0.00010842726485909405
 Time: 1.066300630569458 seconds
--- a/pytorch/output_cpu/altra_10_10_dc2_10000.json
+++ b/pytorch/output_cpu/altra_10_10_dc2_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "dc2", "MATRIX_SHAPE": [116835, 116835], "MATRIX_SIZE": 13650417225, "MATRIX_NNZ": 766396, "MATRIX_DENSITY": 5.614451099680581e-05, "TIME_S": 3.0164122581481934, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.44, 20.72, 20.72, 21.0, 20.84, 21.08, 20.88, 20.8, 20.8, 20.88], "POWER": [64.4, 79.8, 83.24, 75.76, 58.2, 58.2, 56.64, 60.64, 75.88, 93.68], "JOULES": 194.69750034332276, "POWER_AFTER": [21.12, 21.0, 21.12, 20.88, 20.88, 20.84, 20.96, 20.92, 20.88, 20.8]}
--- a/pytorch/output_cpu/altra_10_10_dc2_10000.output
+++ b/pytorch/output_cpu/altra_10_10_dc2_10000.output
@ -1,26 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3470982 queued and waiting for resources
 srun: job 3470982 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,      2,  ..., 766390, 766394,
                            766396]),
       col_indices=tensor([     0,      1,      2,  ..., 116833,     89,
                           116834]),
       values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00,  ...,
                       1.0331e+01, -1.0000e-03,  1.0000e-03]),
       size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
 tensor([0.3305, 0.9342, 0.6954,  ..., 0.1999, 0.9064, 0.6304])
 Matrix: dc2
 Shape: torch.Size([116835, 116835])
 Size: 13650417225
 NNZ: 766396
 Density: 5.614451099680581e-05
 Time: 3.0164122581481934 seconds
--- a/pytorch/output_cpu/altra_10_10_de2010_10000.json
+++ b/pytorch/output_cpu/altra_10_10_de2010_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "de2010", "MATRIX_SHAPE": [24115, 24115], "MATRIX_SIZE": 581533225, "MATRIX_NNZ": 116056, "MATRIX_DENSITY": 0.0001995689928120616, "TIME_S": 1.1378686428070068, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [21.0, 20.88, 21.04, 20.8, 20.8, 20.44, 20.64, 20.48, 20.28, 20.16], "POWER": [22.84, 39.8, 49.48, 50.32, 50.28], "JOULES": 57.25203536033631, "POWER_AFTER": [20.68, 20.44, 20.68, 20.68, 20.56, 20.88, 20.92, 20.88, 21.0, 20.96]}
--- a/pytorch/output_cpu/altra_10_10_email-Enron_10000.json
+++ b/pytorch/output_cpu/altra_10_10_email-Enron_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "email-Enron", "MATRIX_SHAPE": [36692, 36692], "MATRIX_SIZE": 1346302864, "MATRIX_NNZ": 367662, "MATRIX_DENSITY": 0.0002730901120626302, "TIME_S": 1.3314027786254883, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.8, 20.64, 20.6, 20.6, 20.48, 20.8, 20.72, 20.72, 20.92, 20.92], "POWER": [28.4, 43.96, 54.4, 55.28, 55.08], "JOULES": 73.5336650466919, "POWER_AFTER": [20.88, 20.8, 20.8, 20.8, 20.64, 20.64, 20.64, 20.48, 20.52, 20.72]}
--- a/pytorch/output_cpu/altra_10_10_email-Enron_10000.output
+++ b/pytorch/output_cpu/altra_10_10_email-Enron_10000.output
@ -1,24 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3470985 queued and waiting for resources
 srun: job 3470985 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      1,     71,  ..., 367660, 367661,
                            367662]),
       col_indices=tensor([    1,     0,     2,  ..., 36690, 36689,  8203]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(36692, 36692),
       nnz=367662, layout=torch.sparse_csr)
 tensor([0.7107, 0.7540, 0.8321,  ..., 0.9503, 0.7781, 0.9277])
 Matrix: email-Enron
 Shape: torch.Size([36692, 36692])
 Size: 1346302864
 NNZ: 367662
 Density: 0.0002730901120626302
 Time: 1.3314027786254883 seconds
--- a/pytorch/output_cpu/altra_10_10_fl2010_10000.json
+++ b/pytorch/output_cpu/altra_10_10_fl2010_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "fl2010", "MATRIX_SHAPE": [484481, 484481], "MATRIX_SIZE": 234721839361, "MATRIX_NNZ": 2346294, "MATRIX_DENSITY": 9.99606174861054e-06, "TIME_S": 2.924255609512329, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.8, 20.88, 20.72, 20.64, 20.56, 20.92, 20.92, 21.0, 20.96, 20.84], "POWER": [73.32, 93.24, 93.64, 82.2, 61.36, 61.36, 58.0], "JOULES": 176.3268253517151, "POWER_AFTER": [20.76, 20.56, 20.76, 20.72, 20.76, 20.76, 20.76, 20.88, 20.68, 20.68]}
--- a/pytorch/output_cpu/altra_10_10_ga2010_10000.json
+++ b/pytorch/output_cpu/altra_10_10_ga2010_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "ga2010", "MATRIX_SHAPE": [291086, 291086], "MATRIX_SIZE": 84731059396, "MATRIX_NNZ": 1418056, "MATRIX_DENSITY": 1.6735964475229304e-05, "TIME_S": 2.341104745864868, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.32, 20.28, 20.24, 20.44, 20.52, 20.8, 20.64, 20.68, 20.6, 20.36], "POWER": [33.84, 53.08, 66.2, 66.52, 67.36, 59.0], "JOULES": 154.00518000602722, "POWER_AFTER": [20.28, 20.32, 20.52, 20.6, 20.6, 20.84, 21.12, 20.96, 20.76, 20.8]}
--- a/pytorch/output_cpu/altra_10_10_ga2010_10000.output
+++ b/pytorch/output_cpu/altra_10_10_ga2010_10000.output
@ -1,25 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3470989 queued and waiting for resources
 srun: job 3470989 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       3,      10,  ..., 1418047,
                            1418054, 1418056]),
       col_indices=tensor([  1566,   1871,   1997,  ..., 291064, 289820,
                           290176]),
       values=tensor([18760., 17851., 18847.,  ..., 65219., 56729., 77629.]),
       size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
 tensor([0.0746, 0.8150, 0.2560,  ..., 0.7929, 0.2552, 0.7733])
 Matrix: ga2010
 Shape: torch.Size([291086, 291086])
 Size: 84731059396
 NNZ: 1418056
 Density: 1.6735964475229304e-05
 Time: 2.341104745864868 seconds
--- a/pytorch/output_cpu/altra_10_10_mac_econ_fwd500_10000.json
+++ b/pytorch/output_cpu/altra_10_10_mac_econ_fwd500_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "mac_econ_fwd500", "MATRIX_SHAPE": [206500, 206500], "MATRIX_SIZE": 42642250000, "MATRIX_NNZ": 1273389, "MATRIX_DENSITY": 2.9862143765866013e-05, "TIME_S": 1.6093401908874512, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.76, 20.72, 20.28, 20.2, 20.24, 20.56, 20.72, 21.12, 21.24, 21.0], "POWER": [48.6, 65.2, 65.2, 61.84, 62.88, 59.36], "JOULES": 99.0504337310791, "POWER_AFTER": [20.76, 20.4, 20.64, 20.68, 20.68, 20.56, 20.48, 20.68, 20.64, 20.88]}
--- a/pytorch/output_cpu/altra_10_10_mc2depi_10000.json
+++ b/pytorch/output_cpu/altra_10_10_mc2depi_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "mc2depi", "MATRIX_SHAPE": [525825, 525825], "MATRIX_SIZE": 276491930625, "MATRIX_NNZ": 2100225, "MATRIX_DENSITY": 7.595972132902821e-06, "TIME_S": 2.123237371444702, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.68, 20.68, 20.68, 20.64, 20.72, 20.6, 20.84, 20.76, 20.92, 20.96], "POWER": [52.52, 76.2, 82.92, 85.4, 72.28, 58.76], "JOULES": 164.92142794609072, "POWER_AFTER": [20.68, 20.72, 20.84, 20.88, 20.84, 21.16, 21.04, 21.16, 20.88, 20.88]}
--- a/pytorch/output_cpu/altra_10_10_mc2depi_10000.output
+++ b/pytorch/output_cpu/altra_10_10_mc2depi_10000.output
@ -1,25 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3470981 queued and waiting for resources
 srun: job 3470981 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       5,  ..., 2100220,
                            2100223, 2100225]),
       col_indices=tensor([     0,      1,      1,  ..., 525824, 525821,
                           525824]),
       values=tensor([-2025.,  2025., -2026.,  ...,  2025.,  1024., -1024.]),
       size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
 tensor([0.8254, 0.0543, 0.1764,  ..., 0.7650, 0.8254, 0.6404])
 Matrix: mc2depi
 Shape: torch.Size([525825, 525825])
 Size: 276491930625
 NNZ: 2100225
 Density: 7.595972132902821e-06
 Time: 2.123237371444702 seconds
--- a/pytorch/output_cpu/altra_10_10_p2p-Gnutella04_10000.json
+++ b/pytorch/output_cpu/altra_10_10_p2p-Gnutella04_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "p2p-Gnutella04", "MATRIX_SHAPE": [10879, 10879], "MATRIX_SIZE": 118352641, "MATRIX_NNZ": 39994, "MATRIX_DENSITY": 0.0003379223282393842, "TIME_S": 0.9692902565002441, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.6, 20.48, 20.64, 20.64, 20.64, 20.56, 20.52, 20.44, 20.24, 20.12], "POWER": [25.92, 43.16, 50.56, 48.4, 49.28], "JOULES": 47.76662384033203, "POWER_AFTER": [20.4, 20.52, 20.44, 20.64, 20.72, 20.64, 20.8, 20.6, 20.6, 20.64]}
--- a/pytorch/output_cpu/altra_10_10_p2p-Gnutella24_10000.json
+++ b/pytorch/output_cpu/altra_10_10_p2p-Gnutella24_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "p2p-Gnutella24", "MATRIX_SHAPE": [26518, 26518], "MATRIX_SIZE": 703204324, "MATRIX_NNZ": 65369, "MATRIX_DENSITY": 9.295875717624285e-05, "TIME_S": 0.9848971366882324, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [16.32, 16.36, 16.36, 16.32, 16.56, 16.64, 16.72, 16.92, 16.76, 16.96], "POWER": [22.56, 40.8, 42.16, 42.16, 39.84], "JOULES": 39.23830192565919, "POWER_AFTER": [16.56, 16.44, 16.44, 16.68, 16.72, 16.72, 16.76, 16.68, 16.68, 16.92]}
--- a/pytorch/output_cpu/altra_10_10_p2p-Gnutella24_10000.output
+++ b/pytorch/output_cpu/altra_10_10_p2p-Gnutella24_10000.output
@ -1,23 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3471012 queued and waiting for resources
 srun: job 3471012 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 65369, 65369, 65369]),
       col_indices=tensor([    1,     2,     3,  ..., 15065,  9401, 26517]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(26518, 26518),
       nnz=65369, layout=torch.sparse_csr)
 tensor([0.6126, 0.7089, 0.2938,  ..., 0.5143, 0.3903, 0.8766])
 Matrix: p2p-Gnutella24
 Shape: torch.Size([26518, 26518])
 Size: 703204324
 NNZ: 65369
 Density: 9.295875717624285e-05
 Time: 0.9848971366882324 seconds
--- a/pytorch/output_cpu/altra_10_10_p2p-Gnutella25_10000.json
+++ b/pytorch/output_cpu/altra_10_10_p2p-Gnutella25_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "p2p-Gnutella25", "MATRIX_SHAPE": [22687, 22687], "MATRIX_SIZE": 514699969, "MATRIX_NNZ": 54705, "MATRIX_DENSITY": 0.00010628522108964806, "TIME_S": 1.064000129699707, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.4, 20.68, 20.76, 20.6, 20.64, 20.48, 20.36, 20.48, 20.52, 20.52], "POWER": [33.4, 49.92, 52.44, 52.44, 51.68], "JOULES": 55.747526702880855, "POWER_AFTER": [20.96, 20.76, 20.96, 21.08, 20.64, 20.84, 20.84, 20.56, 20.28, 20.48]}
--- a/pytorch/output_cpu/altra_10_10_p2p-Gnutella25_10000.output
+++ b/pytorch/output_cpu/altra_10_10_p2p-Gnutella25_10000.output
@ -1,23 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3470999 queued and waiting for resources
 srun: job 3470999 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([    0,     9,     9,  ..., 54704, 54704, 54705]),
       col_indices=tensor([    1,     2,     3,  ..., 17949, 22685,   144]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]), size=(22687, 22687),
       nnz=54705, layout=torch.sparse_csr)
 tensor([0.1096, 0.4722, 0.2402,  ..., 0.8482, 0.4609, 0.1028])
 Matrix: p2p-Gnutella25
 Shape: torch.Size([22687, 22687])
 Size: 514699969
 NNZ: 54705
 Density: 0.00010628522108964806
 Time: 1.064000129699707 seconds
--- a/pytorch/output_cpu/altra_10_10_p2p-Gnutella30_10000.json
+++ b/pytorch/output_cpu/altra_10_10_p2p-Gnutella30_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "p2p-Gnutella30", "MATRIX_SHAPE": [36682, 36682], "MATRIX_SIZE": 1345569124, "MATRIX_NNZ": 88328, "MATRIX_DENSITY": 6.564359899804003e-05, "TIME_S": 1.022092580795288, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.44, 20.56, 20.76, 20.6, 20.64, 21.08, 20.76, 20.32, 20.32, 20.44], "POWER": [25.64, 36.88, 51.72, 49.6, 50.84], "JOULES": 50.723186807632445, "POWER_AFTER": [20.56, 20.68, 20.6, 20.88, 21.08, 20.76, 20.76, 20.92, 20.32, 20.24]}
--- a/pytorch/output_cpu/altra_10_10_ri2010_10000.json
+++ b/pytorch/output_cpu/altra_10_10_ri2010_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "ri2010", "MATRIX_SHAPE": [25181, 25181], "MATRIX_SIZE": 634082761, "MATRIX_NNZ": 125750, "MATRIX_DENSITY": 0.00019831796057928155, "TIME_S": 0.7675364017486572, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.64, 20.64, 20.64, 20.64, 20.8, 20.8, 20.8, 20.96, 20.92, 20.84], "POWER": [26.52, 43.16, 47.12, 46.0, 47.48], "JOULES": 36.442628355026244, "POWER_AFTER": [20.48, 20.44, 20.6, 20.64, 20.6, 20.68, 20.6, 20.8, 20.6, 20.6]}
--- a/pytorch/output_cpu/altra_10_10_rma10_10000.json
+++ b/pytorch/output_cpu/altra_10_10_rma10_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "rma10", "MATRIX_SHAPE": [46835, 46835], "MATRIX_SIZE": 2193517225, "MATRIX_NNZ": 2374001, "MATRIX_DENSITY": 0.0010822805369125833, "TIME_S": 2.688584089279175, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.24, 20.24, 20.4, 20.44, 20.76, 20.76, 20.68, 20.72, 20.56, 20.44], "POWER": [53.84, 65.36, 65.36, 65.6, 62.2, 50.6], "JOULES": 162.64235491752623, "POWER_AFTER": [20.28, 20.4, 20.48, 20.44, 20.4, 20.48, 20.52, 20.44, 20.44, 20.44]}
--- a/pytorch/output_cpu/altra_10_10_soc-sign-Slashdot090216_10000.json
+++ b/pytorch/output_cpu/altra_10_10_soc-sign-Slashdot090216_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "soc-sign-Slashdot090216", "MATRIX_SHAPE": [81871, 81871], "MATRIX_SIZE": 6702860641, "MATRIX_NNZ": 545671, "MATRIX_DENSITY": 8.140867447881048e-05, "TIME_S": 1.4809374809265137, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [21.16, 20.96, 20.92, 20.92, 20.76, 20.72, 21.04, 21.04, 21.08, 20.84], "POWER": [38.4, 56.52, 60.12, 59.64, 58.44], "JOULES": 87.74598638534546, "POWER_AFTER": [20.56, 20.56, 20.68, 20.52, 21.16, 21.16, 21.28, 21.0, 21.12, 20.84]}
--- a/pytorch/output_cpu/altra_10_10_soc-sign-Slashdot090221_10000.json
+++ b/pytorch/output_cpu/altra_10_10_soc-sign-Slashdot090221_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "soc-sign-Slashdot090221", "MATRIX_SHAPE": [82144, 82144], "MATRIX_SIZE": 6747636736, "MATRIX_NNZ": 549202, "MATRIX_DENSITY": 8.13917555860553e-05, "TIME_S": 1.608903408050537, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.68, 20.68, 20.64, 20.28, 20.32, 20.44, 20.44, 20.44, 20.44, 20.52], "POWER": [57.2, 57.2, 72.76, 72.52, 70.32, 58.68], "JOULES": 106.05045198440551, "POWER_AFTER": [20.96, 20.76, 20.84, 20.92, 20.92, 20.96, 21.12, 21.24, 21.16, 21.04]}
--- a/pytorch/output_cpu/altra_10_10_soc-sign-epinions_10000.json
+++ b/pytorch/output_cpu/altra_10_10_soc-sign-epinions_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "soc-sign-epinions", "MATRIX_SHAPE": [131828, 131828], "MATRIX_SIZE": 17378621584, "MATRIX_NNZ": 841372, "MATRIX_DENSITY": 4.841419648464106e-05, "TIME_S": 4.555854320526123, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [16.4, 16.36, 16.48, 16.68, 16.32, 16.32, 16.56, 16.56, 16.64, 16.64], "POWER": [51.6, 68.68, 77.56, 77.4, 61.4, 55.08, 54.44, 65.6], "JOULES": 284.7840434265137, "POWER_AFTER": [16.92, 16.88, 17.04, 16.92, 16.84, 16.92, 16.88, 16.8, 17.12, 17.12]}
--- a/pytorch/output_cpu/altra_10_10_sx-mathoverflow_10000.json
+++ b/pytorch/output_cpu/altra_10_10_sx-mathoverflow_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "sx-mathoverflow", "MATRIX_SHAPE": [24818, 24818], "MATRIX_SIZE": 615933124, "MATRIX_NNZ": 239978, "MATRIX_DENSITY": 0.00038961697406616504, "TIME_S": 1.0039293766021729, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.88, 21.0, 21.0, 20.92, 20.92, 20.8, 20.6, 20.6, 20.76, 20.92], "POWER": [29.76, 49.24, 50.6, 47.84, 47.84], "JOULES": 48.02798137664795, "POWER_AFTER": [20.96, 20.8, 20.92, 21.68, 22.4, 23.04, 23.76, 23.12, 22.6, 21.8]}
--- a/pytorch/output_cpu/altra_10_10_sx-mathoverflow_10000.output
+++ b/pytorch/output_cpu/altra_10_10_sx-mathoverflow_10000.output
@ -1,24 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3470984 queued and waiting for resources
 srun: job 3470984 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,    317,    416,  ..., 239976, 239977,
                            239978]),
       col_indices=tensor([    0,     1,     2,  ...,  1483,  2179, 24817]),
       values=tensor([151.,  17.,   6.,  ...,   1.,   1.,   1.]),
       size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
 tensor([0.8169, 0.9455, 0.2378,  ..., 0.7183, 0.8285, 0.9774])
 Matrix: sx-mathoverflow
 Shape: torch.Size([24818, 24818])
 Size: 615933124
 NNZ: 239978
 Density: 0.00038961697406616504
 Time: 1.0039293766021729 seconds
--- a/pytorch/output_cpu/altra_10_10_tn2010_10000.json
+++ b/pytorch/output_cpu/altra_10_10_tn2010_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "tn2010", "MATRIX_SHAPE": [240116, 240116], "MATRIX_SIZE": 57655693456, "MATRIX_NNZ": 1193966, "MATRIX_DENSITY": 2.070855328296721e-05, "TIME_S": 2.2318568229675293, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.52, 20.52, 20.68, 20.6, 20.76, 20.84, 20.52, 20.44, 20.48, 20.4], "POWER": [47.04, 68.12, 70.92, 71.88, 71.88, 61.28], "JOULES": 157.9681861114502, "POWER_AFTER": [21.04, 20.76, 20.8, 20.72, 20.76, 20.84, 20.92, 21.04, 20.8, 20.8]}
--- a/pytorch/output_cpu/altra_10_10_tn2010_10000.output
+++ b/pytorch/output_cpu/altra_10_10_tn2010_10000.output
@ -1,26 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3470986 queued and waiting for resources
 srun: job 3470986 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       4,      20,  ..., 1193961,
                            1193963, 1193966]),
       col_indices=tensor([  1152,   1272,   1961,  ..., 238254, 239142,
                           240113]),
       values=tensor([  5728.,   2871., 418449.,  ...,  10058.,  33324.,
                       34928.]), size=(240116, 240116), nnz=1193966,
       layout=torch.sparse_csr)
 tensor([0.2593, 0.6684, 0.1857,  ..., 0.6282, 0.3314, 0.7454])
 Matrix: tn2010
 Shape: torch.Size([240116, 240116])
 Size: 57655693456
 NNZ: 1193966
 Density: 2.070855328296721e-05
 Time: 2.2318568229675293 seconds
--- a/pytorch/output_cpu/altra_10_10_ut2010_10000.json
+++ b/pytorch/output_cpu/altra_10_10_ut2010_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "ut2010", "MATRIX_SHAPE": [115406, 115406], "MATRIX_SIZE": 13318544836, "MATRIX_NNZ": 572066, "MATRIX_DENSITY": 4.295259032005559e-05, "TIME_S": 1.5120632648468018, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [23.36, 22.84, 22.36, 21.92, 21.48, 21.48, 21.72, 22.08, 22.64, 23.28], "POWER": [43.48, 59.4, 65.28, 65.16, 62.16], "JOULES": 96.98985254287719, "POWER_AFTER": [22.56, 22.8, 22.24, 21.84, 21.4, 21.32, 20.96, 21.28, 21.36, 21.08]}
--- a/pytorch/output_cpu/altra_10_10_ut2010_10000.output
+++ b/pytorch/output_cpu/altra_10_10_ut2010_10000.output
@ -1,26 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3471001 queued and waiting for resources
 srun: job 3471001 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([     0,      3,      9,  ..., 572056, 572061,
                            572066]),
       col_indices=tensor([   453,   1291,   1979,  ..., 113521, 114509,
                           114602]),
       values=tensor([160642.,  31335., 282373.,  ...,  88393.,  99485.,
                       18651.]), size=(115406, 115406), nnz=572066,
       layout=torch.sparse_csr)
 tensor([0.9240, 0.3751, 0.9849,  ..., 0.9377, 0.9441, 0.6765])
 Matrix: ut2010
 Shape: torch.Size([115406, 115406])
 Size: 13318544836
 NNZ: 572066
 Density: 4.295259032005559e-05
 Time: 1.5120632648468018 seconds
--- a/pytorch/output_cpu/altra_10_10_va2010_10000.json
+++ b/pytorch/output_cpu/altra_10_10_va2010_10000.json
@ -1 +0,0 @@
 {"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "va2010", "MATRIX_SHAPE": [285762, 285762], "MATRIX_SIZE": 81659920644, "MATRIX_NNZ": 1402128, "MATRIX_DENSITY": 1.717033263003816e-05, "TIME_S": 2.1484014987945557, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.76, 20.72, 20.76, 20.88, 20.88, 20.96, 20.96, 20.96, 20.8, 20.6], "POWER": [65.16, 84.16, 87.88, 82.08, 64.16, 59.44], "JOULES": 155.0609850883484, "POWER_AFTER": [20.52, 20.52, 20.72, 20.56, 20.64, 20.64, 20.72, 20.92, 21.16, 21.32]}
--- a/pytorch/output_cpu/altra_10_10_va2010_10000.output
+++ b/pytorch/output_cpu/altra_10_10_va2010_10000.output
@ -1,26 +0,0 @@
 srun: Job time limit was unset; set to partition default of 60 minutes
 srun: ################################################################################
 srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs.     #
 srun: # All submission nodes and all other compute nodes have x86_64 architecture    #
 srun: # CPUs. Programs, environments, or other software that was built on x86_64     #
 srun: # nodes may need to be rebuilt to properly execute on these nodes.             #
 srun: ################################################################################
 srun: job 3471004 queued and waiting for resources
 srun: job 3471004 has been allocated resources
 /nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
  ).to_sparse_csr().type(torch.float)
 tensor(crow_indices=tensor([      0,       2,       8,  ..., 1402119,
                            1402123, 1402128]),
       col_indices=tensor([  2006,   2464,   1166,  ..., 285581, 285634,
                           285760]),
       values=tensor([125334.,   3558.,   1192.,  ...,  10148.,   1763.,
                        9832.]), size=(285762, 285762), nnz=1402128,
       layout=torch.sparse_csr)
 tensor([0.5972, 0.8492, 0.1772,  ..., 0.7912, 0.0415, 0.8296])
 Matrix: va2010
 Shape: torch.Size([285762, 285762])
 Size: 81659920644
 NNZ: 1402128
 Density: 1.717033263003816e-05
 Time: 2.1484014987945557 seconds
--- a/Show More
+++ b/Show More
		`@ -1 +0,0 @@`
			{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [37.36, 22.88, 22.36, 22.72, 22.52, 22.2, 21.96, 21.8, 21.48, 21.48], "matrix": "Oregon-2", "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 1.5312557220458984, "power": [26.68, 27.84, 28.48, 29.92, 30.0], "power_after": [21.16, 21.32, 21.16, 21.16, 21.16, 20.88, 20.92, 20.76, 20.96, 21.2], "task clock (msec)": 64.81, "page faults": 3244, "cycles": 82069432, "instructions": 78292700, "branch mispredictions": 319703, "branches": 19996903, "ITLB accesses": 26988315, "ITLB misses": 5988, "DTLB misses": 14570, "DTLB accesses": 36879854, "L1I cache accesses": 30465174, "L1I cache misses": 293085, "L1D cache misses": 487330, "L1D cache accesses": 31932249, "LL cache misses": 545501, "LL cache accesses": 558084, "L2D TLB accesses": 204746, "L2D TLB misses": 25302, "L2D cache misses": 314594, "L2D cache accesses": 1828047, "instructions per cycle": 0.9539812582107307, "branch miss rate": 0.01598762568383714, "ITLB miss rate": 0.00022187379982781437, "DTLB miss rate": 0.0003950666399058955, "L2D TLB miss rate": 0.12357750578765886, "L1I cache miss rate": 0.009620329101025322, "L1D cache miss rate": 0.015261374167538278, "L2D cache miss rate": 0.17209294947011755, "LL cache miss rate": 0.9774532149282187}
		`@ -1 +0,0 @@`
			{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "ASIC_680k", "MATRIX_SHAPE": [682862, 682862], "MATRIX_SIZE": 466300511044, "MATRIX_NNZ": 3871773, "MATRIX_DENSITY": 8.303171256088674e-06, "TIME_S": 11.77456283569336, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.36, 20.44, 20.48, 20.72, 20.8, 21.0, 21.32, 21.32, 21.28, 21.08], "POWER": [92.0, 91.8, 78.72, 66.68, 51.2, 46.6, 53.36, 53.36, 70.48, 90.16, 100.04, 103.68, 98.2, 95.64, 97.16, 101.4], "JOULES": 938.4206715393068, "POWER_AFTER": [20.96, 20.76, 20.76, 21.08, 21.24, 21.16, 21.28, 21.2, 21.0, 21.08]}