Output!
This commit is contained in:
parent
66b0699fa8
commit
b402503c31
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [37.36, 22.88, 22.36, 22.72, 22.52, 22.2, 21.96, 21.8, 21.48, 21.48], "matrix": "Oregon-2", "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 1.5312557220458984, "power": [26.68, 27.84, 28.48, 29.92, 30.0], "power_after": [21.16, 21.32, 21.16, 21.16, 21.16, 20.88, 20.92, 20.76, 20.96, 21.2], "task clock (msec)": 64.81, "page faults": 3244, "cycles": 82069432, "instructions": 78292700, "branch mispredictions": 319703, "branches": 19996903, "ITLB accesses": 26988315, "ITLB misses": 5988, "DTLB misses": 14570, "DTLB accesses": 36879854, "L1I cache accesses": 30465174, "L1I cache misses": 293085, "L1D cache misses": 487330, "L1D cache accesses": 31932249, "LL cache misses": 545501, "LL cache accesses": 558084, "L2D TLB accesses": 204746, "L2D TLB misses": 25302, "L2D cache misses": 314594, "L2D cache accesses": 1828047, "instructions per cycle": 0.9539812582107307, "branch miss rate": 0.01598762568383714, "ITLB miss rate": 0.00022187379982781437, "DTLB miss rate": 0.0003950666399058955, "L2D TLB miss rate": 0.12357750578765886, "L1I cache miss rate": 0.009620329101025322, "L1D cache miss rate": 0.015261374167538278, "L2D cache miss rate": 0.17209294947011755, "LL cache miss rate": 0.9774532149282187}
|
|
@ -1,158 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394980 queued and waiting for resources
|
|
||||||
srun: job 3394980 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
|
||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9231, 0.7723, 0.0509, ..., 0.0839, 0.6982, 0.3459])
|
|
||||||
Matrix: Oregon-2
|
|
||||||
Shape: torch.Size([11806, 11806])
|
|
||||||
NNZ: 65460
|
|
||||||
Density: 0.0004696458003979807
|
|
||||||
Time: 1.5677142143249512 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
|
|
||||||
|
|
||||||
64.81 msec task-clock:u # 0.013 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,244 page-faults:u # 50.056 K/sec
|
|
||||||
82,069,432 cycles:u # 1.266 GHz (59.04%)
|
|
||||||
78,292,700 instructions:u # 0.95 insn per cycle (76.75%)
|
|
||||||
<not supported> branches:u
|
|
||||||
341,509 branch-misses:u (90.97%)
|
|
||||||
33,032,555 L1-dcache-loads:u # 509.704 M/sec
|
|
||||||
478,674 L1-dcache-load-misses:u # 1.45% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,508,310 L1-icache-loads:u # 486.184 M/sec
|
|
||||||
297,528 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
|
||||||
49,358,091 dTLB-loads:u # 761.613 M/sec (27.83%)
|
|
||||||
88,514 dTLB-load-misses:u # 0.18% of all dTLB cache accesses (14.82%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
5.016393105 seconds time elapsed
|
|
||||||
|
|
||||||
16.759527000 seconds user
|
|
||||||
31.429551000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
|
||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8423, 0.9339, 0.8037, ..., 0.5953, 0.0649, 0.1559])
|
|
||||||
Matrix: Oregon-2
|
|
||||||
Shape: torch.Size([11806, 11806])
|
|
||||||
NNZ: 65460
|
|
||||||
Density: 0.0004696458003979807
|
|
||||||
Time: 1.516484022140503 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
|
|
||||||
|
|
||||||
319,703 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,996,903 BR_RETIRED:u
|
|
||||||
|
|
||||||
4.945699041 seconds time elapsed
|
|
||||||
|
|
||||||
16.431978000 seconds user
|
|
||||||
29.752452000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
|
||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8058, 0.2922, 0.1227, ..., 0.2176, 0.9496, 0.8838])
|
|
||||||
Matrix: Oregon-2
|
|
||||||
Shape: torch.Size([11806, 11806])
|
|
||||||
NNZ: 65460
|
|
||||||
Density: 0.0004696458003979807
|
|
||||||
Time: 1.6458909511566162 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
|
|
||||||
|
|
||||||
26,988,315 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
5,988 ITLB_WALK:u
|
|
||||||
14,570 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,879,854 L1D_TLB:u
|
|
||||||
|
|
||||||
5.011871473 seconds time elapsed
|
|
||||||
|
|
||||||
16.529942000 seconds user
|
|
||||||
30.438432000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
|
||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7728, 0.1182, 0.3337, ..., 0.2555, 0.2523, 0.5746])
|
|
||||||
Matrix: Oregon-2
|
|
||||||
Shape: torch.Size([11806, 11806])
|
|
||||||
NNZ: 65460
|
|
||||||
Density: 0.0004696458003979807
|
|
||||||
Time: 1.529954433441162 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
|
|
||||||
|
|
||||||
30,465,174 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
293,085 L1I_CACHE_REFILL:u
|
|
||||||
487,330 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
31,932,249 L1D_CACHE:u
|
|
||||||
|
|
||||||
4.954100105 seconds time elapsed
|
|
||||||
|
|
||||||
16.282966000 seconds user
|
|
||||||
28.926724000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
|
||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
|
||||||
tensor([0.5613, 0.3211, 0.1739, ..., 0.5461, 0.1391, 0.8387])
|
|
||||||
Matrix: Oregon-2
|
|
||||||
Shape: torch.Size([11806, 11806])
|
|
||||||
NNZ: 65460
|
|
||||||
Density: 0.0004696458003979807
|
|
||||||
Time: 1.5726752281188965 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
|
|
||||||
|
|
||||||
545,501 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
558,084 LL_CACHE_RD:u
|
|
||||||
204,746 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
25,302 L2D_TLB_REFILL:u
|
|
||||||
314,594 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,828,047 L2D_CACHE:u
|
|
||||||
|
|
||||||
4.866549675 seconds time elapsed
|
|
||||||
|
|
||||||
16.609257000 seconds user
|
|
||||||
31.381282000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [21.6, 21.64, 21.88, 22.08, 22.2, 22.32, 22.36, 22.04, 22.0, 21.96], "matrix": "as-caida", "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 2.6254467964172363, "power": [30.92, 29.2, 29.52, 29.72, 29.72, 31.72], "power_after": [21.04, 21.28, 21.04, 21.16, 21.16, 20.96, 21.04, 20.88, 20.56, 20.84], "task clock (msec)": 61.4, "page faults": 3507, "cycles": 78967021, "instructions": 94334531, "branch mispredictions": 325893, "branches": 19069753, "ITLB accesses": 27181279, "ITLB misses": 5995, "DTLB misses": 17412, "DTLB accesses": 37016930, "L1I cache accesses": 31535482, "L1I cache misses": 292676, "L1D cache misses": 471752, "L1D cache accesses": 33119145, "LL cache misses": 540894, "LL cache accesses": 554700, "L2D TLB accesses": 191772, "L2D TLB misses": 23711, "L2D cache misses": 306195, "L2D cache accesses": 1755986, "instructions per cycle": 1.1946066827061894, "branch miss rate": 0.017089523917797993, "ITLB miss rate": 0.00022055621444450792, "DTLB miss rate": 0.00047037936425305935, "L2D TLB miss rate": 0.12364161608576851, "L1I cache miss rate": 0.009280847522799873, "L1D cache miss rate": 0.01424408752097918, "L2D cache miss rate": 0.17437211913990203, "LL cache miss rate": 0.975110870740941}
|
|
@ -1,163 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394983 queued and waiting for resources
|
|
||||||
srun: job 3394983 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|
||||||
106762]),
|
|
||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
|
||||||
tensor([0.4886, 0.3652, 0.5691, ..., 0.6466, 0.4355, 0.8397])
|
|
||||||
Matrix: as-caida
|
|
||||||
Shape: torch.Size([31379, 31379])
|
|
||||||
NNZ: 106762
|
|
||||||
Density: 0.00010842726485909405
|
|
||||||
Time: 2.6297245025634766 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
|
|
||||||
|
|
||||||
61.40 msec task-clock:u # 0.010 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,507 page-faults:u # 57.117 K/sec
|
|
||||||
78,967,021 cycles:u # 1.286 GHz (61.13%)
|
|
||||||
94,334,531 instructions:u # 1.19 insn per cycle (95.16%)
|
|
||||||
<not supported> branches:u
|
|
||||||
365,239 branch-misses:u
|
|
||||||
33,334,312 L1-dcache-loads:u # 542.906 M/sec
|
|
||||||
457,950 L1-dcache-load-misses:u # 1.37% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,725,851 L1-icache-loads:u # 516.709 M/sec
|
|
||||||
297,720 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
|
||||||
25,188,580 dTLB-loads:u # 410.239 M/sec (5.16%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
6.049042045 seconds time elapsed
|
|
||||||
|
|
||||||
17.649315000 seconds user
|
|
||||||
29.335859000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|
||||||
106762]),
|
|
||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8344, 0.2588, 0.2246, ..., 0.5607, 0.8141, 0.9893])
|
|
||||||
Matrix: as-caida
|
|
||||||
Shape: torch.Size([31379, 31379])
|
|
||||||
NNZ: 106762
|
|
||||||
Density: 0.00010842726485909405
|
|
||||||
Time: 2.6495532989501953 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
|
|
||||||
|
|
||||||
325,893 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,069,753 BR_RETIRED:u
|
|
||||||
|
|
||||||
6.023780447 seconds time elapsed
|
|
||||||
|
|
||||||
17.654658000 seconds user
|
|
||||||
28.848805000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|
||||||
106762]),
|
|
||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0814, 0.1132, 0.8515, ..., 0.8987, 0.5912, 0.5002])
|
|
||||||
Matrix: as-caida
|
|
||||||
Shape: torch.Size([31379, 31379])
|
|
||||||
NNZ: 106762
|
|
||||||
Density: 0.00010842726485909405
|
|
||||||
Time: 2.5444185733795166 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
|
|
||||||
|
|
||||||
27,181,279 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
5,995 ITLB_WALK:u
|
|
||||||
17,412 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
37,016,930 L1D_TLB:u
|
|
||||||
|
|
||||||
5.790360666 seconds time elapsed
|
|
||||||
|
|
||||||
17.919315000 seconds user
|
|
||||||
30.569858000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|
||||||
106762]),
|
|
||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0439, 0.1884, 0.3342, ..., 0.2027, 0.5532, 0.7245])
|
|
||||||
Matrix: as-caida
|
|
||||||
Shape: torch.Size([31379, 31379])
|
|
||||||
NNZ: 106762
|
|
||||||
Density: 0.00010842726485909405
|
|
||||||
Time: 2.620804786682129 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
|
|
||||||
|
|
||||||
31,535,482 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
292,676 L1I_CACHE_REFILL:u
|
|
||||||
471,752 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,119,145 L1D_CACHE:u
|
|
||||||
|
|
||||||
6.002311801 seconds time elapsed
|
|
||||||
|
|
||||||
17.427887000 seconds user
|
|
||||||
30.063688000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|
||||||
106762]),
|
|
||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1495, 0.5856, 0.8600, ..., 0.2101, 0.6229, 0.2019])
|
|
||||||
Matrix: as-caida
|
|
||||||
Shape: torch.Size([31379, 31379])
|
|
||||||
NNZ: 106762
|
|
||||||
Density: 0.00010842726485909405
|
|
||||||
Time: 2.561279296875 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
|
|
||||||
|
|
||||||
540,894 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
554,700 LL_CACHE_RD:u
|
|
||||||
191,772 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,711 L2D_TLB_REFILL:u
|
|
||||||
306,195 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,755,986 L2D_CACHE:u
|
|
||||||
|
|
||||||
5.946428572 seconds time elapsed
|
|
||||||
|
|
||||||
17.396567000 seconds user
|
|
||||||
32.141235000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [83.04, 78.44, 65.92, 53.76, 38.68, 38.68, 25.68, 22.6, 22.52, 22.32], "matrix": "dc2", "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 14.128849267959595, "power": [89.84, 89.4, 82.8, 71.32, 57.72, 51.92, 53.0, 63.8, 78.24, 78.24, 90.2, 90.36, 90.08, 88.64, 88.64, 87.64, 87.68, 87.24], "power_after": [21.4, 21.2, 21.08, 21.08, 21.28, 21.04, 20.92, 21.12, 21.08, 21.0], "task clock (msec)": 58.45, "page faults": 3471, "cycles": 76691414, "instructions": 89547095, "branch mispredictions": 329725, "branches": 19946857, "ITLB accesses": 27648951, "ITLB misses": 6857, "DTLB misses": 18047, "DTLB accesses": 37225736, "L1I cache accesses": 32434686, "L1I cache misses": 293072, "L1D cache misses": 483557, "L1D cache accesses": 34059722, "LL cache misses": 561480, "LL cache accesses": 578369, "L2D TLB accesses": 192306, "L2D TLB misses": 25364, "L2D cache misses": 317121, "L2D cache accesses": 1812330, "instructions per cycle": 1.16762868656979, "branch miss rate": 0.01653017314958442, "ITLB miss rate": 0.00024800217556174194, "DTLB miss rate": 0.00048479901109275584, "L2D TLB miss rate": 0.13189396066685385, "L1I cache miss rate": 0.00903575881696527, "L1D cache miss rate": 0.014197326683993487, "L2D cache miss rate": 0.17497972223601663, "LL cache miss rate": 0.9707989190292011}
|
|
@ -1,173 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394982 queued and waiting for resources
|
|
||||||
srun: job 3394982 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|
||||||
766396]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 116833, 89,
|
|
||||||
116834]),
|
|
||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0986, 0.6504, 0.0132, ..., 0.6525, 0.3337, 0.7557])
|
|
||||||
Matrix: dc2
|
|
||||||
Shape: torch.Size([116835, 116835])
|
|
||||||
NNZ: 766396
|
|
||||||
Density: 5.614451099680581e-05
|
|
||||||
Time: 18.46260714530945 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
|
|
||||||
|
|
||||||
58.45 msec task-clock:u # 0.003 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,471 page-faults:u # 59.382 K/sec
|
|
||||||
76,691,414 cycles:u # 1.312 GHz (41.20%)
|
|
||||||
89,547,095 instructions:u # 1.17 insn per cycle (73.16%)
|
|
||||||
<not supported> branches:u
|
|
||||||
382,362 branch-misses:u (96.21%)
|
|
||||||
33,271,433 L1-dcache-loads:u # 569.211 M/sec
|
|
||||||
488,730 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,926,596 L1-icache-loads:u # 546.204 M/sec
|
|
||||||
304,792 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
|
||||||
36,392,791 dTLB-loads:u # 622.612 M/sec (31.21%)
|
|
||||||
0 dTLB-load-misses:u (5.35%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
22.126601025 seconds time elapsed
|
|
||||||
|
|
||||||
103.642372000 seconds user
|
|
||||||
1434.131491000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|
||||||
766396]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 116833, 89,
|
|
||||||
116834]),
|
|
||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
|
||||||
tensor([0.5605, 0.9374, 0.4444, ..., 0.5937, 0.3099, 0.2252])
|
|
||||||
Matrix: dc2
|
|
||||||
Shape: torch.Size([116835, 116835])
|
|
||||||
NNZ: 766396
|
|
||||||
Density: 5.614451099680581e-05
|
|
||||||
Time: 13.607120752334595 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
|
|
||||||
|
|
||||||
329,725 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,946,857 BR_RETIRED:u
|
|
||||||
|
|
||||||
17.131143957 seconds time elapsed
|
|
||||||
|
|
||||||
96.945305000 seconds user
|
|
||||||
1045.242697000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|
||||||
766396]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 116833, 89,
|
|
||||||
116834]),
|
|
||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8954, 0.9777, 0.8042, ..., 0.2069, 0.7063, 0.8479])
|
|
||||||
Matrix: dc2
|
|
||||||
Shape: torch.Size([116835, 116835])
|
|
||||||
NNZ: 766396
|
|
||||||
Density: 5.614451099680581e-05
|
|
||||||
Time: 17.22396969795227 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
|
|
||||||
|
|
||||||
27,648,951 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,857 ITLB_WALK:u
|
|
||||||
18,047 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
37,225,736 L1D_TLB:u
|
|
||||||
|
|
||||||
20.911480243 seconds time elapsed
|
|
||||||
|
|
||||||
107.392462000 seconds user
|
|
||||||
1329.272154000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|
||||||
766396]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 116833, 89,
|
|
||||||
116834]),
|
|
||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9293, 0.9606, 0.8914, ..., 0.2407, 0.2843, 0.5174])
|
|
||||||
Matrix: dc2
|
|
||||||
Shape: torch.Size([116835, 116835])
|
|
||||||
NNZ: 766396
|
|
||||||
Density: 5.614451099680581e-05
|
|
||||||
Time: 13.233965873718262 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
|
|
||||||
|
|
||||||
32,434,686 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
293,072 L1I_CACHE_REFILL:u
|
|
||||||
483,557 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
34,059,722 L1D_CACHE:u
|
|
||||||
|
|
||||||
16.956477005 seconds time elapsed
|
|
||||||
|
|
||||||
88.393687000 seconds user
|
|
||||||
1037.101858000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|
||||||
766396]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 116833, 89,
|
|
||||||
116834]),
|
|
||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8850, 0.9552, 0.7029, ..., 0.3357, 0.0248, 0.5395])
|
|
||||||
Matrix: dc2
|
|
||||||
Shape: torch.Size([116835, 116835])
|
|
||||||
NNZ: 766396
|
|
||||||
Density: 5.614451099680581e-05
|
|
||||||
Time: 13.873224973678589 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
|
|
||||||
|
|
||||||
561,480 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
578,369 LL_CACHE_RD:u
|
|
||||||
192,306 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
25,364 L2D_TLB_REFILL:u
|
|
||||||
317,121 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,812,330 L2D_CACHE:u
|
|
||||||
|
|
||||||
17.467787426 seconds time elapsed
|
|
||||||
|
|
||||||
92.463054000 seconds user
|
|
||||||
1072.584062000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [28.56, 28.04, 23.8, 23.08, 22.12, 21.16, 21.16, 21.0, 20.96, 20.72], "matrix": "de2010", "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 2.713265895843506, "power": [33.24, 30.84, 29.96, 27.68, 25.8, 25.8, 31.16], "power_after": [20.6, 20.48, 20.24, 20.32, 20.2, 20.36, 20.4, 20.4, 20.36, 20.36], "task clock (msec)": 48.96, "page faults": 3285, "cycles": 48563060, "instructions": 73465190, "branch mispredictions": 326361, "branches": 19599354, "ITLB accesses": 26666488, "ITLB misses": 6643, "DTLB misses": 17347, "DTLB accesses": 35986736, "L1I cache accesses": 32502068, "L1I cache misses": 302739, "L1D cache misses": 480619, "L1D cache accesses": 34031072, "LL cache misses": 552815, "LL cache accesses": 567373, "L2D TLB accesses": 188248, "L2D TLB misses": 23165, "L2D cache misses": 308211, "L2D cache accesses": 1787647, "instructions per cycle": 1.5127792606149613, "branch miss rate": 0.016651620252381788, "ITLB miss rate": 0.0002491141690649327, "DTLB miss rate": 0.0004820387155978803, "L2D TLB miss rate": 0.12305575623645404, "L1I cache miss rate": 0.00931445346800702, "L1D cache miss rate": 0.014122946229845479, "L2D cache miss rate": 0.17241155552522394, "LL cache miss rate": 0.9743413944618443}
|
|
@ -1,168 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394985 queued and waiting for resources
|
|
||||||
srun: job 3394985 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|
||||||
116056]),
|
|
||||||
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
|
||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.6055, 0.8789, 0.0482, ..., 0.0736, 0.1316, 0.6744])
|
|
||||||
Matrix: de2010
|
|
||||||
Shape: torch.Size([24115, 24115])
|
|
||||||
NNZ: 116056
|
|
||||||
Density: 0.0001995689928120616
|
|
||||||
Time: 2.6956887245178223 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
|
||||||
|
|
||||||
48.96 msec task-clock:u # 0.008 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,285 page-faults:u # 67.090 K/sec
|
|
||||||
48,563,060 cycles:u # 0.992 GHz (59.76%)
|
|
||||||
73,465,190 instructions:u # 1.51 insn per cycle (78.23%)
|
|
||||||
<not supported> branches:u
|
|
||||||
369,314 branch-misses:u (98.16%)
|
|
||||||
31,769,641 L1-dcache-loads:u # 648.836 M/sec
|
|
||||||
479,594 L1-dcache-load-misses:u # 1.51% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,338,929 L1-icache-loads:u # 619.616 M/sec
|
|
||||||
282,162 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
|
||||||
55,516,925 dTLB-loads:u # 1.134 G/sec (23.54%)
|
|
||||||
12,345 dTLB-load-misses:u # 0.02% of all dTLB cache accesses (3.47%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
6.017085179 seconds time elapsed
|
|
||||||
|
|
||||||
17.484355000 seconds user
|
|
||||||
28.678064000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|
||||||
116056]),
|
|
||||||
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
|
||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.2815, 0.8196, 0.3706, ..., 0.1328, 0.4062, 0.9113])
|
|
||||||
Matrix: de2010
|
|
||||||
Shape: torch.Size([24115, 24115])
|
|
||||||
NNZ: 116056
|
|
||||||
Density: 0.0001995689928120616
|
|
||||||
Time: 2.7908551692962646 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
|
||||||
|
|
||||||
326,361 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,599,354 BR_RETIRED:u
|
|
||||||
|
|
||||||
6.215591535 seconds time elapsed
|
|
||||||
|
|
||||||
18.097112000 seconds user
|
|
||||||
27.831633000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|
||||||
116056]),
|
|
||||||
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
|
||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.9002, 0.0843, 0.5558, ..., 0.3931, 0.8070, 0.7414])
|
|
||||||
Matrix: de2010
|
|
||||||
Shape: torch.Size([24115, 24115])
|
|
||||||
NNZ: 116056
|
|
||||||
Density: 0.0001995689928120616
|
|
||||||
Time: 2.819589376449585 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
|
||||||
|
|
||||||
26,666,488 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,643 ITLB_WALK:u
|
|
||||||
17,347 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
35,986,736 L1D_TLB:u
|
|
||||||
|
|
||||||
6.243883495 seconds time elapsed
|
|
||||||
|
|
||||||
17.783312000 seconds user
|
|
||||||
31.714619000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|
||||||
116056]),
|
|
||||||
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
|
||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.9109, 0.6392, 0.7899, ..., 0.0945, 0.3298, 0.6865])
|
|
||||||
Matrix: de2010
|
|
||||||
Shape: torch.Size([24115, 24115])
|
|
||||||
NNZ: 116056
|
|
||||||
Density: 0.0001995689928120616
|
|
||||||
Time: 2.747800827026367 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
|
||||||
|
|
||||||
32,502,068 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
302,739 L1I_CACHE_REFILL:u
|
|
||||||
480,619 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
34,031,072 L1D_CACHE:u
|
|
||||||
|
|
||||||
6.126767063 seconds time elapsed
|
|
||||||
|
|
||||||
17.702029000 seconds user
|
|
||||||
29.137072000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|
||||||
116056]),
|
|
||||||
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
|
||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.7083, 0.6766, 0.7649, ..., 0.3027, 0.9885, 0.8086])
|
|
||||||
Matrix: de2010
|
|
||||||
Shape: torch.Size([24115, 24115])
|
|
||||||
NNZ: 116056
|
|
||||||
Density: 0.0001995689928120616
|
|
||||||
Time: 2.795116901397705 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
|
||||||
|
|
||||||
552,815 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
567,373 LL_CACHE_RD:u
|
|
||||||
188,248 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,165 L2D_TLB_REFILL:u
|
|
||||||
308,211 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,787,647 L2D_CACHE:u
|
|
||||||
|
|
||||||
6.041792624 seconds time elapsed
|
|
||||||
|
|
||||||
17.791735000 seconds user
|
|
||||||
29.790006000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [28.96, 27.92, 27.24, 23.0, 22.28, 22.28, 21.6, 20.8, 20.68, 20.76], "matrix": "email-Enron", "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 12.818164587020874, "power": [84.24, 82.72, 82.72, 72.0, 60.2, 51.88, 52.4, 59.36, 72.08, 83.88, 86.48, 84.28, 82.28, 81.12, 80.96, 80.96, 81.16], "power_after": [20.92, 20.92, 20.92, 20.92, 21.0, 20.96, 20.88, 20.84, 20.88, 20.68], "task clock (msec)": 48.76, "page faults": 3281, "cycles": 45495589, "instructions": 79104832, "branch mispredictions": 335574, "branches": 20121415, "ITLB accesses": 26011880, "ITLB misses": 5842, "DTLB misses": 16448, "DTLB accesses": 35000292, "L1I cache accesses": 32193112, "L1I cache misses": 310304, "L1D cache misses": 495806, "L1D cache accesses": 33829187, "LL cache misses": 546628, "LL cache accesses": 570044, "L2D TLB accesses": 196794, "L2D TLB misses": 24071, "L2D cache misses": 316028, "L2D cache accesses": 1836018, "instructions per cycle": 1.7387362981496954, "branch miss rate": 0.016677455338006797, "ITLB miss rate": 0.00022458968748125855, "DTLB miss rate": 0.000469938936509444, "L2D TLB miss rate": 0.1223157210077543, "L1I cache miss rate": 0.009638832058236556, "L1D cache miss rate": 0.014656160669779029, "L2D cache miss rate": 0.1721268527868463, "LL cache miss rate": 0.9589224691427328}
|
|
@ -1,163 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394986 queued and waiting for resources
|
|
||||||
srun: job 3394986 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|
||||||
367662]),
|
|
||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9906, 0.9401, 0.5661, ..., 0.4491, 0.7550, 0.2452])
|
|
||||||
Matrix: email-Enron
|
|
||||||
Shape: torch.Size([36692, 36692])
|
|
||||||
NNZ: 367662
|
|
||||||
Density: 0.0002730901120626302
|
|
||||||
Time: 12.80848503112793 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
|
|
||||||
|
|
||||||
48.76 msec task-clock:u # 0.003 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,281 page-faults:u # 67.289 K/sec
|
|
||||||
45,495,589 cycles:u # 0.933 GHz (57.79%)
|
|
||||||
79,104,832 instructions:u # 1.74 insn per cycle (81.70%)
|
|
||||||
<not supported> branches:u
|
|
||||||
372,161 branch-misses:u
|
|
||||||
32,089,348 L1-dcache-loads:u # 658.113 M/sec
|
|
||||||
467,576 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,688,995 L1-icache-loads:u # 629.393 M/sec
|
|
||||||
289,698 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
|
||||||
47,006,355 dTLB-loads:u # 964.042 M/sec (22.12%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
16.331438990 seconds time elapsed
|
|
||||||
|
|
||||||
76.869141000 seconds user
|
|
||||||
999.179638000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|
||||||
367662]),
|
|
||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7565, 0.5273, 0.1038, ..., 0.9432, 0.1309, 0.5542])
|
|
||||||
Matrix: email-Enron
|
|
||||||
Shape: torch.Size([36692, 36692])
|
|
||||||
NNZ: 367662
|
|
||||||
Density: 0.0002730901120626302
|
|
||||||
Time: 26.91536283493042 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
|
|
||||||
|
|
||||||
335,574 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,121,415 BR_RETIRED:u
|
|
||||||
|
|
||||||
30.559245388 seconds time elapsed
|
|
||||||
|
|
||||||
126.799314000 seconds user
|
|
||||||
2081.777635000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|
||||||
367662]),
|
|
||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2321, 0.0702, 0.2538, ..., 0.6254, 0.6308, 0.5317])
|
|
||||||
Matrix: email-Enron
|
|
||||||
Shape: torch.Size([36692, 36692])
|
|
||||||
NNZ: 367662
|
|
||||||
Density: 0.0002730901120626302
|
|
||||||
Time: 14.841739892959595 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
|
|
||||||
|
|
||||||
26,011,880 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
5,842 ITLB_WALK:u
|
|
||||||
16,448 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
35,000,292 L1D_TLB:u
|
|
||||||
|
|
||||||
18.443612527 seconds time elapsed
|
|
||||||
|
|
||||||
80.694133000 seconds user
|
|
||||||
1159.740575000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|
||||||
367662]),
|
|
||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7091, 0.9447, 0.0959, ..., 0.0090, 0.7012, 0.6025])
|
|
||||||
Matrix: email-Enron
|
|
||||||
Shape: torch.Size([36692, 36692])
|
|
||||||
NNZ: 367662
|
|
||||||
Density: 0.0002730901120626302
|
|
||||||
Time: 10.863199234008789 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
|
|
||||||
|
|
||||||
32,193,112 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
310,304 L1I_CACHE_REFILL:u
|
|
||||||
495,806 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,829,187 L1D_CACHE:u
|
|
||||||
|
|
||||||
14.426841778 seconds time elapsed
|
|
||||||
|
|
||||||
70.728541000 seconds user
|
|
||||||
853.184507000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|
||||||
367662]),
|
|
||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8267, 0.6185, 0.8015, ..., 0.8593, 0.4881, 0.8599])
|
|
||||||
Matrix: email-Enron
|
|
||||||
Shape: torch.Size([36692, 36692])
|
|
||||||
NNZ: 367662
|
|
||||||
Density: 0.0002730901120626302
|
|
||||||
Time: 12.076026678085327 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
|
|
||||||
|
|
||||||
546,628 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
570,044 LL_CACHE_RD:u
|
|
||||||
196,794 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
24,071 L2D_TLB_REFILL:u
|
|
||||||
316,028 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,836,018 L2D_CACHE:u
|
|
||||||
|
|
||||||
15.581045199 seconds time elapsed
|
|
||||||
|
|
||||||
77.345591000 seconds user
|
|
||||||
942.987439000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.12, 16.36, 16.8, 16.76, 16.6, 16.48, 16.44, 16.28, 16.28, 16.16], "matrix": "p2p-Gnutella04", "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 1.0642461776733398, "power": [26.6, 27.52, 27.52, 31.16, 28.48], "power_after": [16.28, 16.4, 16.32, 16.12, 16.24, 16.0, 16.0, 16.24, 16.52, 17.04], "task clock (msec)": 50.59, "page faults": 3303, "cycles": 51318459, "instructions": 74705078, "branch mispredictions": 328853, "branches": 19620312, "ITLB accesses": 27939682, "ITLB misses": 5470, "DTLB misses": 17679, "DTLB accesses": 37425602, "L1I cache accesses": 30276633, "L1I cache misses": 291467, "L1D cache misses": 479061, "L1D cache accesses": 31689326, "LL cache misses": 529426, "LL cache accesses": 550033, "L2D TLB accesses": 171913, "L2D TLB misses": 20624, "L2D cache misses": 296662, "L2D cache accesses": 1714211, "instructions per cycle": 1.455715535028049, "branch miss rate": 0.01676084457780284, "ITLB miss rate": 0.0001957788925443031, "DTLB miss rate": 0.00047237717111404113, "L2D TLB miss rate": 0.11996765805959991, "L1I cache miss rate": 0.009626797008769106, "L1D cache miss rate": 0.015117424712661923, "L2D cache miss rate": 0.17306037588138215, "LL cache miss rate": 0.9625349751742168}
|
|
@ -1,158 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394992 queued and waiting for resources
|
|
||||||
srun: job 3394992 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1181, 0.8387, 0.0554, ..., 0.8107, 0.4393, 0.9489])
|
|
||||||
Matrix: p2p-Gnutella04
|
|
||||||
Shape: torch.Size([10879, 10879])
|
|
||||||
NNZ: 39994
|
|
||||||
Density: 0.0003379223282393842
|
|
||||||
Time: 1.061662197113037 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
|
||||||
|
|
||||||
50.59 msec task-clock:u # 0.012 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,303 page-faults:u # 65.291 K/sec
|
|
||||||
51,318,459 cycles:u # 1.014 GHz (59.34%)
|
|
||||||
74,705,078 instructions:u # 1.46 insn per cycle (83.02%)
|
|
||||||
<not supported> branches:u
|
|
||||||
366,825 branch-misses:u
|
|
||||||
31,809,194 L1-dcache-loads:u # 628.781 M/sec
|
|
||||||
466,198 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,390,161 L1-icache-loads:u # 600.731 M/sec
|
|
||||||
296,270 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
|
|
||||||
61,518,375 dTLB-loads:u # 1.216 G/sec (17.94%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
4.302241563 seconds time elapsed
|
|
||||||
|
|
||||||
16.122298000 seconds user
|
|
||||||
29.141140000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7249, 0.8723, 0.3843, ..., 0.2264, 0.4891, 0.9107])
|
|
||||||
Matrix: p2p-Gnutella04
|
|
||||||
Shape: torch.Size([10879, 10879])
|
|
||||||
NNZ: 39994
|
|
||||||
Density: 0.0003379223282393842
|
|
||||||
Time: 1.0079431533813477 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
|
||||||
|
|
||||||
328,853 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,620,312 BR_RETIRED:u
|
|
||||||
|
|
||||||
4.241400567 seconds time elapsed
|
|
||||||
|
|
||||||
15.325937000 seconds user
|
|
||||||
28.223386000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7608, 0.2449, 0.5322, ..., 0.5547, 0.8659, 0.8437])
|
|
||||||
Matrix: p2p-Gnutella04
|
|
||||||
Shape: torch.Size([10879, 10879])
|
|
||||||
NNZ: 39994
|
|
||||||
Density: 0.0003379223282393842
|
|
||||||
Time: 1.1017234325408936 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
|
||||||
|
|
||||||
27,939,682 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
5,470 ITLB_WALK:u
|
|
||||||
17,679 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
37,425,602 L1D_TLB:u
|
|
||||||
|
|
||||||
4.296820500 seconds time elapsed
|
|
||||||
|
|
||||||
15.875162000 seconds user
|
|
||||||
28.803412000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9980, 0.9991, 0.6749, ..., 0.4225, 0.7297, 0.3717])
|
|
||||||
Matrix: p2p-Gnutella04
|
|
||||||
Shape: torch.Size([10879, 10879])
|
|
||||||
NNZ: 39994
|
|
||||||
Density: 0.0003379223282393842
|
|
||||||
Time: 1.0812580585479736 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
|
||||||
|
|
||||||
30,276,633 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
291,467 L1I_CACHE_REFILL:u
|
|
||||||
479,061 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
31,689,326 L1D_CACHE:u
|
|
||||||
|
|
||||||
4.500137840 seconds time elapsed
|
|
||||||
|
|
||||||
15.794710000 seconds user
|
|
||||||
27.773851000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8707, 0.5871, 0.5970, ..., 0.8826, 0.4673, 0.4994])
|
|
||||||
Matrix: p2p-Gnutella04
|
|
||||||
Shape: torch.Size([10879, 10879])
|
|
||||||
NNZ: 39994
|
|
||||||
Density: 0.0003379223282393842
|
|
||||||
Time: 0.9900743961334229 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
|
||||||
|
|
||||||
529,426 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
550,033 LL_CACHE_RD:u
|
|
||||||
171,913 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
20,624 L2D_TLB_REFILL:u
|
|
||||||
296,662 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,714,211 L2D_CACHE:u
|
|
||||||
|
|
||||||
4.284402033 seconds time elapsed
|
|
||||||
|
|
||||||
15.584671000 seconds user
|
|
||||||
27.523772000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.12, 16.12, 16.12, 16.36, 16.56, 16.52, 17.04, 16.76, 16.64, 16.92], "matrix": "p2p-Gnutella24", "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 1.6947758197784424, "power": [25.2, 25.2, 26.6, 26.28, 26.48], "power_after": [16.4, 16.6, 16.6, 16.64, 16.8, 16.48, 16.44, 16.16, 16.12, 16.2], "task clock (msec)": 66.78, "page faults": 3520, "cycles": 28858055, "instructions": 64429843, "branch mispredictions": 331167, "branches": 19518210, "ITLB accesses": 26964483, "ITLB misses": 4666, "DTLB misses": 14001, "DTLB accesses": 36143905, "L1I cache accesses": 31901160, "L1I cache misses": 302516, "L1D cache misses": 475663, "L1D cache accesses": 33507563, "LL cache misses": 558546, "LL cache accesses": 578676, "L2D TLB accesses": 187549, "L2D TLB misses": 22990, "L2D cache misses": 321826, "L2D cache accesses": 1816571, "instructions per cycle": 2.2326467601506756, "branch miss rate": 0.016967078435983628, "ITLB miss rate": 0.00017304244253449992, "DTLB miss rate": 0.00038736821602425086, "L2D TLB miss rate": 0.12258129875392564, "L1I cache miss rate": 0.009482915354802146, "L1D cache miss rate": 0.01419569068630864, "L2D cache miss rate": 0.1771612560147663, "LL cache miss rate": 0.9652136947099932}
|
|
@ -1,158 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394989 queued and waiting for resources
|
|
||||||
srun: job 3394989 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2470, 0.4231, 0.1036, ..., 0.7937, 0.3241, 0.7116])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 1.6974337100982666 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
|
||||||
|
|
||||||
66.78 msec task-clock:u # 0.013 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,520 page-faults:u # 52.713 K/sec
|
|
||||||
28,858,055 cycles:u # 0.432 GHz (26.93%)
|
|
||||||
64,429,843 instructions:u # 2.23 insn per cycle (67.63%)
|
|
||||||
<not supported> branches:u
|
|
||||||
296,857 branch-misses:u (84.08%)
|
|
||||||
33,646,348 L1-dcache-loads:u # 503.866 M/sec
|
|
||||||
493,998 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
32,070,415 L1-icache-loads:u # 480.266 M/sec
|
|
||||||
305,993 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
|
||||||
46,903,081 dTLB-loads:u # 702.391 M/sec (46.16%)
|
|
||||||
114,272 dTLB-load-misses:u # 0.24% of all dTLB cache accesses (32.45%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
5.106933083 seconds time elapsed
|
|
||||||
|
|
||||||
16.391614000 seconds user
|
|
||||||
28.913912000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2307, 0.4662, 0.3789, ..., 0.0144, 0.6300, 0.7829])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 1.6379659175872803 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
|
||||||
|
|
||||||
331,167 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,518,210 BR_RETIRED:u
|
|
||||||
|
|
||||||
5.017894585 seconds time elapsed
|
|
||||||
|
|
||||||
16.446505000 seconds user
|
|
||||||
31.004338000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7309, 0.0314, 0.4424, ..., 0.7434, 0.2124, 0.1432])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 1.7232718467712402 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
|
||||||
|
|
||||||
26,964,483 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
4,666 ITLB_WALK:u
|
|
||||||
14,001 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,143,905 L1D_TLB:u
|
|
||||||
|
|
||||||
5.053286721 seconds time elapsed
|
|
||||||
|
|
||||||
16.447780000 seconds user
|
|
||||||
28.580949000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.5695, 0.5025, 0.1946, ..., 0.7428, 0.9634, 0.4327])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 1.644775629043579 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
|
||||||
|
|
||||||
31,901,160 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
302,516 L1I_CACHE_REFILL:u
|
|
||||||
475,663 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,507,563 L1D_CACHE:u
|
|
||||||
|
|
||||||
4.978338941 seconds time elapsed
|
|
||||||
|
|
||||||
16.455298000 seconds user
|
|
||||||
30.249373000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0969, 0.1950, 0.8456, ..., 0.3315, 0.1512, 0.3182])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 1.752812385559082 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
|
||||||
|
|
||||||
558,546 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
578,676 LL_CACHE_RD:u
|
|
||||||
187,549 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
22,990 L2D_TLB_REFILL:u
|
|
||||||
321,826 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,816,571 L2D_CACHE:u
|
|
||||||
|
|
||||||
4.952297819 seconds time elapsed
|
|
||||||
|
|
||||||
16.648691000 seconds user
|
|
||||||
27.005944000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.0, 16.4, 16.4, 16.28, 16.48, 16.6, 16.48, 16.56, 16.88, 16.92], "matrix": "p2p-Gnutella25", "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 1.4688231945037842, "power": [23.04, 29.0, 30.24, 27.96, 28.04], "power_after": [16.52, 16.68, 16.88, 17.12, 17.08, 17.04, 16.84, 16.72, 16.84, 16.84], "task clock (msec)": 48.61, "page faults": 3308, "cycles": 60072179, "instructions": 70991785, "branch mispredictions": 331765, "branches": 19906014, "ITLB accesses": 28194337, "ITLB misses": 5083, "DTLB misses": 17916, "DTLB accesses": 37944713, "L1I cache accesses": 31162212, "L1I cache misses": 270684, "L1D cache misses": 465467, "L1D cache accesses": 32857500, "LL cache misses": 541118, "LL cache accesses": 564199, "L2D TLB accesses": 194022, "L2D TLB misses": 23932, "L2D cache misses": 311476, "L2D cache accesses": 1783574, "instructions per cycle": 1.1817747613250387, "branch miss rate": 0.016666571218125335, "ITLB miss rate": 0.00018028443087702328, "DTLB miss rate": 0.00047216064066685654, "L2D TLB miss rate": 0.12334683695663379, "L1I cache miss rate": 0.008686289663904475, "L1D cache miss rate": 0.014166232975728525, "L2D cache miss rate": 0.17463587157022922, "LL cache miss rate": 0.9590906754531646}
|
|
@ -1,158 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394994 queued and waiting for resources
|
|
||||||
srun: job 3394994 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1465, 0.4354, 0.7334, ..., 0.2837, 0.5913, 0.9525])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.4786670207977295 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
|
||||||
|
|
||||||
48.61 msec task-clock:u # 0.010 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,308 page-faults:u # 68.054 K/sec
|
|
||||||
60,072,179 cycles:u # 1.236 GHz (53.26%)
|
|
||||||
70,991,785 instructions:u # 1.18 insn per cycle (71.54%)
|
|
||||||
<not supported> branches:u
|
|
||||||
371,197 branch-misses:u
|
|
||||||
32,964,378 L1-dcache-loads:u # 678.165 M/sec
|
|
||||||
465,448 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,435,424 L1-icache-loads:u # 646.710 M/sec
|
|
||||||
293,561 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
|
||||||
56,761,270 dTLB-loads:u # 1.168 G/sec (30.54%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
4.700046411 seconds time elapsed
|
|
||||||
|
|
||||||
16.235801000 seconds user
|
|
||||||
28.396327000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7780, 0.3388, 0.1540, ..., 0.2989, 0.3682, 0.9160])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.4235138893127441 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
|
||||||
|
|
||||||
331,765 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,906,014 BR_RETIRED:u
|
|
||||||
|
|
||||||
4.757340585 seconds time elapsed
|
|
||||||
|
|
||||||
16.412311000 seconds user
|
|
||||||
29.238029000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.4944, 0.8057, 0.8211, ..., 0.5137, 0.3388, 0.6316])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.4664146900177002 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
|
||||||
|
|
||||||
28,194,337 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
5,083 ITLB_WALK:u
|
|
||||||
17,916 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
37,944,713 L1D_TLB:u
|
|
||||||
|
|
||||||
4.844329421 seconds time elapsed
|
|
||||||
|
|
||||||
16.081022000 seconds user
|
|
||||||
28.021902000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0963, 0.5806, 0.0397, ..., 0.1604, 0.5700, 0.8103])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.3717434406280518 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
|
||||||
|
|
||||||
31,162,212 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
270,684 L1I_CACHE_REFILL:u
|
|
||||||
465,467 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
32,857,500 L1D_CACHE:u
|
|
||||||
|
|
||||||
4.598461782 seconds time elapsed
|
|
||||||
|
|
||||||
15.609727000 seconds user
|
|
||||||
30.606837000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9137, 0.5009, 0.7507, ..., 0.6623, 0.8760, 0.2991])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.4291880130767822 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
|
||||||
|
|
||||||
541,118 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
564,199 LL_CACHE_RD:u
|
|
||||||
194,022 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,932 L2D_TLB_REFILL:u
|
|
||||||
311,476 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,783,574 L2D_CACHE:u
|
|
||||||
|
|
||||||
4.792239951 seconds time elapsed
|
|
||||||
|
|
||||||
15.902307000 seconds user
|
|
||||||
28.747620000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.96, 22.0, 22.16, 22.16, 21.84, 22.08, 22.4, 22.08, 22.0, 21.48], "matrix": "p2p-Gnutella30", "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 3.504030466079712, "power": [54.2, 64.16, 67.64, 67.64, 65.92, 58.96, 59.92], "power_after": [20.72, 20.76, 20.76, 20.8, 20.8, 20.88, 20.92, 21.04, 21.04, 21.12], "task clock (msec)": 56.52, "page faults": 3194, "cycles": 58074747, "instructions": 90036443, "branch mispredictions": 327895, "branches": 20553601, "ITLB accesses": 26120611, "ITLB misses": 7531, "DTLB misses": 19097, "DTLB accesses": 35744928, "L1I cache accesses": 31819981, "L1I cache misses": 284493, "L1D cache misses": 486709, "L1D cache accesses": 33545755, "LL cache misses": 544742, "LL cache accesses": 558323, "L2D TLB accesses": 190574, "L2D TLB misses": 23746, "L2D cache misses": 305844, "L2D cache accesses": 1736964, "instructions per cycle": 1.5503544595725918, "branch miss rate": 0.015953165579111903, "ITLB miss rate": 0.00028831637973552763, "DTLB miss rate": 0.0005342576155140109, "L2D TLB miss rate": 0.12460251660772194, "L1I cache miss rate": 0.008940703012990485, "L1D cache miss rate": 0.014508810429218243, "L2D cache miss rate": 0.17607964241055082, "LL cache miss rate": 0.9756753707083534}
|
|
@ -1,158 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394991 queued and waiting for resources
|
|
||||||
srun: job 3394991 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3046, 0.0725, 0.4580, ..., 0.0593, 0.5121, 0.2116])
|
|
||||||
Matrix: p2p-Gnutella30
|
|
||||||
Shape: torch.Size([36682, 36682])
|
|
||||||
NNZ: 88328
|
|
||||||
Density: 6.564359899804003e-05
|
|
||||||
Time: 3.6646029949188232 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
|
||||||
|
|
||||||
56.52 msec task-clock:u # 0.008 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,194 page-faults:u # 56.515 K/sec
|
|
||||||
58,074,747 cycles:u # 1.028 GHz (51.20%)
|
|
||||||
90,036,443 instructions:u # 1.55 insn per cycle (89.06%)
|
|
||||||
<not supported> branches:u
|
|
||||||
363,262 branch-misses:u
|
|
||||||
33,111,438 L1-dcache-loads:u # 585.875 M/sec
|
|
||||||
454,665 L1-dcache-load-misses:u # 1.37% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,646,314 L1-icache-loads:u # 559.951 M/sec
|
|
||||||
281,443 L1-icache-load-misses:u # 0.89% of all L1-icache accesses
|
|
||||||
43,495,524 dTLB-loads:u # 769.611 M/sec (11.87%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
7.033463989 seconds time elapsed
|
|
||||||
|
|
||||||
34.670765000 seconds user
|
|
||||||
307.031553000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9700, 0.1728, 0.2199, ..., 0.6107, 0.3357, 0.2661])
|
|
||||||
Matrix: p2p-Gnutella30
|
|
||||||
Shape: torch.Size([36682, 36682])
|
|
||||||
NNZ: 88328
|
|
||||||
Density: 6.564359899804003e-05
|
|
||||||
Time: 2.3380045890808105 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
|
||||||
|
|
||||||
327,895 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,553,601 BR_RETIRED:u
|
|
||||||
|
|
||||||
5.895917276 seconds time elapsed
|
|
||||||
|
|
||||||
31.121063000 seconds user
|
|
||||||
208.127447000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9533, 0.7568, 0.8141, ..., 0.8395, 0.5617, 0.7830])
|
|
||||||
Matrix: p2p-Gnutella30
|
|
||||||
Shape: torch.Size([36682, 36682])
|
|
||||||
NNZ: 88328
|
|
||||||
Density: 6.564359899804003e-05
|
|
||||||
Time: 4.476518869400024 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
|
||||||
|
|
||||||
26,120,611 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
7,531 ITLB_WALK:u
|
|
||||||
19,097 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
35,744,928 L1D_TLB:u
|
|
||||||
|
|
||||||
8.109622410 seconds time elapsed
|
|
||||||
|
|
||||||
38.467161000 seconds user
|
|
||||||
370.437915000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
|
||||||
tensor([0.6886, 0.7814, 0.9957, ..., 0.8460, 0.1015, 0.8097])
|
|
||||||
Matrix: p2p-Gnutella30
|
|
||||||
Shape: torch.Size([36682, 36682])
|
|
||||||
NNZ: 88328
|
|
||||||
Density: 6.564359899804003e-05
|
|
||||||
Time: 2.856834888458252 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
|
||||||
|
|
||||||
31,819,981 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
284,493 L1I_CACHE_REFILL:u
|
|
||||||
486,709 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,545,755 L1D_CACHE:u
|
|
||||||
|
|
||||||
6.374371632 seconds time elapsed
|
|
||||||
|
|
||||||
30.817943000 seconds user
|
|
||||||
247.363843000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8464, 0.0437, 0.1230, ..., 0.6221, 0.9268, 0.5436])
|
|
||||||
Matrix: p2p-Gnutella30
|
|
||||||
Shape: torch.Size([36682, 36682])
|
|
||||||
NNZ: 88328
|
|
||||||
Density: 6.564359899804003e-05
|
|
||||||
Time: 4.838747978210449 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
|
||||||
|
|
||||||
544,742 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
558,323 LL_CACHE_RD:u
|
|
||||||
190,574 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,746 L2D_TLB_REFILL:u
|
|
||||||
305,844 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,736,964 L2D_CACHE:u
|
|
||||||
|
|
||||||
8.386896120 seconds time elapsed
|
|
||||||
|
|
||||||
39.861141000 seconds user
|
|
||||||
395.959334000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.2, 31.56, 31.56, 30.84, 24.52, 23.2, 21.32, 20.76, 20.84, 20.84], "matrix": "ri2010", "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 3.077709913253784, "power": [27.76, 28.28, 28.44, 28.28, 25.16, 30.44, 30.6], "power_after": [21.08, 20.88, 20.68, 20.68, 20.6, 20.56, 20.68, 20.8, 20.96, 21.24], "task clock (msec)": 64.49, "page faults": 3473, "cycles": 42783607, "instructions": 84598454, "branch mispredictions": 331326, "branches": 20438455, "ITLB accesses": 26869742, "ITLB misses": 6302, "DTLB misses": 14926, "DTLB accesses": 36876841, "L1I cache accesses": 31664385, "L1I cache misses": 301678, "L1D cache misses": 493536, "L1D cache accesses": 33219437, "LL cache misses": 552180, "LL cache accesses": 564990, "L2D TLB accesses": 167824, "L2D TLB misses": 19594, "L2D cache misses": 304114, "L2D cache accesses": 1716370, "instructions per cycle": 1.977356747877756, "branch miss rate": 0.01621091222404042, "ITLB miss rate": 0.00023453890997539165, "DTLB miss rate": 0.00040475267390718204, "L2D TLB miss rate": 0.11675326532557918, "L1I cache miss rate": 0.009527360155581737, "L1D cache miss rate": 0.014856844202386693, "L2D cache miss rate": 0.17718440662561102, "LL cache miss rate": 0.9773270323368555}
|
|
@ -1,163 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394984 queued and waiting for resources
|
|
||||||
srun: job 3394984 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|
||||||
125750]),
|
|
||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
|
||||||
tensor([0.5906, 0.9651, 0.2033, ..., 0.2175, 0.4484, 0.0412])
|
|
||||||
Matrix: ri2010
|
|
||||||
Shape: torch.Size([25181, 25181])
|
|
||||||
NNZ: 125750
|
|
||||||
Density: 0.00019831796057928155
|
|
||||||
Time: 3.107008934020996 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
|
||||||
|
|
||||||
64.49 msec task-clock:u # 0.010 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,473 page-faults:u # 53.852 K/sec
|
|
||||||
42,783,607 cycles:u # 0.663 GHz (37.27%)
|
|
||||||
84,598,454 instructions:u # 1.98 insn per cycle (73.53%)
|
|
||||||
<not supported> branches:u
|
|
||||||
353,558 branch-misses:u (89.57%)
|
|
||||||
33,192,964 L1-dcache-loads:u # 514.689 M/sec
|
|
||||||
466,217 L1-dcache-load-misses:u # 1.40% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,727,502 L1-icache-loads:u # 491.965 M/sec
|
|
||||||
292,570 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
|
|
||||||
38,623,737 dTLB-loads:u # 598.898 M/sec (34.88%)
|
|
||||||
124,174 dTLB-load-misses:u # 0.32% of all dTLB cache accesses (14.74%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
6.612563197 seconds time elapsed
|
|
||||||
|
|
||||||
18.114584000 seconds user
|
|
||||||
29.808542000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|
||||||
125750]),
|
|
||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
|
||||||
tensor([0.6092, 0.5511, 0.6052, ..., 0.8002, 0.0295, 0.2972])
|
|
||||||
Matrix: ri2010
|
|
||||||
Shape: torch.Size([25181, 25181])
|
|
||||||
NNZ: 125750
|
|
||||||
Density: 0.00019831796057928155
|
|
||||||
Time: 2.9385879039764404 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
|
||||||
|
|
||||||
331,326 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,438,455 BR_RETIRED:u
|
|
||||||
|
|
||||||
6.446731410 seconds time elapsed
|
|
||||||
|
|
||||||
17.939571000 seconds user
|
|
||||||
33.272929000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|
||||||
125750]),
|
|
||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3348, 0.2974, 0.2569, ..., 0.2397, 0.1965, 0.5651])
|
|
||||||
Matrix: ri2010
|
|
||||||
Shape: torch.Size([25181, 25181])
|
|
||||||
NNZ: 125750
|
|
||||||
Density: 0.00019831796057928155
|
|
||||||
Time: 2.972891330718994 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
|
||||||
|
|
||||||
26,869,742 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,302 ITLB_WALK:u
|
|
||||||
14,926 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,876,841 L1D_TLB:u
|
|
||||||
|
|
||||||
6.376775396 seconds time elapsed
|
|
||||||
|
|
||||||
17.836418000 seconds user
|
|
||||||
29.830135000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|
||||||
125750]),
|
|
||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7889, 0.7395, 0.6553, ..., 0.3938, 0.2478, 0.7923])
|
|
||||||
Matrix: ri2010
|
|
||||||
Shape: torch.Size([25181, 25181])
|
|
||||||
NNZ: 125750
|
|
||||||
Density: 0.00019831796057928155
|
|
||||||
Time: 2.9658284187316895 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
|
||||||
|
|
||||||
31,664,385 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
301,678 L1I_CACHE_REFILL:u
|
|
||||||
493,536 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,219,437 L1D_CACHE:u
|
|
||||||
|
|
||||||
6.559158078 seconds time elapsed
|
|
||||||
|
|
||||||
19.008146000 seconds user
|
|
||||||
38.233666000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|
||||||
125750]),
|
|
||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1256, 0.1417, 0.9800, ..., 0.2509, 0.8121, 0.6210])
|
|
||||||
Matrix: ri2010
|
|
||||||
Shape: torch.Size([25181, 25181])
|
|
||||||
NNZ: 125750
|
|
||||||
Density: 0.00019831796057928155
|
|
||||||
Time: 2.9228267669677734 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
|
||||||
|
|
||||||
552,180 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
564,990 LL_CACHE_RD:u
|
|
||||||
167,824 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
19,594 L2D_TLB_REFILL:u
|
|
||||||
304,114 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,716,370 L2D_CACHE:u
|
|
||||||
|
|
||||||
6.135787277 seconds time elapsed
|
|
||||||
|
|
||||||
18.029630000 seconds user
|
|
||||||
28.723217000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [29.88, 23.64, 23.08, 21.84, 21.4, 21.2, 21.0, 21.0, 21.16, 21.0], "matrix": "soc-sign-Slashdot090216", "shape": [81871, 81871], "nnz": 545671, "% density": 8.140867447881048e-05, "time_s": 19.113287687301636, "power": [81.08, 81.56, 71.96, 60.52, 47.16, 48.4, 53.84, 53.84, 67.4, 82.64, 90.8, 89.16, 87.96, 85.76, 84.64, 84.04, 83.64, 84.68, 84.88, 84.88, 84.64, 84.04, 83.6], "power_after": [20.72, 20.6, 20.68, 20.88, 21.2, 21.28, 21.28, 21.48, 21.56, 21.36], "task clock (msec)": 67.66, "page faults": 3317, "cycles": 41915850, "instructions": 84471787, "branch mispredictions": 344452, "branches": 20610765, "ITLB accesses": 27276117, "ITLB misses": 6358, "DTLB misses": 17361, "DTLB accesses": 36565837, "L1I cache accesses": 32022662, "L1I cache misses": 293044, "L1D cache misses": 458939, "L1D cache accesses": 33505164, "LL cache misses": 553814, "LL cache accesses": 567372, "L2D TLB accesses": 199301, "L2D TLB misses": 25193, "L2D cache misses": 313278, "L2D cache accesses": 1796299, "instructions per cycle": 2.015270762730566, "branch miss rate": 0.016712237512775483, "ITLB miss rate": 0.00023309769495416082, "DTLB miss rate": 0.0004747874361524939, "L2D TLB miss rate": 0.12640679173712124, "L1I cache miss rate": 0.009151144274014446, "L1D cache miss rate": 0.01369756017311242, "L2D cache miss rate": 0.17440192306514674, "LL cache miss rate": 0.97610386131145}
|
|
@ -1,163 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394981 queued and waiting for resources
|
|
||||||
srun: job 3394981 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
|
||||||
545671]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
|
||||||
nnz=545671, layout=torch.sparse_csr)
|
|
||||||
tensor([0.6780, 0.5234, 0.1205, ..., 0.2995, 0.6275, 0.1399])
|
|
||||||
Matrix: soc-sign-Slashdot090216
|
|
||||||
Shape: torch.Size([81871, 81871])
|
|
||||||
NNZ: 545671
|
|
||||||
Density: 8.140867447881048e-05
|
|
||||||
Time: 30.653191089630127 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
|
|
||||||
|
|
||||||
67.66 msec task-clock:u # 0.002 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,317 page-faults:u # 49.022 K/sec
|
|
||||||
41,915,850 cycles:u # 0.619 GHz (57.88%)
|
|
||||||
84,471,787 instructions:u # 2.02 insn per cycle (88.19%)
|
|
||||||
<not supported> branches:u
|
|
||||||
375,016 branch-misses:u
|
|
||||||
32,438,527 L1-dcache-loads:u # 479.407 M/sec
|
|
||||||
499,618 L1-dcache-load-misses:u # 1.54% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,998,693 L1-icache-loads:u # 458.127 M/sec
|
|
||||||
306,445 L1-icache-load-misses:u # 0.99% of all L1-icache accesses
|
|
||||||
34,294,934 dTLB-loads:u # 506.842 M/sec (18.86%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
34.340632995 seconds time elapsed
|
|
||||||
|
|
||||||
149.743244000 seconds user
|
|
||||||
2355.852109000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
|
||||||
545671]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
|
||||||
nnz=545671, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9875, 0.2031, 0.7260, ..., 0.5908, 0.1575, 0.7971])
|
|
||||||
Matrix: soc-sign-Slashdot090216
|
|
||||||
Shape: torch.Size([81871, 81871])
|
|
||||||
NNZ: 545671
|
|
||||||
Density: 8.140867447881048e-05
|
|
||||||
Time: 13.671181440353394 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
|
|
||||||
|
|
||||||
344,452 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,610,765 BR_RETIRED:u
|
|
||||||
|
|
||||||
17.331425967 seconds time elapsed
|
|
||||||
|
|
||||||
83.136180000 seconds user
|
|
||||||
1069.027469000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
|
||||||
545671]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
|
||||||
nnz=545671, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2046, 0.3645, 0.7960, ..., 0.6490, 0.4098, 0.5342])
|
|
||||||
Matrix: soc-sign-Slashdot090216
|
|
||||||
Shape: torch.Size([81871, 81871])
|
|
||||||
NNZ: 545671
|
|
||||||
Density: 8.140867447881048e-05
|
|
||||||
Time: 19.569235801696777 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
|
|
||||||
|
|
||||||
27,276,117 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,358 ITLB_WALK:u
|
|
||||||
17,361 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,565,837 L1D_TLB:u
|
|
||||||
|
|
||||||
23.323243037 seconds time elapsed
|
|
||||||
|
|
||||||
108.830923000 seconds user
|
|
||||||
1521.834565000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
|
||||||
545671]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
|
||||||
nnz=545671, layout=torch.sparse_csr)
|
|
||||||
tensor([0.4164, 0.2188, 0.5460, ..., 0.1057, 0.5277, 0.0624])
|
|
||||||
Matrix: soc-sign-Slashdot090216
|
|
||||||
Shape: torch.Size([81871, 81871])
|
|
||||||
NNZ: 545671
|
|
||||||
Density: 8.140867447881048e-05
|
|
||||||
Time: 26.337355375289917 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
|
|
||||||
|
|
||||||
32,022,662 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
293,044 L1I_CACHE_REFILL:u
|
|
||||||
458,939 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,505,164 L1D_CACHE:u
|
|
||||||
|
|
||||||
30.017812847 seconds time elapsed
|
|
||||||
|
|
||||||
131.976276000 seconds user
|
|
||||||
2029.636174000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
|
||||||
545671]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
|
||||||
nnz=545671, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7679, 0.9196, 0.3474, ..., 0.5624, 0.0163, 0.8596])
|
|
||||||
Matrix: soc-sign-Slashdot090216
|
|
||||||
Shape: torch.Size([81871, 81871])
|
|
||||||
NNZ: 545671
|
|
||||||
Density: 8.140867447881048e-05
|
|
||||||
Time: 29.926054000854492 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
|
|
||||||
|
|
||||||
553,814 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
567,372 LL_CACHE_RD:u
|
|
||||||
199,301 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
25,193 L2D_TLB_REFILL:u
|
|
||||||
313,278 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,796,299 L2D_CACHE:u
|
|
||||||
|
|
||||||
33.553779692 seconds time elapsed
|
|
||||||
|
|
||||||
154.498461000 seconds user
|
|
||||||
2293.574463000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [21.92, 21.84, 20.96, 20.24, 20.28, 20.16, 19.96, 19.72, 19.88, 19.76], "matrix": "soc-sign-Slashdot090221", "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 18.79910135269165, "power": [80.48, 80.08, 69.04, 69.04, 55.0, 46.8, 49.16, 56.2, 70.84, 82.84, 86.52, 84.28, 82.56, 81.2, 80.28, 80.28, 80.04, 80.16, 80.8, 81.0, 81.92, 83.04, 82.88], "power_after": [21.0, 20.96, 21.12, 20.76, 20.72, 20.56, 20.52, 20.64, 20.88, 21.04], "task clock (msec)": 58.57, "page faults": 3259, "cycles": 74509373, "instructions": 88672751, "branch mispredictions": 342121, "branches": 20436338, "ITLB accesses": 27189335, "ITLB misses": 6437, "DTLB misses": 18156, "DTLB accesses": 36676625, "L1I cache accesses": 30721032, "L1I cache misses": 302777, "L1D cache misses": 469833, "L1D cache accesses": 32109077, "LL cache misses": 551850, "LL cache accesses": 565355, "L2D TLB accesses": 200417, "L2D TLB misses": 25536, "L2D cache misses": 304133, "L2D cache accesses": 1801849, "instructions per cycle": 1.190088540941017, "branch miss rate": 0.016740817263836603, "ITLB miss rate": 0.0002367472393127673, "DTLB miss rate": 0.0004950291909356436, "L2D TLB miss rate": 0.12741434109880898, "L1I cache miss rate": 0.009855691045795596, "L1D cache miss rate": 0.014632404413244267, "L2D cache miss rate": 0.16878939356183564, "LL cache miss rate": 0.9761123541845389}
|
|
@ -1,163 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394979 queued and waiting for resources
|
|
||||||
srun: job 3394979 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
|
||||||
549202]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
|
||||||
nnz=549202, layout=torch.sparse_csr)
|
|
||||||
tensor([0.4201, 0.7748, 0.6565, ..., 0.0517, 0.6958, 0.5341])
|
|
||||||
Matrix: soc-sign-Slashdot090221
|
|
||||||
Shape: torch.Size([82144, 82144])
|
|
||||||
NNZ: 549202
|
|
||||||
Density: 8.13917555860553e-05
|
|
||||||
Time: 27.35153603553772 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
|
|
||||||
|
|
||||||
58.57 msec task-clock:u # 0.002 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,259 page-faults:u # 55.640 K/sec
|
|
||||||
74,509,373 cycles:u # 1.272 GHz (58.00%)
|
|
||||||
88,672,751 instructions:u # 1.19 insn per cycle (90.97%)
|
|
||||||
<not supported> branches:u
|
|
||||||
361,568 branch-misses:u
|
|
||||||
31,594,797 L1-dcache-loads:u # 539.410 M/sec
|
|
||||||
460,467 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,148,838 L1-icache-loads:u # 514.724 M/sec
|
|
||||||
282,768 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
|
||||||
19,757,856 dTLB-loads:u # 337.321 M/sec (11.69%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
31.087250856 seconds time elapsed
|
|
||||||
|
|
||||||
142.716222000 seconds user
|
|
||||||
2102.420776000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
|
||||||
549202]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
|
||||||
nnz=549202, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7637, 0.5328, 0.8286, ..., 0.7084, 0.8903, 0.1707])
|
|
||||||
Matrix: soc-sign-Slashdot090221
|
|
||||||
Shape: torch.Size([82144, 82144])
|
|
||||||
NNZ: 549202
|
|
||||||
Density: 8.13917555860553e-05
|
|
||||||
Time: 17.188836336135864 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
|
|
||||||
|
|
||||||
342,121 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,436,338 BR_RETIRED:u
|
|
||||||
|
|
||||||
20.753346873 seconds time elapsed
|
|
||||||
|
|
||||||
98.605331000 seconds user
|
|
||||||
1332.291974000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
|
||||||
549202]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
|
||||||
nnz=549202, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9017, 0.8505, 0.0023, ..., 0.4182, 0.6895, 0.5023])
|
|
||||||
Matrix: soc-sign-Slashdot090221
|
|
||||||
Shape: torch.Size([82144, 82144])
|
|
||||||
NNZ: 549202
|
|
||||||
Density: 8.13917555860553e-05
|
|
||||||
Time: 16.22375249862671 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
|
|
||||||
|
|
||||||
27,189,335 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,437 ITLB_WALK:u
|
|
||||||
18,156 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,676,625 L1D_TLB:u
|
|
||||||
|
|
||||||
19.748749363 seconds time elapsed
|
|
||||||
|
|
||||||
103.049578000 seconds user
|
|
||||||
1249.814927000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
|
||||||
549202]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
|
||||||
nnz=549202, layout=torch.sparse_csr)
|
|
||||||
tensor([0.4805, 0.2325, 0.2103, ..., 0.1710, 0.7638, 0.9368])
|
|
||||||
Matrix: soc-sign-Slashdot090221
|
|
||||||
Shape: torch.Size([82144, 82144])
|
|
||||||
NNZ: 549202
|
|
||||||
Density: 8.13917555860553e-05
|
|
||||||
Time: 15.453373908996582 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
|
|
||||||
|
|
||||||
30,721,032 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
302,777 L1I_CACHE_REFILL:u
|
|
||||||
469,833 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
32,109,077 L1D_CACHE:u
|
|
||||||
|
|
||||||
19.090250444 seconds time elapsed
|
|
||||||
|
|
||||||
94.904880000 seconds user
|
|
||||||
1195.102767000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
|
||||||
549202]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
|
||||||
nnz=549202, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8430, 0.9439, 0.4260, ..., 0.8172, 0.4243, 0.3834])
|
|
||||||
Matrix: soc-sign-Slashdot090221
|
|
||||||
Shape: torch.Size([82144, 82144])
|
|
||||||
NNZ: 549202
|
|
||||||
Density: 8.13917555860553e-05
|
|
||||||
Time: 29.316507816314697 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
|
|
||||||
|
|
||||||
551,850 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
565,355 LL_CACHE_RD:u
|
|
||||||
200,417 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
25,536 L2D_TLB_REFILL:u
|
|
||||||
304,133 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,801,849 L2D_CACHE:u
|
|
||||||
|
|
||||||
32.859276963 seconds time elapsed
|
|
||||||
|
|
||||||
148.969816000 seconds user
|
|
||||||
2252.321936000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.32, 20.52, 20.52, 20.56, 20.6, 20.4, 20.76, 20.6, 20.36, 20.4], "matrix": "soc-sign-epinions", "shape": [131828, 131828], "nnz": 841372, "% density": 4.841419648464106e-05, "time_s": 22.52380871772766, "power": [81.24, 81.16, 74.84, 62.04, 51.6, 50.56, 52.4, 52.4, 68.24, 80.56, 91.44, 91.36, 90.28, 88.32, 86.4, 85.16, 83.64, 82.36, 82.96, 82.84, 82.84, 82.56, 82.44, 82.08, 83.64, 84.4], "power_after": [20.8, 20.88, 20.8, 20.92, 20.88, 20.88, 20.8, 20.84, 20.84, 20.6], "task clock (msec)": 63.9, "page faults": 3446, "cycles": 55931043, "instructions": 77907356, "branch mispredictions": 332778, "branches": 20000746, "ITLB accesses": 27000304, "ITLB misses": 6713, "DTLB misses": 18689, "DTLB accesses": 36395663, "L1I cache accesses": 32396405, "L1I cache misses": 292629, "L1D cache misses": 473799, "L1D cache accesses": 34061981, "LL cache misses": 542765, "LL cache accesses": 557193, "L2D TLB accesses": 203626, "L2D TLB misses": 24363, "L2D cache misses": 303397, "L2D cache accesses": 1772084, "instructions per cycle": 1.3929179901043505, "branch miss rate": 0.01663827939217867, "ITLB miss rate": 0.00024862683027568875, "DTLB miss rate": 0.0005134952480464499, "L2D TLB miss rate": 0.11964582126054629, "L1I cache miss rate": 0.009032761505481858, "L1D cache miss rate": 0.01390990735389113, "L2D cache miss rate": 0.171209152613533, "LL cache miss rate": 0.9741059202107708}
|
|
@ -1,168 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394990 queued and waiting for resources
|
|
||||||
srun: job 3394990 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
|
||||||
841372]),
|
|
||||||
col_indices=tensor([ 1, 128552, 3, ..., 131824, 131826,
|
|
||||||
7714]),
|
|
||||||
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
|
||||||
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3914, 0.2076, 0.6733, ..., 0.4758, 0.6360, 0.6316])
|
|
||||||
Matrix: soc-sign-epinions
|
|
||||||
Shape: torch.Size([131828, 131828])
|
|
||||||
NNZ: 841372
|
|
||||||
Density: 4.841419648464106e-05
|
|
||||||
Time: 20.04187798500061 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
|
|
||||||
|
|
||||||
63.90 msec task-clock:u # 0.003 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,446 page-faults:u # 53.927 K/sec
|
|
||||||
55,931,043 cycles:u # 0.875 GHz (85.43%)
|
|
||||||
77,907,356 instructions:u # 1.39 insn per cycle
|
|
||||||
<not supported> branches:u
|
|
||||||
357,739 branch-misses:u
|
|
||||||
33,000,188 L1-dcache-loads:u # 516.421 M/sec
|
|
||||||
466,824 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,503,048 L1-icache-loads:u # 492.992 M/sec
|
|
||||||
301,112 L1-icache-load-misses:u # 0.96% of all L1-icache accesses
|
|
||||||
34,740,872 dTLB-loads:u # 543.661 M/sec (18.37%)
|
|
||||||
32,355 dTLB-load-misses:u # 0.09% of all dTLB cache accesses (12.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
23.478083368 seconds time elapsed
|
|
||||||
|
|
||||||
119.232326000 seconds user
|
|
||||||
1541.081607000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
|
||||||
841372]),
|
|
||||||
col_indices=tensor([ 1, 128552, 3, ..., 131824, 131826,
|
|
||||||
7714]),
|
|
||||||
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
|
||||||
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3970, 0.5643, 0.0036, ..., 0.0338, 0.0807, 0.3885])
|
|
||||||
Matrix: soc-sign-epinions
|
|
||||||
Shape: torch.Size([131828, 131828])
|
|
||||||
NNZ: 841372
|
|
||||||
Density: 4.841419648464106e-05
|
|
||||||
Time: 16.115705490112305 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
|
|
||||||
|
|
||||||
332,778 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,000,746 BR_RETIRED:u
|
|
||||||
|
|
||||||
19.765627973 seconds time elapsed
|
|
||||||
|
|
||||||
103.591961000 seconds user
|
|
||||||
1250.845091000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
|
||||||
841372]),
|
|
||||||
col_indices=tensor([ 1, 128552, 3, ..., 131824, 131826,
|
|
||||||
7714]),
|
|
||||||
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
|
||||||
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0049, 0.4550, 0.3166, ..., 0.3734, 0.8337, 0.5156])
|
|
||||||
Matrix: soc-sign-epinions
|
|
||||||
Shape: torch.Size([131828, 131828])
|
|
||||||
NNZ: 841372
|
|
||||||
Density: 4.841419648464106e-05
|
|
||||||
Time: 18.55180263519287 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
|
|
||||||
|
|
||||||
27,000,304 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,713 ITLB_WALK:u
|
|
||||||
18,689 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,395,663 L1D_TLB:u
|
|
||||||
|
|
||||||
22.333459337 seconds time elapsed
|
|
||||||
|
|
||||||
109.075160000 seconds user
|
|
||||||
1441.055730000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
|
||||||
841372]),
|
|
||||||
col_indices=tensor([ 1, 128552, 3, ..., 131824, 131826,
|
|
||||||
7714]),
|
|
||||||
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
|
||||||
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0560, 0.8530, 0.8946, ..., 0.4591, 0.5391, 0.2898])
|
|
||||||
Matrix: soc-sign-epinions
|
|
||||||
Shape: torch.Size([131828, 131828])
|
|
||||||
NNZ: 841372
|
|
||||||
Density: 4.841419648464106e-05
|
|
||||||
Time: 25.587534427642822 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
|
|
||||||
|
|
||||||
32,396,405 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
292,629 L1I_CACHE_REFILL:u
|
|
||||||
473,799 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
34,061,981 L1D_CACHE:u
|
|
||||||
|
|
||||||
29.367381835 seconds time elapsed
|
|
||||||
|
|
||||||
142.233743000 seconds user
|
|
||||||
1962.747683000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
|
||||||
841372]),
|
|
||||||
col_indices=tensor([ 1, 128552, 3, ..., 131824, 131826,
|
|
||||||
7714]),
|
|
||||||
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
|
||||||
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7002, 0.7829, 0.1511, ..., 0.3651, 0.2391, 0.7788])
|
|
||||||
Matrix: soc-sign-epinions
|
|
||||||
Shape: torch.Size([131828, 131828])
|
|
||||||
NNZ: 841372
|
|
||||||
Density: 4.841419648464106e-05
|
|
||||||
Time: 23.656178951263428 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
|
|
||||||
|
|
||||||
542,765 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
557,193 LL_CACHE_RD:u
|
|
||||||
203,626 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
24,363 L2D_TLB_REFILL:u
|
|
||||||
303,397 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,772,084 L2D_CACHE:u
|
|
||||||
|
|
||||||
27.453055481 seconds time elapsed
|
|
||||||
|
|
||||||
128.709934000 seconds user
|
|
||||||
1831.887905000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.28, 16.44, 16.68, 16.68, 16.84, 17.04, 16.84, 16.84, 16.72, 16.72], "matrix": "sx-mathoverflow", "shape": [24818, 24818], "nnz": 239978, "% density": 0.00038961697406616504, "time_s": 5.405760288238525, "power": [25.64, 20.44, 21.24, 22.16, 22.28, 27.04, 26.92, 26.28, 25.32], "power_after": [16.32, 16.44, 16.4, 16.4, 16.6, 16.48, 16.56, 16.6, 16.32, 16.44], "task clock (msec)": 50.36, "page faults": 3296, "cycles": 56049457, "instructions": 72333565, "branch mispredictions": 325529, "branches": 19463406, "ITLB accesses": 27374917, "ITLB misses": 5203, "DTLB misses": 16771, "DTLB accesses": 36373182, "L1I cache accesses": 31839975, "L1I cache misses": 274158, "L1D cache misses": 471992, "L1D cache accesses": 33638817, "LL cache misses": 538067, "LL cache accesses": 557981, "L2D TLB accesses": 170169, "L2D TLB misses": 21987, "L2D cache misses": 301746, "L2D cache accesses": 1735872, "instructions per cycle": 1.2905310572411077, "branch miss rate": 0.016725181604905125, "ITLB miss rate": 0.00019006450320927, "DTLB miss rate": 0.00046108146381034247, "L2D TLB miss rate": 0.12920684731061474, "L1I cache miss rate": 0.00861049671050307, "L1D cache miss rate": 0.014031171191305569, "L2D cache miss rate": 0.1738296372082734, "LL cache miss rate": 0.9643106127269566}
|
|
@ -1,163 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394987 queued and waiting for resources
|
|
||||||
srun: job 3394987 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|
||||||
239978]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8864, 0.5637, 0.9805, ..., 0.0234, 0.9487, 0.4860])
|
|
||||||
Matrix: sx-mathoverflow
|
|
||||||
Shape: torch.Size([24818, 24818])
|
|
||||||
NNZ: 239978
|
|
||||||
Density: 0.00038961697406616504
|
|
||||||
Time: 5.484489917755127 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
|
|
||||||
|
|
||||||
50.36 msec task-clock:u # 0.006 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,296 page-faults:u # 65.452 K/sec
|
|
||||||
56,049,457 cycles:u # 1.113 GHz (49.66%)
|
|
||||||
72,333,565 instructions:u # 1.29 insn per cycle (66.35%)
|
|
||||||
<not supported> branches:u
|
|
||||||
369,218 branch-misses:u (86.12%)
|
|
||||||
33,730,437 L1-dcache-loads:u # 669.814 M/sec (93.88%)
|
|
||||||
459,922 L1-dcache-load-misses:u # 1.36% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,827,672 L1-icache-loads:u # 632.030 M/sec
|
|
||||||
295,060 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
|
||||||
54,366,618 dTLB-loads:u # 1.080 G/sec (35.64%)
|
|
||||||
84,768 dTLB-load-misses:u # 0.16% of all dTLB cache accesses (25.48%)
|
|
||||||
12,107,953 iTLB-loads:u # 240.438 M/sec (10.11%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
8.968532171 seconds time elapsed
|
|
||||||
|
|
||||||
20.749643000 seconds user
|
|
||||||
28.745486000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|
||||||
239978]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
|
||||||
tensor([0.5549, 0.0336, 0.9472, ..., 0.2657, 0.3394, 0.6185])
|
|
||||||
Matrix: sx-mathoverflow
|
|
||||||
Shape: torch.Size([24818, 24818])
|
|
||||||
NNZ: 239978
|
|
||||||
Density: 0.00038961697406616504
|
|
||||||
Time: 5.532417297363281 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
|
|
||||||
|
|
||||||
325,529 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,463,406 BR_RETIRED:u
|
|
||||||
|
|
||||||
8.912497962 seconds time elapsed
|
|
||||||
|
|
||||||
20.214519000 seconds user
|
|
||||||
31.566513000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|
||||||
239978]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3330, 0.8843, 0.5150, ..., 0.7292, 0.0873, 0.4184])
|
|
||||||
Matrix: sx-mathoverflow
|
|
||||||
Shape: torch.Size([24818, 24818])
|
|
||||||
NNZ: 239978
|
|
||||||
Density: 0.00038961697406616504
|
|
||||||
Time: 5.457342863082886 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
|
|
||||||
|
|
||||||
27,374,917 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
5,203 ITLB_WALK:u
|
|
||||||
16,771 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,373,182 L1D_TLB:u
|
|
||||||
|
|
||||||
8.730534933 seconds time elapsed
|
|
||||||
|
|
||||||
20.156482000 seconds user
|
|
||||||
31.426118000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|
||||||
239978]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
|
||||||
tensor([0.5864, 0.4449, 0.4042, ..., 0.1651, 0.7793, 0.8302])
|
|
||||||
Matrix: sx-mathoverflow
|
|
||||||
Shape: torch.Size([24818, 24818])
|
|
||||||
NNZ: 239978
|
|
||||||
Density: 0.00038961697406616504
|
|
||||||
Time: 5.449937582015991 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
|
|
||||||
|
|
||||||
31,839,975 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
274,158 L1I_CACHE_REFILL:u
|
|
||||||
471,992 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,638,817 L1D_CACHE:u
|
|
||||||
|
|
||||||
8.845491835 seconds time elapsed
|
|
||||||
|
|
||||||
20.577696000 seconds user
|
|
||||||
35.105662000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|
||||||
239978]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8880, 0.4700, 0.5542, ..., 0.8505, 0.9123, 0.5742])
|
|
||||||
Matrix: sx-mathoverflow
|
|
||||||
Shape: torch.Size([24818, 24818])
|
|
||||||
NNZ: 239978
|
|
||||||
Density: 0.00038961697406616504
|
|
||||||
Time: 5.400304794311523 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
|
|
||||||
|
|
||||||
538,067 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
557,981 LL_CACHE_RD:u
|
|
||||||
170,169 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
21,987 L2D_TLB_REFILL:u
|
|
||||||
301,746 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,735,872 L2D_CACHE:u
|
|
||||||
|
|
||||||
8.606800178 seconds time elapsed
|
|
||||||
|
|
||||||
21.064990000 seconds user
|
|
||||||
34.158762000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.36, 20.4, 20.68, 20.64, 20.92, 20.92, 20.88, 20.68, 20.68, 20.6], "matrix": "ut2010", "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 11.10523509979248, "power": [90.68, 90.68, 88.24, 72.2, 59.48, 52.0, 54.72, 64.28, 79.24, 94.08, 96.24, 93.72, 92.36, 92.36, 90.08], "power_after": [21.24, 21.28, 20.96, 21.16, 20.92, 21.04, 21.32, 21.56, 21.16, 21.24], "task clock (msec)": 52.22, "page faults": 3288, "cycles": 67463873, "instructions": 73042754, "branch mispredictions": 344635, "branches": 20775821, "ITLB accesses": 27488750, "ITLB misses": 6494, "DTLB misses": 18293, "DTLB accesses": 36697113, "L1I cache accesses": 31066176, "L1I cache misses": 298652, "L1D cache misses": 473808, "L1D cache accesses": 32572985, "LL cache misses": 547428, "LL cache accesses": 566356, "L2D TLB accesses": 162858, "L2D TLB misses": 19852, "L2D cache misses": 304056, "L2D cache accesses": 1713420, "instructions per cycle": 1.0826943481291091, "branch miss rate": 0.01658827345499367, "ITLB miss rate": 0.00023624209904051657, "DTLB miss rate": 0.0004984860798177775, "L2D TLB miss rate": 0.12189760404769799, "L1I cache miss rate": 0.009613413636747567, "L1D cache miss rate": 0.014546041758223879, "L2D cache miss rate": 0.17745561508561825, "LL cache miss rate": 0.9665793246650517}
|
|
@ -1,173 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394993 queued and waiting for resources
|
|
||||||
srun: job 3394993 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.6983, 0.2845, 0.5984, ..., 0.1182, 0.9468, 0.3161])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 8.604448795318604 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
|
||||||
|
|
||||||
52.22 msec task-clock:u # 0.004 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,288 page-faults:u # 62.965 K/sec
|
|
||||||
67,463,873 cycles:u # 1.292 GHz (52.95%)
|
|
||||||
73,042,754 instructions:u # 1.08 insn per cycle (71.78%)
|
|
||||||
<not supported> branches:u
|
|
||||||
376,297 branch-misses:u (87.57%)
|
|
||||||
34,189,906 L1-dcache-loads:u # 654.731 M/sec (97.72%)
|
|
||||||
471,636 L1-dcache-load-misses:u # 1.38% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,870,328 L1-icache-loads:u # 610.312 M/sec
|
|
||||||
297,680 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
|
||||||
57,623,823 dTLB-loads:u # 1.103 G/sec (30.16%)
|
|
||||||
75,454 dTLB-load-misses:u # 0.13% of all dTLB cache accesses (24.31%)
|
|
||||||
0 iTLB-loads:u # 0.000 /sec (3.96%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
12.112100803 seconds time elapsed
|
|
||||||
|
|
||||||
66.253313000 seconds user
|
|
||||||
675.855469000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.0260, 0.8569, 0.4315, ..., 0.5243, 0.8018, 0.1763])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 8.702903270721436 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
|
||||||
|
|
||||||
344,635 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,775,821 BR_RETIRED:u
|
|
||||||
|
|
||||||
12.383096073 seconds time elapsed
|
|
||||||
|
|
||||||
64.544546000 seconds user
|
|
||||||
688.477174000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.7940, 0.1585, 0.6879, ..., 0.4017, 0.1738, 0.9713])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 7.38647985458374 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
|
||||||
|
|
||||||
27,488,750 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,494 ITLB_WALK:u
|
|
||||||
18,293 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,697,113 L1D_TLB:u
|
|
||||||
|
|
||||||
10.936742446 seconds time elapsed
|
|
||||||
|
|
||||||
63.993242000 seconds user
|
|
||||||
580.515047000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.2725, 0.6578, 0.8180, ..., 0.0148, 0.5094, 0.1155])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 12.719107389450073 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
|
||||||
|
|
||||||
31,066,176 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
298,652 L1I_CACHE_REFILL:u
|
|
||||||
473,808 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
32,572,985 L1D_CACHE:u
|
|
||||||
|
|
||||||
16.299576479 seconds time elapsed
|
|
||||||
|
|
||||||
86.072431000 seconds user
|
|
||||||
987.199923000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.1156, 0.5715, 0.3099, ..., 0.3964, 0.9672, 0.5694])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 12.682909727096558 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
|
||||||
|
|
||||||
547,428 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
566,356 LL_CACHE_RD:u
|
|
||||||
162,858 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
19,852 L2D_TLB_REFILL:u
|
|
||||||
304,056 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,713,420 L2D_CACHE:u
|
|
||||||
|
|
||||||
16.221517033 seconds time elapsed
|
|
||||||
|
|
||||||
79.927661000 seconds user
|
|
||||||
988.333919000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.88, 20.76, 20.76, 20.96, 20.92, 20.88, 20.72, 20.4, 20.4, 20.24], "matrix": "vt2010", "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 3.6774682998657227, "power": [34.12, 31.52, 30.36, 27.2, 27.16, 30.64, 31.0, 31.32], "power_after": [20.44, 20.52, 20.68, 20.72, 20.68, 20.72, 20.88, 20.8, 20.88, 20.52], "task clock (msec)": 48.59, "page faults": 3274, "cycles": 55030923, "instructions": 78222423, "branch mispredictions": 323004, "branches": 19091130, "ITLB accesses": 27178617, "ITLB misses": 6398, "DTLB misses": 19770, "DTLB accesses": 36355567, "L1I cache accesses": 31341858, "L1I cache misses": 291951, "L1D cache misses": 468242, "L1D cache accesses": 32805413, "LL cache misses": 520057, "LL cache accesses": 541186, "L2D TLB accesses": 191068, "L2D TLB misses": 22725, "L2D cache misses": 288895, "L2D cache accesses": 1728320, "instructions per cycle": 1.4214266949511278, "branch miss rate": 0.01691906136514706, "ITLB miss rate": 0.00023540564996371965, "DTLB miss rate": 0.0005437956723381593, "L2D TLB miss rate": 0.11893671363074926, "L1I cache miss rate": 0.009315050817982775, "L1D cache miss rate": 0.014273315199537345, "L2D cache miss rate": 0.16715365210146269, "LL cache miss rate": 0.9609579700879181}
|
|
@ -1,163 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3394988 queued and waiting for resources
|
|
||||||
srun: job 3394988 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|
||||||
155598]),
|
|
||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2022, 0.3400, 0.2561, ..., 0.8370, 0.0285, 0.6506])
|
|
||||||
Matrix: vt2010
|
|
||||||
Shape: torch.Size([32580, 32580])
|
|
||||||
NNZ: 155598
|
|
||||||
Density: 0.00014658915806621921
|
|
||||||
Time: 3.74875545501709 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
|
||||||
|
|
||||||
48.59 msec task-clock:u # 0.007 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,274 page-faults:u # 67.376 K/sec
|
|
||||||
55,030,923 cycles:u # 1.132 GHz (65.54%)
|
|
||||||
78,222,423 instructions:u # 1.42 insn per cycle (83.60%)
|
|
||||||
<not supported> branches:u
|
|
||||||
369,917 branch-misses:u
|
|
||||||
32,435,815 L1-dcache-loads:u # 667.500 M/sec
|
|
||||||
467,963 L1-dcache-load-misses:u # 1.44% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,013,287 L1-icache-loads:u # 638.226 M/sec
|
|
||||||
289,982 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
|
||||||
60,644,978 dTLB-loads:u # 1.248 G/sec (17.29%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
6.978143797 seconds time elapsed
|
|
||||||
|
|
||||||
18.401752000 seconds user
|
|
||||||
28.060858000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|
||||||
155598]),
|
|
||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3381, 0.0423, 0.5363, ..., 0.0429, 0.4077, 0.4744])
|
|
||||||
Matrix: vt2010
|
|
||||||
Shape: torch.Size([32580, 32580])
|
|
||||||
NNZ: 155598
|
|
||||||
Density: 0.00014658915806621921
|
|
||||||
Time: 3.7925527095794678 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
|
||||||
|
|
||||||
323,004 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,091,130 BR_RETIRED:u
|
|
||||||
|
|
||||||
7.233250772 seconds time elapsed
|
|
||||||
|
|
||||||
19.111768000 seconds user
|
|
||||||
32.178633000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|
||||||
155598]),
|
|
||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7962, 0.6492, 0.2778, ..., 0.5407, 0.1159, 0.3587])
|
|
||||||
Matrix: vt2010
|
|
||||||
Shape: torch.Size([32580, 32580])
|
|
||||||
NNZ: 155598
|
|
||||||
Density: 0.00014658915806621921
|
|
||||||
Time: 3.668635129928589 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
|
||||||
|
|
||||||
27,178,617 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,398 ITLB_WALK:u
|
|
||||||
19,770 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,355,567 L1D_TLB:u
|
|
||||||
|
|
||||||
6.925944164 seconds time elapsed
|
|
||||||
|
|
||||||
18.970654000 seconds user
|
|
||||||
30.786317000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|
||||||
155598]),
|
|
||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8340, 0.3434, 0.3449, ..., 0.9828, 0.6683, 0.0312])
|
|
||||||
Matrix: vt2010
|
|
||||||
Shape: torch.Size([32580, 32580])
|
|
||||||
NNZ: 155598
|
|
||||||
Density: 0.00014658915806621921
|
|
||||||
Time: 3.623232126235962 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
|
||||||
|
|
||||||
31,341,858 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
291,951 L1I_CACHE_REFILL:u
|
|
||||||
468,242 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
32,805,413 L1D_CACHE:u
|
|
||||||
|
|
||||||
6.941260499 seconds time elapsed
|
|
||||||
|
|
||||||
18.410270000 seconds user
|
|
||||||
27.908787000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|
||||||
155598]),
|
|
||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2754, 0.3661, 0.9484, ..., 0.7285, 0.5354, 0.4116])
|
|
||||||
Matrix: vt2010
|
|
||||||
Shape: torch.Size([32580, 32580])
|
|
||||||
NNZ: 155598
|
|
||||||
Density: 0.00014658915806621921
|
|
||||||
Time: 3.7337992191314697 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
|
||||||
|
|
||||||
520,057 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
541,186 LL_CACHE_RD:u
|
|
||||||
191,068 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
22,725 L2D_TLB_REFILL:u
|
|
||||||
288,895 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,728,320 L2D_CACHE:u
|
|
||||||
|
|
||||||
7.164825085 seconds time elapsed
|
|
||||||
|
|
||||||
18.193885000 seconds user
|
|
||||||
30.023194000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [80.64, 75.2, 61.4, 49.84, 38.12, 38.12, 24.16, 22.68, 22.36, 22.2], "matrix": "ASIC_680k", "shape": [682862, 682862], "nnz": 3871773, "% density": 8.303171256088674e-06, "time_s": 41.51614689826965, "power": [92.4, 92.44, 84.28, 73.04, 59.64, 53.28, 56.32, 65.88, 80.28, 93.76, 96.16, 94.44, 94.44, 94.2, 93.92, 92.48, 92.16, 91.84, 92.08, 91.84, 91.68, 90.68, 90.88, 90.28, 90.28, 92.44, 92.52, 92.84, 90.0, 89.64, 88.16, 87.28, 88.12, 88.24, 88.08, 85.72, 85.12, 85.12, 81.72, 82.52, 83.84, 86.32, 88.8, 91.0, 90.2], "power_after": [21.92, 21.88, 21.92, 21.88, 21.88, 21.72, 21.72, 21.72, 21.72, 21.44], "task clock (msec)": 55.74, "page faults": 3266, "cycles": 51085608, "instructions": 88049969, "branch mispredictions": 332704, "branches": 20219525, "ITLB accesses": 27856157, "ITLB misses": 6496, "DTLB misses": 17046, "DTLB accesses": 37522360, "L1I cache accesses": 31475230, "L1I cache misses": 277921, "L1D cache misses": 462005, "L1D cache accesses": 33126938, "LL cache misses": 558923, "LL cache accesses": 571263, "L2D TLB accesses": 190627, "L2D TLB misses": 24234, "L2D cache misses": 314815, "L2D cache accesses": 1760110, "instructions per cycle": 1.7235768046452535, "branch miss rate": 0.01645459030318467, "ITLB miss rate": 0.00023319799640704206, "DTLB miss rate": 0.0004542891225392006, "L2D TLB miss rate": 0.12712784652751186, "L1I cache miss rate": 0.008829832220447635, "L1D cache miss rate": 0.013946504805243395, "L2D cache miss rate": 0.17886098027964162, "LL cache miss rate": 0.978398741035215}
|
|
@ -1,173 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395287 queued and waiting for resources
|
|
||||||
srun: job 3395287 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
|
|
||||||
3871770, 3871773]),
|
|
||||||
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
|
|
||||||
682861]),
|
|
||||||
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
|
|
||||||
0.0000e+00, 0.0000e+00, 7.9289e-02]),
|
|
||||||
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9283, 0.0381, 0.0668, ..., 0.8379, 0.4193, 0.2544])
|
|
||||||
Matrix: ASIC_680k
|
|
||||||
Shape: torch.Size([682862, 682862])
|
|
||||||
NNZ: 3871773
|
|
||||||
Density: 8.303171256088674e-06
|
|
||||||
Time: 29.317893266677856 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
|
|
||||||
|
|
||||||
55.74 msec task-clock:u # 0.002 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,266 page-faults:u # 58.589 K/sec
|
|
||||||
51,085,608 cycles:u # 0.916 GHz (47.05%)
|
|
||||||
88,049,969 instructions:u # 1.72 insn per cycle (92.14%)
|
|
||||||
<not supported> branches:u
|
|
||||||
360,079 branch-misses:u
|
|
||||||
31,381,953 L1-dcache-loads:u # 562.963 M/sec
|
|
||||||
471,072 L1-dcache-load-misses:u # 1.50% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
29,944,756 L1-icache-loads:u # 537.181 M/sec
|
|
||||||
283,203 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
|
||||||
20,217,238 dTLB-loads:u # 362.679 M/sec (11.38%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
33.488240295 seconds time elapsed
|
|
||||||
|
|
||||||
222.678572000 seconds user
|
|
||||||
2205.889153000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
|
|
||||||
3871770, 3871773]),
|
|
||||||
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
|
|
||||||
682861]),
|
|
||||||
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
|
|
||||||
0.0000e+00, 0.0000e+00, 7.9289e-02]),
|
|
||||||
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3482, 0.5546, 0.8398, ..., 0.6137, 0.0654, 0.9075])
|
|
||||||
Matrix: ASIC_680k
|
|
||||||
Shape: torch.Size([682862, 682862])
|
|
||||||
NNZ: 3871773
|
|
||||||
Density: 8.303171256088674e-06
|
|
||||||
Time: 38.4066903591156 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
|
|
||||||
|
|
||||||
332,704 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,219,525 BR_RETIRED:u
|
|
||||||
|
|
||||||
42.582064532 seconds time elapsed
|
|
||||||
|
|
||||||
238.965431000 seconds user
|
|
||||||
2914.615754000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
|
|
||||||
3871770, 3871773]),
|
|
||||||
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
|
|
||||||
682861]),
|
|
||||||
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
|
|
||||||
0.0000e+00, 0.0000e+00, 7.9289e-02]),
|
|
||||||
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2581, 0.2884, 0.9465, ..., 0.4833, 0.3421, 0.4862])
|
|
||||||
Matrix: ASIC_680k
|
|
||||||
Shape: torch.Size([682862, 682862])
|
|
||||||
NNZ: 3871773
|
|
||||||
Density: 8.303171256088674e-06
|
|
||||||
Time: 34.74818539619446 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
|
|
||||||
|
|
||||||
27,856,157 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,496 ITLB_WALK:u
|
|
||||||
17,046 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
37,522,360 L1D_TLB:u
|
|
||||||
|
|
||||||
39.019872270 seconds time elapsed
|
|
||||||
|
|
||||||
239.678206000 seconds user
|
|
||||||
2622.552757000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
|
|
||||||
3871770, 3871773]),
|
|
||||||
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
|
|
||||||
682861]),
|
|
||||||
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
|
|
||||||
0.0000e+00, 0.0000e+00, 7.9289e-02]),
|
|
||||||
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8603, 0.0423, 0.3724, ..., 0.4873, 0.6469, 0.9634])
|
|
||||||
Matrix: ASIC_680k
|
|
||||||
Shape: torch.Size([682862, 682862])
|
|
||||||
NNZ: 3871773
|
|
||||||
Density: 8.303171256088674e-06
|
|
||||||
Time: 33.05097770690918 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
|
|
||||||
|
|
||||||
31,475,230 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
277,921 L1I_CACHE_REFILL:u
|
|
||||||
462,005 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,126,938 L1D_CACHE:u
|
|
||||||
|
|
||||||
37.399374202 seconds time elapsed
|
|
||||||
|
|
||||||
239.238852000 seconds user
|
|
||||||
2492.385966000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
|
|
||||||
3871770, 3871773]),
|
|
||||||
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
|
|
||||||
682861]),
|
|
||||||
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
|
|
||||||
0.0000e+00, 0.0000e+00, 7.9289e-02]),
|
|
||||||
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1993, 0.2167, 0.6338, ..., 0.0614, 0.0230, 0.4851])
|
|
||||||
Matrix: ASIC_680k
|
|
||||||
Shape: torch.Size([682862, 682862])
|
|
||||||
NNZ: 3871773
|
|
||||||
Density: 8.303171256088674e-06
|
|
||||||
Time: 32.37103772163391 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
|
|
||||||
|
|
||||||
558,923 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
571,263 LL_CACHE_RD:u
|
|
||||||
190,627 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
24,234 L2D_TLB_REFILL:u
|
|
||||||
314,815 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,760,110 L2D_CACHE:u
|
|
||||||
|
|
||||||
36.644016288 seconds time elapsed
|
|
||||||
|
|
||||||
233.933818000 seconds user
|
|
||||||
2439.284669000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [22.08, 21.88, 21.88, 21.88, 21.56, 21.64, 21.84, 21.88, 21.72, 21.92], "matrix": "de2010", "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 2.7533018589019775, "power": [29.48, 30.24, 27.96, 28.4, 26.84, 30.6, 30.92], "power_after": [20.84, 21.24, 21.2, 21.24, 21.28, 20.88, 20.68, 20.56, 20.52, 20.56], "task clock (msec)": 61.38, "page faults": 3315, "cycles": 65013274, "instructions": 87442627, "branch mispredictions": 328392, "branches": 19496396, "ITLB accesses": 28311619, "ITLB misses": 6963, "DTLB misses": 17888, "DTLB accesses": 38223408, "L1I cache accesses": 30063404, "L1I cache misses": 272797, "L1D cache misses": 468341, "L1D cache accesses": 31519623, "LL cache misses": 538689, "LL cache accesses": 552789, "L2D TLB accesses": 192995, "L2D TLB misses": 23339, "L2D cache misses": 300578, "L2D cache accesses": 1764035, "instructions per cycle": 1.344996515634638, "branch miss rate": 0.016843728451145536, "ITLB miss rate": 0.0002459414277933028, "DTLB miss rate": 0.00046798548156668814, "L2D TLB miss rate": 0.12093059405684085, "L1I cache miss rate": 0.009074055619250568, "L1D cache miss rate": 0.01485871198395996, "L2D cache miss rate": 0.17039231081015965, "LL cache miss rate": 0.9744929801425137}
|
|
@ -1,168 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395278 queued and waiting for resources
|
|
||||||
srun: job 3395278 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|
||||||
116056]),
|
|
||||||
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
|
||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.3547, 0.6554, 0.2142, ..., 0.8854, 0.1041, 0.2243])
|
|
||||||
Matrix: de2010
|
|
||||||
Shape: torch.Size([24115, 24115])
|
|
||||||
NNZ: 116056
|
|
||||||
Density: 0.0001995689928120616
|
|
||||||
Time: 2.74495267868042 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
|
||||||
|
|
||||||
61.38 msec task-clock:u # 0.010 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,315 page-faults:u # 54.008 K/sec
|
|
||||||
65,013,274 cycles:u # 1.059 GHz (90.47%)
|
|
||||||
87,442,627 instructions:u # 1.34 insn per cycle
|
|
||||||
<not supported> branches:u
|
|
||||||
369,052 branch-misses:u
|
|
||||||
31,570,549 L1-dcache-loads:u # 514.350 M/sec
|
|
||||||
477,402 L1-dcache-load-misses:u # 1.51% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,354,192 L1-icache-loads:u # 494.533 M/sec
|
|
||||||
294,845 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
|
|
||||||
0 dTLB-loads:u # 0.000 /sec (3.92%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
6.232986287 seconds time elapsed
|
|
||||||
|
|
||||||
17.354331000 seconds user
|
|
||||||
29.036034000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|
||||||
116056]),
|
|
||||||
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
|
||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.3177, 0.9122, 0.6465, ..., 0.5489, 0.2254, 0.7965])
|
|
||||||
Matrix: de2010
|
|
||||||
Shape: torch.Size([24115, 24115])
|
|
||||||
NNZ: 116056
|
|
||||||
Density: 0.0001995689928120616
|
|
||||||
Time: 2.7603256702423096 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
|
||||||
|
|
||||||
328,392 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,496,396 BR_RETIRED:u
|
|
||||||
|
|
||||||
6.149991615 seconds time elapsed
|
|
||||||
|
|
||||||
17.630426000 seconds user
|
|
||||||
30.586756000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|
||||||
116056]),
|
|
||||||
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
|
||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.7815, 0.6240, 0.3715, ..., 0.5116, 0.5969, 0.4241])
|
|
||||||
Matrix: de2010
|
|
||||||
Shape: torch.Size([24115, 24115])
|
|
||||||
NNZ: 116056
|
|
||||||
Density: 0.0001995689928120616
|
|
||||||
Time: 2.7978765964508057 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
|
||||||
|
|
||||||
28,311,619 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,963 ITLB_WALK:u
|
|
||||||
17,888 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
38,223,408 L1D_TLB:u
|
|
||||||
|
|
||||||
6.151843492 seconds time elapsed
|
|
||||||
|
|
||||||
17.202045000 seconds user
|
|
||||||
28.014218000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|
||||||
116056]),
|
|
||||||
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
|
||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.9638, 0.0929, 0.0479, ..., 0.1500, 0.3117, 0.9664])
|
|
||||||
Matrix: de2010
|
|
||||||
Shape: torch.Size([24115, 24115])
|
|
||||||
NNZ: 116056
|
|
||||||
Density: 0.0001995689928120616
|
|
||||||
Time: 2.684640884399414 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
|
||||||
|
|
||||||
30,063,404 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
272,797 L1I_CACHE_REFILL:u
|
|
||||||
468,341 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
31,519,623 L1D_CACHE:u
|
|
||||||
|
|
||||||
5.874324363 seconds time elapsed
|
|
||||||
|
|
||||||
17.629166000 seconds user
|
|
||||||
29.998701000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|
||||||
116056]),
|
|
||||||
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
|
||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.3936, 0.9167, 0.4396, ..., 0.1628, 0.6361, 0.1875])
|
|
||||||
Matrix: de2010
|
|
||||||
Shape: torch.Size([24115, 24115])
|
|
||||||
NNZ: 116056
|
|
||||||
Density: 0.0001995689928120616
|
|
||||||
Time: 2.747934103012085 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
|
||||||
|
|
||||||
538,689 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
552,789 LL_CACHE_RD:u
|
|
||||||
192,995 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,339 L2D_TLB_REFILL:u
|
|
||||||
300,578 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,764,035 L2D_CACHE:u
|
|
||||||
|
|
||||||
6.102012809 seconds time elapsed
|
|
||||||
|
|
||||||
18.001082000 seconds user
|
|
||||||
27.986033000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.72, 20.8, 20.96, 21.08, 21.4, 21.48, 21.48, 21.36, 21.08, 21.04], "matrix": "fl2010", "shape": [484481, 484481], "nnz": 2346294, "% density": 9.99606174861054e-06, "time_s": 14.43001127243042, "power": [93.04, 93.04, 89.16, 77.68, 62.92, 55.12, 53.84, 64.72, 77.04, 89.56, 94.4, 94.76, 93.52, 93.52, 96.04, 97.12, 96.44, 93.88, 93.72], "power_after": [21.08, 21.28, 21.28, 21.36, 21.08, 21.24, 21.08, 20.8, 21.04, 20.88], "task clock (msec)": 61.6, "page faults": 3276, "cycles": 41408849, "instructions": 49118917, "branch mispredictions": 331330, "branches": 19331189, "ITLB accesses": 27367982, "ITLB misses": 6160, "DTLB misses": 17157, "DTLB accesses": 36828216, "L1I cache accesses": 30147304, "L1I cache misses": 280082, "L1D cache misses": 454022, "L1D cache accesses": 31595140, "LL cache misses": 536056, "LL cache accesses": 550006, "L2D TLB accesses": 185998, "L2D TLB misses": 23735, "L2D cache misses": 296648, "L2D cache accesses": 1723525, "instructions per cycle": 1.1861937287848787, "branch miss rate": 0.017139659645353425, "ITLB miss rate": 0.00022508053388810325, "DTLB miss rate": 0.00046586562867992305, "L2D TLB miss rate": 0.12760889902041958, "L1I cache miss rate": 0.009290449321770198, "L1D cache miss rate": 0.014369994878959232, "L2D cache miss rate": 0.172117027603313, "LL cache miss rate": 0.97463664032756}
|
|
@ -1,169 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395283 queued and waiting for resources
|
|
||||||
srun: job 3395283 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
|
|
||||||
2346292, 2346294]),
|
|
||||||
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
|
|
||||||
484022]),
|
|
||||||
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
|
|
||||||
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
|
|
||||||
tensor([2.0367e-04, 1.7661e-01, 2.1772e-01, ..., 1.8646e-01, 2.2210e-01,
|
|
||||||
4.2364e-02])
|
|
||||||
Matrix: fl2010
|
|
||||||
Shape: torch.Size([484481, 484481])
|
|
||||||
NNZ: 2346294
|
|
||||||
Density: 9.99606174861054e-06
|
|
||||||
Time: 16.31556534767151 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
|
|
||||||
|
|
||||||
61.60 msec task-clock:u # 0.003 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,276 page-faults:u # 53.185 K/sec
|
|
||||||
41,408,849 cycles:u # 0.672 GHz (41.57%)
|
|
||||||
49,118,917 instructions:u # 1.19 insn per cycle (67.74%)
|
|
||||||
<not supported> branches:u
|
|
||||||
344,653 branch-misses:u (91.69%)
|
|
||||||
31,501,274 L1-dcache-loads:u # 511.418 M/sec
|
|
||||||
477,740 L1-dcache-load-misses:u # 1.52% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,099,667 L1-icache-loads:u # 488.663 M/sec
|
|
||||||
285,734 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
|
||||||
41,879,387 dTLB-loads:u # 679.904 M/sec (54.00%)
|
|
||||||
99,044 dTLB-load-misses:u # 0.24% of all dTLB cache accesses (13.61%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
20.288512544 seconds time elapsed
|
|
||||||
|
|
||||||
134.447078000 seconds user
|
|
||||||
1247.121046000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
|
|
||||||
2346292, 2346294]),
|
|
||||||
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
|
|
||||||
484022]),
|
|
||||||
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
|
|
||||||
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9700, 0.5813, 0.6566, ..., 0.4126, 0.7652, 0.9833])
|
|
||||||
Matrix: fl2010
|
|
||||||
Shape: torch.Size([484481, 484481])
|
|
||||||
NNZ: 2346294
|
|
||||||
Density: 9.99606174861054e-06
|
|
||||||
Time: 16.561575651168823 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
|
|
||||||
|
|
||||||
331,330 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,331,189 BR_RETIRED:u
|
|
||||||
|
|
||||||
20.603578845 seconds time elapsed
|
|
||||||
|
|
||||||
136.555709000 seconds user
|
|
||||||
1264.382740000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
|
|
||||||
2346292, 2346294]),
|
|
||||||
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
|
|
||||||
484022]),
|
|
||||||
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
|
|
||||||
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1770, 0.8270, 0.4236, ..., 0.0091, 0.2300, 0.5084])
|
|
||||||
Matrix: fl2010
|
|
||||||
Shape: torch.Size([484481, 484481])
|
|
||||||
NNZ: 2346294
|
|
||||||
Density: 9.99606174861054e-06
|
|
||||||
Time: 17.374610424041748 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
|
|
||||||
|
|
||||||
27,367,982 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,160 ITLB_WALK:u
|
|
||||||
17,157 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,828,216 L1D_TLB:u
|
|
||||||
|
|
||||||
21.377378255 seconds time elapsed
|
|
||||||
|
|
||||||
140.848520000 seconds user
|
|
||||||
1326.124469000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
|
|
||||||
2346292, 2346294]),
|
|
||||||
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
|
|
||||||
484022]),
|
|
||||||
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
|
|
||||||
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1268, 0.8786, 0.9762, ..., 0.0649, 0.4474, 0.9707])
|
|
||||||
Matrix: fl2010
|
|
||||||
Shape: torch.Size([484481, 484481])
|
|
||||||
NNZ: 2346294
|
|
||||||
Density: 9.99606174861054e-06
|
|
||||||
Time: 16.753613471984863 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
|
|
||||||
|
|
||||||
30,147,304 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
280,082 L1I_CACHE_REFILL:u
|
|
||||||
454,022 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
31,595,140 L1D_CACHE:u
|
|
||||||
|
|
||||||
20.706929400 seconds time elapsed
|
|
||||||
|
|
||||||
139.881127000 seconds user
|
|
||||||
1278.527504000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
|
|
||||||
2346292, 2346294]),
|
|
||||||
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
|
|
||||||
484022]),
|
|
||||||
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
|
|
||||||
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1394, 0.8842, 0.4362, ..., 0.8265, 0.1643, 0.9034])
|
|
||||||
Matrix: fl2010
|
|
||||||
Shape: torch.Size([484481, 484481])
|
|
||||||
NNZ: 2346294
|
|
||||||
Density: 9.99606174861054e-06
|
|
||||||
Time: 14.484151124954224 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
|
|
||||||
|
|
||||||
536,056 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
550,006 LL_CACHE_RD:u
|
|
||||||
185,998 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,735 L2D_TLB_REFILL:u
|
|
||||||
296,648 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,723,525 L2D_CACHE:u
|
|
||||||
|
|
||||||
18.443039315 seconds time elapsed
|
|
||||||
|
|
||||||
135.498625000 seconds user
|
|
||||||
1101.745145000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [51.04, 38.64, 22.84, 22.24, 21.88, 21.88, 21.6, 21.4, 21.24, 21.28], "matrix": "ga2010", "shape": [291086, 291086], "nnz": 1418056, "% density": 1.6735964475229304e-05, "time_s": 15.249999523162842, "power": [88.88, 89.52, 78.6, 64.88, 52.64, 52.64, 54.76, 60.16, 71.44, 86.84, 90.72, 89.6, 90.56, 90.36, 91.68, 91.84, 93.4, 93.4, 92.72], "power_after": [21.68, 21.4, 21.28, 21.04, 21.04, 20.96, 20.92, 20.76, 20.8, 20.96], "task clock (msec)": 72.45, "page faults": 3289, "cycles": 24836161, "instructions": 74134706, "branch mispredictions": 325643, "branches": 19697746, "ITLB accesses": 27767290, "ITLB misses": 5832, "DTLB misses": 18134, "DTLB accesses": 37063060, "L1I cache accesses": 32135376, "L1I cache misses": 302429, "L1D cache misses": 484427, "L1D cache accesses": 33639686, "LL cache misses": 548380, "LL cache accesses": 561312, "L2D TLB accesses": 186006, "L2D TLB misses": 25022, "L2D cache misses": 304539, "L2D cache accesses": 1750107, "instructions per cycle": 2.9849502908279586, "branch miss rate": 0.01653199305138771, "ITLB miss rate": 0.00021003129941740803, "DTLB miss rate": 0.0004892742261432272, "L2D TLB miss rate": 0.13452254228358226, "L1I cache miss rate": 0.009411092622659838, "L1D cache miss rate": 0.014400461407398393, "L2D cache miss rate": 0.17401164614506429, "LL cache miss rate": 0.976961119662505}
|
|
@ -1,168 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395281 queued and waiting for resources
|
|
||||||
srun: job 3395281 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
|
||||||
1418054, 1418056]),
|
|
||||||
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
|
||||||
290176]),
|
|
||||||
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
|
||||||
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8043, 0.7164, 0.5687, ..., 0.1275, 0.5142, 0.8456])
|
|
||||||
Matrix: ga2010
|
|
||||||
Shape: torch.Size([291086, 291086])
|
|
||||||
NNZ: 1418056
|
|
||||||
Density: 1.6735964475229304e-05
|
|
||||||
Time: 13.566045045852661 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
|
|
||||||
|
|
||||||
72.45 msec task-clock:u # 0.004 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,289 page-faults:u # 45.396 K/sec
|
|
||||||
24,836,161 cycles:u # 0.343 GHz (23.15%)
|
|
||||||
74,134,706 instructions:u # 2.98 insn per cycle (85.49%)
|
|
||||||
<not supported> branches:u
|
|
||||||
381,828 branch-misses:u
|
|
||||||
33,748,654 L1-dcache-loads:u # 465.814 M/sec
|
|
||||||
497,166 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
32,271,900 L1-icache-loads:u # 445.431 M/sec
|
|
||||||
311,814 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
|
|
||||||
43,431,516 dTLB-loads:u # 599.461 M/sec (27.81%)
|
|
||||||
33,416 dTLB-load-misses:u # 0.08% of all dTLB cache accesses (4.55%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
17.276157893 seconds time elapsed
|
|
||||||
|
|
||||||
100.320029000 seconds user
|
|
||||||
1057.703228000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
|
||||||
1418054, 1418056]),
|
|
||||||
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
|
||||||
290176]),
|
|
||||||
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
|
||||||
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
|
||||||
tensor([0.6290, 0.2236, 0.0669, ..., 0.6531, 0.4280, 0.4384])
|
|
||||||
Matrix: ga2010
|
|
||||||
Shape: torch.Size([291086, 291086])
|
|
||||||
NNZ: 1418056
|
|
||||||
Density: 1.6735964475229304e-05
|
|
||||||
Time: 17.094524145126343 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
|
|
||||||
|
|
||||||
325,643 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,697,746 BR_RETIRED:u
|
|
||||||
|
|
||||||
20.849795214 seconds time elapsed
|
|
||||||
|
|
||||||
115.280665000 seconds user
|
|
||||||
1318.654953000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
|
||||||
1418054, 1418056]),
|
|
||||||
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
|
||||||
290176]),
|
|
||||||
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
|
||||||
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1008, 0.2309, 0.3749, ..., 0.1568, 0.8852, 0.8182])
|
|
||||||
Matrix: ga2010
|
|
||||||
Shape: torch.Size([291086, 291086])
|
|
||||||
NNZ: 1418056
|
|
||||||
Density: 1.6735964475229304e-05
|
|
||||||
Time: 15.106332063674927 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
|
|
||||||
|
|
||||||
27,767,290 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
5,832 ITLB_WALK:u
|
|
||||||
18,134 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
37,063,060 L1D_TLB:u
|
|
||||||
|
|
||||||
18.753509375 seconds time elapsed
|
|
||||||
|
|
||||||
112.958759000 seconds user
|
|
||||||
1167.457916000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
|
||||||
1418054, 1418056]),
|
|
||||||
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
|
||||||
290176]),
|
|
||||||
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
|
||||||
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8347, 0.6624, 0.6196, ..., 0.2250, 0.0157, 0.1843])
|
|
||||||
Matrix: ga2010
|
|
||||||
Shape: torch.Size([291086, 291086])
|
|
||||||
NNZ: 1418056
|
|
||||||
Density: 1.6735964475229304e-05
|
|
||||||
Time: 13.73094367980957 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
|
|
||||||
|
|
||||||
32,135,376 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
302,429 L1I_CACHE_REFILL:u
|
|
||||||
484,427 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,639,686 L1D_CACHE:u
|
|
||||||
|
|
||||||
17.400567824 seconds time elapsed
|
|
||||||
|
|
||||||
110.027662000 seconds user
|
|
||||||
1054.271122000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
|
||||||
1418054, 1418056]),
|
|
||||||
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
|
||||||
290176]),
|
|
||||||
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
|
||||||
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8369, 0.3399, 0.1689, ..., 0.2081, 0.0714, 0.7388])
|
|
||||||
Matrix: ga2010
|
|
||||||
Shape: torch.Size([291086, 291086])
|
|
||||||
NNZ: 1418056
|
|
||||||
Density: 1.6735964475229304e-05
|
|
||||||
Time: 15.809288501739502 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
|
|
||||||
|
|
||||||
548,380 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
561,312 LL_CACHE_RD:u
|
|
||||||
186,006 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
25,022 L2D_TLB_REFILL:u
|
|
||||||
304,539 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,750,107 L2D_CACHE:u
|
|
||||||
|
|
||||||
19.626934574 seconds time elapsed
|
|
||||||
|
|
||||||
116.733174000 seconds user
|
|
||||||
1214.439657000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [22.04, 21.32, 21.32, 21.32, 21.12, 21.12, 21.0, 20.68, 20.72, 20.56], "matrix": "mac_econ_fwd500", "shape": [206500, 206500], "nnz": 1273389, "% density": 2.9862143765866013e-05, "time_s": 15.046087741851807, "power": [91.88, 91.12, 83.92, 72.88, 57.76, 51.24, 53.12, 62.84, 78.32, 91.64, 95.8, 95.8, 94.08, 92.48, 91.6, 89.88, 87.36, 87.84, 87.32], "power_after": [20.92, 21.04, 21.12, 20.92, 20.92, 20.88, 20.88, 20.92, 21.04, 20.96], "task clock (msec)": 62.46, "page faults": 3243, "cycles": 57150420, "instructions": 94155455, "branch mispredictions": 320781, "branches": 19491698, "ITLB accesses": 27433101, "ITLB misses": 7382, "DTLB misses": 19213, "DTLB accesses": 37123052, "L1I cache accesses": 32027284, "L1I cache misses": 290368, "L1D cache misses": 471338, "L1D cache accesses": 33366668, "LL cache misses": 571063, "LL cache accesses": 583554, "L2D TLB accesses": 196434, "L2D TLB misses": 25171, "L2D cache misses": 329198, "L2D cache accesses": 1814040, "instructions per cycle": 1.6475024155553013, "branch miss rate": 0.016457314288370363, "ITLB miss rate": 0.0002690909788142434, "DTLB miss rate": 0.0005175490420345827, "L2D TLB miss rate": 0.1281397314110592, "L1I cache miss rate": 0.009066269871650684, "L1D cache miss rate": 0.014126013421537926, "L2D cache miss rate": 0.1814722938854711, "LL cache miss rate": 0.9785949543658342}
|
|
@ -1,173 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395279 queued and waiting for resources
|
|
||||||
srun: job 3395279 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
|
|
||||||
1273379, 1273389]),
|
|
||||||
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
|
|
||||||
206459]),
|
|
||||||
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
|
|
||||||
1.2290e-01, 2.2235e-01, -1.0000e+00]),
|
|
||||||
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
|
|
||||||
tensor([0.5388, 0.2921, 0.7349, ..., 0.6379, 0.9676, 0.6389])
|
|
||||||
Matrix: mac_econ_fwd500
|
|
||||||
Shape: torch.Size([206500, 206500])
|
|
||||||
NNZ: 1273389
|
|
||||||
Density: 2.9862143765866013e-05
|
|
||||||
Time: 21.700236320495605 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
|
|
||||||
|
|
||||||
62.46 msec task-clock:u # 0.002 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,243 page-faults:u # 51.921 K/sec
|
|
||||||
57,150,420 cycles:u # 0.915 GHz (90.14%)
|
|
||||||
94,155,455 instructions:u # 1.65 insn per cycle
|
|
||||||
<not supported> branches:u
|
|
||||||
373,032 branch-misses:u
|
|
||||||
33,654,742 L1-dcache-loads:u # 538.817 M/sec
|
|
||||||
479,068 L1-dcache-load-misses:u # 1.42% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
32,149,866 L1-icache-loads:u # 514.724 M/sec
|
|
||||||
293,643 L1-icache-load-misses:u # 0.91% of all L1-icache accesses
|
|
||||||
0 dTLB-loads:u # 0.000 /sec (5.14%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
25.310174677 seconds time elapsed
|
|
||||||
|
|
||||||
125.287203000 seconds user
|
|
||||||
1680.798909000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
|
|
||||||
1273379, 1273389]),
|
|
||||||
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
|
|
||||||
206459]),
|
|
||||||
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
|
|
||||||
1.2290e-01, 2.2235e-01, -1.0000e+00]),
|
|
||||||
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
|
|
||||||
tensor([0.6433, 0.3677, 0.3308, ..., 0.5364, 0.2509, 0.4204])
|
|
||||||
Matrix: mac_econ_fwd500
|
|
||||||
Shape: torch.Size([206500, 206500])
|
|
||||||
NNZ: 1273389
|
|
||||||
Density: 2.9862143765866013e-05
|
|
||||||
Time: 16.171404361724854 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
|
|
||||||
|
|
||||||
320,781 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,491,698 BR_RETIRED:u
|
|
||||||
|
|
||||||
19.988421837 seconds time elapsed
|
|
||||||
|
|
||||||
112.429117000 seconds user
|
|
||||||
1245.246161000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
|
|
||||||
1273379, 1273389]),
|
|
||||||
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
|
|
||||||
206459]),
|
|
||||||
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
|
|
||||||
1.2290e-01, 2.2235e-01, -1.0000e+00]),
|
|
||||||
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9344, 0.9844, 0.2313, ..., 0.8634, 0.6912, 0.9693])
|
|
||||||
Matrix: mac_econ_fwd500
|
|
||||||
Shape: torch.Size([206500, 206500])
|
|
||||||
NNZ: 1273389
|
|
||||||
Density: 2.9862143765866013e-05
|
|
||||||
Time: 11.788637161254883 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
|
|
||||||
|
|
||||||
27,433,101 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
7,382 ITLB_WALK:u
|
|
||||||
19,213 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
37,123,052 L1D_TLB:u
|
|
||||||
|
|
||||||
15.542834153 seconds time elapsed
|
|
||||||
|
|
||||||
99.681401000 seconds user
|
|
||||||
906.856853000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
|
|
||||||
1273379, 1273389]),
|
|
||||||
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
|
|
||||||
206459]),
|
|
||||||
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
|
|
||||||
1.2290e-01, 2.2235e-01, -1.0000e+00]),
|
|
||||||
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2037, 0.6417, 0.9786, ..., 0.8187, 0.4933, 0.1289])
|
|
||||||
Matrix: mac_econ_fwd500
|
|
||||||
Shape: torch.Size([206500, 206500])
|
|
||||||
NNZ: 1273389
|
|
||||||
Density: 2.9862143765866013e-05
|
|
||||||
Time: 13.596147060394287 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
|
|
||||||
|
|
||||||
32,027,284 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
290,368 L1I_CACHE_REFILL:u
|
|
||||||
471,338 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,366,668 L1D_CACHE:u
|
|
||||||
|
|
||||||
17.325855116 seconds time elapsed
|
|
||||||
|
|
||||||
101.368582000 seconds user
|
|
||||||
1053.826259000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
|
|
||||||
1273379, 1273389]),
|
|
||||||
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
|
|
||||||
206459]),
|
|
||||||
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
|
|
||||||
1.2290e-01, 2.2235e-01, -1.0000e+00]),
|
|
||||||
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2072, 0.8681, 0.4768, ..., 0.4873, 0.8997, 0.8601])
|
|
||||||
Matrix: mac_econ_fwd500
|
|
||||||
Shape: torch.Size([206500, 206500])
|
|
||||||
NNZ: 1273389
|
|
||||||
Density: 2.9862143765866013e-05
|
|
||||||
Time: 14.157796382904053 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
|
|
||||||
|
|
||||||
571,063 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
583,554 LL_CACHE_RD:u
|
|
||||||
196,434 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
25,171 L2D_TLB_REFILL:u
|
|
||||||
329,198 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,814,040 L2D_CACHE:u
|
|
||||||
|
|
||||||
17.958287837 seconds time elapsed
|
|
||||||
|
|
||||||
104.145071000 seconds user
|
|
||||||
1089.962121000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [94.16, 91.68, 78.92, 60.88, 46.72, 28.36, 22.08, 21.64, 21.64, 21.64], "matrix": "mc2depi", "shape": [525825, 525825], "nnz": 2100225, "% density": 7.595972132902821e-06, "time_s": 11.03979206085205, "power": [95.44, 94.0, 88.76, 72.12, 59.48, 51.92, 53.88, 68.6, 83.2, 97.76, 98.4, 97.12, 97.12, 95.28, 94.12], "power_after": [21.48, 21.44, 21.28, 21.24, 21.16, 21.08, 21.24, 21.24, 21.24, 21.16], "task clock (msec)": 56.14, "page faults": 3289, "cycles": 47515158, "instructions": 72388154, "branch mispredictions": 327042, "branches": 19309026, "ITLB accesses": 26093030, "ITLB misses": 6189, "DTLB misses": 17253, "DTLB accesses": 35168741, "L1I cache accesses": 30539322, "L1I cache misses": 285404, "L1D cache misses": 465747, "L1D cache accesses": 31932803, "LL cache misses": 530261, "LL cache accesses": 551030, "L2D TLB accesses": 183570, "L2D TLB misses": 23883, "L2D cache misses": 297006, "L2D cache accesses": 1721848, "instructions per cycle": 1.5234749719236964, "branch miss rate": 0.01693726032581861, "ITLB miss rate": 0.0002371897782664566, "DTLB miss rate": 0.0004905776979619486, "L2D TLB miss rate": 0.13010295799967314, "L1I cache miss rate": 0.009345459601231487, "L1D cache miss rate": 0.014585221347465175, "L2D cache miss rate": 0.1724925777420539, "LL cache miss rate": 0.9623087672177558}
|
|
@ -1,168 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395280 queued and waiting for resources
|
|
||||||
srun: job 3395280 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
|
||||||
2100223, 2100225]),
|
|
||||||
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
|
||||||
525824]),
|
|
||||||
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
|
||||||
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7162, 0.9445, 0.3087, ..., 0.2863, 0.2977, 0.0994])
|
|
||||||
Matrix: mc2depi
|
|
||||||
Shape: torch.Size([525825, 525825])
|
|
||||||
NNZ: 2100225
|
|
||||||
Density: 7.595972132902821e-06
|
|
||||||
Time: 14.228392839431763 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
|
|
||||||
|
|
||||||
56.14 msec task-clock:u # 0.003 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,289 page-faults:u # 58.584 K/sec
|
|
||||||
47,515,158 cycles:u # 0.846 GHz (55.54%)
|
|
||||||
72,388,154 instructions:u # 1.52 insn per cycle (79.69%)
|
|
||||||
<not supported> branches:u
|
|
||||||
369,139 branch-misses:u
|
|
||||||
32,820,508 L1-dcache-loads:u # 584.601 M/sec
|
|
||||||
483,558 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,317,848 L1-icache-loads:u # 557.836 M/sec
|
|
||||||
288,398 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
|
|
||||||
39,511,659 dTLB-loads:u # 703.784 M/sec (36.64%)
|
|
||||||
0 dTLB-load-misses:u (3.47%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
18.186987302 seconds time elapsed
|
|
||||||
|
|
||||||
124.639912000 seconds user
|
|
||||||
1088.590740000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
|
||||||
2100223, 2100225]),
|
|
||||||
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
|
||||||
525824]),
|
|
||||||
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
|
||||||
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
|
||||||
tensor([0.4954, 0.2907, 0.0979, ..., 0.0742, 0.4519, 0.0278])
|
|
||||||
Matrix: mc2depi
|
|
||||||
Shape: torch.Size([525825, 525825])
|
|
||||||
NNZ: 2100225
|
|
||||||
Density: 7.595972132902821e-06
|
|
||||||
Time: 11.948119163513184 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
|
|
||||||
|
|
||||||
327,042 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,309,026 BR_RETIRED:u
|
|
||||||
|
|
||||||
15.715674756 seconds time elapsed
|
|
||||||
|
|
||||||
115.898749000 seconds user
|
|
||||||
910.018676000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
|
||||||
2100223, 2100225]),
|
|
||||||
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
|
||||||
525824]),
|
|
||||||
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
|
||||||
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1402, 0.9048, 0.8859, ..., 0.9542, 0.3509, 0.0695])
|
|
||||||
Matrix: mc2depi
|
|
||||||
Shape: torch.Size([525825, 525825])
|
|
||||||
NNZ: 2100225
|
|
||||||
Density: 7.595972132902821e-06
|
|
||||||
Time: 14.170094966888428 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
|
|
||||||
|
|
||||||
26,093,030 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,189 ITLB_WALK:u
|
|
||||||
17,253 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
35,168,741 L1D_TLB:u
|
|
||||||
|
|
||||||
18.132605509 seconds time elapsed
|
|
||||||
|
|
||||||
121.020111000 seconds user
|
|
||||||
1090.508165000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
|
||||||
2100223, 2100225]),
|
|
||||||
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
|
||||||
525824]),
|
|
||||||
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
|
||||||
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1192, 0.6084, 0.4643, ..., 0.3445, 0.4658, 0.7085])
|
|
||||||
Matrix: mc2depi
|
|
||||||
Shape: torch.Size([525825, 525825])
|
|
||||||
NNZ: 2100225
|
|
||||||
Density: 7.595972132902821e-06
|
|
||||||
Time: 13.925398826599121 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
|
|
||||||
|
|
||||||
30,539,322 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
285,404 L1I_CACHE_REFILL:u
|
|
||||||
465,747 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
31,932,803 L1D_CACHE:u
|
|
||||||
|
|
||||||
17.812911214 seconds time elapsed
|
|
||||||
|
|
||||||
119.918777000 seconds user
|
|
||||||
1067.928403000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
|
||||||
2100223, 2100225]),
|
|
||||||
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
|
||||||
525824]),
|
|
||||||
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
|
||||||
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2075, 0.7442, 0.4477, ..., 0.0794, 0.0859, 0.8652])
|
|
||||||
Matrix: mc2depi
|
|
||||||
Shape: torch.Size([525825, 525825])
|
|
||||||
NNZ: 2100225
|
|
||||||
Density: 7.595972132902821e-06
|
|
||||||
Time: 12.866743564605713 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
|
|
||||||
|
|
||||||
530,261 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
551,030 LL_CACHE_RD:u
|
|
||||||
183,570 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,883 L2D_TLB_REFILL:u
|
|
||||||
297,006 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,721,848 L2D_CACHE:u
|
|
||||||
|
|
||||||
16.812811712 seconds time elapsed
|
|
||||||
|
|
||||||
117.780323000 seconds user
|
|
||||||
986.834040000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [30.08, 25.12, 24.68, 23.68, 22.84, 21.96, 21.08, 20.96, 20.8, 20.96], "matrix": "p2p-Gnutella04", "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.9992897510528564, "power": [29.48, 30.52, 31.88, 31.24, 34.32], "power_after": [20.4, 20.6, 20.64, 20.76, 20.92, 20.84, 20.88, 20.88, 20.88, 20.84], "task clock (msec)": 52.68, "page faults": 3272, "cycles": 63019732, "instructions": 73518898, "branch mispredictions": 333423, "branches": 19435905, "ITLB accesses": 27447537, "ITLB misses": 6417, "DTLB misses": 18300, "DTLB accesses": 37569384, "L1I cache accesses": 30830481, "L1I cache misses": 290545, "L1D cache misses": 473875, "L1D cache accesses": 32284772, "LL cache misses": 529403, "LL cache accesses": 549794, "L2D TLB accesses": 198306, "L2D TLB misses": 24497, "L2D cache misses": 298519, "L2D cache accesses": 1772795, "instructions per cycle": 1.1666012480027683, "branch miss rate": 0.017155002558409294, "ITLB miss rate": 0.00023379146915805232, "DTLB miss rate": 0.000487098750408045, "L2D TLB miss rate": 0.12353131019737174, "L1I cache miss rate": 0.009423952873132274, "L1D cache miss rate": 0.014677972636758903, "L2D cache miss rate": 0.16838890001381998, "LL cache miss rate": 0.9629115632400499}
|
|
@ -1,158 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395271 queued and waiting for resources
|
|
||||||
srun: job 3395271 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3559, 0.4732, 0.3024, ..., 0.9176, 0.7712, 0.4949])
|
|
||||||
Matrix: p2p-Gnutella04
|
|
||||||
Shape: torch.Size([10879, 10879])
|
|
||||||
NNZ: 39994
|
|
||||||
Density: 0.0003379223282393842
|
|
||||||
Time: 1.0082497596740723 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
|
||||||
|
|
||||||
52.68 msec task-clock:u # 0.012 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,272 page-faults:u # 62.105 K/sec
|
|
||||||
63,019,732 cycles:u # 1.196 GHz (70.67%)
|
|
||||||
73,518,898 instructions:u # 1.17 insn per cycle (85.80%)
|
|
||||||
<not supported> branches:u
|
|
||||||
359,236 branch-misses:u (99.44%)
|
|
||||||
31,459,751 L1-dcache-loads:u # 597.131 M/sec
|
|
||||||
460,969 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
29,975,208 L1-icache-loads:u # 568.954 M/sec
|
|
||||||
281,710 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
|
||||||
59,589,523 dTLB-loads:u # 1.131 G/sec (17.10%)
|
|
||||||
0 dTLB-load-misses:u (1.27%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
4.456867719 seconds time elapsed
|
|
||||||
|
|
||||||
16.389568000 seconds user
|
|
||||||
29.247355000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0123, 0.4107, 0.7785, ..., 0.7964, 0.7541, 0.4153])
|
|
||||||
Matrix: p2p-Gnutella04
|
|
||||||
Shape: torch.Size([10879, 10879])
|
|
||||||
NNZ: 39994
|
|
||||||
Density: 0.0003379223282393842
|
|
||||||
Time: 1.030029058456421 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
|
||||||
|
|
||||||
333,423 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,435,905 BR_RETIRED:u
|
|
||||||
|
|
||||||
4.359656946 seconds time elapsed
|
|
||||||
|
|
||||||
16.490532000 seconds user
|
|
||||||
28.366462000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1898, 0.0740, 0.4564, ..., 0.7987, 0.1017, 0.5949])
|
|
||||||
Matrix: p2p-Gnutella04
|
|
||||||
Shape: torch.Size([10879, 10879])
|
|
||||||
NNZ: 39994
|
|
||||||
Density: 0.0003379223282393842
|
|
||||||
Time: 1.004878044128418 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
|
||||||
|
|
||||||
27,447,537 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,417 ITLB_WALK:u
|
|
||||||
18,300 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
37,569,384 L1D_TLB:u
|
|
||||||
|
|
||||||
4.355627133 seconds time elapsed
|
|
||||||
|
|
||||||
15.883078000 seconds user
|
|
||||||
27.120829000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1682, 0.9350, 0.9210, ..., 0.3758, 0.2263, 0.1068])
|
|
||||||
Matrix: p2p-Gnutella04
|
|
||||||
Shape: torch.Size([10879, 10879])
|
|
||||||
NNZ: 39994
|
|
||||||
Density: 0.0003379223282393842
|
|
||||||
Time: 1.0207850933074951 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
|
||||||
|
|
||||||
30,830,481 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
290,545 L1I_CACHE_REFILL:u
|
|
||||||
473,875 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
32,284,772 L1D_CACHE:u
|
|
||||||
|
|
||||||
4.427088851 seconds time elapsed
|
|
||||||
|
|
||||||
15.711555000 seconds user
|
|
||||||
29.627091000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9351, 0.3836, 0.0822, ..., 0.9798, 0.3726, 0.7394])
|
|
||||||
Matrix: p2p-Gnutella04
|
|
||||||
Shape: torch.Size([10879, 10879])
|
|
||||||
NNZ: 39994
|
|
||||||
Density: 0.0003379223282393842
|
|
||||||
Time: 1.041510820388794 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
|
||||||
|
|
||||||
529,403 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
549,794 LL_CACHE_RD:u
|
|
||||||
198,306 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
24,497 L2D_TLB_REFILL:u
|
|
||||||
298,519 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,772,795 L2D_CACHE:u
|
|
||||||
|
|
||||||
4.454107604 seconds time elapsed
|
|
||||||
|
|
||||||
16.577921000 seconds user
|
|
||||||
29.390427000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [30.72, 30.6, 28.68, 26.48, 22.44, 21.4, 21.28, 21.08, 21.32, 21.6], "matrix": "p2p-Gnutella24", "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 1.718301773071289, "power": [31.52, 32.48, 33.64, 33.88, 33.44, 31.52], "power_after": [20.96, 20.84, 20.92, 20.8, 20.76, 20.76, 20.76, 20.68, 20.72, 20.92], "task clock (msec)": 67.08, "page faults": 3303, "cycles": 61261862, "instructions": 83757591, "branch mispredictions": 329248, "branches": 19953212, "ITLB accesses": 27084694, "ITLB misses": 7107, "DTLB misses": 17529, "DTLB accesses": 36684333, "L1I cache accesses": 32158234, "L1I cache misses": 286484, "L1D cache misses": 474161, "L1D cache accesses": 33730073, "LL cache misses": 550064, "LL cache accesses": 565245, "L2D TLB accesses": 191046, "L2D TLB misses": 23775, "L2D cache misses": 307419, "L2D cache accesses": 1772169, "instructions per cycle": 1.3672060930828385, "branch miss rate": 0.016501002445120115, "ITLB miss rate": 0.0002623991247602797, "DTLB miss rate": 0.0004778334118818516, "L2D TLB miss rate": 0.12444646838981188, "L1I cache miss rate": 0.008908573773049851, "L1D cache miss rate": 0.014057514788064645, "L2D cache miss rate": 0.1734704760099065, "LL cache miss rate": 0.973142619572044}
|
|
@ -1,158 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395289 queued and waiting for resources
|
|
||||||
srun: job 3395289 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3210, 0.3418, 0.9584, ..., 0.8929, 0.9807, 0.5532])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 1.6565663814544678 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
|
||||||
|
|
||||||
67.08 msec task-clock:u # 0.013 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,303 page-faults:u # 49.241 K/sec
|
|
||||||
61,261,862 cycles:u # 0.913 GHz (49.19%)
|
|
||||||
83,757,591 instructions:u # 1.37 insn per cycle (88.30%)
|
|
||||||
<not supported> branches:u
|
|
||||||
364,692 branch-misses:u
|
|
||||||
31,954,743 L1-dcache-loads:u # 476.379 M/sec
|
|
||||||
490,953 L1-dcache-load-misses:u # 1.54% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,490,915 L1-icache-loads:u # 454.556 M/sec
|
|
||||||
291,964 L1-icache-load-misses:u # 0.96% of all L1-icache accesses
|
|
||||||
32,131,046 dTLB-loads:u # 479.007 M/sec (19.20%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
5.107407925 seconds time elapsed
|
|
||||||
|
|
||||||
16.045361000 seconds user
|
|
||||||
30.574855000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.4851, 0.2524, 0.2134, ..., 0.5976, 0.0089, 0.2284])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 1.6902527809143066 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
|
||||||
|
|
||||||
329,248 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,953,212 BR_RETIRED:u
|
|
||||||
|
|
||||||
4.990707186 seconds time elapsed
|
|
||||||
|
|
||||||
16.713526000 seconds user
|
|
||||||
27.761595000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1844, 0.9003, 0.0155, ..., 0.5184, 0.1445, 0.3588])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 1.6478993892669678 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
|
||||||
|
|
||||||
27,084,694 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
7,107 ITLB_WALK:u
|
|
||||||
17,529 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,684,333 L1D_TLB:u
|
|
||||||
|
|
||||||
5.010572757 seconds time elapsed
|
|
||||||
|
|
||||||
16.570396000 seconds user
|
|
||||||
27.387405000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2313, 0.8375, 0.3065, ..., 0.2374, 0.2281, 0.2100])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 1.637598991394043 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
|
||||||
|
|
||||||
32,158,234 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
286,484 L1I_CACHE_REFILL:u
|
|
||||||
474,161 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,730,073 L1D_CACHE:u
|
|
||||||
|
|
||||||
4.963121627 seconds time elapsed
|
|
||||||
|
|
||||||
16.730431000 seconds user
|
|
||||||
29.869416000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.5006, 0.8470, 0.3527, ..., 0.3901, 0.3581, 0.1154])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 1.6584653854370117 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
|
||||||
|
|
||||||
550,064 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
565,245 LL_CACHE_RD:u
|
|
||||||
191,046 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,775 L2D_TLB_REFILL:u
|
|
||||||
307,419 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,772,169 L2D_CACHE:u
|
|
||||||
|
|
||||||
5.019317303 seconds time elapsed
|
|
||||||
|
|
||||||
16.518292000 seconds user
|
|
||||||
30.069880000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [86.48, 72.16, 59.36, 41.84, 28.44, 22.96, 22.92, 22.92, 23.04, 23.24], "matrix": "p2p-Gnutella25", "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 1.431199073791504, "power": [35.16, 36.2, 36.72, 37.52, 37.52], "power_after": [21.32, 21.2, 21.2, 21.28, 21.52, 21.44, 21.92, 21.68, 21.6, 21.36], "task clock (msec)": 59.85, "page faults": 3318, "cycles": 76505130, "instructions": 72343215, "branch mispredictions": 322338, "branches": 19784096, "ITLB accesses": 27270404, "ITLB misses": 6607, "DTLB misses": 17981, "DTLB accesses": 36751047, "L1I cache accesses": 30620441, "L1I cache misses": 302139, "L1D cache misses": 471011, "L1D cache accesses": 32141810, "LL cache misses": 531907, "LL cache accesses": 545159, "L2D TLB accesses": 188244, "L2D TLB misses": 23034, "L2D cache misses": 293848, "L2D cache accesses": 1757551, "instructions per cycle": 0.945599530384433, "branch miss rate": 0.016292783860329025, "ITLB miss rate": 0.00024227730546272803, "DTLB miss rate": 0.0004892649725054092, "L2D TLB miss rate": 0.12236246573595971, "L1I cache miss rate": 0.009867232153841285, "L1D cache miss rate": 0.014654152955294054, "L2D cache miss rate": 0.1671917344077071, "LL cache miss rate": 0.9756914955086498}
|
|
@ -1,158 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395288 queued and waiting for resources
|
|
||||||
srun: job 3395288 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9962, 0.2550, 0.9564, ..., 0.7113, 0.6635, 0.3831])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.4832944869995117 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
|
||||||
|
|
||||||
59.85 msec task-clock:u # 0.012 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,318 page-faults:u # 55.439 K/sec
|
|
||||||
76,505,130 cycles:u # 1.278 GHz (43.11%)
|
|
||||||
72,343,215 instructions:u # 0.95 insn per cycle (62.06%)
|
|
||||||
<not supported> branches:u
|
|
||||||
371,337 branch-misses:u (77.63%)
|
|
||||||
33,969,604 L1-dcache-loads:u # 567.579 M/sec (88.85%)
|
|
||||||
472,023 L1-dcache-load-misses:u # 1.39% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,728,689 L1-icache-loads:u # 530.137 M/sec
|
|
||||||
299,356 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
|
||||||
50,921,898 dTLB-loads:u # 850.825 M/sec (39.93%)
|
|
||||||
90,542 dTLB-load-misses:u # 0.18% of all dTLB cache accesses (36.53%)
|
|
||||||
11,563,883 iTLB-loads:u # 193.214 M/sec (20.26%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
4.953668960 seconds time elapsed
|
|
||||||
|
|
||||||
16.652653000 seconds user
|
|
||||||
30.408692000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.9968, 0.7101, 0.9319, ..., 0.2871, 0.7386, 0.8934])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.3799591064453125 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
|
||||||
|
|
||||||
322,338 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,784,096 BR_RETIRED:u
|
|
||||||
|
|
||||||
4.633544255 seconds time elapsed
|
|
||||||
|
|
||||||
16.572749000 seconds user
|
|
||||||
26.228349000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3551, 0.8297, 0.9950, ..., 0.9625, 0.7129, 0.2173])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.400240182876587 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
|
||||||
|
|
||||||
27,270,404 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,607 ITLB_WALK:u
|
|
||||||
17,981 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,751,047 L1D_TLB:u
|
|
||||||
|
|
||||||
4.696092090 seconds time elapsed
|
|
||||||
|
|
||||||
15.781810000 seconds user
|
|
||||||
28.383624000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3600, 0.0388, 0.5262, ..., 0.5849, 0.3707, 0.1514])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.4545772075653076 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
|
||||||
|
|
||||||
30,620,441 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
302,139 L1I_CACHE_REFILL:u
|
|
||||||
471,011 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
32,141,810 L1D_CACHE:u
|
|
||||||
|
|
||||||
4.897499310 seconds time elapsed
|
|
||||||
|
|
||||||
16.207163000 seconds user
|
|
||||||
32.246890000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1220, 0.8435, 0.7035, ..., 0.2109, 0.0289, 0.0715])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.4200170040130615 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
|
||||||
|
|
||||||
531,907 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
545,159 LL_CACHE_RD:u
|
|
||||||
188,244 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,034 L2D_TLB_REFILL:u
|
|
||||||
293,848 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,757,551 L2D_CACHE:u
|
|
||||||
|
|
||||||
4.683262937 seconds time elapsed
|
|
||||||
|
|
||||||
16.111909000 seconds user
|
|
||||||
29.660483000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.44, 16.44, 16.44, 16.84, 16.72, 16.6, 16.72, 16.84, 16.68, 16.84], "matrix": "p2p-Gnutella30", "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 2.896674871444702, "power": [56.32, 68.24, 71.76, 59.48, 47.6, 48.76, 52.6], "power_after": [16.92, 17.0, 16.96, 16.8, 16.48, 16.52, 16.52, 16.52, 16.24, 16.36], "task clock (msec)": 56.47, "page faults": 3222, "cycles": 69105836, "instructions": 89065155, "branch mispredictions": 333669, "branches": 20078755, "ITLB accesses": 26015038, "ITLB misses": 5212, "DTLB misses": 17039, "DTLB accesses": 35296010, "L1I cache accesses": 31837486, "L1I cache misses": 293353, "L1D cache misses": 462358, "L1D cache accesses": 33478540, "LL cache misses": 546516, "LL cache accesses": 559865, "L2D TLB accesses": 190400, "L2D TLB misses": 23787, "L2D cache misses": 307032, "L2D cache accesses": 1768186, "instructions per cycle": 1.288822480926213, "branch miss rate": 0.016618012421586895, "ITLB miss rate": 0.00020034566161310238, "DTLB miss rate": 0.00048274578344691083, "L2D TLB miss rate": 0.12493172268907562, "L1I cache miss rate": 0.009214075508348869, "L1D cache miss rate": 0.013810578358554464, "L2D cache miss rate": 0.17364236567872385, "LL cache miss rate": 0.9761567520741607}
|
|
@ -1,158 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395282 queued and waiting for resources
|
|
||||||
srun: job 3395282 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0302, 0.1334, 0.4142, ..., 0.9516, 0.6030, 0.3883])
|
|
||||||
Matrix: p2p-Gnutella30
|
|
||||||
Shape: torch.Size([36682, 36682])
|
|
||||||
NNZ: 88328
|
|
||||||
Density: 6.564359899804003e-05
|
|
||||||
Time: 2.790724277496338 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
|
||||||
|
|
||||||
56.47 msec task-clock:u # 0.009 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,222 page-faults:u # 57.061 K/sec
|
|
||||||
69,105,836 cycles:u # 1.224 GHz (53.55%)
|
|
||||||
89,065,155 instructions:u # 1.29 insn per cycle (92.79%)
|
|
||||||
<not supported> branches:u
|
|
||||||
367,525 branch-misses:u
|
|
||||||
32,122,654 L1-dcache-loads:u # 568.886 M/sec
|
|
||||||
467,921 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,765,438 L1-icache-loads:u # 544.850 M/sec
|
|
||||||
289,327 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
|
||||||
24,642,710 dTLB-loads:u # 436.418 M/sec (11.11%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
6.334250152 seconds time elapsed
|
|
||||||
|
|
||||||
32.099712000 seconds user
|
|
||||||
240.206702000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
|
||||||
tensor([0.6147, 0.4171, 0.2258, ..., 0.0253, 0.8932, 0.8040])
|
|
||||||
Matrix: p2p-Gnutella30
|
|
||||||
Shape: torch.Size([36682, 36682])
|
|
||||||
NNZ: 88328
|
|
||||||
Density: 6.564359899804003e-05
|
|
||||||
Time: 2.092158079147339 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
|
||||||
|
|
||||||
333,669 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,078,755 BR_RETIRED:u
|
|
||||||
|
|
||||||
5.557038624 seconds time elapsed
|
|
||||||
|
|
||||||
29.074016000 seconds user
|
|
||||||
186.372846000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0146, 0.2151, 0.1948, ..., 0.7633, 0.4329, 0.7106])
|
|
||||||
Matrix: p2p-Gnutella30
|
|
||||||
Shape: torch.Size([36682, 36682])
|
|
||||||
NNZ: 88328
|
|
||||||
Density: 6.564359899804003e-05
|
|
||||||
Time: 3.1269772052764893 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
|
||||||
|
|
||||||
26,015,038 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
5,212 ITLB_WALK:u
|
|
||||||
17,039 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
35,296,010 L1D_TLB:u
|
|
||||||
|
|
||||||
6.550798214 seconds time elapsed
|
|
||||||
|
|
||||||
36.334689000 seconds user
|
|
||||||
263.614426000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1810, 0.5208, 0.0542, ..., 0.6108, 0.4905, 0.8918])
|
|
||||||
Matrix: p2p-Gnutella30
|
|
||||||
Shape: torch.Size([36682, 36682])
|
|
||||||
NNZ: 88328
|
|
||||||
Density: 6.564359899804003e-05
|
|
||||||
Time: 1.9065814018249512 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
|
||||||
|
|
||||||
31,837,486 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
293,353 L1I_CACHE_REFILL:u
|
|
||||||
462,358 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,478,540 L1D_CACHE:u
|
|
||||||
|
|
||||||
5.319975004 seconds time elapsed
|
|
||||||
|
|
||||||
26.918342000 seconds user
|
|
||||||
175.603919000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8456, 0.8302, 0.2078, ..., 0.8155, 0.5148, 0.5853])
|
|
||||||
Matrix: p2p-Gnutella30
|
|
||||||
Shape: torch.Size([36682, 36682])
|
|
||||||
NNZ: 88328
|
|
||||||
Density: 6.564359899804003e-05
|
|
||||||
Time: 3.8523874282836914 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
|
||||||
|
|
||||||
546,516 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
559,865 LL_CACHE_RD:u
|
|
||||||
190,400 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,787 L2D_TLB_REFILL:u
|
|
||||||
307,032 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,768,186 L2D_CACHE:u
|
|
||||||
|
|
||||||
7.266305868 seconds time elapsed
|
|
||||||
|
|
||||||
37.085321000 seconds user
|
|
||||||
320.780766000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.6, 16.64, 17.04, 17.08, 16.92, 17.24, 16.88, 16.36, 16.4, 16.4], "matrix": "ri2010", "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 2.970583200454712, "power": [23.04, 23.28, 23.76, 24.12, 21.4, 26.28, 26.36], "power_after": [16.16, 16.16, 16.52, 16.48, 16.52, 16.44, 16.36, 16.48, 16.76, 16.6], "task clock (msec)": 52.61, "page faults": 3292, "cycles": 42915672, "instructions": 71002596, "branch mispredictions": 344300, "branches": 20224759, "ITLB accesses": 26039851, "ITLB misses": 5035, "DTLB misses": 16402, "DTLB accesses": 34820806, "L1I cache accesses": 31878105, "L1I cache misses": 299057, "L1D cache misses": 471869, "L1D cache accesses": 33450518, "LL cache misses": 530093, "LL cache accesses": 551126, "L2D TLB accesses": 188315, "L2D TLB misses": 22856, "L2D cache misses": 299885, "L2D cache accesses": 1763155, "instructions per cycle": 1.6544677664607, "branch miss rate": 0.01702368863826758, "ITLB miss rate": 0.00019335748119296073, "DTLB miss rate": 0.0004710402165877493, "L2D TLB miss rate": 0.12137110692191275, "L1I cache miss rate": 0.009381266546427399, "L1D cache miss rate": 0.014106478111938357, "L2D cache miss rate": 0.1700843090936418, "LL cache miss rate": 0.9618363132931489}
|
|
@ -1,163 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395268 queued and waiting for resources
|
|
||||||
srun: job 3395268 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|
||||||
125750]),
|
|
||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
|
||||||
tensor([0.4029, 0.5373, 0.8376, ..., 0.9299, 0.3127, 0.4778])
|
|
||||||
Matrix: ri2010
|
|
||||||
Shape: torch.Size([25181, 25181])
|
|
||||||
NNZ: 125750
|
|
||||||
Density: 0.00019831796057928155
|
|
||||||
Time: 2.9858975410461426 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
|
||||||
|
|
||||||
52.61 msec task-clock:u # 0.008 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,292 page-faults:u # 62.576 K/sec
|
|
||||||
42,915,672 cycles:u # 0.816 GHz (55.04%)
|
|
||||||
71,002,596 instructions:u # 1.65 insn per cycle (81.89%)
|
|
||||||
<not supported> branches:u
|
|
||||||
369,793 branch-misses:u
|
|
||||||
33,163,106 L1-dcache-loads:u # 630.381 M/sec
|
|
||||||
471,533 L1-dcache-load-misses:u # 1.42% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,640,002 L1-icache-loads:u # 601.429 M/sec
|
|
||||||
297,919 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
|
||||||
48,642,108 dTLB-loads:u # 924.614 M/sec (29.77%)
|
|
||||||
0 dTLB-load-misses:u (5.06%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
6.215745697 seconds time elapsed
|
|
||||||
|
|
||||||
17.600216000 seconds user
|
|
||||||
30.777524000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|
||||||
125750]),
|
|
||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8706, 0.3724, 0.8779, ..., 0.4299, 0.0920, 0.4238])
|
|
||||||
Matrix: ri2010
|
|
||||||
Shape: torch.Size([25181, 25181])
|
|
||||||
NNZ: 125750
|
|
||||||
Density: 0.00019831796057928155
|
|
||||||
Time: 2.9231789112091064 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
|
||||||
|
|
||||||
344,300 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,224,759 BR_RETIRED:u
|
|
||||||
|
|
||||||
6.297708483 seconds time elapsed
|
|
||||||
|
|
||||||
17.546068000 seconds user
|
|
||||||
26.920857000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|
||||||
125750]),
|
|
||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2988, 0.0160, 0.4360, ..., 0.7543, 0.0919, 0.2321])
|
|
||||||
Matrix: ri2010
|
|
||||||
Shape: torch.Size([25181, 25181])
|
|
||||||
NNZ: 125750
|
|
||||||
Density: 0.00019831796057928155
|
|
||||||
Time: 2.9701316356658936 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
|
||||||
|
|
||||||
26,039,851 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
5,035 ITLB_WALK:u
|
|
||||||
16,402 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
34,820,806 L1D_TLB:u
|
|
||||||
|
|
||||||
6.227977259 seconds time elapsed
|
|
||||||
|
|
||||||
17.937381000 seconds user
|
|
||||||
30.196552000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|
||||||
125750]),
|
|
||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
|
||||||
tensor([0.5797, 0.8992, 0.8317, ..., 0.0283, 0.7124, 0.2690])
|
|
||||||
Matrix: ri2010
|
|
||||||
Shape: torch.Size([25181, 25181])
|
|
||||||
NNZ: 125750
|
|
||||||
Density: 0.00019831796057928155
|
|
||||||
Time: 2.968733072280884 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
|
||||||
|
|
||||||
31,878,105 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
299,057 L1I_CACHE_REFILL:u
|
|
||||||
471,869 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,450,518 L1D_CACHE:u
|
|
||||||
|
|
||||||
6.278062824 seconds time elapsed
|
|
||||||
|
|
||||||
17.822878000 seconds user
|
|
||||||
27.932170000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|
||||||
125750]),
|
|
||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0630, 0.5194, 0.8720, ..., 0.9537, 0.3959, 0.5550])
|
|
||||||
Matrix: ri2010
|
|
||||||
Shape: torch.Size([25181, 25181])
|
|
||||||
NNZ: 125750
|
|
||||||
Density: 0.00019831796057928155
|
|
||||||
Time: 2.9069995880126953 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
|
||||||
|
|
||||||
530,093 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
551,126 LL_CACHE_RD:u
|
|
||||||
188,315 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
22,856 L2D_TLB_REFILL:u
|
|
||||||
299,885 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,763,155 L2D_CACHE:u
|
|
||||||
|
|
||||||
6.075529293 seconds time elapsed
|
|
||||||
|
|
||||||
17.073983000 seconds user
|
|
||||||
27.811966000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.36, 30.64, 31.12, 24.52, 24.16, 23.12, 22.08, 21.28, 21.16, 20.88], "matrix": "rma10", "shape": [46835, 46835], "nnz": 2374001, "% density": 0.0010822805369125833, "time_s": 68.86891412734985, "power": [81.8, 81.32, 75.08, 63.48, 51.92, 51.96, 51.8, 65.0, 65.0, 75.12, 82.68, 82.32, 82.08, 82.76, 82.8, 83.6, 83.36, 83.08, 82.88, 83.0, 83.32, 83.32, 83.36, 84.64, 84.56, 84.24, 83.52, 83.4, 83.36, 83.36, 83.72, 84.16, 83.24, 82.76, 82.76, 82.96, 82.36, 82.24, 81.64, 81.6, 81.4, 81.6, 81.88, 82.32, 83.04, 83.48, 83.48, 84.32, 84.04, 84.32, 83.16, 82.44, 81.96, 81.4, 81.8, 82.08, 81.8, 81.84, 82.04, 82.04, 82.08, 82.44, 82.6, 82.84, 83.8, 84.24, 84.6, 85.4, 85.6, 86.0, 85.72, 85.36], "power_after": [21.96, 21.88, 21.96, 21.96, 22.0, 21.68, 21.44, 21.16, 21.04, 20.92], "task clock (msec)": 58.3, "page faults": 3281, "cycles": 81319364, "instructions": 90830397, "branch mispredictions": 342237, "branches": 20641135, "ITLB accesses": 27974213, "ITLB misses": 6660, "DTLB misses": 18441, "DTLB accesses": 37780346, "L1I cache accesses": 31166891, "L1I cache misses": 291301, "L1D cache misses": 477186, "L1D cache accesses": 32682323, "LL cache misses": 538552, "LL cache accesses": 552543, "L2D TLB accesses": 202351, "L2D TLB misses": 24178, "L2D cache misses": 298051, "L2D cache accesses": 1775481, "instructions per cycle": 1.1169590185186398, "branch miss rate": 0.01658033824205888, "ITLB miss rate": 0.00023807640272132053, "DTLB miss rate": 0.00048811093471722044, "L2D TLB miss rate": 0.11948544855226809, "L1I cache miss rate": 0.00934648887500521, "L1D cache miss rate": 0.014600736918241704, "L2D cache miss rate": 0.1678705657790762, "LL cache miss rate": 0.9746788937693537}
|
|
@ -1,168 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395286 queued and waiting for resources
|
|
||||||
srun: job 3395286 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
|
|
||||||
2373970, 2374001]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
|
|
||||||
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
|
|
||||||
8.3378e+01, 2.5138e+00, 1.2184e+03]),
|
|
||||||
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
|
|
||||||
tensor([0.4937, 0.5946, 0.4240, ..., 0.9888, 0.5278, 0.9155])
|
|
||||||
Matrix: rma10
|
|
||||||
Shape: torch.Size([46835, 46835])
|
|
||||||
NNZ: 2374001
|
|
||||||
Density: 0.0010822805369125833
|
|
||||||
Time: 52.320035219192505 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
|
|
||||||
|
|
||||||
58.30 msec task-clock:u # 0.001 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,281 page-faults:u # 56.279 K/sec
|
|
||||||
81,319,364 cycles:u # 1.395 GHz (62.38%)
|
|
||||||
90,830,397 instructions:u # 1.12 insn per cycle (94.62%)
|
|
||||||
<not supported> branches:u
|
|
||||||
358,947 branch-misses:u
|
|
||||||
32,561,141 L1-dcache-loads:u # 558.523 M/sec
|
|
||||||
477,147 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,044,361 L1-icache-loads:u # 532.506 M/sec
|
|
||||||
286,125 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
|
|
||||||
29,678,379 dTLB-loads:u # 509.075 M/sec (5.72%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
56.145511940 seconds time elapsed
|
|
||||||
|
|
||||||
269.541895000 seconds user
|
|
||||||
3993.928150000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
|
|
||||||
2373970, 2374001]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
|
|
||||||
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
|
|
||||||
8.3378e+01, 2.5138e+00, 1.2184e+03]),
|
|
||||||
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2401, 0.9608, 0.9686, ..., 0.2643, 0.1097, 0.0695])
|
|
||||||
Matrix: rma10
|
|
||||||
Shape: torch.Size([46835, 46835])
|
|
||||||
NNZ: 2374001
|
|
||||||
Density: 0.0010822805369125833
|
|
||||||
Time: 65.29214668273926 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
|
|
||||||
|
|
||||||
342,237 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,641,135 BR_RETIRED:u
|
|
||||||
|
|
||||||
69.131216008 seconds time elapsed
|
|
||||||
|
|
||||||
324.908899000 seconds user
|
|
||||||
4969.165543000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
|
|
||||||
2373970, 2374001]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
|
|
||||||
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
|
|
||||||
8.3378e+01, 2.5138e+00, 1.2184e+03]),
|
|
||||||
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
|
|
||||||
tensor([0.5237, 0.3525, 0.2809, ..., 0.8641, 0.3894, 0.4198])
|
|
||||||
Matrix: rma10
|
|
||||||
Shape: torch.Size([46835, 46835])
|
|
||||||
NNZ: 2374001
|
|
||||||
Density: 0.0010822805369125833
|
|
||||||
Time: 66.05637407302856 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
|
|
||||||
|
|
||||||
27,974,213 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,660 ITLB_WALK:u
|
|
||||||
18,441 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
37,780,346 L1D_TLB:u
|
|
||||||
|
|
||||||
69.880637029 seconds time elapsed
|
|
||||||
|
|
||||||
320.759259000 seconds user
|
|
||||||
5037.255757000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
|
|
||||||
2373970, 2374001]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
|
|
||||||
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
|
|
||||||
8.3378e+01, 2.5138e+00, 1.2184e+03]),
|
|
||||||
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8185, 0.4278, 0.7553, ..., 0.5022, 0.1058, 0.0783])
|
|
||||||
Matrix: rma10
|
|
||||||
Shape: torch.Size([46835, 46835])
|
|
||||||
NNZ: 2374001
|
|
||||||
Density: 0.0010822805369125833
|
|
||||||
Time: 63.55399775505066 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
|
|
||||||
|
|
||||||
31,166,891 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
291,301 L1I_CACHE_REFILL:u
|
|
||||||
477,186 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
32,682,323 L1D_CACHE:u
|
|
||||||
|
|
||||||
67.517251505 seconds time elapsed
|
|
||||||
|
|
||||||
319.301754000 seconds user
|
|
||||||
4839.755901000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
|
|
||||||
2373970, 2374001]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
|
|
||||||
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
|
|
||||||
8.3378e+01, 2.5138e+00, 1.2184e+03]),
|
|
||||||
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8358, 0.0086, 0.1779, ..., 0.6354, 0.7134, 0.5745])
|
|
||||||
Matrix: rma10
|
|
||||||
Shape: torch.Size([46835, 46835])
|
|
||||||
NNZ: 2374001
|
|
||||||
Density: 0.0010822805369125833
|
|
||||||
Time: 63.55393171310425 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
|
|
||||||
|
|
||||||
538,552 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
552,543 LL_CACHE_RD:u
|
|
||||||
202,351 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
24,178 L2D_TLB_REFILL:u
|
|
||||||
298,051 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,775,481 L2D_CACHE:u
|
|
||||||
|
|
||||||
67.538674790 seconds time elapsed
|
|
||||||
|
|
||||||
321.810383000 seconds user
|
|
||||||
4836.154538000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [37.56, 23.12, 22.32, 22.28, 22.28, 21.96, 21.76, 21.72, 21.88, 21.84], "matrix": "tn2010", "shape": [240116, 240116], "nnz": 1193966, "% density": 2.070855328296721e-05, "time_s": 16.282614707946777, "power": [85.48, 85.84, 79.28, 70.16, 55.52, 49.48, 49.48, 60.48, 76.32, 88.88, 91.0, 91.0, 90.68, 88.32, 86.92, 86.4, 88.08, 86.8, 87.32, 87.8], "power_after": [21.68, 21.48, 21.44, 21.36, 21.52, 21.4, 21.4, 21.32, 21.2, 21.04], "task clock (msec)": 68.11, "page faults": 3486, "cycles": 70427921, "instructions": 85638293, "branch mispredictions": 333780, "branches": 19402540, "ITLB accesses": 26935483, "ITLB misses": 5639, "DTLB misses": 16688, "DTLB accesses": 36421540, "L1I cache accesses": 33029213, "L1I cache misses": 302558, "L1D cache misses": 481598, "L1D cache accesses": 34668833, "LL cache misses": 551659, "LL cache accesses": 564579, "L2D TLB accesses": 188346, "L2D TLB misses": 24479, "L2D cache misses": 311796, "L2D cache accesses": 1767924, "instructions per cycle": 1.215970765344614, "branch miss rate": 0.017202902300420462, "ITLB miss rate": 0.0002093521025778524, "DTLB miss rate": 0.00045819040051573877, "L2D TLB miss rate": 0.12996824992301403, "L1I cache miss rate": 0.00916031514284037, "L1D cache miss rate": 0.013891381922200843, "L2D cache miss rate": 0.17636278482559206, "LL cache miss rate": 0.9771156915152707}
|
|
@ -1,173 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395275 queued and waiting for resources
|
|
||||||
srun: job 3395275 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
|
||||||
1193963, 1193966]),
|
|
||||||
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
|
||||||
240113]),
|
|
||||||
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
|
||||||
34928.]), size=(240116, 240116), nnz=1193966,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.2511, 0.1104, 0.8257, ..., 0.4006, 0.1534, 0.0009])
|
|
||||||
Matrix: tn2010
|
|
||||||
Shape: torch.Size([240116, 240116])
|
|
||||||
NNZ: 1193966
|
|
||||||
Density: 2.070855328296721e-05
|
|
||||||
Time: 12.89618182182312 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
|
|
||||||
|
|
||||||
68.11 msec task-clock:u # 0.004 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,486 page-faults:u # 51.182 K/sec
|
|
||||||
70,427,921 cycles:u # 1.034 GHz (46.81%)
|
|
||||||
85,638,293 instructions:u # 1.22 insn per cycle (74.19%)
|
|
||||||
<not supported> branches:u
|
|
||||||
356,748 branch-misses:u (89.74%)
|
|
||||||
34,044,117 L1-dcache-loads:u # 499.843 M/sec
|
|
||||||
481,076 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
32,553,977 L1-icache-loads:u # 477.965 M/sec
|
|
||||||
309,127 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
|
||||||
41,245,978 dTLB-loads:u # 605.583 M/sec (33.60%)
|
|
||||||
127,770 dTLB-load-misses:u # 0.31% of all dTLB cache accesses (15.43%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
16.626373547 seconds time elapsed
|
|
||||||
|
|
||||||
101.073288000 seconds user
|
|
||||||
996.348020000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
|
||||||
1193963, 1193966]),
|
|
||||||
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
|
||||||
240113]),
|
|
||||||
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
|
||||||
34928.]), size=(240116, 240116), nnz=1193966,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.0138, 0.1394, 0.6273, ..., 0.8681, 0.0444, 0.2705])
|
|
||||||
Matrix: tn2010
|
|
||||||
Shape: torch.Size([240116, 240116])
|
|
||||||
NNZ: 1193966
|
|
||||||
Density: 2.070855328296721e-05
|
|
||||||
Time: 14.216531038284302 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
|
|
||||||
|
|
||||||
333,780 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,402,540 BR_RETIRED:u
|
|
||||||
|
|
||||||
17.985093703 seconds time elapsed
|
|
||||||
|
|
||||||
106.904608000 seconds user
|
|
||||||
1091.172933000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
|
||||||
1193963, 1193966]),
|
|
||||||
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
|
||||||
240113]),
|
|
||||||
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
|
||||||
34928.]), size=(240116, 240116), nnz=1193966,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.6279, 0.1696, 0.6937, ..., 0.4267, 0.4847, 0.6447])
|
|
||||||
Matrix: tn2010
|
|
||||||
Shape: torch.Size([240116, 240116])
|
|
||||||
NNZ: 1193966
|
|
||||||
Density: 2.070855328296721e-05
|
|
||||||
Time: 12.462992429733276 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
|
|
||||||
|
|
||||||
26,935,483 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
5,639 ITLB_WALK:u
|
|
||||||
16,688 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,421,540 L1D_TLB:u
|
|
||||||
|
|
||||||
15.984498303 seconds time elapsed
|
|
||||||
|
|
||||||
95.195897000 seconds user
|
|
||||||
962.237122000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
|
||||||
1193963, 1193966]),
|
|
||||||
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
|
||||||
240113]),
|
|
||||||
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
|
||||||
34928.]), size=(240116, 240116), nnz=1193966,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.4060, 0.4915, 0.8557, ..., 0.9902, 0.0548, 0.2450])
|
|
||||||
Matrix: tn2010
|
|
||||||
Shape: torch.Size([240116, 240116])
|
|
||||||
NNZ: 1193966
|
|
||||||
Density: 2.070855328296721e-05
|
|
||||||
Time: 9.298198223114014 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
|
|
||||||
|
|
||||||
33,029,213 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
302,558 L1I_CACHE_REFILL:u
|
|
||||||
481,598 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
34,668,833 L1D_CACHE:u
|
|
||||||
|
|
||||||
12.985459942 seconds time elapsed
|
|
||||||
|
|
||||||
78.950722000 seconds user
|
|
||||||
727.126874000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
|
||||||
1193963, 1193966]),
|
|
||||||
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
|
||||||
240113]),
|
|
||||||
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
|
||||||
34928.]), size=(240116, 240116), nnz=1193966,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.0166, 0.6910, 0.0311, ..., 0.6156, 0.5689, 0.9849])
|
|
||||||
Matrix: tn2010
|
|
||||||
Shape: torch.Size([240116, 240116])
|
|
||||||
NNZ: 1193966
|
|
||||||
Density: 2.070855328296721e-05
|
|
||||||
Time: 12.012693405151367 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
|
|
||||||
|
|
||||||
551,659 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
564,579 LL_CACHE_RD:u
|
|
||||||
188,346 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
24,479 L2D_TLB_REFILL:u
|
|
||||||
311,796 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,767,924 L2D_CACHE:u
|
|
||||||
|
|
||||||
15.749851583 seconds time elapsed
|
|
||||||
|
|
||||||
98.008506000 seconds user
|
|
||||||
926.127594000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [93.52, 87.76, 71.6, 58.32, 39.6, 26.24, 26.24, 22.16, 22.24, 22.24], "matrix": "ut2010", "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 8.478580713272095, "power": [89.68, 88.92, 80.84, 68.96, 56.64, 54.52, 55.88, 70.44, 85.36, 85.36, 98.2, 96.52], "power_after": [21.24, 21.32, 21.16, 21.44, 21.68, 21.76, 21.72, 22.0, 21.72, 21.72], "task clock (msec)": 53.84, "page faults": 3291, "cycles": 66389970, "instructions": 74935543, "branch mispredictions": 330515, "branches": 19475058, "ITLB accesses": 26125490, "ITLB misses": 6431, "DTLB misses": 13728, "DTLB accesses": 35274185, "L1I cache accesses": 30428652, "L1I cache misses": 288897, "L1D cache misses": 475615, "L1D cache accesses": 31855716, "LL cache misses": 553829, "LL cache accesses": 574192, "L2D TLB accesses": 181148, "L2D TLB misses": 23202, "L2D cache misses": 307806, "L2D cache accesses": 1767037, "instructions per cycle": 1.1287178319255153, "branch miss rate": 0.016971194642911976, "ITLB miss rate": 0.00024615806248992844, "DTLB miss rate": 0.0003891797925309968, "L2D TLB miss rate": 0.12808311435952924, "L1I cache miss rate": 0.009494242465949527, "L1D cache miss rate": 0.014930287550278261, "L2D cache miss rate": 0.17419329646181717, "LL cache miss rate": 0.9645362526820297}
|
|
@ -1,173 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395284 queued and waiting for resources
|
|
||||||
srun: job 3395284 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.1487, 0.4275, 0.9471, ..., 0.3851, 0.0801, 0.4295])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 8.772023677825928 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
|
||||||
|
|
||||||
53.84 msec task-clock:u # 0.004 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,291 page-faults:u # 61.127 K/sec
|
|
||||||
66,389,970 cycles:u # 1.233 GHz (67.37%)
|
|
||||||
74,935,543 instructions:u # 1.13 insn per cycle (83.30%)
|
|
||||||
<not supported> branches:u
|
|
||||||
365,846 branch-misses:u
|
|
||||||
31,684,169 L1-dcache-loads:u # 588.504 M/sec
|
|
||||||
462,583 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,260,337 L1-icache-loads:u # 562.058 M/sec
|
|
||||||
288,196 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
|
||||||
57,721,334 dTLB-loads:u # 1.072 G/sec (18.54%)
|
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
12.179628060 seconds time elapsed
|
|
||||||
|
|
||||||
68.068275000 seconds user
|
|
||||||
690.223452000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.9553, 0.9401, 0.7135, ..., 0.8664, 0.5986, 0.8459])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 8.94040060043335 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
|
||||||
|
|
||||||
330,515 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,475,058 BR_RETIRED:u
|
|
||||||
|
|
||||||
12.428594105 seconds time elapsed
|
|
||||||
|
|
||||||
67.011228000 seconds user
|
|
||||||
709.528404000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.6289, 0.8171, 0.1590, ..., 0.7515, 0.5400, 0.3693])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 14.403366804122925 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
|
||||||
|
|
||||||
26,125,490 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,431 ITLB_WALK:u
|
|
||||||
13,728 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
35,274,185 L1D_TLB:u
|
|
||||||
|
|
||||||
18.084508405 seconds time elapsed
|
|
||||||
|
|
||||||
95.162133000 seconds user
|
|
||||||
1117.716009000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.8824, 0.0692, 0.7225, ..., 0.8736, 0.6854, 0.7514])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 9.64679503440857 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
|
||||||
|
|
||||||
30,428,652 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
288,897 L1I_CACHE_REFILL:u
|
|
||||||
475,615 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
31,855,716 L1D_CACHE:u
|
|
||||||
|
|
||||||
13.170070008 seconds time elapsed
|
|
||||||
|
|
||||||
68.362809000 seconds user
|
|
||||||
761.360459000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.9552, 0.0509, 0.7738, ..., 0.7722, 0.4417, 0.7772])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 12.372079133987427 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
|
||||||
|
|
||||||
553,829 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
574,192 LL_CACHE_RD:u
|
|
||||||
181,148 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,202 L2D_TLB_REFILL:u
|
|
||||||
307,806 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,767,037 L2D_CACHE:u
|
|
||||||
|
|
||||||
15.923392394 seconds time elapsed
|
|
||||||
|
|
||||||
83.307253000 seconds user
|
|
||||||
958.949992000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [32.08, 31.8, 28.68, 27.6, 22.96, 22.08, 21.0, 20.84, 20.68, 20.72], "matrix": "va2010", "shape": [285762, 285762], "nnz": 1402128, "% density": 1.717033263003816e-05, "time_s": 14.632386922836304, "power": [85.16, 83.48, 76.96, 67.44, 54.04, 51.4, 54.24, 66.76, 83.2, 96.44, 96.44, 95.84, 94.24, 92.36, 91.2, 89.32, 87.48, 88.68, 88.24], "power_after": [21.12, 21.0, 21.16, 21.4, 21.32, 21.36, 21.36, 21.12, 20.76, 20.84], "task clock (msec)": 57.32, "page faults": 3280, "cycles": 39497791, "instructions": 64385555, "branch mispredictions": 332792, "branches": 19983954, "ITLB accesses": 27156853, "ITLB misses": 6466, "DTLB misses": 18244, "DTLB accesses": 36466301, "L1I cache accesses": 30929971, "L1I cache misses": 291811, "L1D cache misses": 473063, "L1D cache accesses": 32462905, "LL cache misses": 544953, "LL cache accesses": 565172, "L2D TLB accesses": 183225, "L2D TLB misses": 23924, "L2D cache misses": 301362, "L2D cache accesses": 1756590, "instructions per cycle": 1.6301052127193645, "branch miss rate": 0.01665296067034582, "ITLB miss rate": 0.00023809828038616994, "DTLB miss rate": 0.000500297521264907, "L2D TLB miss rate": 0.13057170145995362, "L1I cache miss rate": 0.009434570759862659, "L1D cache miss rate": 0.014572417348354991, "L2D cache miss rate": 0.17156080815671274, "LL cache miss rate": 0.964225050073252}
|
|
@ -1,173 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395277 queued and waiting for resources
|
|
||||||
srun: job 3395277 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
|
||||||
1402123, 1402128]),
|
|
||||||
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
|
||||||
285760]),
|
|
||||||
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
|
||||||
9832.]), size=(285762, 285762), nnz=1402128,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.2920, 0.3583, 0.0598, ..., 0.2208, 0.1741, 0.4955])
|
|
||||||
Matrix: va2010
|
|
||||||
Shape: torch.Size([285762, 285762])
|
|
||||||
NNZ: 1402128
|
|
||||||
Density: 1.717033263003816e-05
|
|
||||||
Time: 14.792448997497559 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
|
|
||||||
|
|
||||||
57.32 msec task-clock:u # 0.003 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,280 page-faults:u # 57.220 K/sec
|
|
||||||
39,497,791 cycles:u # 0.689 GHz (54.25%)
|
|
||||||
64,385,555 instructions:u # 1.63 insn per cycle (81.24%)
|
|
||||||
<not supported> branches:u
|
|
||||||
362,674 branch-misses:u
|
|
||||||
33,532,520 L1-dcache-loads:u # 584.977 M/sec
|
|
||||||
481,355 L1-dcache-load-misses:u # 1.44% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
31,924,348 L1-icache-loads:u # 556.922 M/sec
|
|
||||||
296,637 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
|
||||||
43,420,143 dTLB-loads:u # 757.467 M/sec (40.22%)
|
|
||||||
30,923 dTLB-load-misses:u # 0.07% of all dTLB cache accesses (19.05%)
|
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
18.678937115 seconds time elapsed
|
|
||||||
|
|
||||||
112.979167000 seconds user
|
|
||||||
1135.785668000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
|
||||||
1402123, 1402128]),
|
|
||||||
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
|
||||||
285760]),
|
|
||||||
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
|
||||||
9832.]), size=(285762, 285762), nnz=1402128,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.7703, 0.7481, 0.5351, ..., 0.4663, 0.6089, 0.3679])
|
|
||||||
Matrix: va2010
|
|
||||||
Shape: torch.Size([285762, 285762])
|
|
||||||
NNZ: 1402128
|
|
||||||
Density: 1.717033263003816e-05
|
|
||||||
Time: 14.130552530288696 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
|
|
||||||
|
|
||||||
332,792 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
19,983,954 BR_RETIRED:u
|
|
||||||
|
|
||||||
17.923156218 seconds time elapsed
|
|
||||||
|
|
||||||
107.999690000 seconds user
|
|
||||||
1091.659165000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
|
||||||
1402123, 1402128]),
|
|
||||||
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
|
||||||
285760]),
|
|
||||||
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
|
||||||
9832.]), size=(285762, 285762), nnz=1402128,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.8850, 0.1406, 0.0617, ..., 0.4325, 0.2725, 0.9292])
|
|
||||||
Matrix: va2010
|
|
||||||
Shape: torch.Size([285762, 285762])
|
|
||||||
NNZ: 1402128
|
|
||||||
Density: 1.717033263003816e-05
|
|
||||||
Time: 13.32977032661438 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
|
|
||||||
|
|
||||||
27,156,853 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,466 ITLB_WALK:u
|
|
||||||
18,244 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
36,466,301 L1D_TLB:u
|
|
||||||
|
|
||||||
17.186572497 seconds time elapsed
|
|
||||||
|
|
||||||
104.940187000 seconds user
|
|
||||||
1032.527271000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
|
||||||
1402123, 1402128]),
|
|
||||||
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
|
||||||
285760]),
|
|
||||||
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
|
||||||
9832.]), size=(285762, 285762), nnz=1402128,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.6289, 0.0403, 0.9207, ..., 0.0183, 0.4807, 0.7504])
|
|
||||||
Matrix: va2010
|
|
||||||
Shape: torch.Size([285762, 285762])
|
|
||||||
NNZ: 1402128
|
|
||||||
Density: 1.717033263003816e-05
|
|
||||||
Time: 13.460915803909302 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
|
|
||||||
|
|
||||||
30,929,971 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
291,811 L1I_CACHE_REFILL:u
|
|
||||||
473,063 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
32,462,905 L1D_CACHE:u
|
|
||||||
|
|
||||||
17.219448483 seconds time elapsed
|
|
||||||
|
|
||||||
100.274467000 seconds user
|
|
||||||
1045.271682000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
|
||||||
1402123, 1402128]),
|
|
||||||
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
|
||||||
285760]),
|
|
||||||
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
|
||||||
9832.]), size=(285762, 285762), nnz=1402128,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.6412, 0.1151, 0.5075, ..., 0.9251, 0.9288, 0.3560])
|
|
||||||
Matrix: va2010
|
|
||||||
Shape: torch.Size([285762, 285762])
|
|
||||||
NNZ: 1402128
|
|
||||||
Density: 1.717033263003816e-05
|
|
||||||
Time: 15.992860555648804 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
|
|
||||||
|
|
||||||
544,953 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
565,172 LL_CACHE_RD:u
|
|
||||||
183,225 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
23,924 L2D_TLB_REFILL:u
|
|
||||||
301,362 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,756,590 L2D_CACHE:u
|
|
||||||
|
|
||||||
19.884223259 seconds time elapsed
|
|
||||||
|
|
||||||
113.211516000 seconds user
|
|
||||||
1230.525804000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [77.2, 64.12, 64.12, 48.92, 36.2, 21.72, 21.88, 22.36, 22.36, 22.44], "matrix": "vt2010", "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 3.5892834663391113, "power": [33.44, 30.68, 31.08, 26.96, 26.88, 32.48, 32.24], "power_after": [21.24, 21.24, 21.36, 21.36, 21.2, 21.04, 20.84, 20.72, 20.72, 20.56], "task clock (msec)": 55.26, "page faults": 3297, "cycles": 49276491, "instructions": 64763517, "branch mispredictions": 340611, "branches": 20355849, "ITLB accesses": 27946393, "ITLB misses": 6805, "DTLB misses": 17877, "DTLB accesses": 38226912, "L1I cache accesses": 31946141, "L1I cache misses": 295259, "L1D cache misses": 468136, "L1D cache accesses": 33395666, "LL cache misses": 527109, "LL cache accesses": 540409, "L2D TLB accesses": 192519, "L2D TLB misses": 24204, "L2D cache misses": 290933, "L2D cache accesses": 1743452, "instructions per cycle": 1.3142883286880147, "branch miss rate": 0.016732831924622747, "ITLB miss rate": 0.00024350190738389746, "DTLB miss rate": 0.0004676548291423592, "L2D TLB miss rate": 0.1257226559456469, "L1I cache miss rate": 0.009242399574959616, "L1D cache miss rate": 0.014017866869311724, "L2D cache miss rate": 0.16687181522634406, "LL cache miss rate": 0.9753890109158063}
|
|
@ -1,163 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3395285 queued and waiting for resources
|
|
||||||
srun: job 3395285 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|
||||||
155598]),
|
|
||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1179, 0.2288, 0.5357, ..., 0.4845, 0.6375, 0.4513])
|
|
||||||
Matrix: vt2010
|
|
||||||
Shape: torch.Size([32580, 32580])
|
|
||||||
NNZ: 155598
|
|
||||||
Density: 0.00014658915806621921
|
|
||||||
Time: 3.628732681274414 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
|
||||||
|
|
||||||
55.26 msec task-clock:u # 0.008 CPUs utilized
|
|
||||||
0 context-switches:u # 0.000 /sec
|
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
|
||||||
3,297 page-faults:u # 59.661 K/sec
|
|
||||||
49,276,491 cycles:u # 0.892 GHz (31.65%)
|
|
||||||
64,763,517 instructions:u # 1.31 insn per cycle (57.73%)
|
|
||||||
<not supported> branches:u
|
|
||||||
357,693 branch-misses:u (76.18%)
|
|
||||||
32,426,852 L1-dcache-loads:u # 586.784 M/sec (88.36%)
|
|
||||||
469,495 L1-dcache-load-misses:u # 1.45% of all L1-dcache accesses
|
|
||||||
<not supported> LLC-loads:u
|
|
||||||
<not supported> LLC-load-misses:u
|
|
||||||
30,941,957 L1-icache-loads:u # 559.914 M/sec
|
|
||||||
279,512 L1-icache-load-misses:u # 0.90% of all L1-icache accesses
|
|
||||||
47,128,547 dTLB-loads:u # 852.821 M/sec (46.73%)
|
|
||||||
108,931 dTLB-load-misses:u # 0.23% of all dTLB cache accesses (32.30%)
|
|
||||||
14,189,608 iTLB-loads:u # 256.770 M/sec (19.86%)
|
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
|
||||||
|
|
||||||
7.117399121 seconds time elapsed
|
|
||||||
|
|
||||||
18.404618000 seconds user
|
|
||||||
29.532104000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|
||||||
155598]),
|
|
||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7544, 0.0071, 0.0491, ..., 0.7236, 0.5537, 0.4901])
|
|
||||||
Matrix: vt2010
|
|
||||||
Shape: torch.Size([32580, 32580])
|
|
||||||
NNZ: 155598
|
|
||||||
Density: 0.00014658915806621921
|
|
||||||
Time: 3.6322426795959473 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
|
||||||
|
|
||||||
340,611 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
|
||||||
20,355,849 BR_RETIRED:u
|
|
||||||
|
|
||||||
7.112879848 seconds time elapsed
|
|
||||||
|
|
||||||
18.362004000 seconds user
|
|
||||||
29.398677000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|
||||||
155598]),
|
|
||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7651, 0.6605, 0.7128, ..., 0.7434, 0.6656, 0.3987])
|
|
||||||
Matrix: vt2010
|
|
||||||
Shape: torch.Size([32580, 32580])
|
|
||||||
NNZ: 155598
|
|
||||||
Density: 0.00014658915806621921
|
|
||||||
Time: 3.7933311462402344 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
|
||||||
|
|
||||||
27,946,393 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
|
||||||
6,805 ITLB_WALK:u
|
|
||||||
17,877 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
|
||||||
38,226,912 L1D_TLB:u
|
|
||||||
|
|
||||||
7.235266934 seconds time elapsed
|
|
||||||
|
|
||||||
18.566568000 seconds user
|
|
||||||
29.759130000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|
||||||
155598]),
|
|
||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3319, 0.1241, 0.4830, ..., 0.5188, 0.8684, 0.1488])
|
|
||||||
Matrix: vt2010
|
|
||||||
Shape: torch.Size([32580, 32580])
|
|
||||||
NNZ: 155598
|
|
||||||
Density: 0.00014658915806621921
|
|
||||||
Time: 3.662006378173828 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
|
||||||
|
|
||||||
31,946,141 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
|
||||||
295,259 L1I_CACHE_REFILL:u
|
|
||||||
468,136 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
|
||||||
33,395,666 L1D_CACHE:u
|
|
||||||
|
|
||||||
7.187008251 seconds time elapsed
|
|
||||||
|
|
||||||
18.275672000 seconds user
|
|
||||||
30.724065000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|
||||||
155598]),
|
|
||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0055, 0.7774, 0.9046, ..., 0.5143, 0.0678, 0.4725])
|
|
||||||
Matrix: vt2010
|
|
||||||
Shape: torch.Size([32580, 32580])
|
|
||||||
NNZ: 155598
|
|
||||||
Density: 0.00014658915806621921
|
|
||||||
Time: 3.616023063659668 seconds
|
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
|
||||||
|
|
||||||
527,109 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
|
||||||
540,409 LL_CACHE_RD:u
|
|
||||||
192,519 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
|
||||||
24,204 L2D_TLB_REFILL:u
|
|
||||||
290,933 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
|
||||||
1,743,452 L2D_CACHE:u
|
|
||||||
|
|
||||||
7.030605378 seconds time elapsed
|
|
||||||
|
|
||||||
18.274323000 seconds user
|
|
||||||
28.779020000 seconds sys
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "ASIC_680k", "MATRIX_SHAPE": [682862, 682862], "MATRIX_SIZE": 466300511044, "MATRIX_NNZ": 3871773, "MATRIX_DENSITY": 8.303171256088674e-06, "TIME_S": 11.77456283569336, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.36, 20.44, 20.48, 20.72, 20.8, 21.0, 21.32, 21.32, 21.28, 21.08], "POWER": [92.0, 91.8, 78.72, 66.68, 51.2, 46.6, 53.36, 53.36, 70.48, 90.16, 100.04, 103.68, 98.2, 95.64, 97.16, 101.4], "JOULES": 938.4206715393068, "POWER_AFTER": [20.96, 20.76, 20.76, 21.08, 21.24, 21.16, 21.28, 21.2, 21.0, 21.08]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "Oregon-2", "MATRIX_SHAPE": [11806, 11806], "MATRIX_SIZE": 139381636, "MATRIX_NNZ": 65460, "MATRIX_DENSITY": 0.0004696458003979807, "TIME_S": 0.9880795478820801, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [21.04, 21.12, 21.2, 21.12, 21.04, 20.96, 20.92, 20.88, 21.16, 21.08], "POWER": [25.92, 42.32, 42.32, 45.44, 45.4], "JOULES": 44.85881147384644, "POWER_AFTER": [20.72, 20.72, 20.84, 20.84, 20.84, 20.96, 20.92, 20.6, 20.68, 20.84]}
|
|
@ -1,23 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3471014 queued and waiting for resources
|
|
||||||
srun: job 3471014 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
|
||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
|
||||||
tensor([0.2158, 0.5422, 0.9585, ..., 0.6377, 0.8158, 0.5743])
|
|
||||||
Matrix: Oregon-2
|
|
||||||
Shape: torch.Size([11806, 11806])
|
|
||||||
Size: 139381636
|
|
||||||
NNZ: 65460
|
|
||||||
Density: 0.0004696458003979807
|
|
||||||
Time: 0.9880795478820801 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "as-caida", "MATRIX_SHAPE": [31379, 31379], "MATRIX_SIZE": 984641641, "MATRIX_NNZ": 106762, "MATRIX_DENSITY": 0.00010842726485909405, "TIME_S": 1.066300630569458, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.64, 20.48, 20.68, 20.64, 20.32, 20.32, 20.4, 20.2, 20.52, 20.52], "POWER": [26.32, 39.88, 50.16, 50.64, 50.24], "JOULES": 53.97094367980957, "POWER_AFTER": [20.28, 20.4, 20.2, 20.32, 20.32, 20.4, 20.48, 20.28, 20.28, 20.44]}
|
|
@ -1,24 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3470988 queued and waiting for resources
|
|
||||||
srun: job 3470988 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|
||||||
106762]),
|
|
||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8877, 0.6518, 0.0601, ..., 0.0372, 0.4806, 0.8853])
|
|
||||||
Matrix: as-caida
|
|
||||||
Shape: torch.Size([31379, 31379])
|
|
||||||
Size: 984641641
|
|
||||||
NNZ: 106762
|
|
||||||
Density: 0.00010842726485909405
|
|
||||||
Time: 1.066300630569458 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "dc2", "MATRIX_SHAPE": [116835, 116835], "MATRIX_SIZE": 13650417225, "MATRIX_NNZ": 766396, "MATRIX_DENSITY": 5.614451099680581e-05, "TIME_S": 3.0164122581481934, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.44, 20.72, 20.72, 21.0, 20.84, 21.08, 20.88, 20.8, 20.8, 20.88], "POWER": [64.4, 79.8, 83.24, 75.76, 58.2, 58.2, 56.64, 60.64, 75.88, 93.68], "JOULES": 194.69750034332276, "POWER_AFTER": [21.12, 21.0, 21.12, 20.88, 20.88, 20.84, 20.96, 20.92, 20.88, 20.8]}
|
|
@ -1,26 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3470982 queued and waiting for resources
|
|
||||||
srun: job 3470982 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|
||||||
766396]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 116833, 89,
|
|
||||||
116834]),
|
|
||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
|
||||||
tensor([0.3305, 0.9342, 0.6954, ..., 0.1999, 0.9064, 0.6304])
|
|
||||||
Matrix: dc2
|
|
||||||
Shape: torch.Size([116835, 116835])
|
|
||||||
Size: 13650417225
|
|
||||||
NNZ: 766396
|
|
||||||
Density: 5.614451099680581e-05
|
|
||||||
Time: 3.0164122581481934 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "de2010", "MATRIX_SHAPE": [24115, 24115], "MATRIX_SIZE": 581533225, "MATRIX_NNZ": 116056, "MATRIX_DENSITY": 0.0001995689928120616, "TIME_S": 1.1378686428070068, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [21.0, 20.88, 21.04, 20.8, 20.8, 20.44, 20.64, 20.48, 20.28, 20.16], "POWER": [22.84, 39.8, 49.48, 50.32, 50.28], "JOULES": 57.25203536033631, "POWER_AFTER": [20.68, 20.44, 20.68, 20.68, 20.56, 20.88, 20.92, 20.88, 21.0, 20.96]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "email-Enron", "MATRIX_SHAPE": [36692, 36692], "MATRIX_SIZE": 1346302864, "MATRIX_NNZ": 367662, "MATRIX_DENSITY": 0.0002730901120626302, "TIME_S": 1.3314027786254883, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.8, 20.64, 20.6, 20.6, 20.48, 20.8, 20.72, 20.72, 20.92, 20.92], "POWER": [28.4, 43.96, 54.4, 55.28, 55.08], "JOULES": 73.5336650466919, "POWER_AFTER": [20.88, 20.8, 20.8, 20.8, 20.64, 20.64, 20.64, 20.48, 20.52, 20.72]}
|
|
@ -1,24 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3470985 queued and waiting for resources
|
|
||||||
srun: job 3470985 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|
||||||
367662]),
|
|
||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
|
||||||
tensor([0.7107, 0.7540, 0.8321, ..., 0.9503, 0.7781, 0.9277])
|
|
||||||
Matrix: email-Enron
|
|
||||||
Shape: torch.Size([36692, 36692])
|
|
||||||
Size: 1346302864
|
|
||||||
NNZ: 367662
|
|
||||||
Density: 0.0002730901120626302
|
|
||||||
Time: 1.3314027786254883 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "fl2010", "MATRIX_SHAPE": [484481, 484481], "MATRIX_SIZE": 234721839361, "MATRIX_NNZ": 2346294, "MATRIX_DENSITY": 9.99606174861054e-06, "TIME_S": 2.924255609512329, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.8, 20.88, 20.72, 20.64, 20.56, 20.92, 20.92, 21.0, 20.96, 20.84], "POWER": [73.32, 93.24, 93.64, 82.2, 61.36, 61.36, 58.0], "JOULES": 176.3268253517151, "POWER_AFTER": [20.76, 20.56, 20.76, 20.72, 20.76, 20.76, 20.76, 20.88, 20.68, 20.68]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "ga2010", "MATRIX_SHAPE": [291086, 291086], "MATRIX_SIZE": 84731059396, "MATRIX_NNZ": 1418056, "MATRIX_DENSITY": 1.6735964475229304e-05, "TIME_S": 2.341104745864868, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.32, 20.28, 20.24, 20.44, 20.52, 20.8, 20.64, 20.68, 20.6, 20.36], "POWER": [33.84, 53.08, 66.2, 66.52, 67.36, 59.0], "JOULES": 154.00518000602722, "POWER_AFTER": [20.28, 20.32, 20.52, 20.6, 20.6, 20.84, 21.12, 20.96, 20.76, 20.8]}
|
|
@ -1,25 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3470989 queued and waiting for resources
|
|
||||||
srun: job 3470989 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
|
||||||
1418054, 1418056]),
|
|
||||||
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
|
||||||
290176]),
|
|
||||||
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
|
||||||
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
|
||||||
tensor([0.0746, 0.8150, 0.2560, ..., 0.7929, 0.2552, 0.7733])
|
|
||||||
Matrix: ga2010
|
|
||||||
Shape: torch.Size([291086, 291086])
|
|
||||||
Size: 84731059396
|
|
||||||
NNZ: 1418056
|
|
||||||
Density: 1.6735964475229304e-05
|
|
||||||
Time: 2.341104745864868 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "mac_econ_fwd500", "MATRIX_SHAPE": [206500, 206500], "MATRIX_SIZE": 42642250000, "MATRIX_NNZ": 1273389, "MATRIX_DENSITY": 2.9862143765866013e-05, "TIME_S": 1.6093401908874512, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.76, 20.72, 20.28, 20.2, 20.24, 20.56, 20.72, 21.12, 21.24, 21.0], "POWER": [48.6, 65.2, 65.2, 61.84, 62.88, 59.36], "JOULES": 99.0504337310791, "POWER_AFTER": [20.76, 20.4, 20.64, 20.68, 20.68, 20.56, 20.48, 20.68, 20.64, 20.88]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "mc2depi", "MATRIX_SHAPE": [525825, 525825], "MATRIX_SIZE": 276491930625, "MATRIX_NNZ": 2100225, "MATRIX_DENSITY": 7.595972132902821e-06, "TIME_S": 2.123237371444702, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.68, 20.68, 20.68, 20.64, 20.72, 20.6, 20.84, 20.76, 20.92, 20.96], "POWER": [52.52, 76.2, 82.92, 85.4, 72.28, 58.76], "JOULES": 164.92142794609072, "POWER_AFTER": [20.68, 20.72, 20.84, 20.88, 20.84, 21.16, 21.04, 21.16, 20.88, 20.88]}
|
|
@ -1,25 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3470981 queued and waiting for resources
|
|
||||||
srun: job 3470981 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
|
||||||
2100223, 2100225]),
|
|
||||||
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
|
||||||
525824]),
|
|
||||||
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
|
||||||
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8254, 0.0543, 0.1764, ..., 0.7650, 0.8254, 0.6404])
|
|
||||||
Matrix: mc2depi
|
|
||||||
Shape: torch.Size([525825, 525825])
|
|
||||||
Size: 276491930625
|
|
||||||
NNZ: 2100225
|
|
||||||
Density: 7.595972132902821e-06
|
|
||||||
Time: 2.123237371444702 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "p2p-Gnutella04", "MATRIX_SHAPE": [10879, 10879], "MATRIX_SIZE": 118352641, "MATRIX_NNZ": 39994, "MATRIX_DENSITY": 0.0003379223282393842, "TIME_S": 0.9692902565002441, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.6, 20.48, 20.64, 20.64, 20.64, 20.56, 20.52, 20.44, 20.24, 20.12], "POWER": [25.92, 43.16, 50.56, 48.4, 49.28], "JOULES": 47.76662384033203, "POWER_AFTER": [20.4, 20.52, 20.44, 20.64, 20.72, 20.64, 20.8, 20.6, 20.6, 20.64]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "p2p-Gnutella24", "MATRIX_SHAPE": [26518, 26518], "MATRIX_SIZE": 703204324, "MATRIX_NNZ": 65369, "MATRIX_DENSITY": 9.295875717624285e-05, "TIME_S": 0.9848971366882324, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [16.32, 16.36, 16.36, 16.32, 16.56, 16.64, 16.72, 16.92, 16.76, 16.96], "POWER": [22.56, 40.8, 42.16, 42.16, 39.84], "JOULES": 39.23830192565919, "POWER_AFTER": [16.56, 16.44, 16.44, 16.68, 16.72, 16.72, 16.76, 16.68, 16.68, 16.92]}
|
|
@ -1,23 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3471012 queued and waiting for resources
|
|
||||||
srun: job 3471012 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
|
||||||
tensor([0.6126, 0.7089, 0.2938, ..., 0.5143, 0.3903, 0.8766])
|
|
||||||
Matrix: p2p-Gnutella24
|
|
||||||
Shape: torch.Size([26518, 26518])
|
|
||||||
Size: 703204324
|
|
||||||
NNZ: 65369
|
|
||||||
Density: 9.295875717624285e-05
|
|
||||||
Time: 0.9848971366882324 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "p2p-Gnutella25", "MATRIX_SHAPE": [22687, 22687], "MATRIX_SIZE": 514699969, "MATRIX_NNZ": 54705, "MATRIX_DENSITY": 0.00010628522108964806, "TIME_S": 1.064000129699707, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.4, 20.68, 20.76, 20.6, 20.64, 20.48, 20.36, 20.48, 20.52, 20.52], "POWER": [33.4, 49.92, 52.44, 52.44, 51.68], "JOULES": 55.747526702880855, "POWER_AFTER": [20.96, 20.76, 20.96, 21.08, 20.64, 20.84, 20.84, 20.56, 20.28, 20.48]}
|
|
@ -1,23 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3470999 queued and waiting for resources
|
|
||||||
srun: job 3470999 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
|
||||||
tensor([0.1096, 0.4722, 0.2402, ..., 0.8482, 0.4609, 0.1028])
|
|
||||||
Matrix: p2p-Gnutella25
|
|
||||||
Shape: torch.Size([22687, 22687])
|
|
||||||
Size: 514699969
|
|
||||||
NNZ: 54705
|
|
||||||
Density: 0.00010628522108964806
|
|
||||||
Time: 1.064000129699707 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "p2p-Gnutella30", "MATRIX_SHAPE": [36682, 36682], "MATRIX_SIZE": 1345569124, "MATRIX_NNZ": 88328, "MATRIX_DENSITY": 6.564359899804003e-05, "TIME_S": 1.022092580795288, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.44, 20.56, 20.76, 20.6, 20.64, 21.08, 20.76, 20.32, 20.32, 20.44], "POWER": [25.64, 36.88, 51.72, 49.6, 50.84], "JOULES": 50.723186807632445, "POWER_AFTER": [20.56, 20.68, 20.6, 20.88, 21.08, 20.76, 20.76, 20.92, 20.32, 20.24]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "ri2010", "MATRIX_SHAPE": [25181, 25181], "MATRIX_SIZE": 634082761, "MATRIX_NNZ": 125750, "MATRIX_DENSITY": 0.00019831796057928155, "TIME_S": 0.7675364017486572, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.64, 20.64, 20.64, 20.64, 20.8, 20.8, 20.8, 20.96, 20.92, 20.84], "POWER": [26.52, 43.16, 47.12, 46.0, 47.48], "JOULES": 36.442628355026244, "POWER_AFTER": [20.48, 20.44, 20.6, 20.64, 20.6, 20.68, 20.6, 20.8, 20.6, 20.6]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "rma10", "MATRIX_SHAPE": [46835, 46835], "MATRIX_SIZE": 2193517225, "MATRIX_NNZ": 2374001, "MATRIX_DENSITY": 0.0010822805369125833, "TIME_S": 2.688584089279175, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.24, 20.24, 20.4, 20.44, 20.76, 20.76, 20.68, 20.72, 20.56, 20.44], "POWER": [53.84, 65.36, 65.36, 65.6, 62.2, 50.6], "JOULES": 162.64235491752623, "POWER_AFTER": [20.28, 20.4, 20.48, 20.44, 20.4, 20.48, 20.52, 20.44, 20.44, 20.44]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "soc-sign-Slashdot090216", "MATRIX_SHAPE": [81871, 81871], "MATRIX_SIZE": 6702860641, "MATRIX_NNZ": 545671, "MATRIX_DENSITY": 8.140867447881048e-05, "TIME_S": 1.4809374809265137, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [21.16, 20.96, 20.92, 20.92, 20.76, 20.72, 21.04, 21.04, 21.08, 20.84], "POWER": [38.4, 56.52, 60.12, 59.64, 58.44], "JOULES": 87.74598638534546, "POWER_AFTER": [20.56, 20.56, 20.68, 20.52, 21.16, 21.16, 21.28, 21.0, 21.12, 20.84]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "soc-sign-Slashdot090221", "MATRIX_SHAPE": [82144, 82144], "MATRIX_SIZE": 6747636736, "MATRIX_NNZ": 549202, "MATRIX_DENSITY": 8.13917555860553e-05, "TIME_S": 1.608903408050537, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.68, 20.68, 20.64, 20.28, 20.32, 20.44, 20.44, 20.44, 20.44, 20.52], "POWER": [57.2, 57.2, 72.76, 72.52, 70.32, 58.68], "JOULES": 106.05045198440551, "POWER_AFTER": [20.96, 20.76, 20.84, 20.92, 20.92, 20.96, 21.12, 21.24, 21.16, 21.04]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "soc-sign-epinions", "MATRIX_SHAPE": [131828, 131828], "MATRIX_SIZE": 17378621584, "MATRIX_NNZ": 841372, "MATRIX_DENSITY": 4.841419648464106e-05, "TIME_S": 4.555854320526123, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [16.4, 16.36, 16.48, 16.68, 16.32, 16.32, 16.56, 16.56, 16.64, 16.64], "POWER": [51.6, 68.68, 77.56, 77.4, 61.4, 55.08, 54.44, 65.6], "JOULES": 284.7840434265137, "POWER_AFTER": [16.92, 16.88, 17.04, 16.92, 16.84, 16.92, 16.88, 16.8, 17.12, 17.12]}
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "sx-mathoverflow", "MATRIX_SHAPE": [24818, 24818], "MATRIX_SIZE": 615933124, "MATRIX_NNZ": 239978, "MATRIX_DENSITY": 0.00038961697406616504, "TIME_S": 1.0039293766021729, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.88, 21.0, 21.0, 20.92, 20.92, 20.8, 20.6, 20.6, 20.76, 20.92], "POWER": [29.76, 49.24, 50.6, 47.84, 47.84], "JOULES": 48.02798137664795, "POWER_AFTER": [20.96, 20.8, 20.92, 21.68, 22.4, 23.04, 23.76, 23.12, 22.6, 21.8]}
|
|
@ -1,24 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3470984 queued and waiting for resources
|
|
||||||
srun: job 3470984 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|
||||||
239978]),
|
|
||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
|
||||||
tensor([0.8169, 0.9455, 0.2378, ..., 0.7183, 0.8285, 0.9774])
|
|
||||||
Matrix: sx-mathoverflow
|
|
||||||
Shape: torch.Size([24818, 24818])
|
|
||||||
Size: 615933124
|
|
||||||
NNZ: 239978
|
|
||||||
Density: 0.00038961697406616504
|
|
||||||
Time: 1.0039293766021729 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "tn2010", "MATRIX_SHAPE": [240116, 240116], "MATRIX_SIZE": 57655693456, "MATRIX_NNZ": 1193966, "MATRIX_DENSITY": 2.070855328296721e-05, "TIME_S": 2.2318568229675293, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.52, 20.52, 20.68, 20.6, 20.76, 20.84, 20.52, 20.44, 20.48, 20.4], "POWER": [47.04, 68.12, 70.92, 71.88, 71.88, 61.28], "JOULES": 157.9681861114502, "POWER_AFTER": [21.04, 20.76, 20.8, 20.72, 20.76, 20.84, 20.92, 21.04, 20.8, 20.8]}
|
|
@ -1,26 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3470986 queued and waiting for resources
|
|
||||||
srun: job 3470986 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
|
||||||
1193963, 1193966]),
|
|
||||||
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
|
||||||
240113]),
|
|
||||||
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
|
||||||
34928.]), size=(240116, 240116), nnz=1193966,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.2593, 0.6684, 0.1857, ..., 0.6282, 0.3314, 0.7454])
|
|
||||||
Matrix: tn2010
|
|
||||||
Shape: torch.Size([240116, 240116])
|
|
||||||
Size: 57655693456
|
|
||||||
NNZ: 1193966
|
|
||||||
Density: 2.070855328296721e-05
|
|
||||||
Time: 2.2318568229675293 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "ut2010", "MATRIX_SHAPE": [115406, 115406], "MATRIX_SIZE": 13318544836, "MATRIX_NNZ": 572066, "MATRIX_DENSITY": 4.295259032005559e-05, "TIME_S": 1.5120632648468018, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [23.36, 22.84, 22.36, 21.92, 21.48, 21.48, 21.72, 22.08, 22.64, 23.28], "POWER": [43.48, 59.4, 65.28, 65.16, 62.16], "JOULES": 96.98985254287719, "POWER_AFTER": [22.56, 22.8, 22.24, 21.84, 21.4, 21.32, 20.96, 21.28, 21.36, 21.08]}
|
|
@ -1,26 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3471001 queued and waiting for resources
|
|
||||||
srun: job 3471001 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|
||||||
572066]),
|
|
||||||
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
|
||||||
114602]),
|
|
||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.9240, 0.3751, 0.9849, ..., 0.9377, 0.9441, 0.6765])
|
|
||||||
Matrix: ut2010
|
|
||||||
Shape: torch.Size([115406, 115406])
|
|
||||||
Size: 13318544836
|
|
||||||
NNZ: 572066
|
|
||||||
Density: 4.295259032005559e-05
|
|
||||||
Time: 1.5120632648468018 seconds
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"CPU": "ALTRA", "ITERATIONS": 10000, "MATRIX_FILE": "va2010", "MATRIX_SHAPE": [285762, 285762], "MATRIX_SIZE": 81659920644, "MATRIX_NNZ": 1402128, "MATRIX_DENSITY": 1.717033263003816e-05, "TIME_S": 2.1484014987945557, "BASELINE_TIME_S": 10, "BASELINE_DELAY_S": 10, "POWER_BEFORE": [20.76, 20.72, 20.76, 20.88, 20.88, 20.96, 20.96, 20.96, 20.8, 20.6], "POWER": [65.16, 84.16, 87.88, 82.08, 64.16, 59.44], "JOULES": 155.0609850883484, "POWER_AFTER": [20.52, 20.52, 20.72, 20.56, 20.64, 20.64, 20.72, 20.92, 21.16, 21.32]}
|
|
@ -1,26 +0,0 @@
|
|||||||
srun: Job time limit was unset; set to partition default of 60 minutes
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
|
||||||
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
|
||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
|
||||||
srun: ################################################################################
|
|
||||||
srun: job 3471004 queued and waiting for resources
|
|
||||||
srun: job 3471004 has been allocated resources
|
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:22: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
|
||||||
).to_sparse_csr().type(torch.float)
|
|
||||||
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
|
||||||
1402123, 1402128]),
|
|
||||||
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
|
||||||
285760]),
|
|
||||||
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
|
||||||
9832.]), size=(285762, 285762), nnz=1402128,
|
|
||||||
layout=torch.sparse_csr)
|
|
||||||
tensor([0.5972, 0.8492, 0.1772, ..., 0.7912, 0.0415, 0.8296])
|
|
||||||
Matrix: va2010
|
|
||||||
Shape: torch.Size([285762, 285762])
|
|
||||||
Size: 81659920644
|
|
||||||
NNZ: 1402128
|
|
||||||
Density: 1.717033263003816e-05
|
|
||||||
Time: 2.1484014987945557 seconds
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user