Compare commits

..

2 Commits

Author SHA1 Message Date
9e54411c5a ignored matrices 2024-12-03 08:54:48 -05:00
93690abfee new output 2024-12-03 08:53:39 -05:00
84 changed files with 3744 additions and 971 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@
*.swp
*.sif
__pycache__
matrices/

View File

@ -4,6 +4,7 @@ import argparse
import glob
import os
import subprocess
import random
parser = argparse.ArgumentParser()
parser.add_argument('arch')
@ -14,7 +15,7 @@ parser.add_argument('baseline_time_s', type=int)
parser.add_argument('baseline_delay_s', type=int)
parser.add_argument('--perf', action='store_const', const='--perf')
parser.add_argument('--power', action='store_const', const='--power')
parser.add_argument('--distribute', type=bool)
parser.add_argument('--distribute', action='store_true')
args = parser.parse_args()
srun_args_altra = [
@ -42,32 +43,36 @@ def srun(srun_args_list: list, run_args, matrix_file: str) -> list:
run_args_list += [args.perf]
if args.power is not None:
run_args_list += [args.power]
return ['srun'] + srun_args_list + ['run.py'] + run_args_list
return ['srun'] + srun_args_list + ['./run.py'] + run_args_list
processes = list()
for i, matrix in enumerate(glob.glob(f'{args.matrix_dir.rstrip("/")}/*.mtx')):
if args.arch == 'altra':
if args.distribute == True:
if args.distribute:
i = i % 40
srun_args_altra += [f'--nodelist oasis{i:02}']
srun_args = srun_args_altra + ['--nodelist', f'oasis{i:02}']
else:
srun_args = srun_args_altra
output_filename = '_'.join([
args.arch,
str(args.iterations),
os.path.splitext(os.path.basename(matrix))[0],
str(args.baseline_time_s),
str(args.baseline_delay_s)])
str(args.baseline_delay_s),
os.path.splitext(os.path.basename(matrix))[0],
str(args.iterations)])
json_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.json'
raw_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.output'
with open(json_filepath, 'w') as json_file, open(raw_filepath, 'w') as raw_file:
print(srun(srun_args_altra, args, matrix))
proc = subprocess.run(
print(srun(srun_args, args, matrix))
print(json_filepath)
print(raw_filepath)
processes.append(subprocess.Popen(
srun(srun_args_altra, args, matrix),
stdout=json_file,
stderr=raw_file,
text=True)
#output = proc.communicate()
#print(output[0])
#print(output[1])
stderr=raw_file))
break;
for process in processes:
process.wait()

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [37.36, 22.88, 22.36, 22.72, 22.52, 22.2, 21.96, 21.8, 21.48, 21.48], "matrix": "Oregon-2", "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 1.5312557220458984, "power": [26.68, 27.84, 28.48, 29.92, 30.0], "power_after": [21.16, 21.32, 21.16, 21.16, 21.16, 20.88, 20.92, 20.76, 20.96, 21.2], "task clock (msec)": 64.81, "page faults": 3244, "cycles": 82069432, "instructions": 78292700, "branch mispredictions": 319703, "branches": 19996903, "ITLB accesses": 26988315, "ITLB misses": 5988, "DTLB misses": 14570, "DTLB accesses": 36879854, "L1I cache accesses": 30465174, "L1I cache misses": 293085, "L1D cache misses": 487330, "L1D cache accesses": 31932249, "LL cache misses": 545501, "LL cache accesses": 558084, "L2D TLB accesses": 204746, "L2D TLB misses": 25302, "L2D cache misses": 314594, "L2D cache accesses": 1828047, "instructions per cycle": 0.9539812582107307, "branch miss rate": 0.01598762568383714, "ITLB miss rate": 0.00022187379982781437, "DTLB miss rate": 0.0003950666399058955, "L2D TLB miss rate": 0.12357750578765886, "L1I cache miss rate": 0.009620329101025322, "L1D cache miss rate": 0.015261374167538278, "L2D cache miss rate": 0.17209294947011755, "LL cache miss rate": 0.9774532149282187}

View File

@ -5,45 +5,46 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394148 queued and waiting for resources
srun: job 3394148 has been allocated resources
srun: job 3394980 queued and waiting for resources
srun: job 3394980 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
nnz=65460, layout=torch.sparse_csr)
tensor([0.3190, 0.2829, 0.6210, ..., 0.9278, 0.7514, 0.5737])
tensor([0.9231, 0.7723, 0.0509, ..., 0.0839, 0.6982, 0.3459])
Matrix: Oregon-2
Shape: torch.Size([11806, 11806])
NNZ: 65460
Density: 0.0004696458003979807
Time: 0.22389841079711914 seconds
Time: 1.5677142143249512 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
42.01 msec task-clock:u # 0.012 CPUs utilized
64.81 msec task-clock:u # 0.013 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,263 page-faults:u # 77.672 K/sec
47,084,933 cycles:u # 1.121 GHz (65.90%)
77,895,119 instructions:u # 1.65 insn per cycle (85.49%)
3,244 page-faults:u # 50.056 K/sec
82,069,432 cycles:u # 1.266 GHz (59.04%)
78,292,700 instructions:u # 0.95 insn per cycle (76.75%)
<not supported> branches:u
352,740 branch-misses:u
30,958,922 L1-dcache-loads:u # 736.946 M/sec
442,351 L1-dcache-load-misses:u # 1.43% of all L1-dcache accesses
341,509 branch-misses:u (90.97%)
33,032,555 L1-dcache-loads:u # 509.704 M/sec
478,674 L1-dcache-load-misses:u # 1.45% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
29,506,648 L1-icache-loads:u # 702.376 M/sec
272,063 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
51,646,382 dTLB-loads:u # 1.229 G/sec (15.87%)
<not counted> dTLB-load-misses:u (0.00%)
31,508,310 L1-icache-loads:u # 486.184 M/sec
297,528 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
49,358,091 dTLB-loads:u # 761.613 M/sec (27.83%)
88,514 dTLB-load-misses:u # 0.18% of all dTLB cache accesses (14.82%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
3.513156571 seconds time elapsed
5.016393105 seconds time elapsed
15.150380000 seconds user
32.922923000 seconds sys
16.759527000 seconds user
31.429551000 seconds sys
@ -53,21 +54,22 @@ tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
nnz=65460, layout=torch.sparse_csr)
tensor([0.0741, 0.5476, 0.1060, ..., 0.8459, 0.8270, 0.8313])
tensor([0.8423, 0.9339, 0.8037, ..., 0.5953, 0.0649, 0.1559])
Matrix: Oregon-2
Shape: torch.Size([11806, 11806])
NNZ: 65460
Density: 0.0004696458003979807
Time: 0.20610284805297852 seconds
Time: 1.516484022140503 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
330,923 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,740,519 BR_RETIRED:u
319,703 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,996,903 BR_RETIRED:u
3.639725976 seconds time elapsed
4.945699041 seconds time elapsed
15.493122000 seconds user
27.617441000 seconds sys
16.431978000 seconds user
29.752452000 seconds sys
@ -77,23 +79,24 @@ tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
nnz=65460, layout=torch.sparse_csr)
tensor([0.9699, 0.9368, 0.7284, ..., 0.7182, 0.5308, 0.9833])
tensor([0.8058, 0.2922, 0.1227, ..., 0.2176, 0.9496, 0.8838])
Matrix: Oregon-2
Shape: torch.Size([11806, 11806])
NNZ: 65460
Density: 0.0004696458003979807
Time: 0.15960955619812012 seconds
Time: 1.6458909511566162 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
27,761,239 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,471 ITLB_WALK:u
17,268 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,993,265 L1D_TLB:u
26,988,315 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,988 ITLB_WALK:u
14,570 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,879,854 L1D_TLB:u
3.455602215 seconds time elapsed
5.011871473 seconds time elapsed
15.015027000 seconds user
27.930709000 seconds sys
16.529942000 seconds user
30.438432000 seconds sys
@ -103,23 +106,24 @@ tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
nnz=65460, layout=torch.sparse_csr)
tensor([0.5851, 0.3425, 0.8120, ..., 0.0829, 0.5823, 0.2256])
tensor([0.7728, 0.1182, 0.3337, ..., 0.2555, 0.2523, 0.5746])
Matrix: Oregon-2
Shape: torch.Size([11806, 11806])
NNZ: 65460
Density: 0.0004696458003979807
Time: 0.15697884559631348 seconds
Time: 1.529954433441162 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
31,834,980 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
298,333 L1I_CACHE_REFILL:u
466,901 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,528,976 L1D_CACHE:u
30,465,174 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
293,085 L1I_CACHE_REFILL:u
487,330 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
31,932,249 L1D_CACHE:u
3.452279902 seconds time elapsed
4.954100105 seconds time elapsed
14.635240000 seconds user
28.262858000 seconds sys
16.282966000 seconds user
28.926724000 seconds sys
@ -129,25 +133,26 @@ tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
nnz=65460, layout=torch.sparse_csr)
tensor([0.0772, 0.9112, 0.0293, ..., 0.4016, 0.4357, 0.5368])
tensor([0.5613, 0.3211, 0.1739, ..., 0.5461, 0.1391, 0.8387])
Matrix: Oregon-2
Shape: torch.Size([11806, 11806])
NNZ: 65460
Density: 0.0004696458003979807
Time: 0.20962285995483398 seconds
Time: 1.5726752281188965 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
525,505 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
546,521 LL_CACHE_RD:u
184,884 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
22,933 L2D_TLB_REFILL:u
292,367 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,706,226 L2D_CACHE:u
545,501 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
558,084 LL_CACHE_RD:u
204,746 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
25,302 L2D_TLB_REFILL:u
314,594 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,828,047 L2D_CACHE:u
3.566096255 seconds time elapsed
4.866549675 seconds time elapsed
15.763579000 seconds user
28.620423000 seconds sys
16.609257000 seconds user
31.381282000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [21.6, 21.64, 21.88, 22.08, 22.2, 22.32, 22.36, 22.04, 22.0, 21.96], "matrix": "as-caida", "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 2.6254467964172363, "power": [30.92, 29.2, 29.52, 29.72, 29.72, 31.72], "power_after": [21.04, 21.28, 21.04, 21.16, 21.16, 20.96, 21.04, 20.88, 20.56, 20.84], "task clock (msec)": 61.4, "page faults": 3507, "cycles": 78967021, "instructions": 94334531, "branch mispredictions": 325893, "branches": 19069753, "ITLB accesses": 27181279, "ITLB misses": 5995, "DTLB misses": 17412, "DTLB accesses": 37016930, "L1I cache accesses": 31535482, "L1I cache misses": 292676, "L1D cache misses": 471752, "L1D cache accesses": 33119145, "LL cache misses": 540894, "LL cache accesses": 554700, "L2D TLB accesses": 191772, "L2D TLB misses": 23711, "L2D cache misses": 306195, "L2D cache accesses": 1755986, "instructions per cycle": 1.1946066827061894, "branch miss rate": 0.017089523917797993, "ITLB miss rate": 0.00022055621444450792, "DTLB miss rate": 0.00047037936425305935, "L2D TLB miss rate": 0.12364161608576851, "L1I cache miss rate": 0.009280847522799873, "L1D cache miss rate": 0.01424408752097918, "L2D cache miss rate": 0.17437211913990203, "LL cache miss rate": 0.975110870740941}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394150 queued and waiting for resources
srun: job 3394150 has been allocated resources
srun: job 3394983 queued and waiting for resources
srun: job 3394983 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
nnz=106762, layout=torch.sparse_csr)
tensor([0.7672, 0.5818, 0.6775, ..., 0.1052, 0.2539, 0.4347])
tensor([0.4886, 0.3652, 0.5691, ..., 0.6466, 0.4355, 0.8397])
Matrix: as-caida
Shape: torch.Size([31379, 31379])
NNZ: 106762
Density: 0.00010842726485909405
Time: 0.28373050689697266 seconds
Time: 2.6297245025634766 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
60.78 msec task-clock:u # 0.017 CPUs utilized
61.40 msec task-clock:u # 0.010 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,300 page-faults:u # 54.293 K/sec
66,733,059 cycles:u # 1.098 GHz (58.34%)
87,889,334 instructions:u # 1.32 insn per cycle (93.45%)
3,507 page-faults:u # 57.117 K/sec
78,967,021 cycles:u # 1.286 GHz (61.13%)
94,334,531 instructions:u # 1.19 insn per cycle (95.16%)
<not supported> branches:u
369,909 branch-misses:u
31,872,708 L1-dcache-loads:u # 524.386 M/sec
465,719 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
365,239 branch-misses:u
33,334,312 L1-dcache-loads:u # 542.906 M/sec
457,950 L1-dcache-load-misses:u # 1.37% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
30,443,353 L1-icache-loads:u # 500.870 M/sec
292,371 L1-icache-load-misses:u # 0.96% of all L1-icache accesses
34,702,735 dTLB-loads:u # 570.947 M/sec (6.96%)
31,725,851 L1-icache-loads:u # 516.709 M/sec
297,720 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
25,188,580 dTLB-loads:u # 410.239 M/sec (5.16%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
3.683429807 seconds time elapsed
6.049042045 seconds time elapsed
15.161162000 seconds user
31.335288000 seconds sys
17.649315000 seconds user
29.335859000 seconds sys
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
nnz=106762, layout=torch.sparse_csr)
tensor([0.2708, 0.2455, 0.7615, ..., 0.1172, 0.4072, 0.8970])
tensor([0.8344, 0.2588, 0.2246, ..., 0.5607, 0.8141, 0.9893])
Matrix: as-caida
Shape: torch.Size([31379, 31379])
NNZ: 106762
Density: 0.00010842726485909405
Time: 0.32511067390441895 seconds
Time: 2.6495532989501953 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
326,300 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,832,700 BR_RETIRED:u
325,893 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,069,753 BR_RETIRED:u
3.755497210 seconds time elapsed
6.023780447 seconds time elapsed
14.681699000 seconds user
29.413955000 seconds sys
17.654658000 seconds user
28.848805000 seconds sys
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
nnz=106762, layout=torch.sparse_csr)
tensor([0.9417, 0.0965, 0.8551, ..., 0.6665, 0.0164, 0.5102])
tensor([0.0814, 0.1132, 0.8515, ..., 0.8987, 0.5912, 0.5002])
Matrix: as-caida
Shape: torch.Size([31379, 31379])
NNZ: 106762
Density: 0.00010842726485909405
Time: 0.33124780654907227 seconds
Time: 2.5444185733795166 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
27,233,629 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,868 ITLB_WALK:u
16,893 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,409,508 L1D_TLB:u
27,181,279 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,995 ITLB_WALK:u
17,412 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,016,930 L1D_TLB:u
3.751203540 seconds time elapsed
5.790360666 seconds time elapsed
14.849342000 seconds user
27.706396000 seconds sys
17.919315000 seconds user
30.569858000 seconds sys
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
nnz=106762, layout=torch.sparse_csr)
tensor([0.9215, 0.4139, 0.1789, ..., 0.0245, 0.0029, 0.2129])
tensor([0.0439, 0.1884, 0.3342, ..., 0.2027, 0.5532, 0.7245])
Matrix: as-caida
Shape: torch.Size([31379, 31379])
NNZ: 106762
Density: 0.00010842726485909405
Time: 0.3386805057525635 seconds
Time: 2.620804786682129 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
30,924,532 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
288,199 L1I_CACHE_REFILL:u
462,816 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,428,375 L1D_CACHE:u
31,535,482 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
292,676 L1I_CACHE_REFILL:u
471,752 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,119,145 L1D_CACHE:u
3.628443937 seconds time elapsed
6.002311801 seconds time elapsed
15.430937000 seconds user
30.878583000 seconds sys
17.427887000 seconds user
30.063688000 seconds sys
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
nnz=106762, layout=torch.sparse_csr)
tensor([0.4983, 0.0268, 0.1695, ..., 0.6987, 0.7224, 0.8577])
tensor([0.1495, 0.5856, 0.8600, ..., 0.2101, 0.6229, 0.2019])
Matrix: as-caida
Shape: torch.Size([31379, 31379])
NNZ: 106762
Density: 0.00010842726485909405
Time: 0.3289623260498047 seconds
Time: 2.561279296875 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
551,997 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
568,528 LL_CACHE_RD:u
193,991 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,353 L2D_TLB_REFILL:u
312,207 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,821,196 L2D_CACHE:u
540,894 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
554,700 LL_CACHE_RD:u
191,772 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,711 L2D_TLB_REFILL:u
306,195 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,755,986 L2D_CACHE:u
3.698790384 seconds time elapsed
5.946428572 seconds time elapsed
15.745189000 seconds user
31.063512000 seconds sys
17.396567000 seconds user
32.141235000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [83.04, 78.44, 65.92, 53.76, 38.68, 38.68, 25.68, 22.6, 22.52, 22.32], "matrix": "dc2", "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 14.128849267959595, "power": [89.84, 89.4, 82.8, 71.32, 57.72, 51.92, 53.0, 63.8, 78.24, 78.24, 90.2, 90.36, 90.08, 88.64, 88.64, 87.64, 87.68, 87.24], "power_after": [21.4, 21.2, 21.08, 21.08, 21.28, 21.04, 20.92, 21.12, 21.08, 21.0], "task clock (msec)": 58.45, "page faults": 3471, "cycles": 76691414, "instructions": 89547095, "branch mispredictions": 329725, "branches": 19946857, "ITLB accesses": 27648951, "ITLB misses": 6857, "DTLB misses": 18047, "DTLB accesses": 37225736, "L1I cache accesses": 32434686, "L1I cache misses": 293072, "L1D cache misses": 483557, "L1D cache accesses": 34059722, "LL cache misses": 561480, "LL cache accesses": 578369, "L2D TLB accesses": 192306, "L2D TLB misses": 25364, "L2D cache misses": 317121, "L2D cache accesses": 1812330, "instructions per cycle": 1.16762868656979, "branch miss rate": 0.01653017314958442, "ITLB miss rate": 0.00024800217556174194, "DTLB miss rate": 0.00048479901109275584, "L2D TLB miss rate": 0.13189396066685385, "L1I cache miss rate": 0.00903575881696527, "L1D cache miss rate": 0.014197326683993487, "L2D cache miss rate": 0.17497972223601663, "LL cache miss rate": 0.9707989190292011}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394149 queued and waiting for resources
srun: job 3394149 has been allocated resources
srun: job 3394982 queued and waiting for resources
srun: job 3394982 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
@ -16,37 +16,38 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
1.0331e+01, -1.0000e-03, 1.0000e-03]),
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
tensor([0.4749, 0.3788, 0.8812, ..., 0.8281, 0.8889, 0.4945])
tensor([0.0986, 0.6504, 0.0132, ..., 0.6525, 0.3337, 0.7557])
Matrix: dc2
Shape: torch.Size([116835, 116835])
NNZ: 766396
Density: 5.614451099680581e-05
Time: 2.2480316162109375 seconds
Time: 18.46260714530945 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
50.43 msec task-clock:u # 0.009 CPUs utilized
58.45 msec task-clock:u # 0.003 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,285 page-faults:u # 65.135 K/sec
54,118,679 cycles:u # 1.073 GHz (60.92%)
77,692,421 instructions:u # 1.44 insn per cycle (82.73%)
3,471 page-faults:u # 59.382 K/sec
76,691,414 cycles:u # 1.312 GHz (41.20%)
89,547,095 instructions:u # 1.17 insn per cycle (73.16%)
<not supported> branches:u
367,999 branch-misses:u
32,182,371 L1-dcache-loads:u # 638.112 M/sec
491,960 L1-dcache-load-misses:u # 1.53% of all L1-dcache accesses
382,362 branch-misses:u (96.21%)
33,271,433 L1-dcache-loads:u # 569.211 M/sec
488,730 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
30,682,258 L1-icache-loads:u # 608.367 M/sec
300,874 L1-icache-load-misses:u # 0.98% of all L1-icache accesses
55,244,523 dTLB-loads:u # 1.095 G/sec (19.09%)
<not counted> dTLB-load-misses:u (0.00%)
31,926,596 L1-icache-loads:u # 546.204 M/sec
304,792 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
36,392,791 dTLB-loads:u # 622.612 M/sec (31.21%)
0 dTLB-load-misses:u (5.35%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
5.813837947 seconds time elapsed
22.126601025 seconds time elapsed
28.815118000 seconds user
213.749674000 seconds sys
103.642372000 seconds user
1434.131491000 seconds sys
@ -59,21 +60,22 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
1.0331e+01, -1.0000e-03, 1.0000e-03]),
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
tensor([0.9715, 0.3920, 0.0297, ..., 0.1819, 0.5744, 0.8105])
tensor([0.5605, 0.9374, 0.4444, ..., 0.5937, 0.3099, 0.2252])
Matrix: dc2
Shape: torch.Size([116835, 116835])
NNZ: 766396
Density: 5.614451099680581e-05
Time: 2.2333595752716064 seconds
Time: 13.607120752334595 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
325,039 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,383,216 BR_RETIRED:u
329,725 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,946,857 BR_RETIRED:u
5.973132269 seconds time elapsed
17.131143957 seconds time elapsed
29.719778000 seconds user
213.706315000 seconds sys
96.945305000 seconds user
1045.242697000 seconds sys
@ -86,23 +88,24 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
1.0331e+01, -1.0000e-03, 1.0000e-03]),
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
tensor([0.3371, 0.4985, 0.9905, ..., 0.6075, 0.1568, 0.3782])
tensor([0.8954, 0.9777, 0.8042, ..., 0.2069, 0.7063, 0.8479])
Matrix: dc2
Shape: torch.Size([116835, 116835])
NNZ: 766396
Density: 5.614451099680581e-05
Time: 1.9790923595428467 seconds
Time: 17.22396969795227 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
26,060,519 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
4,749 ITLB_WALK:u
16,865 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
34,819,729 L1D_TLB:u
27,648,951 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,857 ITLB_WALK:u
18,047 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,225,736 L1D_TLB:u
5.575020445 seconds time elapsed
20.911480243 seconds time elapsed
26.769391000 seconds user
188.138935000 seconds sys
107.392462000 seconds user
1329.272154000 seconds sys
@ -115,23 +118,24 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
1.0331e+01, -1.0000e-03, 1.0000e-03]),
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
tensor([0.6806, 0.8858, 0.7035, ..., 0.6007, 0.0880, 0.4550])
tensor([0.9293, 0.9606, 0.8914, ..., 0.2407, 0.2843, 0.5174])
Matrix: dc2
Shape: torch.Size([116835, 116835])
NNZ: 766396
Density: 5.614451099680581e-05
Time: 1.5306556224822998 seconds
Time: 13.233965873718262 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
30,777,115 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
293,980 L1I_CACHE_REFILL:u
461,522 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,216,597 L1D_CACHE:u
32,434,686 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
293,072 L1I_CACHE_REFILL:u
483,557 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
34,059,722 L1D_CACHE:u
4.961298684 seconds time elapsed
16.956477005 seconds time elapsed
23.946357000 seconds user
156.598674000 seconds sys
88.393687000 seconds user
1037.101858000 seconds sys
@ -144,25 +148,26 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
1.0331e+01, -1.0000e-03, 1.0000e-03]),
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
tensor([0.3029, 0.1908, 0.9816, ..., 0.0418, 0.8182, 0.5474])
tensor([0.8850, 0.9552, 0.7029, ..., 0.3357, 0.0248, 0.5395])
Matrix: dc2
Shape: torch.Size([116835, 116835])
NNZ: 766396
Density: 5.614451099680581e-05
Time: 2.28926944732666 seconds
Time: 13.873224973678589 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
567,700 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
588,689 LL_CACHE_RD:u
189,417 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
22,360 L2D_TLB_REFILL:u
328,306 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,908,607 L2D_CACHE:u
561,480 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
578,369 LL_CACHE_RD:u
192,306 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
25,364 L2D_TLB_REFILL:u
317,121 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,812,330 L2D_CACHE:u
5.710829283 seconds time elapsed
17.467787426 seconds time elapsed
28.671301000 seconds user
213.960421000 seconds sys
92.463054000 seconds user
1072.584062000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [28.56, 28.04, 23.8, 23.08, 22.12, 21.16, 21.16, 21.0, 20.96, 20.72], "matrix": "de2010", "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 2.713265895843506, "power": [33.24, 30.84, 29.96, 27.68, 25.8, 25.8, 31.16], "power_after": [20.6, 20.48, 20.24, 20.32, 20.2, 20.36, 20.4, 20.4, 20.36, 20.36], "task clock (msec)": 48.96, "page faults": 3285, "cycles": 48563060, "instructions": 73465190, "branch mispredictions": 326361, "branches": 19599354, "ITLB accesses": 26666488, "ITLB misses": 6643, "DTLB misses": 17347, "DTLB accesses": 35986736, "L1I cache accesses": 32502068, "L1I cache misses": 302739, "L1D cache misses": 480619, "L1D cache accesses": 34031072, "LL cache misses": 552815, "LL cache accesses": 567373, "L2D TLB accesses": 188248, "L2D TLB misses": 23165, "L2D cache misses": 308211, "L2D cache accesses": 1787647, "instructions per cycle": 1.5127792606149613, "branch miss rate": 0.016651620252381788, "ITLB miss rate": 0.0002491141690649327, "DTLB miss rate": 0.0004820387155978803, "L2D TLB miss rate": 0.12305575623645404, "L1I cache miss rate": 0.00931445346800702, "L1D cache miss rate": 0.014122946229845479, "L2D cache miss rate": 0.17241155552522394, "LL cache miss rate": 0.9743413944618443}

View File

@ -0,0 +1,168 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394985 queued and waiting for resources
srun: job 3394985 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
116056]),
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
16949.]), size=(24115, 24115), nnz=116056,
layout=torch.sparse_csr)
tensor([0.6055, 0.8789, 0.0482, ..., 0.0736, 0.1316, 0.6744])
Matrix: de2010
Shape: torch.Size([24115, 24115])
NNZ: 116056
Density: 0.0001995689928120616
Time: 2.6956887245178223 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
48.96 msec task-clock:u # 0.008 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,285 page-faults:u # 67.090 K/sec
48,563,060 cycles:u # 0.992 GHz (59.76%)
73,465,190 instructions:u # 1.51 insn per cycle (78.23%)
<not supported> branches:u
369,314 branch-misses:u (98.16%)
31,769,641 L1-dcache-loads:u # 648.836 M/sec
479,594 L1-dcache-load-misses:u # 1.51% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
30,338,929 L1-icache-loads:u # 619.616 M/sec
282,162 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
55,516,925 dTLB-loads:u # 1.134 G/sec (23.54%)
12,345 dTLB-load-misses:u # 0.02% of all dTLB cache accesses (3.47%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
6.017085179 seconds time elapsed
17.484355000 seconds user
28.678064000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
116056]),
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
16949.]), size=(24115, 24115), nnz=116056,
layout=torch.sparse_csr)
tensor([0.2815, 0.8196, 0.3706, ..., 0.1328, 0.4062, 0.9113])
Matrix: de2010
Shape: torch.Size([24115, 24115])
NNZ: 116056
Density: 0.0001995689928120616
Time: 2.7908551692962646 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
326,361 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,599,354 BR_RETIRED:u
6.215591535 seconds time elapsed
18.097112000 seconds user
27.831633000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
116056]),
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
16949.]), size=(24115, 24115), nnz=116056,
layout=torch.sparse_csr)
tensor([0.9002, 0.0843, 0.5558, ..., 0.3931, 0.8070, 0.7414])
Matrix: de2010
Shape: torch.Size([24115, 24115])
NNZ: 116056
Density: 0.0001995689928120616
Time: 2.819589376449585 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
26,666,488 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,643 ITLB_WALK:u
17,347 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
35,986,736 L1D_TLB:u
6.243883495 seconds time elapsed
17.783312000 seconds user
31.714619000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
116056]),
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
16949.]), size=(24115, 24115), nnz=116056,
layout=torch.sparse_csr)
tensor([0.9109, 0.6392, 0.7899, ..., 0.0945, 0.3298, 0.6865])
Matrix: de2010
Shape: torch.Size([24115, 24115])
NNZ: 116056
Density: 0.0001995689928120616
Time: 2.747800827026367 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
32,502,068 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
302,739 L1I_CACHE_REFILL:u
480,619 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
34,031,072 L1D_CACHE:u
6.126767063 seconds time elapsed
17.702029000 seconds user
29.137072000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
116056]),
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
16949.]), size=(24115, 24115), nnz=116056,
layout=torch.sparse_csr)
tensor([0.7083, 0.6766, 0.7649, ..., 0.3027, 0.9885, 0.8086])
Matrix: de2010
Shape: torch.Size([24115, 24115])
NNZ: 116056
Density: 0.0001995689928120616
Time: 2.795116901397705 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
552,815 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
567,373 LL_CACHE_RD:u
188,248 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,165 L2D_TLB_REFILL:u
308,211 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,787,647 L2D_CACHE:u
6.041792624 seconds time elapsed
17.791735000 seconds user
29.790006000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [28.96, 27.92, 27.24, 23.0, 22.28, 22.28, 21.6, 20.8, 20.68, 20.76], "matrix": "email-Enron", "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 12.818164587020874, "power": [84.24, 82.72, 82.72, 72.0, 60.2, 51.88, 52.4, 59.36, 72.08, 83.88, 86.48, 84.28, 82.28, 81.12, 80.96, 80.96, 81.16], "power_after": [20.92, 20.92, 20.92, 20.92, 21.0, 20.96, 20.88, 20.84, 20.88, 20.68], "task clock (msec)": 48.76, "page faults": 3281, "cycles": 45495589, "instructions": 79104832, "branch mispredictions": 335574, "branches": 20121415, "ITLB accesses": 26011880, "ITLB misses": 5842, "DTLB misses": 16448, "DTLB accesses": 35000292, "L1I cache accesses": 32193112, "L1I cache misses": 310304, "L1D cache misses": 495806, "L1D cache accesses": 33829187, "LL cache misses": 546628, "LL cache accesses": 570044, "L2D TLB accesses": 196794, "L2D TLB misses": 24071, "L2D cache misses": 316028, "L2D cache accesses": 1836018, "instructions per cycle": 1.7387362981496954, "branch miss rate": 0.016677455338006797, "ITLB miss rate": 0.00022458968748125855, "DTLB miss rate": 0.000469938936509444, "L2D TLB miss rate": 0.1223157210077543, "L1I cache miss rate": 0.009638832058236556, "L1D cache miss rate": 0.014656160669779029, "L2D cache miss rate": 0.1721268527868463, "LL cache miss rate": 0.9589224691427328}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394152 queued and waiting for resources
srun: job 3394152 has been allocated resources
srun: job 3394986 queued and waiting for resources
srun: job 3394986 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
nnz=367662, layout=torch.sparse_csr)
tensor([0.3626, 0.7532, 0.0782, ..., 0.6679, 0.4308, 0.6586])
tensor([0.9906, 0.9401, 0.5661, ..., 0.4491, 0.7550, 0.2452])
Matrix: email-Enron
Shape: torch.Size([36692, 36692])
NNZ: 367662
Density: 0.0002730901120626302
Time: 1.3745801448822021 seconds
Time: 12.80848503112793 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
60.43 msec task-clock:u # 0.012 CPUs utilized
48.76 msec task-clock:u # 0.003 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,319 page-faults:u # 54.926 K/sec
66,114,448 cycles:u # 1.094 GHz (58.10%)
90,786,829 instructions:u # 1.37 insn per cycle (92.25%)
3,281 page-faults:u # 67.289 K/sec
45,495,589 cycles:u # 0.933 GHz (57.79%)
79,104,832 instructions:u # 1.74 insn per cycle (81.70%)
<not supported> branches:u
372,381 branch-misses:u
32,997,410 L1-dcache-loads:u # 546.070 M/sec
470,216 L1-dcache-load-misses:u # 1.43% of all L1-dcache accesses
372,161 branch-misses:u
32,089,348 L1-dcache-loads:u # 658.113 M/sec
467,576 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,485,339 L1-icache-loads:u # 521.047 M/sec
294,395 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
31,376,646 dTLB-loads:u # 519.248 M/sec (10.03%)
30,688,995 L1-icache-loads:u # 629.393 M/sec
289,698 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
47,006,355 dTLB-loads:u # 964.042 M/sec (22.12%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
4.904488673 seconds time elapsed
16.331438990 seconds time elapsed
22.874521000 seconds user
139.276239000 seconds sys
76.869141000 seconds user
999.179638000 seconds sys
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
nnz=367662, layout=torch.sparse_csr)
tensor([0.2040, 0.8252, 0.0215, ..., 0.2921, 0.9143, 0.8728])
tensor([0.7565, 0.5273, 0.1038, ..., 0.9432, 0.1309, 0.5542])
Matrix: email-Enron
Shape: torch.Size([36692, 36692])
NNZ: 367662
Density: 0.0002730901120626302
Time: 1.3087654113769531 seconds
Time: 26.91536283493042 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
341,625 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,129,354 BR_RETIRED:u
335,574 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,121,415 BR_RETIRED:u
4.644873434 seconds time elapsed
30.559245388 seconds time elapsed
22.729927000 seconds user
132.278582000 seconds sys
126.799314000 seconds user
2081.777635000 seconds sys
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
nnz=367662, layout=torch.sparse_csr)
tensor([0.6154, 0.6641, 0.3794, ..., 0.9736, 0.0619, 0.4790])
tensor([0.2321, 0.0702, 0.2538, ..., 0.6254, 0.6308, 0.5317])
Matrix: email-Enron
Shape: torch.Size([36692, 36692])
NNZ: 367662
Density: 0.0002730901120626302
Time: 1.2701547145843506 seconds
Time: 14.841739892959595 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
27,441,303 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,807 ITLB_WALK:u
20,551 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,867,114 L1D_TLB:u
26,011,880 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,842 ITLB_WALK:u
16,448 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
35,000,292 L1D_TLB:u
4.861510767 seconds time elapsed
18.443612527 seconds time elapsed
22.111354000 seconds user
132.431608000 seconds sys
80.694133000 seconds user
1159.740575000 seconds sys
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
nnz=367662, layout=torch.sparse_csr)
tensor([0.4201, 0.4134, 0.8169, ..., 0.6631, 0.0087, 0.8439])
tensor([0.7091, 0.9447, 0.0959, ..., 0.0090, 0.7012, 0.6025])
Matrix: email-Enron
Shape: torch.Size([36692, 36692])
NNZ: 367662
Density: 0.0002730901120626302
Time: 1.1176586151123047 seconds
Time: 10.863199234008789 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
31,744,243 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
271,027 L1I_CACHE_REFILL:u
464,135 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,441,141 L1D_CACHE:u
32,193,112 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
310,304 L1I_CACHE_REFILL:u
495,806 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,829,187 L1D_CACHE:u
4.693803969 seconds time elapsed
14.426841778 seconds time elapsed
21.724904000 seconds user
119.873018000 seconds sys
70.728541000 seconds user
853.184507000 seconds sys
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
nnz=367662, layout=torch.sparse_csr)
tensor([0.1285, 0.3989, 0.3903, ..., 0.7892, 0.2737, 0.2659])
tensor([0.8267, 0.6185, 0.8015, ..., 0.8593, 0.4881, 0.8599])
Matrix: email-Enron
Shape: torch.Size([36692, 36692])
NNZ: 367662
Density: 0.0002730901120626302
Time: 1.196892261505127 seconds
Time: 12.076026678085327 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
539,935 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
552,519 LL_CACHE_RD:u
188,291 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,177 L2D_TLB_REFILL:u
301,281 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,737,575 L2D_CACHE:u
546,628 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
570,044 LL_CACHE_RD:u
196,794 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,071 L2D_TLB_REFILL:u
316,028 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,836,018 L2D_CACHE:u
4.741030347 seconds time elapsed
15.581045199 seconds time elapsed
23.793930000 seconds user
125.634838000 seconds sys
77.345591000 seconds user
942.987439000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.12, 16.36, 16.8, 16.76, 16.6, 16.48, 16.44, 16.28, 16.28, 16.16], "matrix": "p2p-Gnutella04", "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 1.0642461776733398, "power": [26.6, 27.52, 27.52, 31.16, 28.48], "power_after": [16.28, 16.4, 16.32, 16.12, 16.24, 16.0, 16.0, 16.24, 16.52, 17.04], "task clock (msec)": 50.59, "page faults": 3303, "cycles": 51318459, "instructions": 74705078, "branch mispredictions": 328853, "branches": 19620312, "ITLB accesses": 27939682, "ITLB misses": 5470, "DTLB misses": 17679, "DTLB accesses": 37425602, "L1I cache accesses": 30276633, "L1I cache misses": 291467, "L1D cache misses": 479061, "L1D cache accesses": 31689326, "LL cache misses": 529426, "LL cache accesses": 550033, "L2D TLB accesses": 171913, "L2D TLB misses": 20624, "L2D cache misses": 296662, "L2D cache accesses": 1714211, "instructions per cycle": 1.455715535028049, "branch miss rate": 0.01676084457780284, "ITLB miss rate": 0.0001957788925443031, "DTLB miss rate": 0.00047237717111404113, "L2D TLB miss rate": 0.11996765805959991, "L1I cache miss rate": 0.009626797008769106, "L1D cache miss rate": 0.015117424712661923, "L2D cache miss rate": 0.17306037588138215, "LL cache miss rate": 0.9625349751742168}

View File

@ -5,45 +5,46 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394153 queued and waiting for resources
srun: job 3394153 has been allocated resources
srun: job 3394992 queued and waiting for resources
srun: job 3394992 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
nnz=39994, layout=torch.sparse_csr)
tensor([0.6982, 0.7263, 0.0064, ..., 0.9256, 0.7249, 0.5065])
tensor([0.1181, 0.8387, 0.0554, ..., 0.8107, 0.4393, 0.9489])
Matrix: p2p-Gnutella04
Shape: torch.Size([10879, 10879])
NNZ: 39994
Density: 0.0003379223282393842
Time: 0.18009519577026367 seconds
Time: 1.061662197113037 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
67.56 msec task-clock:u # 0.019 CPUs utilized
50.59 msec task-clock:u # 0.012 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,829 page-faults:u # 56.674 K/sec
47,862,000 cycles:u # 0.708 GHz (59.24%)
84,392,375 instructions:u # 1.76 insn per cycle (87.61%)
3,303 page-faults:u # 65.291 K/sec
51,318,459 cycles:u # 1.014 GHz (59.34%)
74,705,078 instructions:u # 1.46 insn per cycle (83.02%)
<not supported> branches:u
368,432 branch-misses:u
32,507,448 L1-dcache-loads:u # 481.147 M/sec
481,389 L1-dcache-load-misses:u # 1.48% of all L1-dcache accesses
366,825 branch-misses:u
31,809,194 L1-dcache-loads:u # 628.781 M/sec
466,198 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,030,656 L1-icache-loads:u # 459.289 M/sec
308,582 L1-icache-load-misses:u # 0.99% of all L1-icache accesses
34,988,046 dTLB-loads:u # 517.863 M/sec (20.00%)
30,390,161 L1-icache-loads:u # 600.731 M/sec
296,270 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
61,518,375 dTLB-loads:u # 1.216 G/sec (17.94%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
3.538329547 seconds time elapsed
4.302241563 seconds time elapsed
14.667604000 seconds user
29.534487000 seconds sys
16.122298000 seconds user
29.141140000 seconds sys
@ -53,21 +54,22 @@ tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
nnz=39994, layout=torch.sparse_csr)
tensor([0.4946, 0.3509, 0.5239, ..., 0.4520, 0.4206, 0.8181])
tensor([0.7249, 0.8723, 0.3843, ..., 0.2264, 0.4891, 0.9107])
Matrix: p2p-Gnutella04
Shape: torch.Size([10879, 10879])
NNZ: 39994
Density: 0.0003379223282393842
Time: 0.18875432014465332 seconds
Time: 1.0079431533813477 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
331,622 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,800,140 BR_RETIRED:u
328,853 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,620,312 BR_RETIRED:u
3.556031790 seconds time elapsed
4.241400567 seconds time elapsed
14.799719000 seconds user
27.876987000 seconds sys
15.325937000 seconds user
28.223386000 seconds sys
@ -77,23 +79,24 @@ tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
nnz=39994, layout=torch.sparse_csr)
tensor([0.2184, 0.4999, 0.9567, ..., 0.8794, 0.8213, 0.8713])
tensor([0.7608, 0.2449, 0.5322, ..., 0.5547, 0.8659, 0.8437])
Matrix: p2p-Gnutella04
Shape: torch.Size([10879, 10879])
NNZ: 39994
Density: 0.0003379223282393842
Time: 0.1066896915435791 seconds
Time: 1.1017234325408936 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
25,905,045 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,746 ITLB_WALK:u
17,547 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
35,220,079 L1D_TLB:u
27,939,682 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,470 ITLB_WALK:u
17,679 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,425,602 L1D_TLB:u
3.505367779 seconds time elapsed
4.296820500 seconds time elapsed
14.557493000 seconds user
29.642958000 seconds sys
15.875162000 seconds user
28.803412000 seconds sys
@ -103,23 +106,24 @@ tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
nnz=39994, layout=torch.sparse_csr)
tensor([0.2180, 0.0881, 0.5532, ..., 0.4961, 0.0093, 0.4929])
tensor([0.9980, 0.9991, 0.6749, ..., 0.4225, 0.7297, 0.3717])
Matrix: p2p-Gnutella04
Shape: torch.Size([10879, 10879])
NNZ: 39994
Density: 0.0003379223282393842
Time: 0.12433028221130371 seconds
Time: 1.0812580585479736 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
30,359,576 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
283,204 L1I_CACHE_REFILL:u
465,520 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
31,843,274 L1D_CACHE:u
30,276,633 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
291,467 L1I_CACHE_REFILL:u
479,061 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
31,689,326 L1D_CACHE:u
3.565310130 seconds time elapsed
4.500137840 seconds time elapsed
14.913239000 seconds user
28.125605000 seconds sys
15.794710000 seconds user
27.773851000 seconds sys
@ -129,25 +133,26 @@ tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
nnz=39994, layout=torch.sparse_csr)
tensor([0.6394, 0.6808, 0.7957, ..., 0.1529, 0.0561, 0.7834])
tensor([0.8707, 0.5871, 0.5970, ..., 0.8826, 0.4673, 0.4994])
Matrix: p2p-Gnutella04
Shape: torch.Size([10879, 10879])
NNZ: 39994
Density: 0.0003379223282393842
Time: 0.13401126861572266 seconds
Time: 0.9900743961334229 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
560,542 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
575,610 LL_CACHE_RD:u
173,643 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
21,499 L2D_TLB_REFILL:u
313,335 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,741,621 L2D_CACHE:u
529,426 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
550,033 LL_CACHE_RD:u
171,913 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
20,624 L2D_TLB_REFILL:u
296,662 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,714,211 L2D_CACHE:u
3.503362704 seconds time elapsed
4.284402033 seconds time elapsed
15.287949000 seconds user
28.752303000 seconds sys
15.584671000 seconds user
27.523772000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.12, 16.12, 16.12, 16.36, 16.56, 16.52, 17.04, 16.76, 16.64, 16.92], "matrix": "p2p-Gnutella24", "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 1.6947758197784424, "power": [25.2, 25.2, 26.6, 26.28, 26.48], "power_after": [16.4, 16.6, 16.6, 16.64, 16.8, 16.48, 16.44, 16.16, 16.12, 16.2], "task clock (msec)": 66.78, "page faults": 3520, "cycles": 28858055, "instructions": 64429843, "branch mispredictions": 331167, "branches": 19518210, "ITLB accesses": 26964483, "ITLB misses": 4666, "DTLB misses": 14001, "DTLB accesses": 36143905, "L1I cache accesses": 31901160, "L1I cache misses": 302516, "L1D cache misses": 475663, "L1D cache accesses": 33507563, "LL cache misses": 558546, "LL cache accesses": 578676, "L2D TLB accesses": 187549, "L2D TLB misses": 22990, "L2D cache misses": 321826, "L2D cache accesses": 1816571, "instructions per cycle": 2.2326467601506756, "branch miss rate": 0.016967078435983628, "ITLB miss rate": 0.00017304244253449992, "DTLB miss rate": 0.00038736821602425086, "L2D TLB miss rate": 0.12258129875392564, "L1I cache miss rate": 0.009482915354802146, "L1D cache miss rate": 0.01419569068630864, "L2D cache miss rate": 0.1771612560147663, "LL cache miss rate": 0.9652136947099932}

View File

@ -0,0 +1,158 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394989 queued and waiting for resources
srun: job 3394989 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
nnz=65369, layout=torch.sparse_csr)
tensor([0.2470, 0.4231, 0.1036, ..., 0.7937, 0.3241, 0.7116])
Matrix: p2p-Gnutella24
Shape: torch.Size([26518, 26518])
NNZ: 65369
Density: 9.295875717624285e-05
Time: 1.6974337100982666 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
66.78 msec task-clock:u # 0.013 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,520 page-faults:u # 52.713 K/sec
28,858,055 cycles:u # 0.432 GHz (26.93%)
64,429,843 instructions:u # 2.23 insn per cycle (67.63%)
<not supported> branches:u
296,857 branch-misses:u (84.08%)
33,646,348 L1-dcache-loads:u # 503.866 M/sec
493,998 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
32,070,415 L1-icache-loads:u # 480.266 M/sec
305,993 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
46,903,081 dTLB-loads:u # 702.391 M/sec (46.16%)
114,272 dTLB-load-misses:u # 0.24% of all dTLB cache accesses (32.45%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
5.106933083 seconds time elapsed
16.391614000 seconds user
28.913912000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
nnz=65369, layout=torch.sparse_csr)
tensor([0.2307, 0.4662, 0.3789, ..., 0.0144, 0.6300, 0.7829])
Matrix: p2p-Gnutella24
Shape: torch.Size([26518, 26518])
NNZ: 65369
Density: 9.295875717624285e-05
Time: 1.6379659175872803 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
331,167 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,518,210 BR_RETIRED:u
5.017894585 seconds time elapsed
16.446505000 seconds user
31.004338000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
nnz=65369, layout=torch.sparse_csr)
tensor([0.7309, 0.0314, 0.4424, ..., 0.7434, 0.2124, 0.1432])
Matrix: p2p-Gnutella24
Shape: torch.Size([26518, 26518])
NNZ: 65369
Density: 9.295875717624285e-05
Time: 1.7232718467712402 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
26,964,483 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
4,666 ITLB_WALK:u
14,001 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,143,905 L1D_TLB:u
5.053286721 seconds time elapsed
16.447780000 seconds user
28.580949000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
nnz=65369, layout=torch.sparse_csr)
tensor([0.5695, 0.5025, 0.1946, ..., 0.7428, 0.9634, 0.4327])
Matrix: p2p-Gnutella24
Shape: torch.Size([26518, 26518])
NNZ: 65369
Density: 9.295875717624285e-05
Time: 1.644775629043579 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
31,901,160 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
302,516 L1I_CACHE_REFILL:u
475,663 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,507,563 L1D_CACHE:u
4.978338941 seconds time elapsed
16.455298000 seconds user
30.249373000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
nnz=65369, layout=torch.sparse_csr)
tensor([0.0969, 0.1950, 0.8456, ..., 0.3315, 0.1512, 0.3182])
Matrix: p2p-Gnutella24
Shape: torch.Size([26518, 26518])
NNZ: 65369
Density: 9.295875717624285e-05
Time: 1.752812385559082 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
558,546 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
578,676 LL_CACHE_RD:u
187,549 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
22,990 L2D_TLB_REFILL:u
321,826 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,816,571 L2D_CACHE:u
4.952297819 seconds time elapsed
16.648691000 seconds user
27.005944000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.0, 16.4, 16.4, 16.28, 16.48, 16.6, 16.48, 16.56, 16.88, 16.92], "matrix": "p2p-Gnutella25", "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 1.4688231945037842, "power": [23.04, 29.0, 30.24, 27.96, 28.04], "power_after": [16.52, 16.68, 16.88, 17.12, 17.08, 17.04, 16.84, 16.72, 16.84, 16.84], "task clock (msec)": 48.61, "page faults": 3308, "cycles": 60072179, "instructions": 70991785, "branch mispredictions": 331765, "branches": 19906014, "ITLB accesses": 28194337, "ITLB misses": 5083, "DTLB misses": 17916, "DTLB accesses": 37944713, "L1I cache accesses": 31162212, "L1I cache misses": 270684, "L1D cache misses": 465467, "L1D cache accesses": 32857500, "LL cache misses": 541118, "LL cache accesses": 564199, "L2D TLB accesses": 194022, "L2D TLB misses": 23932, "L2D cache misses": 311476, "L2D cache accesses": 1783574, "instructions per cycle": 1.1817747613250387, "branch miss rate": 0.016666571218125335, "ITLB miss rate": 0.00018028443087702328, "DTLB miss rate": 0.00047216064066685654, "L2D TLB miss rate": 0.12334683695663379, "L1I cache miss rate": 0.008686289663904475, "L1D cache miss rate": 0.014166232975728525, "L2D cache miss rate": 0.17463587157022922, "LL cache miss rate": 0.9590906754531646}

View File

@ -5,45 +5,46 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394140 queued and waiting for resources
srun: job 3394140 has been allocated resources
srun: job 3394994 queued and waiting for resources
srun: job 3394994 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
nnz=54705, layout=torch.sparse_csr)
tensor([0.8199, 0.9849, 0.4642, ..., 0.7594, 0.3568, 0.4020])
tensor([0.1465, 0.4354, 0.7334, ..., 0.2837, 0.5913, 0.9525])
Matrix: p2p-Gnutella25
Shape: torch.Size([22687, 22687])
NNZ: 54705
Density: 0.00010628522108964806
Time: 0.19272208213806152 seconds
Time: 1.4786670207977295 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
64.71 msec task-clock:u # 0.018 CPUs utilized
48.61 msec task-clock:u # 0.010 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,319 page-faults:u # 51.288 K/sec
57,611,295 cycles:u # 0.890 GHz (39.00%)
83,148,228 instructions:u # 1.44 insn per cycle (82.73%)
3,308 page-faults:u # 68.054 K/sec
60,072,179 cycles:u # 1.236 GHz (53.26%)
70,991,785 instructions:u # 1.18 insn per cycle (71.54%)
<not supported> branches:u
375,111 branch-misses:u
32,759,228 L1-dcache-loads:u # 506.221 M/sec
475,086 L1-dcache-load-misses:u # 1.45% of all L1-dcache accesses
371,197 branch-misses:u
32,964,378 L1-dcache-loads:u # 678.165 M/sec
465,448 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,366,158 L1-icache-loads:u # 484.694 M/sec
297,293 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
35,611,781 dTLB-loads:u # 550.301 M/sec (25.73%)
31,435,424 L1-icache-loads:u # 646.710 M/sec
293,561 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
56,761,270 dTLB-loads:u # 1.168 G/sec (30.54%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
3.578384817 seconds time elapsed
4.700046411 seconds time elapsed
14.435258000 seconds user
27.700836000 seconds sys
16.235801000 seconds user
28.396327000 seconds sys
@ -53,21 +54,22 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
nnz=54705, layout=torch.sparse_csr)
tensor([0.0069, 0.9904, 0.5316, ..., 0.2082, 0.4858, 0.4936])
tensor([0.7780, 0.3388, 0.1540, ..., 0.2989, 0.3682, 0.9160])
Matrix: p2p-Gnutella25
Shape: torch.Size([22687, 22687])
NNZ: 54705
Density: 0.00010628522108964806
Time: 0.1423017978668213 seconds
Time: 1.4235138893127441 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
318,386 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,233,431 BR_RETIRED:u
331,765 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,906,014 BR_RETIRED:u
3.555753224 seconds time elapsed
4.757340585 seconds time elapsed
14.642518000 seconds user
30.112207000 seconds sys
16.412311000 seconds user
29.238029000 seconds sys
@ -77,23 +79,24 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
nnz=54705, layout=torch.sparse_csr)
tensor([0.2250, 0.5676, 0.3018, ..., 0.5431, 0.7314, 0.5593])
tensor([0.4944, 0.8057, 0.8211, ..., 0.5137, 0.3388, 0.6316])
Matrix: p2p-Gnutella25
Shape: torch.Size([22687, 22687])
NNZ: 54705
Density: 0.00010628522108964806
Time: 0.14638042449951172 seconds
Time: 1.4664146900177002 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
27,039,805 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,375 ITLB_WALK:u
17,290 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,688,544 L1D_TLB:u
28,194,337 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,083 ITLB_WALK:u
17,916 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,944,713 L1D_TLB:u
3.566915241 seconds time elapsed
4.844329421 seconds time elapsed
16.116565000 seconds user
28.752519000 seconds sys
16.081022000 seconds user
28.021902000 seconds sys
@ -103,23 +106,24 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
nnz=54705, layout=torch.sparse_csr)
tensor([0.0220, 0.7494, 0.7913, ..., 0.8924, 0.8542, 0.5491])
tensor([0.0963, 0.5806, 0.0397, ..., 0.1604, 0.5700, 0.8103])
Matrix: p2p-Gnutella25
Shape: torch.Size([22687, 22687])
NNZ: 54705
Density: 0.00010628522108964806
Time: 0.17815685272216797 seconds
Time: 1.3717434406280518 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
32,508,072 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
297,568 L1I_CACHE_REFILL:u
477,654 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
34,044,579 L1D_CACHE:u
31,162,212 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
270,684 L1I_CACHE_REFILL:u
465,467 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,857,500 L1D_CACHE:u
3.435706033 seconds time elapsed
4.598461782 seconds time elapsed
14.690285000 seconds user
28.763423000 seconds sys
15.609727000 seconds user
30.606837000 seconds sys
@ -129,25 +133,26 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
nnz=54705, layout=torch.sparse_csr)
tensor([0.6277, 0.4955, 0.9335, ..., 0.1476, 0.2079, 0.0931])
tensor([0.9137, 0.5009, 0.7507, ..., 0.6623, 0.8760, 0.2991])
Matrix: p2p-Gnutella25
Shape: torch.Size([22687, 22687])
NNZ: 54705
Density: 0.00010628522108964806
Time: 0.14432048797607422 seconds
Time: 1.4291880130767822 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
549,474 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
561,939 LL_CACHE_RD:u
185,622 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,295 L2D_TLB_REFILL:u
305,878 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,763,089 L2D_CACHE:u
541,118 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
564,199 LL_CACHE_RD:u
194,022 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,932 L2D_TLB_REFILL:u
311,476 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,783,574 L2D_CACHE:u
3.538826979 seconds time elapsed
4.792239951 seconds time elapsed
15.006109000 seconds user
29.644298000 seconds sys
15.902307000 seconds user
28.747620000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.96, 22.0, 22.16, 22.16, 21.84, 22.08, 22.4, 22.08, 22.0, 21.48], "matrix": "p2p-Gnutella30", "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 3.504030466079712, "power": [54.2, 64.16, 67.64, 67.64, 65.92, 58.96, 59.92], "power_after": [20.72, 20.76, 20.76, 20.8, 20.8, 20.88, 20.92, 21.04, 21.04, 21.12], "task clock (msec)": 56.52, "page faults": 3194, "cycles": 58074747, "instructions": 90036443, "branch mispredictions": 327895, "branches": 20553601, "ITLB accesses": 26120611, "ITLB misses": 7531, "DTLB misses": 19097, "DTLB accesses": 35744928, "L1I cache accesses": 31819981, "L1I cache misses": 284493, "L1D cache misses": 486709, "L1D cache accesses": 33545755, "LL cache misses": 544742, "LL cache accesses": 558323, "L2D TLB accesses": 190574, "L2D TLB misses": 23746, "L2D cache misses": 305844, "L2D cache accesses": 1736964, "instructions per cycle": 1.5503544595725918, "branch miss rate": 0.015953165579111903, "ITLB miss rate": 0.00028831637973552763, "DTLB miss rate": 0.0005342576155140109, "L2D TLB miss rate": 0.12460251660772194, "L1I cache miss rate": 0.008940703012990485, "L1D cache miss rate": 0.014508810429218243, "L2D cache miss rate": 0.17607964241055082, "LL cache miss rate": 0.9756753707083534}

View File

@ -0,0 +1,158 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394991 queued and waiting for resources
srun: job 3394991 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
nnz=88328, layout=torch.sparse_csr)
tensor([0.3046, 0.0725, 0.4580, ..., 0.0593, 0.5121, 0.2116])
Matrix: p2p-Gnutella30
Shape: torch.Size([36682, 36682])
NNZ: 88328
Density: 6.564359899804003e-05
Time: 3.6646029949188232 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
56.52 msec task-clock:u # 0.008 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,194 page-faults:u # 56.515 K/sec
58,074,747 cycles:u # 1.028 GHz (51.20%)
90,036,443 instructions:u # 1.55 insn per cycle (89.06%)
<not supported> branches:u
363,262 branch-misses:u
33,111,438 L1-dcache-loads:u # 585.875 M/sec
454,665 L1-dcache-load-misses:u # 1.37% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,646,314 L1-icache-loads:u # 559.951 M/sec
281,443 L1-icache-load-misses:u # 0.89% of all L1-icache accesses
43,495,524 dTLB-loads:u # 769.611 M/sec (11.87%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
7.033463989 seconds time elapsed
34.670765000 seconds user
307.031553000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
nnz=88328, layout=torch.sparse_csr)
tensor([0.9700, 0.1728, 0.2199, ..., 0.6107, 0.3357, 0.2661])
Matrix: p2p-Gnutella30
Shape: torch.Size([36682, 36682])
NNZ: 88328
Density: 6.564359899804003e-05
Time: 2.3380045890808105 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
327,895 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,553,601 BR_RETIRED:u
5.895917276 seconds time elapsed
31.121063000 seconds user
208.127447000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
nnz=88328, layout=torch.sparse_csr)
tensor([0.9533, 0.7568, 0.8141, ..., 0.8395, 0.5617, 0.7830])
Matrix: p2p-Gnutella30
Shape: torch.Size([36682, 36682])
NNZ: 88328
Density: 6.564359899804003e-05
Time: 4.476518869400024 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
26,120,611 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
7,531 ITLB_WALK:u
19,097 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
35,744,928 L1D_TLB:u
8.109622410 seconds time elapsed
38.467161000 seconds user
370.437915000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
nnz=88328, layout=torch.sparse_csr)
tensor([0.6886, 0.7814, 0.9957, ..., 0.8460, 0.1015, 0.8097])
Matrix: p2p-Gnutella30
Shape: torch.Size([36682, 36682])
NNZ: 88328
Density: 6.564359899804003e-05
Time: 2.856834888458252 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
31,819,981 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
284,493 L1I_CACHE_REFILL:u
486,709 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,545,755 L1D_CACHE:u
6.374371632 seconds time elapsed
30.817943000 seconds user
247.363843000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
nnz=88328, layout=torch.sparse_csr)
tensor([0.8464, 0.0437, 0.1230, ..., 0.6221, 0.9268, 0.5436])
Matrix: p2p-Gnutella30
Shape: torch.Size([36682, 36682])
NNZ: 88328
Density: 6.564359899804003e-05
Time: 4.838747978210449 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
544,742 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
558,323 LL_CACHE_RD:u
190,574 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,746 L2D_TLB_REFILL:u
305,844 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,736,964 L2D_CACHE:u
8.386896120 seconds time elapsed
39.861141000 seconds user
395.959334000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.2, 31.56, 31.56, 30.84, 24.52, 23.2, 21.32, 20.76, 20.84, 20.84], "matrix": "ri2010", "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 3.077709913253784, "power": [27.76, 28.28, 28.44, 28.28, 25.16, 30.44, 30.6], "power_after": [21.08, 20.88, 20.68, 20.68, 20.6, 20.56, 20.68, 20.8, 20.96, 21.24], "task clock (msec)": 64.49, "page faults": 3473, "cycles": 42783607, "instructions": 84598454, "branch mispredictions": 331326, "branches": 20438455, "ITLB accesses": 26869742, "ITLB misses": 6302, "DTLB misses": 14926, "DTLB accesses": 36876841, "L1I cache accesses": 31664385, "L1I cache misses": 301678, "L1D cache misses": 493536, "L1D cache accesses": 33219437, "LL cache misses": 552180, "LL cache accesses": 564990, "L2D TLB accesses": 167824, "L2D TLB misses": 19594, "L2D cache misses": 304114, "L2D cache accesses": 1716370, "instructions per cycle": 1.977356747877756, "branch miss rate": 0.01621091222404042, "ITLB miss rate": 0.00023453890997539165, "DTLB miss rate": 0.00040475267390718204, "L2D TLB miss rate": 0.11675326532557918, "L1I cache miss rate": 0.009527360155581737, "L1D cache miss rate": 0.014856844202386693, "L2D cache miss rate": 0.17718440662561102, "LL cache miss rate": 0.9773270323368555}

View File

@ -0,0 +1,163 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394984 queued and waiting for resources
srun: job 3394984 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
125750]),
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
tensor([0.5906, 0.9651, 0.2033, ..., 0.2175, 0.4484, 0.0412])
Matrix: ri2010
Shape: torch.Size([25181, 25181])
NNZ: 125750
Density: 0.00019831796057928155
Time: 3.107008934020996 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
64.49 msec task-clock:u # 0.010 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,473 page-faults:u # 53.852 K/sec
42,783,607 cycles:u # 0.663 GHz (37.27%)
84,598,454 instructions:u # 1.98 insn per cycle (73.53%)
<not supported> branches:u
353,558 branch-misses:u (89.57%)
33,192,964 L1-dcache-loads:u # 514.689 M/sec
466,217 L1-dcache-load-misses:u # 1.40% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,727,502 L1-icache-loads:u # 491.965 M/sec
292,570 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
38,623,737 dTLB-loads:u # 598.898 M/sec (34.88%)
124,174 dTLB-load-misses:u # 0.32% of all dTLB cache accesses (14.74%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
6.612563197 seconds time elapsed
18.114584000 seconds user
29.808542000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
125750]),
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
tensor([0.6092, 0.5511, 0.6052, ..., 0.8002, 0.0295, 0.2972])
Matrix: ri2010
Shape: torch.Size([25181, 25181])
NNZ: 125750
Density: 0.00019831796057928155
Time: 2.9385879039764404 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
331,326 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,438,455 BR_RETIRED:u
6.446731410 seconds time elapsed
17.939571000 seconds user
33.272929000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
125750]),
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
tensor([0.3348, 0.2974, 0.2569, ..., 0.2397, 0.1965, 0.5651])
Matrix: ri2010
Shape: torch.Size([25181, 25181])
NNZ: 125750
Density: 0.00019831796057928155
Time: 2.972891330718994 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
26,869,742 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,302 ITLB_WALK:u
14,926 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,876,841 L1D_TLB:u
6.376775396 seconds time elapsed
17.836418000 seconds user
29.830135000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
125750]),
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
tensor([0.7889, 0.7395, 0.6553, ..., 0.3938, 0.2478, 0.7923])
Matrix: ri2010
Shape: torch.Size([25181, 25181])
NNZ: 125750
Density: 0.00019831796057928155
Time: 2.9658284187316895 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
31,664,385 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
301,678 L1I_CACHE_REFILL:u
493,536 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,219,437 L1D_CACHE:u
6.559158078 seconds time elapsed
19.008146000 seconds user
38.233666000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
125750]),
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
tensor([0.1256, 0.1417, 0.9800, ..., 0.2509, 0.8121, 0.6210])
Matrix: ri2010
Shape: torch.Size([25181, 25181])
NNZ: 125750
Density: 0.00019831796057928155
Time: 2.9228267669677734 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
552,180 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
564,990 LL_CACHE_RD:u
167,824 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
19,594 L2D_TLB_REFILL:u
304,114 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,716,370 L2D_CACHE:u
6.135787277 seconds time elapsed
18.029630000 seconds user
28.723217000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [29.88, 23.64, 23.08, 21.84, 21.4, 21.2, 21.0, 21.0, 21.16, 21.0], "matrix": "soc-sign-Slashdot090216", "shape": [81871, 81871], "nnz": 545671, "% density": 8.140867447881048e-05, "time_s": 19.113287687301636, "power": [81.08, 81.56, 71.96, 60.52, 47.16, 48.4, 53.84, 53.84, 67.4, 82.64, 90.8, 89.16, 87.96, 85.76, 84.64, 84.04, 83.64, 84.68, 84.88, 84.88, 84.64, 84.04, 83.6], "power_after": [20.72, 20.6, 20.68, 20.88, 21.2, 21.28, 21.28, 21.48, 21.56, 21.36], "task clock (msec)": 67.66, "page faults": 3317, "cycles": 41915850, "instructions": 84471787, "branch mispredictions": 344452, "branches": 20610765, "ITLB accesses": 27276117, "ITLB misses": 6358, "DTLB misses": 17361, "DTLB accesses": 36565837, "L1I cache accesses": 32022662, "L1I cache misses": 293044, "L1D cache misses": 458939, "L1D cache accesses": 33505164, "LL cache misses": 553814, "LL cache accesses": 567372, "L2D TLB accesses": 199301, "L2D TLB misses": 25193, "L2D cache misses": 313278, "L2D cache accesses": 1796299, "instructions per cycle": 2.015270762730566, "branch miss rate": 0.016712237512775483, "ITLB miss rate": 0.00023309769495416082, "DTLB miss rate": 0.0004747874361524939, "L2D TLB miss rate": 0.12640679173712124, "L1I cache miss rate": 0.009151144274014446, "L1D cache miss rate": 0.01369756017311242, "L2D cache miss rate": 0.17440192306514674, "LL cache miss rate": 0.97610386131145}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394151 queued and waiting for resources
srun: job 3394151 has been allocated resources
srun: job 3394981 queued and waiting for resources
srun: job 3394981 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
nnz=545671, layout=torch.sparse_csr)
tensor([0.3831, 0.6714, 0.8380, ..., 0.7892, 0.5274, 0.9035])
tensor([0.6780, 0.5234, 0.1205, ..., 0.2995, 0.6275, 0.1399])
Matrix: soc-sign-Slashdot090216
Shape: torch.Size([81871, 81871])
NNZ: 545671
Density: 8.140867447881048e-05
Time: 2.044952392578125 seconds
Time: 30.653191089630127 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
59.01 msec task-clock:u # 0.010 CPUs utilized
67.66 msec task-clock:u # 0.002 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,448 page-faults:u # 58.432 K/sec
73,062,796 cycles:u # 1.238 GHz (59.95%)
88,329,175 instructions:u # 1.21 insn per cycle (93.89%)
3,317 page-faults:u # 49.022 K/sec
41,915,850 cycles:u # 0.619 GHz (57.88%)
84,471,787 instructions:u # 2.02 insn per cycle (88.19%)
<not supported> branches:u
365,177 branch-misses:u
31,850,867 L1-dcache-loads:u # 539.766 M/sec
473,835 L1-dcache-load-misses:u # 1.49% of all L1-dcache accesses
375,016 branch-misses:u
32,438,527 L1-dcache-loads:u # 479.407 M/sec
499,618 L1-dcache-load-misses:u # 1.54% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
30,385,913 L1-icache-loads:u # 514.940 M/sec
299,969 L1-icache-load-misses:u # 0.99% of all L1-icache accesses
24,365,554 dTLB-loads:u # 412.915 M/sec (8.42%)
30,998,693 L1-icache-loads:u # 458.127 M/sec
306,445 L1-icache-load-misses:u # 0.99% of all L1-icache accesses
34,294,934 dTLB-loads:u # 506.842 M/sec (18.86%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
5.680365622 seconds time elapsed
34.340632995 seconds time elapsed
27.656957000 seconds user
194.823873000 seconds sys
149.743244000 seconds user
2355.852109000 seconds sys
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
nnz=545671, layout=torch.sparse_csr)
tensor([0.6906, 0.4067, 0.7042, ..., 0.8333, 0.7120, 0.3519])
tensor([0.9875, 0.2031, 0.7260, ..., 0.5908, 0.1575, 0.7971])
Matrix: soc-sign-Slashdot090216
Shape: torch.Size([81871, 81871])
NNZ: 545671
Density: 8.140867447881048e-05
Time: 1.3788115978240967 seconds
Time: 13.671181440353394 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
331,091 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,013,316 BR_RETIRED:u
344,452 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,610,765 BR_RETIRED:u
4.886021169 seconds time elapsed
17.331425967 seconds time elapsed
23.105025000 seconds user
141.491451000 seconds sys
83.136180000 seconds user
1069.027469000 seconds sys
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
nnz=545671, layout=torch.sparse_csr)
tensor([0.8755, 0.6165, 0.4104, ..., 0.6974, 0.9453, 0.9872])
tensor([0.2046, 0.3645, 0.7960, ..., 0.6490, 0.4098, 0.5342])
Matrix: soc-sign-Slashdot090216
Shape: torch.Size([81871, 81871])
NNZ: 545671
Density: 8.140867447881048e-05
Time: 2.8570749759674072 seconds
Time: 19.569235801696777 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
26,330,936 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,193 ITLB_WALK:u
16,837 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
35,930,477 L1D_TLB:u
27,276,117 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,358 ITLB_WALK:u
17,361 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,565,837 L1D_TLB:u
6.371573603 seconds time elapsed
23.323243037 seconds time elapsed
30.986329000 seconds user
254.347216000 seconds sys
108.830923000 seconds user
1521.834565000 seconds sys
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
nnz=545671, layout=torch.sparse_csr)
tensor([0.3573, 0.9331, 0.0611, ..., 0.9133, 0.6057, 0.2374])
tensor([0.4164, 0.2188, 0.5460, ..., 0.1057, 0.5277, 0.0624])
Matrix: soc-sign-Slashdot090216
Shape: torch.Size([81871, 81871])
NNZ: 545671
Density: 8.140867447881048e-05
Time: 2.311248540878296 seconds
Time: 26.337355375289917 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
31,853,890 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
306,147 L1I_CACHE_REFILL:u
479,933 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,426,019 L1D_CACHE:u
32,022,662 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
293,044 L1I_CACHE_REFILL:u
458,939 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,505,164 L1D_CACHE:u
5.718741260 seconds time elapsed
30.017812847 seconds time elapsed
28.451593000 seconds user
214.350594000 seconds sys
131.976276000 seconds user
2029.636174000 seconds sys
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
nnz=545671, layout=torch.sparse_csr)
tensor([0.6021, 0.5679, 0.4538, ..., 0.9086, 0.9552, 0.5329])
tensor([0.7679, 0.9196, 0.3474, ..., 0.5624, 0.0163, 0.8596])
Matrix: soc-sign-Slashdot090216
Shape: torch.Size([81871, 81871])
NNZ: 545671
Density: 8.140867447881048e-05
Time: 1.8193013668060303 seconds
Time: 29.926054000854492 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
540,302 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
553,181 LL_CACHE_RD:u
173,206 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
21,390 L2D_TLB_REFILL:u
300,032 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,739,931 L2D_CACHE:u
553,814 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
567,372 LL_CACHE_RD:u
199,301 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
25,193 L2D_TLB_REFILL:u
313,278 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,796,299 L2D_CACHE:u
5.546861941 seconds time elapsed
33.553779692 seconds time elapsed
28.194596000 seconds user
181.004698000 seconds sys
154.498461000 seconds user
2293.574463000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [21.92, 21.84, 20.96, 20.24, 20.28, 20.16, 19.96, 19.72, 19.88, 19.76], "matrix": "soc-sign-Slashdot090221", "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 18.79910135269165, "power": [80.48, 80.08, 69.04, 69.04, 55.0, 46.8, 49.16, 56.2, 70.84, 82.84, 86.52, 84.28, 82.56, 81.2, 80.28, 80.28, 80.04, 80.16, 80.8, 81.0, 81.92, 83.04, 82.88], "power_after": [21.0, 20.96, 21.12, 20.76, 20.72, 20.56, 20.52, 20.64, 20.88, 21.04], "task clock (msec)": 58.57, "page faults": 3259, "cycles": 74509373, "instructions": 88672751, "branch mispredictions": 342121, "branches": 20436338, "ITLB accesses": 27189335, "ITLB misses": 6437, "DTLB misses": 18156, "DTLB accesses": 36676625, "L1I cache accesses": 30721032, "L1I cache misses": 302777, "L1D cache misses": 469833, "L1D cache accesses": 32109077, "LL cache misses": 551850, "LL cache accesses": 565355, "L2D TLB accesses": 200417, "L2D TLB misses": 25536, "L2D cache misses": 304133, "L2D cache accesses": 1801849, "instructions per cycle": 1.190088540941017, "branch miss rate": 0.016740817263836603, "ITLB miss rate": 0.0002367472393127673, "DTLB miss rate": 0.0004950291909356436, "L2D TLB miss rate": 0.12741434109880898, "L1I cache miss rate": 0.009855691045795596, "L1D cache miss rate": 0.014632404413244267, "L2D cache miss rate": 0.16878939356183564, "LL cache miss rate": 0.9761123541845389}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394147 queued and waiting for resources
srun: job 3394147 has been allocated resources
srun: job 3394979 queued and waiting for resources
srun: job 3394979 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
nnz=549202, layout=torch.sparse_csr)
tensor([0.2696, 0.6106, 0.1626, ..., 0.2215, 0.5107, 0.8609])
tensor([0.4201, 0.7748, 0.6565, ..., 0.0517, 0.6958, 0.5341])
Matrix: soc-sign-Slashdot090221
Shape: torch.Size([82144, 82144])
NNZ: 549202
Density: 8.13917555860553e-05
Time: 1.4500706195831299 seconds
Time: 27.35153603553772 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
61.26 msec task-clock:u # 0.012 CPUs utilized
58.57 msec task-clock:u # 0.002 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,303 page-faults:u # 53.917 K/sec
44,515,786 cycles:u # 0.727 GHz (40.46%)
81,513,738 instructions:u # 1.83 insn per cycle (73.51%)
3,259 page-faults:u # 55.640 K/sec
74,509,373 cycles:u # 1.272 GHz (58.00%)
88,672,751 instructions:u # 1.19 insn per cycle (90.97%)
<not supported> branches:u
344,479 branch-misses:u (89.42%)
34,411,073 L1-dcache-loads:u # 561.710 M/sec
484,811 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
361,568 branch-misses:u
31,594,797 L1-dcache-loads:u # 539.410 M/sec
460,467 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
32,789,672 L1-icache-loads:u # 535.243 M/sec
293,487 L1-icache-load-misses:u # 0.90% of all L1-icache accesses
47,065,740 dTLB-loads:u # 768.279 M/sec (32.81%)
146,215 dTLB-load-misses:u # 0.31% of all dTLB cache accesses (13.39%)
30,148,838 L1-icache-loads:u # 514.724 M/sec
282,768 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
19,757,856 dTLB-loads:u # 337.321 M/sec (11.69%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
4.966101053 seconds time elapsed
31.087250856 seconds time elapsed
23.375418000 seconds user
148.052989000 seconds sys
142.716222000 seconds user
2102.420776000 seconds sys
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
nnz=549202, layout=torch.sparse_csr)
tensor([0.1999, 0.3932, 0.8035, ..., 0.5079, 0.5903, 0.7606])
tensor([0.7637, 0.5328, 0.8286, ..., 0.7084, 0.8903, 0.1707])
Matrix: soc-sign-Slashdot090221
Shape: torch.Size([82144, 82144])
NNZ: 549202
Density: 8.13917555860553e-05
Time: 1.9677543640136719 seconds
Time: 17.188836336135864 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
328,019 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,893,662 BR_RETIRED:u
342,121 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,436,338 BR_RETIRED:u
5.529871590 seconds time elapsed
20.753346873 seconds time elapsed
26.844356000 seconds user
190.429440000 seconds sys
98.605331000 seconds user
1332.291974000 seconds sys
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
nnz=549202, layout=torch.sparse_csr)
tensor([0.2933, 0.6999, 0.0078, ..., 0.6213, 0.9377, 0.6359])
tensor([0.9017, 0.8505, 0.0023, ..., 0.4182, 0.6895, 0.5023])
Matrix: soc-sign-Slashdot090221
Shape: torch.Size([82144, 82144])
NNZ: 549202
Density: 8.13917555860553e-05
Time: 1.4976201057434082 seconds
Time: 16.22375249862671 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
27,248,112 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,792 ITLB_WALK:u
16,632 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,929,042 L1D_TLB:u
27,189,335 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,437 ITLB_WALK:u
18,156 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,676,625 L1D_TLB:u
4.971341163 seconds time elapsed
19.748749363 seconds time elapsed
24.247480000 seconds user
151.276717000 seconds sys
103.049578000 seconds user
1249.814927000 seconds sys
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
nnz=549202, layout=torch.sparse_csr)
tensor([0.1310, 0.6695, 0.9479, ..., 0.3141, 0.9327, 0.2117])
tensor([0.4805, 0.2325, 0.2103, ..., 0.1710, 0.7638, 0.9368])
Matrix: soc-sign-Slashdot090221
Shape: torch.Size([82144, 82144])
NNZ: 549202
Density: 8.13917555860553e-05
Time: 1.0877256393432617 seconds
Time: 15.453373908996582 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
31,702,830 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
295,778 L1I_CACHE_REFILL:u
470,423 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,155,119 L1D_CACHE:u
30,721,032 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
302,777 L1I_CACHE_REFILL:u
469,833 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,109,077 L1D_CACHE:u
4.675682406 seconds time elapsed
19.090250444 seconds time elapsed
23.098007000 seconds user
119.827712000 seconds sys
94.904880000 seconds user
1195.102767000 seconds sys
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
nnz=549202, layout=torch.sparse_csr)
tensor([0.0860, 0.5402, 0.6738, ..., 0.3856, 0.5968, 0.4203])
tensor([0.8430, 0.9439, 0.4260, ..., 0.8172, 0.4243, 0.3834])
Matrix: soc-sign-Slashdot090221
Shape: torch.Size([82144, 82144])
NNZ: 549202
Density: 8.13917555860553e-05
Time: 1.2302696704864502 seconds
Time: 29.316507816314697 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
545,220 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
562,139 LL_CACHE_RD:u
192,206 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,891 L2D_TLB_REFILL:u
307,033 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,782,260 L2D_CACHE:u
551,850 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
565,355 LL_CACHE_RD:u
200,417 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
25,536 L2D_TLB_REFILL:u
304,133 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,801,849 L2D_CACHE:u
4.781838296 seconds time elapsed
32.859276963 seconds time elapsed
23.716896000 seconds user
130.971947000 seconds sys
148.969816000 seconds user
2252.321936000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.32, 20.52, 20.52, 20.56, 20.6, 20.4, 20.76, 20.6, 20.36, 20.4], "matrix": "soc-sign-epinions", "shape": [131828, 131828], "nnz": 841372, "% density": 4.841419648464106e-05, "time_s": 22.52380871772766, "power": [81.24, 81.16, 74.84, 62.04, 51.6, 50.56, 52.4, 52.4, 68.24, 80.56, 91.44, 91.36, 90.28, 88.32, 86.4, 85.16, 83.64, 82.36, 82.96, 82.84, 82.84, 82.56, 82.44, 82.08, 83.64, 84.4], "power_after": [20.8, 20.88, 20.8, 20.92, 20.88, 20.88, 20.8, 20.84, 20.84, 20.6], "task clock (msec)": 63.9, "page faults": 3446, "cycles": 55931043, "instructions": 77907356, "branch mispredictions": 332778, "branches": 20000746, "ITLB accesses": 27000304, "ITLB misses": 6713, "DTLB misses": 18689, "DTLB accesses": 36395663, "L1I cache accesses": 32396405, "L1I cache misses": 292629, "L1D cache misses": 473799, "L1D cache accesses": 34061981, "LL cache misses": 542765, "LL cache accesses": 557193, "L2D TLB accesses": 203626, "L2D TLB misses": 24363, "L2D cache misses": 303397, "L2D cache accesses": 1772084, "instructions per cycle": 1.3929179901043505, "branch miss rate": 0.01663827939217867, "ITLB miss rate": 0.00024862683027568875, "DTLB miss rate": 0.0005134952480464499, "L2D TLB miss rate": 0.11964582126054629, "L1I cache miss rate": 0.009032761505481858, "L1D cache miss rate": 0.01390990735389113, "L2D cache miss rate": 0.171209152613533, "LL cache miss rate": 0.9741059202107708}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394154 queued and waiting for resources
srun: job 3394154 has been allocated resources
srun: job 3394990 queued and waiting for resources
srun: job 3394990 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
@ -15,37 +15,38 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
7714]),
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
tensor([0.5842, 0.3042, 0.7358, ..., 0.7882, 0.7596, 0.5895])
tensor([0.3914, 0.2076, 0.6733, ..., 0.4758, 0.6360, 0.6316])
Matrix: soc-sign-epinions
Shape: torch.Size([131828, 131828])
NNZ: 841372
Density: 4.841419648464106e-05
Time: 2.4407293796539307 seconds
Time: 20.04187798500061 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
49.87 msec task-clock:u # 0.008 CPUs utilized
63.90 msec task-clock:u # 0.003 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,300 page-faults:u # 66.174 K/sec
51,935,476 cycles:u # 1.041 GHz (65.00%)
83,731,856 instructions:u # 1.61 insn per cycle (84.25%)
3,446 page-faults:u # 53.927 K/sec
55,931,043 cycles:u # 0.875 GHz (85.43%)
77,907,356 instructions:u # 1.39 insn per cycle
<not supported> branches:u
375,900 branch-misses:u
34,169,837 L1-dcache-loads:u # 685.197 M/sec
474,410 L1-dcache-load-misses:u # 1.39% of all L1-dcache accesses
357,739 branch-misses:u
33,000,188 L1-dcache-loads:u # 516.421 M/sec
466,824 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
32,443,215 L1-icache-loads:u # 650.574 M/sec
294,146 L1-icache-load-misses:u # 0.91% of all L1-icache accesses
63,709,518 dTLB-loads:u # 1.278 G/sec (16.44%)
<not counted> dTLB-load-misses:u (0.00%)
31,503,048 L1-icache-loads:u # 492.992 M/sec
301,112 L1-icache-load-misses:u # 0.96% of all L1-icache accesses
34,740,872 dTLB-loads:u # 543.661 M/sec (18.37%)
32,355 dTLB-load-misses:u # 0.09% of all dTLB cache accesses (12.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
6.058862056 seconds time elapsed
23.478083368 seconds time elapsed
29.101578000 seconds user
224.790489000 seconds sys
119.232326000 seconds user
1541.081607000 seconds sys
@ -57,21 +58,22 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
7714]),
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
tensor([0.9696, 0.8139, 0.4858, ..., 0.2374, 0.1716, 0.9756])
tensor([0.3970, 0.5643, 0.0036, ..., 0.0338, 0.0807, 0.3885])
Matrix: soc-sign-epinions
Shape: torch.Size([131828, 131828])
NNZ: 841372
Density: 4.841419648464106e-05
Time: 2.0945546627044678 seconds
Time: 16.115705490112305 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
326,464 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,341,367 BR_RETIRED:u
332,778 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,000,746 BR_RETIRED:u
5.525378890 seconds time elapsed
19.765627973 seconds time elapsed
28.841740000 seconds user
199.678982000 seconds sys
103.591961000 seconds user
1250.845091000 seconds sys
@ -83,23 +85,24 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
7714]),
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
tensor([0.3478, 0.0057, 0.8574, ..., 0.6409, 0.1876, 0.8429])
tensor([0.0049, 0.4550, 0.3166, ..., 0.3734, 0.8337, 0.5156])
Matrix: soc-sign-epinions
Shape: torch.Size([131828, 131828])
NNZ: 841372
Density: 4.841419648464106e-05
Time: 2.8504912853240967 seconds
Time: 18.55180263519287 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
27,590,154 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,210 ITLB_WALK:u
17,536 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,763,243 L1D_TLB:u
27,000,304 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,713 ITLB_WALK:u
18,689 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,395,663 L1D_TLB:u
6.425887143 seconds time elapsed
22.333459337 seconds time elapsed
33.069094000 seconds user
256.667850000 seconds sys
109.075160000 seconds user
1441.055730000 seconds sys
@ -111,23 +114,24 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
7714]),
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
tensor([0.5381, 0.6651, 0.4689, ..., 0.7251, 0.3759, 0.8516])
tensor([0.0560, 0.8530, 0.8946, ..., 0.4591, 0.5391, 0.2898])
Matrix: soc-sign-epinions
Shape: torch.Size([131828, 131828])
NNZ: 841372
Density: 4.841419648464106e-05
Time: 1.6941111087799072 seconds
Time: 25.587534427642822 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
31,663,300 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
289,727 L1I_CACHE_REFILL:u
462,864 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,262,254 L1D_CACHE:u
32,396,405 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
292,629 L1I_CACHE_REFILL:u
473,799 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
34,061,981 L1D_CACHE:u
5.304170809 seconds time elapsed
29.367381835 seconds time elapsed
25.992245000 seconds user
173.752913000 seconds sys
142.233743000 seconds user
1962.747683000 seconds sys
@ -139,25 +143,26 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
7714]),
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
tensor([0.4145, 0.8515, 0.7222, ..., 0.1386, 0.6641, 0.6662])
tensor([0.7002, 0.7829, 0.1511, ..., 0.3651, 0.2391, 0.7788])
Matrix: soc-sign-epinions
Shape: torch.Size([131828, 131828])
NNZ: 841372
Density: 4.841419648464106e-05
Time: 3.0850296020507812 seconds
Time: 23.656178951263428 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
530,272 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
551,373 LL_CACHE_RD:u
196,152 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,542 L2D_TLB_REFILL:u
301,998 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,732,662 L2D_CACHE:u
542,765 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
557,193 LL_CACHE_RD:u
203,626 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,363 L2D_TLB_REFILL:u
303,397 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,772,084 L2D_CACHE:u
6.733517838 seconds time elapsed
27.453055481 seconds time elapsed
34.030476000 seconds user
271.397968000 seconds sys
128.709934000 seconds user
1831.887905000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.28, 16.44, 16.68, 16.68, 16.84, 17.04, 16.84, 16.84, 16.72, 16.72], "matrix": "sx-mathoverflow", "shape": [24818, 24818], "nnz": 239978, "% density": 0.00038961697406616504, "time_s": 5.405760288238525, "power": [25.64, 20.44, 21.24, 22.16, 22.28, 27.04, 26.92, 26.28, 25.32], "power_after": [16.32, 16.44, 16.4, 16.4, 16.6, 16.48, 16.56, 16.6, 16.32, 16.44], "task clock (msec)": 50.36, "page faults": 3296, "cycles": 56049457, "instructions": 72333565, "branch mispredictions": 325529, "branches": 19463406, "ITLB accesses": 27374917, "ITLB misses": 5203, "DTLB misses": 16771, "DTLB accesses": 36373182, "L1I cache accesses": 31839975, "L1I cache misses": 274158, "L1D cache misses": 471992, "L1D cache accesses": 33638817, "LL cache misses": 538067, "LL cache accesses": 557981, "L2D TLB accesses": 170169, "L2D TLB misses": 21987, "L2D cache misses": 301746, "L2D cache accesses": 1735872, "instructions per cycle": 1.2905310572411077, "branch miss rate": 0.016725181604905125, "ITLB miss rate": 0.00019006450320927, "DTLB miss rate": 0.00046108146381034247, "L2D TLB miss rate": 0.12920684731061474, "L1I cache miss rate": 0.00861049671050307, "L1D cache miss rate": 0.014031171191305569, "L2D cache miss rate": 0.1738296372082734, "LL cache miss rate": 0.9643106127269566}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394144 queued and waiting for resources
srun: job 3394144 has been allocated resources
srun: job 3394987 queued and waiting for resources
srun: job 3394987 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
tensor([0.7658, 0.2874, 0.7506, ..., 0.3335, 0.5056, 0.9767])
tensor([0.8864, 0.5637, 0.9805, ..., 0.0234, 0.9487, 0.4860])
Matrix: sx-mathoverflow
Shape: torch.Size([24818, 24818])
NNZ: 239978
Density: 0.00038961697406616504
Time: 0.5561239719390869 seconds
Time: 5.484489917755127 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
62.49 msec task-clock:u # 0.015 CPUs utilized
50.36 msec task-clock:u # 0.006 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,312 page-faults:u # 53.003 K/sec
76,783,170 cycles:u # 1.229 GHz (62.65%)
77,095,702 instructions:u # 1.00 insn per cycle (80.20%)
3,296 page-faults:u # 65.452 K/sec
56,049,457 cycles:u # 1.113 GHz (49.66%)
72,333,565 instructions:u # 1.29 insn per cycle (66.35%)
<not supported> branches:u
370,891 branch-misses:u (94.99%)
32,730,448 L1-dcache-loads:u # 523.800 M/sec
467,718 L1-dcache-load-misses:u # 1.43% of all L1-dcache accesses
369,218 branch-misses:u (86.12%)
33,730,437 L1-dcache-loads:u # 669.814 M/sec (93.88%)
459,922 L1-dcache-load-misses:u # 1.36% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,548,469 L1-icache-loads:u # 504.885 M/sec
298,966 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
61,098,419 dTLB-loads:u # 977.786 M/sec (20.67%)
64,747 dTLB-load-misses:u # 0.11% of all dTLB cache accesses (10.91%)
<not counted> iTLB-loads:u (0.00%)
31,827,672 L1-icache-loads:u # 632.030 M/sec
295,060 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
54,366,618 dTLB-loads:u # 1.080 G/sec (35.64%)
84,768 dTLB-load-misses:u # 0.16% of all dTLB cache accesses (25.48%)
12,107,953 iTLB-loads:u # 240.438 M/sec (10.11%)
<not counted> iTLB-load-misses:u (0.00%)
4.062782709 seconds time elapsed
8.968532171 seconds time elapsed
16.106338000 seconds user
32.399716000 seconds sys
20.749643000 seconds user
28.745486000 seconds sys
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
tensor([0.7531, 0.4727, 0.4126, ..., 0.1574, 0.5247, 0.8875])
tensor([0.5549, 0.0336, 0.9472, ..., 0.2657, 0.3394, 0.6185])
Matrix: sx-mathoverflow
Shape: torch.Size([24818, 24818])
NNZ: 239978
Density: 0.00038961697406616504
Time: 0.6003477573394775 seconds
Time: 5.532417297363281 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
323,514 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,769,937 BR_RETIRED:u
325,529 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,463,406 BR_RETIRED:u
4.061021393 seconds time elapsed
8.912497962 seconds time elapsed
16.155442000 seconds user
31.047278000 seconds sys
20.214519000 seconds user
31.566513000 seconds sys
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
tensor([0.3067, 0.4335, 0.8814, ..., 0.2370, 0.1210, 0.7695])
tensor([0.3330, 0.8843, 0.5150, ..., 0.7292, 0.0873, 0.4184])
Matrix: sx-mathoverflow
Shape: torch.Size([24818, 24818])
NNZ: 239978
Density: 0.00038961697406616504
Time: 0.5404119491577148 seconds
Time: 5.457342863082886 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
26,809,325 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,925 ITLB_WALK:u
19,003 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,516,965 L1D_TLB:u
27,374,917 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,203 ITLB_WALK:u
16,771 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,373,182 L1D_TLB:u
4.031175418 seconds time elapsed
8.730534933 seconds time elapsed
15.607232000 seconds user
30.562258000 seconds sys
20.156482000 seconds user
31.426118000 seconds sys
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
tensor([0.5013, 0.5961, 0.5565, ..., 0.3779, 0.1835, 0.6722])
tensor([0.5864, 0.4449, 0.4042, ..., 0.1651, 0.7793, 0.8302])
Matrix: sx-mathoverflow
Shape: torch.Size([24818, 24818])
NNZ: 239978
Density: 0.00038961697406616504
Time: 0.6185996532440186 seconds
Time: 5.449937582015991 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
31,104,231 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
285,499 L1I_CACHE_REFILL:u
468,498 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,677,465 L1D_CACHE:u
31,839,975 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
274,158 L1I_CACHE_REFILL:u
471,992 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,638,817 L1D_CACHE:u
4.083129305 seconds time elapsed
8.845491835 seconds time elapsed
16.243642000 seconds user
36.578375000 seconds sys
20.577696000 seconds user
35.105662000 seconds sys
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
tensor([0.9075, 0.2788, 0.1365, ..., 0.4240, 0.8832, 0.1064])
tensor([0.8880, 0.4700, 0.5542, ..., 0.8505, 0.9123, 0.5742])
Matrix: sx-mathoverflow
Shape: torch.Size([24818, 24818])
NNZ: 239978
Density: 0.00038961697406616504
Time: 0.54673171043396 seconds
Time: 5.400304794311523 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
559,358 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
571,935 LL_CACHE_RD:u
194,840 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,481 L2D_TLB_REFILL:u
313,487 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,779,730 L2D_CACHE:u
538,067 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
557,981 LL_CACHE_RD:u
170,169 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
21,987 L2D_TLB_REFILL:u
301,746 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,735,872 L2D_CACHE:u
3.961843929 seconds time elapsed
8.606800178 seconds time elapsed
15.425912000 seconds user
28.864046000 seconds sys
21.064990000 seconds user
34.158762000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.36, 20.4, 20.68, 20.64, 20.92, 20.92, 20.88, 20.68, 20.68, 20.6], "matrix": "ut2010", "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 11.10523509979248, "power": [90.68, 90.68, 88.24, 72.2, 59.48, 52.0, 54.72, 64.28, 79.24, 94.08, 96.24, 93.72, 92.36, 92.36, 90.08], "power_after": [21.24, 21.28, 20.96, 21.16, 20.92, 21.04, 21.32, 21.56, 21.16, 21.24], "task clock (msec)": 52.22, "page faults": 3288, "cycles": 67463873, "instructions": 73042754, "branch mispredictions": 344635, "branches": 20775821, "ITLB accesses": 27488750, "ITLB misses": 6494, "DTLB misses": 18293, "DTLB accesses": 36697113, "L1I cache accesses": 31066176, "L1I cache misses": 298652, "L1D cache misses": 473808, "L1D cache accesses": 32572985, "LL cache misses": 547428, "LL cache accesses": 566356, "L2D TLB accesses": 162858, "L2D TLB misses": 19852, "L2D cache misses": 304056, "L2D cache accesses": 1713420, "instructions per cycle": 1.0826943481291091, "branch miss rate": 0.01658827345499367, "ITLB miss rate": 0.00023624209904051657, "DTLB miss rate": 0.0004984860798177775, "L2D TLB miss rate": 0.12189760404769799, "L1I cache miss rate": 0.009613413636747567, "L1D cache miss rate": 0.014546041758223879, "L2D cache miss rate": 0.17745561508561825, "LL cache miss rate": 0.9665793246650517}

View File

@ -0,0 +1,173 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394993 queued and waiting for resources
srun: job 3394993 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
572066]),
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
114602]),
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
18651.]), size=(115406, 115406), nnz=572066,
layout=torch.sparse_csr)
tensor([0.6983, 0.2845, 0.5984, ..., 0.1182, 0.9468, 0.3161])
Matrix: ut2010
Shape: torch.Size([115406, 115406])
NNZ: 572066
Density: 4.295259032005559e-05
Time: 8.604448795318604 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
52.22 msec task-clock:u # 0.004 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,288 page-faults:u # 62.965 K/sec
67,463,873 cycles:u # 1.292 GHz (52.95%)
73,042,754 instructions:u # 1.08 insn per cycle (71.78%)
<not supported> branches:u
376,297 branch-misses:u (87.57%)
34,189,906 L1-dcache-loads:u # 654.731 M/sec (97.72%)
471,636 L1-dcache-load-misses:u # 1.38% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,870,328 L1-icache-loads:u # 610.312 M/sec
297,680 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
57,623,823 dTLB-loads:u # 1.103 G/sec (30.16%)
75,454 dTLB-load-misses:u # 0.13% of all dTLB cache accesses (24.31%)
0 iTLB-loads:u # 0.000 /sec (3.96%)
<not counted> iTLB-load-misses:u (0.00%)
12.112100803 seconds time elapsed
66.253313000 seconds user
675.855469000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
572066]),
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
114602]),
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
18651.]), size=(115406, 115406), nnz=572066,
layout=torch.sparse_csr)
tensor([0.0260, 0.8569, 0.4315, ..., 0.5243, 0.8018, 0.1763])
Matrix: ut2010
Shape: torch.Size([115406, 115406])
NNZ: 572066
Density: 4.295259032005559e-05
Time: 8.702903270721436 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
344,635 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,775,821 BR_RETIRED:u
12.383096073 seconds time elapsed
64.544546000 seconds user
688.477174000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
572066]),
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
114602]),
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
18651.]), size=(115406, 115406), nnz=572066,
layout=torch.sparse_csr)
tensor([0.7940, 0.1585, 0.6879, ..., 0.4017, 0.1738, 0.9713])
Matrix: ut2010
Shape: torch.Size([115406, 115406])
NNZ: 572066
Density: 4.295259032005559e-05
Time: 7.38647985458374 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
27,488,750 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,494 ITLB_WALK:u
18,293 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,697,113 L1D_TLB:u
10.936742446 seconds time elapsed
63.993242000 seconds user
580.515047000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
572066]),
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
114602]),
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
18651.]), size=(115406, 115406), nnz=572066,
layout=torch.sparse_csr)
tensor([0.2725, 0.6578, 0.8180, ..., 0.0148, 0.5094, 0.1155])
Matrix: ut2010
Shape: torch.Size([115406, 115406])
NNZ: 572066
Density: 4.295259032005559e-05
Time: 12.719107389450073 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
31,066,176 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
298,652 L1I_CACHE_REFILL:u
473,808 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,572,985 L1D_CACHE:u
16.299576479 seconds time elapsed
86.072431000 seconds user
987.199923000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
572066]),
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
114602]),
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
18651.]), size=(115406, 115406), nnz=572066,
layout=torch.sparse_csr)
tensor([0.1156, 0.5715, 0.3099, ..., 0.3964, 0.9672, 0.5694])
Matrix: ut2010
Shape: torch.Size([115406, 115406])
NNZ: 572066
Density: 4.295259032005559e-05
Time: 12.682909727096558 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
547,428 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
566,356 LL_CACHE_RD:u
162,858 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
19,852 L2D_TLB_REFILL:u
304,056 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,713,420 L2D_CACHE:u
16.221517033 seconds time elapsed
79.927661000 seconds user
988.333919000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.88, 20.76, 20.76, 20.96, 20.92, 20.88, 20.72, 20.4, 20.4, 20.24], "matrix": "vt2010", "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 3.6774682998657227, "power": [34.12, 31.52, 30.36, 27.2, 27.16, 30.64, 31.0, 31.32], "power_after": [20.44, 20.52, 20.68, 20.72, 20.68, 20.72, 20.88, 20.8, 20.88, 20.52], "task clock (msec)": 48.59, "page faults": 3274, "cycles": 55030923, "instructions": 78222423, "branch mispredictions": 323004, "branches": 19091130, "ITLB accesses": 27178617, "ITLB misses": 6398, "DTLB misses": 19770, "DTLB accesses": 36355567, "L1I cache accesses": 31341858, "L1I cache misses": 291951, "L1D cache misses": 468242, "L1D cache accesses": 32805413, "LL cache misses": 520057, "LL cache accesses": 541186, "L2D TLB accesses": 191068, "L2D TLB misses": 22725, "L2D cache misses": 288895, "L2D cache accesses": 1728320, "instructions per cycle": 1.4214266949511278, "branch miss rate": 0.01691906136514706, "ITLB miss rate": 0.00023540564996371965, "DTLB miss rate": 0.0005437956723381593, "L2D TLB miss rate": 0.11893671363074926, "L1I cache miss rate": 0.009315050817982775, "L1D cache miss rate": 0.014273315199537345, "L2D cache miss rate": 0.16715365210146269, "LL cache miss rate": 0.9609579700879181}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394143 queued and waiting for resources
srun: job 3394143 has been allocated resources
srun: job 3394988 queued and waiting for resources
srun: job 3394988 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
tensor([0.9170, 0.7306, 0.1175, ..., 0.0616, 0.0147, 0.6403])
tensor([0.2022, 0.3400, 0.2561, ..., 0.8370, 0.0285, 0.6506])
Matrix: vt2010
Shape: torch.Size([32580, 32580])
NNZ: 155598
Density: 0.00014658915806621921
Time: 0.4440653324127197 seconds
Time: 3.74875545501709 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
61.63 msec task-clock:u # 0.016 CPUs utilized
48.59 msec task-clock:u # 0.007 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,304 page-faults:u # 53.611 K/sec
64,734,203 cycles:u # 1.050 GHz (50.46%)
53,597,991 instructions:u # 0.83 insn per cycle (70.10%)
3,274 page-faults:u # 67.376 K/sec
55,030,923 cycles:u # 1.132 GHz (65.54%)
78,222,423 instructions:u # 1.42 insn per cycle (83.60%)
<not supported> branches:u
347,389 branch-misses:u (91.95%)
31,363,842 L1-dcache-loads:u # 508.915 M/sec
482,780 L1-dcache-load-misses:u # 1.54% of all L1-dcache accesses
369,917 branch-misses:u
32,435,815 L1-dcache-loads:u # 667.500 M/sec
467,963 L1-dcache-load-misses:u # 1.44% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
30,027,001 L1-icache-loads:u # 487.223 M/sec
288,023 L1-icache-load-misses:u # 0.96% of all L1-icache accesses
44,333,825 dTLB-loads:u # 719.368 M/sec (48.58%)
74,525 dTLB-load-misses:u # 0.17% of all dTLB cache accesses (16.71%)
31,013,287 L1-icache-loads:u # 638.226 M/sec
289,982 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
60,644,978 dTLB-loads:u # 1.248 G/sec (17.29%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
3.811654040 seconds time elapsed
6.978143797 seconds time elapsed
15.616953000 seconds user
30.906234000 seconds sys
18.401752000 seconds user
28.060858000 seconds sys
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
tensor([0.5548, 0.3514, 0.6283, ..., 0.5672, 0.1575, 0.4493])
tensor([0.3381, 0.0423, 0.5363, ..., 0.0429, 0.4077, 0.4744])
Matrix: vt2010
Shape: torch.Size([32580, 32580])
NNZ: 155598
Density: 0.00014658915806621921
Time: 0.44233155250549316 seconds
Time: 3.7925527095794678 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
330,777 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,357,034 BR_RETIRED:u
323,004 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,091,130 BR_RETIRED:u
3.835342404 seconds time elapsed
7.233250772 seconds time elapsed
15.497637000 seconds user
28.676763000 seconds sys
19.111768000 seconds user
32.178633000 seconds sys
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
tensor([0.0953, 0.5790, 0.0112, ..., 0.9540, 0.3173, 0.4731])
tensor([0.7962, 0.6492, 0.2778, ..., 0.5407, 0.1159, 0.3587])
Matrix: vt2010
Shape: torch.Size([32580, 32580])
NNZ: 155598
Density: 0.00014658915806621921
Time: 0.43302106857299805 seconds
Time: 3.668635129928589 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
27,381,387 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,248 ITLB_WALK:u
17,636 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,436,110 L1D_TLB:u
27,178,617 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,398 ITLB_WALK:u
19,770 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,355,567 L1D_TLB:u
3.828586094 seconds time elapsed
6.925944164 seconds time elapsed
15.518057000 seconds user
31.389361000 seconds sys
18.970654000 seconds user
30.786317000 seconds sys
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
tensor([0.5456, 0.8708, 0.2037, ..., 0.8669, 0.9122, 0.2046])
tensor([0.8340, 0.3434, 0.3449, ..., 0.9828, 0.6683, 0.0312])
Matrix: vt2010
Shape: torch.Size([32580, 32580])
NNZ: 155598
Density: 0.00014658915806621921
Time: 0.4426534175872803 seconds
Time: 3.623232126235962 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
32,505,993 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
303,849 L1I_CACHE_REFILL:u
467,426 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
34,241,110 L1D_CACHE:u
31,341,858 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
291,951 L1I_CACHE_REFILL:u
468,242 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,805,413 L1D_CACHE:u
3.811299200 seconds time elapsed
6.941260499 seconds time elapsed
15.932195000 seconds user
30.887870000 seconds sys
18.410270000 seconds user
27.908787000 seconds sys
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
tensor([0.5024, 0.2304, 0.7925, ..., 0.1397, 0.5558, 0.6450])
tensor([0.2754, 0.3661, 0.9484, ..., 0.7285, 0.5354, 0.4116])
Matrix: vt2010
Shape: torch.Size([32580, 32580])
NNZ: 155598
Density: 0.00014658915806621921
Time: 0.3671383857727051 seconds
Time: 3.7337992191314697 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
550,075 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
562,829 LL_CACHE_RD:u
199,285 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,424 L2D_TLB_REFILL:u
310,155 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,783,824 L2D_CACHE:u
520,057 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
541,186 LL_CACHE_RD:u
191,068 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
22,725 L2D_TLB_REFILL:u
288,895 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,728,320 L2D_CACHE:u
3.824434783 seconds time elapsed
7.164825085 seconds time elapsed
15.754438000 seconds user
28.226523000 seconds sys
18.193885000 seconds user
30.023194000 seconds sys

View File

@ -1 +0,0 @@
{"power_before": [50.88, 50.88], "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 0.1896660327911377, "power": [25.52, 32.28, 33.12, 33.12], "power_after": [32.88, 26.52], "task clock (msec)": 42.01, "page faults": 3263, "cycles": 47084933, "instructions": 77895119, "branch mispredictions": 330923, "branches": 19740519, "ITLB accesses": 27761239, "ITLB misses": 6471, "DTLB misses": 17268, "DTLB accesses": 36993265, "L1I cache accesses": 31834980, "L1I cache misses": 298333, "L1D cache misses": 466901, "L1D cache accesses": 33528976, "LL cache misses": 525505, "LL cache accesses": 546521, "L2D TLB accesses": 184884, "L2D TLB misses": 22933, "L2D cache misses": 292367, "L2D cache accesses": 1706226, "instructions per cycle": 1.6543534000568716, "branch miss rate": 0.016763642333821112, "ITLB miss rate": 0.00023309478370183695, "DTLB miss rate": 0.0004667876706746485, "L2D TLB miss rate": 0.12403993855606758, "L1I cache miss rate": 0.009371232524725947, "L1D cache miss rate": 0.013925298523879763, "L2D cache miss rate": 0.1713530329510862, "LL cache miss rate": 0.9615458509371094}

View File

@ -1 +0,0 @@
{"power_before": [20.16, 20.08], "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 0.336850643157959, "power": [24.28, 30.72, 30.72, 34.56], "power_after": [37.32, 32.92], "task clock (msec)": 60.78, "page faults": 3300, "cycles": 66733059, "instructions": 87889334, "branch mispredictions": 326300, "branches": 19832700, "ITLB accesses": 27233629, "ITLB misses": 5868, "DTLB misses": 16893, "DTLB accesses": 36409508, "L1I cache accesses": 30924532, "L1I cache misses": 288199, "L1D cache misses": 462816, "L1D cache accesses": 32428375, "LL cache misses": 551997, "LL cache accesses": 568528, "L2D TLB accesses": 193991, "L2D TLB misses": 24353, "L2D cache misses": 312207, "L2D cache accesses": 1821196, "instructions per cycle": 1.3170284011707, "branch miss rate": 0.016452626218316214, "ITLB miss rate": 0.0002154688969288669, "DTLB miss rate": 0.00046397221297250155, "L2D TLB miss rate": 0.125536751704976, "L1I cache miss rate": 0.009319429635992551, "L1D cache miss rate": 0.014271945479845968, "L2D cache miss rate": 0.17142965391973186, "LL cache miss rate": 0.9709231559395491}

View File

@ -1 +0,0 @@
{"power_before": [16.32, 16.2], "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 2.2665774822235107, "power": [35.16, 50.8, 53.4, 53.4, 46.08, 46.88], "power_after": [58.4, 57.32], "task clock (msec)": 50.43, "page faults": 3285, "cycles": 54118679, "instructions": 77692421, "branch mispredictions": 325039, "branches": 19383216, "ITLB accesses": 26060519, "ITLB misses": 4749, "DTLB misses": 16865, "DTLB accesses": 34819729, "L1I cache accesses": 30777115, "L1I cache misses": 293980, "L1D cache misses": 461522, "L1D cache accesses": 32216597, "LL cache misses": 567700, "LL cache accesses": 588689, "L2D TLB accesses": 189417, "L2D TLB misses": 22360, "L2D cache misses": 328306, "L2D cache accesses": 1908607, "instructions per cycle": 1.4355934482436277, "branch miss rate": 0.0167690954896236, "ITLB miss rate": 0.00018222967854170517, "DTLB miss rate": 0.00048435184547243316, "L2D TLB miss rate": 0.11804642666708902, "L1I cache miss rate": 0.009551902444397404, "L1D cache miss rate": 0.014325597455249542, "L2D cache miss rate": 0.172013410827897, "LL cache miss rate": 0.9643461997761127}

View File

@ -1 +0,0 @@
{"power_before": [20.48, 20.96], "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 0.3271017074584961, "power": [25.28, 26.08, 31.28, 32.96], "power_after": [33.4, 30.24], "task clock (msec)": 59.88, "page faults": 3313, "cycles": 58169777, "instructions": 57993431, "branch mispredictions": 330494, "branches": 20578427, "ITLB accesses": 27982097, "ITLB misses": 6614, "DTLB misses": 17270, "DTLB accesses": 37728899, "L1I cache accesses": 29754926, "L1I cache misses": 278786, "L1D cache misses": 454742, "L1D cache accesses": 31173246, "LL cache misses": 543243, "LL cache accesses": 560716, "L2D TLB accesses": 162281, "L2D TLB misses": 19847, "L2D cache misses": 300577, "L2D cache accesses": 1696278, "instructions per cycle": 0.9969684257170179, "branch miss rate": 0.016060216847478187, "ITLB miss rate": 0.0002363654160729984, "DTLB miss rate": 0.00045773930482307474, "L2D TLB miss rate": 0.12230020766448321, "L1I cache miss rate": 0.009369406598423401, "L1D cache miss rate": 0.014587572946365611, "L2D cache miss rate": 0.1771979592967662, "LL cache miss rate": 0.9688380570556218}

View File

@ -1 +0,0 @@
{"power_before": [20.28, 20.32], "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 1.030203104019165, "power": [32.08, 47.84, 55.76, 58.08, 58.24], "power_after": [48.76, 45.16], "task clock (msec)": 60.43, "page faults": 3319, "cycles": 66114448, "instructions": 90786829, "branch mispredictions": 341625, "branches": 20129354, "ITLB accesses": 27441303, "ITLB misses": 6807, "DTLB misses": 20551, "DTLB accesses": 36867114, "L1I cache accesses": 31744243, "L1I cache misses": 271027, "L1D cache misses": 464135, "L1D cache accesses": 33441141, "LL cache misses": 539935, "LL cache accesses": 552519, "L2D TLB accesses": 188291, "L2D TLB misses": 24177, "L2D cache misses": 301281, "L2D cache accesses": 1737575, "instructions per cycle": 1.3731768432824245, "branch miss rate": 0.016971483535934636, "ITLB miss rate": 0.00024805673404065397, "DTLB miss rate": 0.0005574344658494288, "L2D TLB miss rate": 0.12840231344036623, "L1I cache miss rate": 0.008537831568388637, "L1D cache miss rate": 0.01387916159918108, "L2D cache miss rate": 0.17339165215889962, "LL cache miss rate": 0.9772243126480719}

View File

@ -1 +0,0 @@
{"power_before": [50.68, 49.4], "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.11296772956848145, "power": [26.2, 29.76, 33.64, 34.44], "power_after": [36.84, 29.44], "task clock (msec)": 67.56, "page faults": 3829, "cycles": 47862000, "instructions": 84392375, "branch mispredictions": 331622, "branches": 19800140, "ITLB accesses": 25905045, "ITLB misses": 6746, "DTLB misses": 17547, "DTLB accesses": 35220079, "L1I cache accesses": 30359576, "L1I cache misses": 283204, "L1D cache misses": 465520, "L1D cache accesses": 31843274, "LL cache misses": 560542, "LL cache accesses": 575610, "L2D TLB accesses": 173643, "L2D TLB misses": 21499, "L2D cache misses": 313335, "L2D cache accesses": 1741621, "instructions per cycle": 1.7632438051063475, "branch miss rate": 0.016748467435078743, "ITLB miss rate": 0.0002604125953072075, "DTLB miss rate": 0.0004982101261044871, "L2D TLB miss rate": 0.12381149830399152, "L1I cache miss rate": 0.009328325270418797, "L1D cache miss rate": 0.014619099782264852, "L2D cache miss rate": 0.17990998041479747, "LL cache miss rate": 0.9738225534650197}

View File

@ -1 +0,0 @@
{"power_before": [16.52, 16.24], "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 0.1715233325958252, "power": [18.56, 24.92, 27.84, 27.84], "power_after": [33.2, 27.28], "task clock (msec)": 61.92, "page faults": 3281, "cycles": 66250810, "instructions": 75178179, "branch mispredictions": 332366, "branches": 19076182, "ITLB accesses": 27005133, "ITLB misses": 4791, "DTLB misses": 13403, "DTLB accesses": 36457054, "L1I cache accesses": 32367686, "L1I cache misses": 287524, "L1D cache misses": 467557, "L1D cache accesses": 34022862, "LL cache misses": 535707, "LL cache accesses": 556316, "L2D TLB accesses": 150149, "L2D TLB misses": 18418, "L2D cache misses": 297042, "L2D cache accesses": 1687364, "instructions per cycle": 1.1347510920998551, "branch miss rate": 0.017423088121092577, "ITLB miss rate": 0.00017741071669597036, "DTLB miss rate": 0.00036763804338112453, "L2D TLB miss rate": 0.12266481961251822, "L1I cache miss rate": 0.008883057009388932, "L1D cache miss rate": 0.013742435895016709, "L2D cache miss rate": 0.1760390763344483, "LL cache miss rate": 0.9629545078696281}

View File

@ -1 +0,0 @@
{"power_before": [29.76, 33.16], "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 0.14322686195373535, "power": [22.6, 22.6, 26.16, 29.2], "power_after": [34.0, 30.16], "task clock (msec)": 64.71, "page faults": 3319, "cycles": 57611295, "instructions": 83148228, "branch mispredictions": 318386, "branches": 19233431, "ITLB accesses": 27039805, "ITLB misses": 6375, "DTLB misses": 17290, "DTLB accesses": 36688544, "L1I cache accesses": 32508072, "L1I cache misses": 297568, "L1D cache misses": 477654, "L1D cache accesses": 34044579, "LL cache misses": 549474, "LL cache accesses": 561939, "L2D TLB accesses": 185622, "L2D TLB misses": 23295, "L2D cache misses": 305878, "L2D cache accesses": 1763089, "instructions per cycle": 1.4432626102225268, "branch miss rate": 0.01655378075809771, "ITLB miss rate": 0.00023576353453732377, "DTLB miss rate": 0.00047126427257511227, "L2D TLB miss rate": 0.12549697772893298, "L1I cache miss rate": 0.009153664972810446, "L1D cache miss rate": 0.014030251336049713, "L2D cache miss rate": 0.17348982382625042, "LL cache miss rate": 0.9778178770293573}

View File

@ -1 +0,0 @@
{"power_before": [20.56, 20.28], "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 0.30861377716064453, "power": [23.88, 27.6, 39.8, 40.12], "power_after": [39.28, 35.2], "task clock (msec)": 65.91, "page faults": 3247, "cycles": 92293071, "instructions": 76208632, "branch mispredictions": 320083, "branches": 19285106, "ITLB accesses": 26853940, "ITLB misses": 6728, "DTLB misses": 13955, "DTLB accesses": 37111059, "L1I cache accesses": 32554796, "L1I cache misses": 298729, "L1D cache misses": 473779, "L1D cache accesses": 34117102, "LL cache misses": 535040, "LL cache accesses": 547502, "L2D TLB accesses": 179876, "L2D TLB misses": 21809, "L2D cache misses": 298620, "L2D cache accesses": 1722959, "instructions per cycle": 0.8257243059990929, "branch miss rate": 0.016597419791210898, "ITLB miss rate": 0.0002505405165871377, "DTLB miss rate": 0.0003760334621547717, "L2D TLB miss rate": 0.12124463519313304, "L1I cache miss rate": 0.009176190199440968, "L1D cache miss rate": 0.013886847716432655, "L2D cache miss rate": 0.17331811145825293, "LL cache miss rate": 0.9772384393116372}

View File

@ -1 +0,0 @@
{"power_before": [30.44, 35.52], "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 0.29622840881347656, "power": [23.84, 29.44, 33.0, 33.04], "power_after": [36.32, 30.0], "task clock (msec)": 60.77, "page faults": 3361, "cycles": 63493475, "instructions": 91578911, "branch mispredictions": 329084, "branches": 20406595, "ITLB accesses": 26859919, "ITLB misses": 6237, "DTLB misses": 16689, "DTLB accesses": 36348977, "L1I cache accesses": 30979764, "L1I cache misses": 292038, "L1D cache misses": 469219, "L1D cache accesses": 32411890, "LL cache misses": 571870, "LL cache accesses": 598306, "L2D TLB accesses": 205488, "L2D TLB misses": 26392, "L2D cache misses": 342141, "L2D cache accesses": 1857697, "instructions per cycle": 1.442335783322617, "branch miss rate": 0.01612635522976763, "ITLB miss rate": 0.00023220472109390948, "DTLB miss rate": 0.0004591325912693499, "L2D TLB miss rate": 0.12843572374055906, "L1I cache miss rate": 0.009426734173959492, "L1D cache miss rate": 0.014476755289494072, "L2D cache miss rate": 0.1841748142996409, "LL cache miss rate": 0.9558152517273769}

View File

@ -1 +0,0 @@
{"power_before": [16.52, 16.64], "shape": [81871, 81871], "nnz": 545671, "% density": 8.140867447881048e-05, "time_s": 1.3372814655303955, "power": [23.92, 38.6, 46.04, 48.2, 48.2], "power_after": [45.0, 44.08], "task clock (msec)": 59.01, "page faults": 3448, "cycles": 73062796, "instructions": 88329175, "branch mispredictions": 331091, "branches": 20013316, "ITLB accesses": 26330936, "ITLB misses": 5193, "DTLB misses": 16837, "DTLB accesses": 35930477, "L1I cache accesses": 31853890, "L1I cache misses": 306147, "L1D cache misses": 479933, "L1D cache accesses": 33426019, "LL cache misses": 540302, "LL cache accesses": 553181, "L2D TLB accesses": 173206, "L2D TLB misses": 21390, "L2D cache misses": 300032, "L2D cache accesses": 1739931, "instructions per cycle": 1.2089487377406143, "branch miss rate": 0.016543535314187813, "ITLB miss rate": 0.0001972204861991993, "DTLB miss rate": 0.000468599401004334, "L2D TLB miss rate": 0.12349456716280037, "L1I cache miss rate": 0.009610976869701, "L1D cache miss rate": 0.014358066391334247, "L2D cache miss rate": 0.17243902200719455, "LL cache miss rate": 0.9767182893121781}

View File

@ -1 +0,0 @@
{"power_before": [53.64, 46.88], "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 1.2292509078979492, "power": [40.64, 52.44, 54.8, 54.96, 46.8], "power_after": [47.88, 47.08], "task clock (msec)": 61.26, "page faults": 3303, "cycles": 44515786, "instructions": 81513738, "branch mispredictions": 328019, "branches": 19893662, "ITLB accesses": 27248112, "ITLB misses": 5792, "DTLB misses": 16632, "DTLB accesses": 36929042, "L1I cache accesses": 31702830, "L1I cache misses": 295778, "L1D cache misses": 470423, "L1D cache accesses": 33155119, "LL cache misses": 545220, "LL cache accesses": 562139, "L2D TLB accesses": 192206, "L2D TLB misses": 24891, "L2D cache misses": 307033, "L2D cache accesses": 1782260, "instructions per cycle": 1.8311198189334452, "branch miss rate": 0.01648861833482443, "ITLB miss rate": 0.0002125651861677609, "DTLB miss rate": 0.0004503772396803578, "L2D TLB miss rate": 0.12950168048864238, "L1I cache miss rate": 0.009329703373484323, "L1D cache miss rate": 0.014188548079106578, "L2D cache miss rate": 0.17227172241984895, "LL cache miss rate": 0.9699024618466251}

View File

@ -1 +0,0 @@
{"power_before": [30.48, 33.04], "shape": [131828, 131828], "nnz": 841372, "% density": 4.841419648464106e-05, "time_s": 2.848874092102051, "power": [65.52, 75.88, 71.16, 71.16, 59.72, 47.92, 48.68], "power_after": [68.68, 67.88], "task clock (msec)": 49.87, "page faults": 3300, "cycles": 51935476, "instructions": 83731856, "branch mispredictions": 326464, "branches": 20341367, "ITLB accesses": 27590154, "ITLB misses": 6210, "DTLB misses": 17536, "DTLB accesses": 36763243, "L1I cache accesses": 31663300, "L1I cache misses": 289727, "L1D cache misses": 462864, "L1D cache accesses": 33262254, "LL cache misses": 530272, "LL cache accesses": 551373, "L2D TLB accesses": 196152, "L2D TLB misses": 23542, "L2D cache misses": 301998, "L2D cache accesses": 1732662, "instructions per cycle": 1.6122285275675532, "branch miss rate": 0.01604926551888081, "ITLB miss rate": 0.000225080294948698, "DTLB miss rate": 0.0004769981799483794, "L2D TLB miss rate": 0.12001916880786329, "L1I cache miss rate": 0.00915024649989104, "L1D cache miss rate": 0.013915593332911234, "L2D cache miss rate": 0.17429712200071334, "LL cache miss rate": 0.9617300810884828}

View File

@ -1 +0,0 @@
{"power_before": [20.44, 20.2], "shape": [24818, 24818], "nnz": 239978, "% density": 0.00038961697406616504, "time_s": 0.556269645690918, "power": [25.24, 32.16, 33.0, 32.52], "power_after": [34.24, 30.28], "task clock (msec)": 62.49, "page faults": 3312, "cycles": 76783170, "instructions": 77095702, "branch mispredictions": 323514, "branches": 19769937, "ITLB accesses": 26809325, "ITLB misses": 6925, "DTLB misses": 19003, "DTLB accesses": 36516965, "L1I cache accesses": 31104231, "L1I cache misses": 285499, "L1D cache misses": 468498, "L1D cache accesses": 32677465, "LL cache misses": 559358, "LL cache accesses": 571935, "L2D TLB accesses": 194840, "L2D TLB misses": 23481, "L2D cache misses": 313487, "L2D cache accesses": 1779730, "instructions per cycle": 1.004070319055595, "branch miss rate": 0.016363936819829016, "ITLB miss rate": 0.00025830564551699827, "DTLB miss rate": 0.0005203882633729282, "L2D TLB miss rate": 0.12051426811742968, "L1I cache miss rate": 0.009178783426601994, "L1D cache miss rate": 0.01433703624194839, "L2D cache miss rate": 0.1761430104566423, "LL cache miss rate": 0.9780097388689274}

View File

@ -1 +0,0 @@
{"power_before": [34.6, 37.16], "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 1.0817186832427979, "power": [34.32, 50.84, 52.12, 52.4, 52.76], "power_after": [49.0, 45.08], "task clock (msec)": 60.55, "page faults": 3490, "cycles": 49977496, "instructions": 78622993, "branch mispredictions": 327078, "branches": 20135808, "ITLB accesses": 27608093, "ITLB misses": 6616, "DTLB misses": 17185, "DTLB accesses": 36866957, "L1I cache accesses": 32639204, "L1I cache misses": 309643, "L1D cache misses": 478856, "L1D cache accesses": 34280618, "LL cache misses": 555275, "LL cache accesses": 578455, "L2D TLB accesses": 188723, "L2D TLB misses": 24635, "L2D cache misses": 319663, "L2D cache accesses": 1799940, "instructions per cycle": 1.573167911413569, "branch miss rate": 0.016243599462211798, "ITLB miss rate": 0.00023963987661154286, "DTLB miss rate": 0.00046613556958335347, "L2D TLB miss rate": 0.13053522888042263, "L1I cache miss rate": 0.009486842877663316, "L1D cache miss rate": 0.013968709665619214, "L2D cache miss rate": 0.17759647543807017, "LL cache miss rate": 0.9599277385449171}

View File

@ -1 +0,0 @@
{"power_before": [34.04, 43.96], "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 0.4164857864379883, "power": [23.72, 23.72, 29.88, 33.32], "power_after": [33.36, 32.52], "task clock (msec)": 61.63, "page faults": 3304, "cycles": 64734203, "instructions": 53597991, "branch mispredictions": 330777, "branches": 20357034, "ITLB accesses": 27381387, "ITLB misses": 6248, "DTLB misses": 17636, "DTLB accesses": 37436110, "L1I cache accesses": 32505993, "L1I cache misses": 303849, "L1D cache misses": 467426, "L1D cache accesses": 34241110, "LL cache misses": 550075, "LL cache accesses": 562829, "L2D TLB accesses": 199285, "L2D TLB misses": 24424, "L2D cache misses": 310155, "L2D cache accesses": 1783824, "instructions per cycle": 0.8279701999266138, "branch miss rate": 0.016248781625063848, "ITLB miss rate": 0.00022818420410916364, "DTLB miss rate": 0.00047109595521543235, "L2D TLB miss rate": 0.12255814536969667, "L1I cache miss rate": 0.009347476325365603, "L1D cache miss rate": 0.01365101773861887, "L2D cache miss rate": 0.17387085272986572, "LL cache miss rate": 0.9773394761108614}

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [80.64, 75.2, 61.4, 49.84, 38.12, 38.12, 24.16, 22.68, 22.36, 22.2], "matrix": "ASIC_680k", "shape": [682862, 682862], "nnz": 3871773, "% density": 8.303171256088674e-06, "time_s": 41.51614689826965, "power": [92.4, 92.44, 84.28, 73.04, 59.64, 53.28, 56.32, 65.88, 80.28, 93.76, 96.16, 94.44, 94.44, 94.2, 93.92, 92.48, 92.16, 91.84, 92.08, 91.84, 91.68, 90.68, 90.88, 90.28, 90.28, 92.44, 92.52, 92.84, 90.0, 89.64, 88.16, 87.28, 88.12, 88.24, 88.08, 85.72, 85.12, 85.12, 81.72, 82.52, 83.84, 86.32, 88.8, 91.0, 90.2], "power_after": [21.92, 21.88, 21.92, 21.88, 21.88, 21.72, 21.72, 21.72, 21.72, 21.44], "task clock (msec)": 55.74, "page faults": 3266, "cycles": 51085608, "instructions": 88049969, "branch mispredictions": 332704, "branches": 20219525, "ITLB accesses": 27856157, "ITLB misses": 6496, "DTLB misses": 17046, "DTLB accesses": 37522360, "L1I cache accesses": 31475230, "L1I cache misses": 277921, "L1D cache misses": 462005, "L1D cache accesses": 33126938, "LL cache misses": 558923, "LL cache accesses": 571263, "L2D TLB accesses": 190627, "L2D TLB misses": 24234, "L2D cache misses": 314815, "L2D cache accesses": 1760110, "instructions per cycle": 1.7235768046452535, "branch miss rate": 0.01645459030318467, "ITLB miss rate": 0.00023319799640704206, "DTLB miss rate": 0.0004542891225392006, "L2D TLB miss rate": 0.12712784652751186, "L1I cache miss rate": 0.008829832220447635, "L1D cache miss rate": 0.013946504805243395, "L2D cache miss rate": 0.17886098027964162, "LL cache miss rate": 0.978398741035215}

View File

@ -0,0 +1,173 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395287 queued and waiting for resources
srun: job 3395287 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
3871770, 3871773]),
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
682861]),
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
0.0000e+00, 0.0000e+00, 7.9289e-02]),
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
tensor([0.9283, 0.0381, 0.0668, ..., 0.8379, 0.4193, 0.2544])
Matrix: ASIC_680k
Shape: torch.Size([682862, 682862])
NNZ: 3871773
Density: 8.303171256088674e-06
Time: 29.317893266677856 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
55.74 msec task-clock:u # 0.002 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,266 page-faults:u # 58.589 K/sec
51,085,608 cycles:u # 0.916 GHz (47.05%)
88,049,969 instructions:u # 1.72 insn per cycle (92.14%)
<not supported> branches:u
360,079 branch-misses:u
31,381,953 L1-dcache-loads:u # 562.963 M/sec
471,072 L1-dcache-load-misses:u # 1.50% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
29,944,756 L1-icache-loads:u # 537.181 M/sec
283,203 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
20,217,238 dTLB-loads:u # 362.679 M/sec (11.38%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
33.488240295 seconds time elapsed
222.678572000 seconds user
2205.889153000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
3871770, 3871773]),
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
682861]),
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
0.0000e+00, 0.0000e+00, 7.9289e-02]),
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
tensor([0.3482, 0.5546, 0.8398, ..., 0.6137, 0.0654, 0.9075])
Matrix: ASIC_680k
Shape: torch.Size([682862, 682862])
NNZ: 3871773
Density: 8.303171256088674e-06
Time: 38.4066903591156 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
332,704 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,219,525 BR_RETIRED:u
42.582064532 seconds time elapsed
238.965431000 seconds user
2914.615754000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
3871770, 3871773]),
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
682861]),
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
0.0000e+00, 0.0000e+00, 7.9289e-02]),
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
tensor([0.2581, 0.2884, 0.9465, ..., 0.4833, 0.3421, 0.4862])
Matrix: ASIC_680k
Shape: torch.Size([682862, 682862])
NNZ: 3871773
Density: 8.303171256088674e-06
Time: 34.74818539619446 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
27,856,157 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,496 ITLB_WALK:u
17,046 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,522,360 L1D_TLB:u
39.019872270 seconds time elapsed
239.678206000 seconds user
2622.552757000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
3871770, 3871773]),
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
682861]),
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
0.0000e+00, 0.0000e+00, 7.9289e-02]),
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
tensor([0.8603, 0.0423, 0.3724, ..., 0.4873, 0.6469, 0.9634])
Matrix: ASIC_680k
Shape: torch.Size([682862, 682862])
NNZ: 3871773
Density: 8.303171256088674e-06
Time: 33.05097770690918 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
31,475,230 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
277,921 L1I_CACHE_REFILL:u
462,005 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,126,938 L1D_CACHE:u
37.399374202 seconds time elapsed
239.238852000 seconds user
2492.385966000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
3871770, 3871773]),
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
682861]),
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
0.0000e+00, 0.0000e+00, 7.9289e-02]),
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
tensor([0.1993, 0.2167, 0.6338, ..., 0.0614, 0.0230, 0.4851])
Matrix: ASIC_680k
Shape: torch.Size([682862, 682862])
NNZ: 3871773
Density: 8.303171256088674e-06
Time: 32.37103772163391 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
558,923 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
571,263 LL_CACHE_RD:u
190,627 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,234 L2D_TLB_REFILL:u
314,815 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,760,110 L2D_CACHE:u
36.644016288 seconds time elapsed
233.933818000 seconds user
2439.284669000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [22.08, 21.88, 21.88, 21.88, 21.56, 21.64, 21.84, 21.88, 21.72, 21.92], "matrix": "de2010", "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 2.7533018589019775, "power": [29.48, 30.24, 27.96, 28.4, 26.84, 30.6, 30.92], "power_after": [20.84, 21.24, 21.2, 21.24, 21.28, 20.88, 20.68, 20.56, 20.52, 20.56], "task clock (msec)": 61.38, "page faults": 3315, "cycles": 65013274, "instructions": 87442627, "branch mispredictions": 328392, "branches": 19496396, "ITLB accesses": 28311619, "ITLB misses": 6963, "DTLB misses": 17888, "DTLB accesses": 38223408, "L1I cache accesses": 30063404, "L1I cache misses": 272797, "L1D cache misses": 468341, "L1D cache accesses": 31519623, "LL cache misses": 538689, "LL cache accesses": 552789, "L2D TLB accesses": 192995, "L2D TLB misses": 23339, "L2D cache misses": 300578, "L2D cache accesses": 1764035, "instructions per cycle": 1.344996515634638, "branch miss rate": 0.016843728451145536, "ITLB miss rate": 0.0002459414277933028, "DTLB miss rate": 0.00046798548156668814, "L2D TLB miss rate": 0.12093059405684085, "L1I cache miss rate": 0.009074055619250568, "L1D cache miss rate": 0.01485871198395996, "L2D cache miss rate": 0.17039231081015965, "LL cache miss rate": 0.9744929801425137}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394139 queued and waiting for resources
srun: job 3394139 has been allocated resources
srun: job 3395278 queued and waiting for resources
srun: job 3395278 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
@ -15,37 +15,38 @@ tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
16949.]), size=(24115, 24115), nnz=116056,
layout=torch.sparse_csr)
tensor([0.4207, 0.3943, 0.6543, ..., 0.2191, 0.5415, 0.1575])
tensor([0.3547, 0.6554, 0.2142, ..., 0.8854, 0.1041, 0.2243])
Matrix: de2010
Shape: torch.Size([24115, 24115])
NNZ: 116056
Density: 0.0001995689928120616
Time: 0.36042284965515137 seconds
Time: 2.74495267868042 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
59.88 msec task-clock:u # 0.016 CPUs utilized
61.38 msec task-clock:u # 0.010 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,313 page-faults:u # 55.328 K/sec
58,169,777 cycles:u # 0.971 GHz (61.49%)
57,993,431 instructions:u # 1.00 insn per cycle (81.67%)
3,315 page-faults:u # 54.008 K/sec
65,013,274 cycles:u # 1.059 GHz (90.47%)
87,442,627 instructions:u # 1.34 insn per cycle
<not supported> branches:u
341,266 branch-misses:u
31,858,781 L1-dcache-loads:u # 532.049 M/sec
467,486 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
369,052 branch-misses:u
31,570,549 L1-dcache-loads:u # 514.350 M/sec
477,402 L1-dcache-load-misses:u # 1.51% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
30,461,310 L1-icache-loads:u # 508.711 M/sec
294,156 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
43,828,130 dTLB-loads:u # 731.940 M/sec (40.26%)
47,836 dTLB-load-misses:u # 0.11% of all dTLB cache accesses (25.52%)
0 iTLB-loads:u # 0.000 /sec (2.73%)
30,354,192 L1-icache-loads:u # 494.533 M/sec
294,845 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
0 dTLB-loads:u # 0.000 /sec (3.92%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
3.824054028 seconds time elapsed
6.232986287 seconds time elapsed
15.099361000 seconds user
28.830417000 seconds sys
17.354331000 seconds user
29.036034000 seconds sys
@ -57,21 +58,22 @@ tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
16949.]), size=(24115, 24115), nnz=116056,
layout=torch.sparse_csr)
tensor([0.0456, 0.2095, 0.0276, ..., 0.4209, 0.6824, 0.5475])
tensor([0.3177, 0.9122, 0.6465, ..., 0.5489, 0.2254, 0.7965])
Matrix: de2010
Shape: torch.Size([24115, 24115])
NNZ: 116056
Density: 0.0001995689928120616
Time: 0.3598823547363281 seconds
Time: 2.7603256702423096 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
330,494 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,578,427 BR_RETIRED:u
328,392 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,496,396 BR_RETIRED:u
3.781234836 seconds time elapsed
6.149991615 seconds time elapsed
14.965545000 seconds user
29.444131000 seconds sys
17.630426000 seconds user
30.586756000 seconds sys
@ -83,23 +85,24 @@ tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
16949.]), size=(24115, 24115), nnz=116056,
layout=torch.sparse_csr)
tensor([0.9882, 0.5477, 0.6307, ..., 0.1179, 0.6903, 0.1235])
tensor([0.7815, 0.6240, 0.3715, ..., 0.5116, 0.5969, 0.4241])
Matrix: de2010
Shape: torch.Size([24115, 24115])
NNZ: 116056
Density: 0.0001995689928120616
Time: 0.29088521003723145 seconds
Time: 2.7978765964508057 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
27,982,097 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,614 ITLB_WALK:u
17,270 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,728,899 L1D_TLB:u
28,311,619 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,963 ITLB_WALK:u
17,888 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
38,223,408 L1D_TLB:u
3.576632300 seconds time elapsed
6.151843492 seconds time elapsed
14.864601000 seconds user
29.274547000 seconds sys
17.202045000 seconds user
28.014218000 seconds sys
@ -111,23 +114,24 @@ tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
16949.]), size=(24115, 24115), nnz=116056,
layout=torch.sparse_csr)
tensor([0.3952, 0.0475, 0.1125, ..., 0.3481, 0.1290, 0.3495])
tensor([0.9638, 0.0929, 0.0479, ..., 0.1500, 0.3117, 0.9664])
Matrix: de2010
Shape: torch.Size([24115, 24115])
NNZ: 116056
Density: 0.0001995689928120616
Time: 0.30365920066833496 seconds
Time: 2.684640884399414 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
29,754,926 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
278,786 L1I_CACHE_REFILL:u
454,742 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
31,173,246 L1D_CACHE:u
30,063,404 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
272,797 L1I_CACHE_REFILL:u
468,341 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
31,519,623 L1D_CACHE:u
3.730995381 seconds time elapsed
5.874324363 seconds time elapsed
15.213930000 seconds user
30.995070000 seconds sys
17.629166000 seconds user
29.998701000 seconds sys
@ -139,25 +143,26 @@ tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
16949.]), size=(24115, 24115), nnz=116056,
layout=torch.sparse_csr)
tensor([0.7266, 0.7537, 0.9729, ..., 0.3349, 0.3523, 0.6532])
tensor([0.3936, 0.9167, 0.4396, ..., 0.1628, 0.6361, 0.1875])
Matrix: de2010
Shape: torch.Size([24115, 24115])
NNZ: 116056
Density: 0.0001995689928120616
Time: 0.2798902988433838 seconds
Time: 2.747934103012085 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
543,243 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
560,716 LL_CACHE_RD:u
162,281 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
19,847 L2D_TLB_REFILL:u
300,577 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,696,278 L2D_CACHE:u
538,689 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
552,789 LL_CACHE_RD:u
192,995 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,339 L2D_TLB_REFILL:u
300,578 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,764,035 L2D_CACHE:u
3.819959836 seconds time elapsed
6.102012809 seconds time elapsed
15.346035000 seconds user
29.199873000 seconds sys
18.001082000 seconds user
27.986033000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.72, 20.8, 20.96, 21.08, 21.4, 21.48, 21.48, 21.36, 21.08, 21.04], "matrix": "fl2010", "shape": [484481, 484481], "nnz": 2346294, "% density": 9.99606174861054e-06, "time_s": 14.43001127243042, "power": [93.04, 93.04, 89.16, 77.68, 62.92, 55.12, 53.84, 64.72, 77.04, 89.56, 94.4, 94.76, 93.52, 93.52, 96.04, 97.12, 96.44, 93.88, 93.72], "power_after": [21.08, 21.28, 21.28, 21.36, 21.08, 21.24, 21.08, 20.8, 21.04, 20.88], "task clock (msec)": 61.6, "page faults": 3276, "cycles": 41408849, "instructions": 49118917, "branch mispredictions": 331330, "branches": 19331189, "ITLB accesses": 27367982, "ITLB misses": 6160, "DTLB misses": 17157, "DTLB accesses": 36828216, "L1I cache accesses": 30147304, "L1I cache misses": 280082, "L1D cache misses": 454022, "L1D cache accesses": 31595140, "LL cache misses": 536056, "LL cache accesses": 550006, "L2D TLB accesses": 185998, "L2D TLB misses": 23735, "L2D cache misses": 296648, "L2D cache accesses": 1723525, "instructions per cycle": 1.1861937287848787, "branch miss rate": 0.017139659645353425, "ITLB miss rate": 0.00022508053388810325, "DTLB miss rate": 0.00046586562867992305, "L2D TLB miss rate": 0.12760889902041958, "L1I cache miss rate": 0.009290449321770198, "L1D cache miss rate": 0.014369994878959232, "L2D cache miss rate": 0.172117027603313, "LL cache miss rate": 0.97463664032756}

View File

@ -0,0 +1,169 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395283 queued and waiting for resources
srun: job 3395283 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
2346292, 2346294]),
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
484022]),
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
tensor([2.0367e-04, 1.7661e-01, 2.1772e-01, ..., 1.8646e-01, 2.2210e-01,
4.2364e-02])
Matrix: fl2010
Shape: torch.Size([484481, 484481])
NNZ: 2346294
Density: 9.99606174861054e-06
Time: 16.31556534767151 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
61.60 msec task-clock:u # 0.003 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,276 page-faults:u # 53.185 K/sec
41,408,849 cycles:u # 0.672 GHz (41.57%)
49,118,917 instructions:u # 1.19 insn per cycle (67.74%)
<not supported> branches:u
344,653 branch-misses:u (91.69%)
31,501,274 L1-dcache-loads:u # 511.418 M/sec
477,740 L1-dcache-load-misses:u # 1.52% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
30,099,667 L1-icache-loads:u # 488.663 M/sec
285,734 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
41,879,387 dTLB-loads:u # 679.904 M/sec (54.00%)
99,044 dTLB-load-misses:u # 0.24% of all dTLB cache accesses (13.61%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
20.288512544 seconds time elapsed
134.447078000 seconds user
1247.121046000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
2346292, 2346294]),
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
484022]),
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
tensor([0.9700, 0.5813, 0.6566, ..., 0.4126, 0.7652, 0.9833])
Matrix: fl2010
Shape: torch.Size([484481, 484481])
NNZ: 2346294
Density: 9.99606174861054e-06
Time: 16.561575651168823 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
331,330 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,331,189 BR_RETIRED:u
20.603578845 seconds time elapsed
136.555709000 seconds user
1264.382740000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
2346292, 2346294]),
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
484022]),
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
tensor([0.1770, 0.8270, 0.4236, ..., 0.0091, 0.2300, 0.5084])
Matrix: fl2010
Shape: torch.Size([484481, 484481])
NNZ: 2346294
Density: 9.99606174861054e-06
Time: 17.374610424041748 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
27,367,982 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,160 ITLB_WALK:u
17,157 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,828,216 L1D_TLB:u
21.377378255 seconds time elapsed
140.848520000 seconds user
1326.124469000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
2346292, 2346294]),
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
484022]),
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
tensor([0.1268, 0.8786, 0.9762, ..., 0.0649, 0.4474, 0.9707])
Matrix: fl2010
Shape: torch.Size([484481, 484481])
NNZ: 2346294
Density: 9.99606174861054e-06
Time: 16.753613471984863 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
30,147,304 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
280,082 L1I_CACHE_REFILL:u
454,022 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
31,595,140 L1D_CACHE:u
20.706929400 seconds time elapsed
139.881127000 seconds user
1278.527504000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
2346292, 2346294]),
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
484022]),
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
tensor([0.1394, 0.8842, 0.4362, ..., 0.8265, 0.1643, 0.9034])
Matrix: fl2010
Shape: torch.Size([484481, 484481])
NNZ: 2346294
Density: 9.99606174861054e-06
Time: 14.484151124954224 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
536,056 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
550,006 LL_CACHE_RD:u
185,998 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,735 L2D_TLB_REFILL:u
296,648 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,723,525 L2D_CACHE:u
18.443039315 seconds time elapsed
135.498625000 seconds user
1101.745145000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [51.04, 38.64, 22.84, 22.24, 21.88, 21.88, 21.6, 21.4, 21.24, 21.28], "matrix": "ga2010", "shape": [291086, 291086], "nnz": 1418056, "% density": 1.6735964475229304e-05, "time_s": 15.249999523162842, "power": [88.88, 89.52, 78.6, 64.88, 52.64, 52.64, 54.76, 60.16, 71.44, 86.84, 90.72, 89.6, 90.56, 90.36, 91.68, 91.84, 93.4, 93.4, 92.72], "power_after": [21.68, 21.4, 21.28, 21.04, 21.04, 20.96, 20.92, 20.76, 20.8, 20.96], "task clock (msec)": 72.45, "page faults": 3289, "cycles": 24836161, "instructions": 74134706, "branch mispredictions": 325643, "branches": 19697746, "ITLB accesses": 27767290, "ITLB misses": 5832, "DTLB misses": 18134, "DTLB accesses": 37063060, "L1I cache accesses": 32135376, "L1I cache misses": 302429, "L1D cache misses": 484427, "L1D cache accesses": 33639686, "LL cache misses": 548380, "LL cache accesses": 561312, "L2D TLB accesses": 186006, "L2D TLB misses": 25022, "L2D cache misses": 304539, "L2D cache accesses": 1750107, "instructions per cycle": 2.9849502908279586, "branch miss rate": 0.01653199305138771, "ITLB miss rate": 0.00021003129941740803, "DTLB miss rate": 0.0004892742261432272, "L2D TLB miss rate": 0.13452254228358226, "L1I cache miss rate": 0.009411092622659838, "L1D cache miss rate": 0.014400461407398393, "L2D cache miss rate": 0.17401164614506429, "LL cache miss rate": 0.976961119662505}

View File

@ -0,0 +1,168 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395281 queued and waiting for resources
srun: job 3395281 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
1418054, 1418056]),
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
290176]),
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
tensor([0.8043, 0.7164, 0.5687, ..., 0.1275, 0.5142, 0.8456])
Matrix: ga2010
Shape: torch.Size([291086, 291086])
NNZ: 1418056
Density: 1.6735964475229304e-05
Time: 13.566045045852661 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
72.45 msec task-clock:u # 0.004 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,289 page-faults:u # 45.396 K/sec
24,836,161 cycles:u # 0.343 GHz (23.15%)
74,134,706 instructions:u # 2.98 insn per cycle (85.49%)
<not supported> branches:u
381,828 branch-misses:u
33,748,654 L1-dcache-loads:u # 465.814 M/sec
497,166 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
32,271,900 L1-icache-loads:u # 445.431 M/sec
311,814 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
43,431,516 dTLB-loads:u # 599.461 M/sec (27.81%)
33,416 dTLB-load-misses:u # 0.08% of all dTLB cache accesses (4.55%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
17.276157893 seconds time elapsed
100.320029000 seconds user
1057.703228000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
1418054, 1418056]),
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
290176]),
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
tensor([0.6290, 0.2236, 0.0669, ..., 0.6531, 0.4280, 0.4384])
Matrix: ga2010
Shape: torch.Size([291086, 291086])
NNZ: 1418056
Density: 1.6735964475229304e-05
Time: 17.094524145126343 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
325,643 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,697,746 BR_RETIRED:u
20.849795214 seconds time elapsed
115.280665000 seconds user
1318.654953000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
1418054, 1418056]),
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
290176]),
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
tensor([0.1008, 0.2309, 0.3749, ..., 0.1568, 0.8852, 0.8182])
Matrix: ga2010
Shape: torch.Size([291086, 291086])
NNZ: 1418056
Density: 1.6735964475229304e-05
Time: 15.106332063674927 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
27,767,290 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,832 ITLB_WALK:u
18,134 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,063,060 L1D_TLB:u
18.753509375 seconds time elapsed
112.958759000 seconds user
1167.457916000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
1418054, 1418056]),
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
290176]),
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
tensor([0.8347, 0.6624, 0.6196, ..., 0.2250, 0.0157, 0.1843])
Matrix: ga2010
Shape: torch.Size([291086, 291086])
NNZ: 1418056
Density: 1.6735964475229304e-05
Time: 13.73094367980957 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
32,135,376 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
302,429 L1I_CACHE_REFILL:u
484,427 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,639,686 L1D_CACHE:u
17.400567824 seconds time elapsed
110.027662000 seconds user
1054.271122000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
1418054, 1418056]),
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
290176]),
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
tensor([0.8369, 0.3399, 0.1689, ..., 0.2081, 0.0714, 0.7388])
Matrix: ga2010
Shape: torch.Size([291086, 291086])
NNZ: 1418056
Density: 1.6735964475229304e-05
Time: 15.809288501739502 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
548,380 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
561,312 LL_CACHE_RD:u
186,006 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
25,022 L2D_TLB_REFILL:u
304,539 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,750,107 L2D_CACHE:u
19.626934574 seconds time elapsed
116.733174000 seconds user
1214.439657000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [22.04, 21.32, 21.32, 21.32, 21.12, 21.12, 21.0, 20.68, 20.72, 20.56], "matrix": "mac_econ_fwd500", "shape": [206500, 206500], "nnz": 1273389, "% density": 2.9862143765866013e-05, "time_s": 15.046087741851807, "power": [91.88, 91.12, 83.92, 72.88, 57.76, 51.24, 53.12, 62.84, 78.32, 91.64, 95.8, 95.8, 94.08, 92.48, 91.6, 89.88, 87.36, 87.84, 87.32], "power_after": [20.92, 21.04, 21.12, 20.92, 20.92, 20.88, 20.88, 20.92, 21.04, 20.96], "task clock (msec)": 62.46, "page faults": 3243, "cycles": 57150420, "instructions": 94155455, "branch mispredictions": 320781, "branches": 19491698, "ITLB accesses": 27433101, "ITLB misses": 7382, "DTLB misses": 19213, "DTLB accesses": 37123052, "L1I cache accesses": 32027284, "L1I cache misses": 290368, "L1D cache misses": 471338, "L1D cache accesses": 33366668, "LL cache misses": 571063, "LL cache accesses": 583554, "L2D TLB accesses": 196434, "L2D TLB misses": 25171, "L2D cache misses": 329198, "L2D cache accesses": 1814040, "instructions per cycle": 1.6475024155553013, "branch miss rate": 0.016457314288370363, "ITLB miss rate": 0.0002690909788142434, "DTLB miss rate": 0.0005175490420345827, "L2D TLB miss rate": 0.1281397314110592, "L1I cache miss rate": 0.009066269871650684, "L1D cache miss rate": 0.014126013421537926, "L2D cache miss rate": 0.1814722938854711, "LL cache miss rate": 0.9785949543658342}

View File

@ -0,0 +1,173 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395279 queued and waiting for resources
srun: job 3395279 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
1273379, 1273389]),
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
206459]),
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
1.2290e-01, 2.2235e-01, -1.0000e+00]),
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
tensor([0.5388, 0.2921, 0.7349, ..., 0.6379, 0.9676, 0.6389])
Matrix: mac_econ_fwd500
Shape: torch.Size([206500, 206500])
NNZ: 1273389
Density: 2.9862143765866013e-05
Time: 21.700236320495605 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
62.46 msec task-clock:u # 0.002 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,243 page-faults:u # 51.921 K/sec
57,150,420 cycles:u # 0.915 GHz (90.14%)
94,155,455 instructions:u # 1.65 insn per cycle
<not supported> branches:u
373,032 branch-misses:u
33,654,742 L1-dcache-loads:u # 538.817 M/sec
479,068 L1-dcache-load-misses:u # 1.42% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
32,149,866 L1-icache-loads:u # 514.724 M/sec
293,643 L1-icache-load-misses:u # 0.91% of all L1-icache accesses
0 dTLB-loads:u # 0.000 /sec (5.14%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
25.310174677 seconds time elapsed
125.287203000 seconds user
1680.798909000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
1273379, 1273389]),
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
206459]),
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
1.2290e-01, 2.2235e-01, -1.0000e+00]),
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
tensor([0.6433, 0.3677, 0.3308, ..., 0.5364, 0.2509, 0.4204])
Matrix: mac_econ_fwd500
Shape: torch.Size([206500, 206500])
NNZ: 1273389
Density: 2.9862143765866013e-05
Time: 16.171404361724854 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
320,781 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,491,698 BR_RETIRED:u
19.988421837 seconds time elapsed
112.429117000 seconds user
1245.246161000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
1273379, 1273389]),
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
206459]),
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
1.2290e-01, 2.2235e-01, -1.0000e+00]),
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
tensor([0.9344, 0.9844, 0.2313, ..., 0.8634, 0.6912, 0.9693])
Matrix: mac_econ_fwd500
Shape: torch.Size([206500, 206500])
NNZ: 1273389
Density: 2.9862143765866013e-05
Time: 11.788637161254883 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
27,433,101 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
7,382 ITLB_WALK:u
19,213 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,123,052 L1D_TLB:u
15.542834153 seconds time elapsed
99.681401000 seconds user
906.856853000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
1273379, 1273389]),
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
206459]),
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
1.2290e-01, 2.2235e-01, -1.0000e+00]),
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
tensor([0.2037, 0.6417, 0.9786, ..., 0.8187, 0.4933, 0.1289])
Matrix: mac_econ_fwd500
Shape: torch.Size([206500, 206500])
NNZ: 1273389
Density: 2.9862143765866013e-05
Time: 13.596147060394287 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
32,027,284 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
290,368 L1I_CACHE_REFILL:u
471,338 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,366,668 L1D_CACHE:u
17.325855116 seconds time elapsed
101.368582000 seconds user
1053.826259000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
1273379, 1273389]),
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
206459]),
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
1.2290e-01, 2.2235e-01, -1.0000e+00]),
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
tensor([0.2072, 0.8681, 0.4768, ..., 0.4873, 0.8997, 0.8601])
Matrix: mac_econ_fwd500
Shape: torch.Size([206500, 206500])
NNZ: 1273389
Density: 2.9862143765866013e-05
Time: 14.157796382904053 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
571,063 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
583,554 LL_CACHE_RD:u
196,434 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
25,171 L2D_TLB_REFILL:u
329,198 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,814,040 L2D_CACHE:u
17.958287837 seconds time elapsed
104.145071000 seconds user
1089.962121000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [94.16, 91.68, 78.92, 60.88, 46.72, 28.36, 22.08, 21.64, 21.64, 21.64], "matrix": "mc2depi", "shape": [525825, 525825], "nnz": 2100225, "% density": 7.595972132902821e-06, "time_s": 11.03979206085205, "power": [95.44, 94.0, 88.76, 72.12, 59.48, 51.92, 53.88, 68.6, 83.2, 97.76, 98.4, 97.12, 97.12, 95.28, 94.12], "power_after": [21.48, 21.44, 21.28, 21.24, 21.16, 21.08, 21.24, 21.24, 21.24, 21.16], "task clock (msec)": 56.14, "page faults": 3289, "cycles": 47515158, "instructions": 72388154, "branch mispredictions": 327042, "branches": 19309026, "ITLB accesses": 26093030, "ITLB misses": 6189, "DTLB misses": 17253, "DTLB accesses": 35168741, "L1I cache accesses": 30539322, "L1I cache misses": 285404, "L1D cache misses": 465747, "L1D cache accesses": 31932803, "LL cache misses": 530261, "LL cache accesses": 551030, "L2D TLB accesses": 183570, "L2D TLB misses": 23883, "L2D cache misses": 297006, "L2D cache accesses": 1721848, "instructions per cycle": 1.5234749719236964, "branch miss rate": 0.01693726032581861, "ITLB miss rate": 0.0002371897782664566, "DTLB miss rate": 0.0004905776979619486, "L2D TLB miss rate": 0.13010295799967314, "L1I cache miss rate": 0.009345459601231487, "L1D cache miss rate": 0.014585221347465175, "L2D cache miss rate": 0.1724925777420539, "LL cache miss rate": 0.9623087672177558}

View File

@ -0,0 +1,168 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395280 queued and waiting for resources
srun: job 3395280 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
2100223, 2100225]),
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
525824]),
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
tensor([0.7162, 0.9445, 0.3087, ..., 0.2863, 0.2977, 0.0994])
Matrix: mc2depi
Shape: torch.Size([525825, 525825])
NNZ: 2100225
Density: 7.595972132902821e-06
Time: 14.228392839431763 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
56.14 msec task-clock:u # 0.003 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,289 page-faults:u # 58.584 K/sec
47,515,158 cycles:u # 0.846 GHz (55.54%)
72,388,154 instructions:u # 1.52 insn per cycle (79.69%)
<not supported> branches:u
369,139 branch-misses:u
32,820,508 L1-dcache-loads:u # 584.601 M/sec
483,558 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,317,848 L1-icache-loads:u # 557.836 M/sec
288,398 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
39,511,659 dTLB-loads:u # 703.784 M/sec (36.64%)
0 dTLB-load-misses:u (3.47%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
18.186987302 seconds time elapsed
124.639912000 seconds user
1088.590740000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
2100223, 2100225]),
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
525824]),
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
tensor([0.4954, 0.2907, 0.0979, ..., 0.0742, 0.4519, 0.0278])
Matrix: mc2depi
Shape: torch.Size([525825, 525825])
NNZ: 2100225
Density: 7.595972132902821e-06
Time: 11.948119163513184 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
327,042 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,309,026 BR_RETIRED:u
15.715674756 seconds time elapsed
115.898749000 seconds user
910.018676000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
2100223, 2100225]),
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
525824]),
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
tensor([0.1402, 0.9048, 0.8859, ..., 0.9542, 0.3509, 0.0695])
Matrix: mc2depi
Shape: torch.Size([525825, 525825])
NNZ: 2100225
Density: 7.595972132902821e-06
Time: 14.170094966888428 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
26,093,030 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,189 ITLB_WALK:u
17,253 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
35,168,741 L1D_TLB:u
18.132605509 seconds time elapsed
121.020111000 seconds user
1090.508165000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
2100223, 2100225]),
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
525824]),
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
tensor([0.1192, 0.6084, 0.4643, ..., 0.3445, 0.4658, 0.7085])
Matrix: mc2depi
Shape: torch.Size([525825, 525825])
NNZ: 2100225
Density: 7.595972132902821e-06
Time: 13.925398826599121 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
30,539,322 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
285,404 L1I_CACHE_REFILL:u
465,747 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
31,932,803 L1D_CACHE:u
17.812911214 seconds time elapsed
119.918777000 seconds user
1067.928403000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
2100223, 2100225]),
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
525824]),
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
tensor([0.2075, 0.7442, 0.4477, ..., 0.0794, 0.0859, 0.8652])
Matrix: mc2depi
Shape: torch.Size([525825, 525825])
NNZ: 2100225
Density: 7.595972132902821e-06
Time: 12.866743564605713 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
530,261 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
551,030 LL_CACHE_RD:u
183,570 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,883 L2D_TLB_REFILL:u
297,006 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,721,848 L2D_CACHE:u
16.812811712 seconds time elapsed
117.780323000 seconds user
986.834040000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [30.08, 25.12, 24.68, 23.68, 22.84, 21.96, 21.08, 20.96, 20.8, 20.96], "matrix": "p2p-Gnutella04", "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.9992897510528564, "power": [29.48, 30.52, 31.88, 31.24, 34.32], "power_after": [20.4, 20.6, 20.64, 20.76, 20.92, 20.84, 20.88, 20.88, 20.88, 20.84], "task clock (msec)": 52.68, "page faults": 3272, "cycles": 63019732, "instructions": 73518898, "branch mispredictions": 333423, "branches": 19435905, "ITLB accesses": 27447537, "ITLB misses": 6417, "DTLB misses": 18300, "DTLB accesses": 37569384, "L1I cache accesses": 30830481, "L1I cache misses": 290545, "L1D cache misses": 473875, "L1D cache accesses": 32284772, "LL cache misses": 529403, "LL cache accesses": 549794, "L2D TLB accesses": 198306, "L2D TLB misses": 24497, "L2D cache misses": 298519, "L2D cache accesses": 1772795, "instructions per cycle": 1.1666012480027683, "branch miss rate": 0.017155002558409294, "ITLB miss rate": 0.00023379146915805232, "DTLB miss rate": 0.000487098750408045, "L2D TLB miss rate": 0.12353131019737174, "L1I cache miss rate": 0.009423952873132274, "L1D cache miss rate": 0.014677972636758903, "L2D cache miss rate": 0.16838890001381998, "LL cache miss rate": 0.9629115632400499}

View File

@ -0,0 +1,158 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395271 queued and waiting for resources
srun: job 3395271 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
nnz=39994, layout=torch.sparse_csr)
tensor([0.3559, 0.4732, 0.3024, ..., 0.9176, 0.7712, 0.4949])
Matrix: p2p-Gnutella04
Shape: torch.Size([10879, 10879])
NNZ: 39994
Density: 0.0003379223282393842
Time: 1.0082497596740723 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
52.68 msec task-clock:u # 0.012 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,272 page-faults:u # 62.105 K/sec
63,019,732 cycles:u # 1.196 GHz (70.67%)
73,518,898 instructions:u # 1.17 insn per cycle (85.80%)
<not supported> branches:u
359,236 branch-misses:u (99.44%)
31,459,751 L1-dcache-loads:u # 597.131 M/sec
460,969 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
29,975,208 L1-icache-loads:u # 568.954 M/sec
281,710 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
59,589,523 dTLB-loads:u # 1.131 G/sec (17.10%)
0 dTLB-load-misses:u (1.27%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
4.456867719 seconds time elapsed
16.389568000 seconds user
29.247355000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
nnz=39994, layout=torch.sparse_csr)
tensor([0.0123, 0.4107, 0.7785, ..., 0.7964, 0.7541, 0.4153])
Matrix: p2p-Gnutella04
Shape: torch.Size([10879, 10879])
NNZ: 39994
Density: 0.0003379223282393842
Time: 1.030029058456421 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
333,423 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,435,905 BR_RETIRED:u
4.359656946 seconds time elapsed
16.490532000 seconds user
28.366462000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
nnz=39994, layout=torch.sparse_csr)
tensor([0.1898, 0.0740, 0.4564, ..., 0.7987, 0.1017, 0.5949])
Matrix: p2p-Gnutella04
Shape: torch.Size([10879, 10879])
NNZ: 39994
Density: 0.0003379223282393842
Time: 1.004878044128418 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
27,447,537 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,417 ITLB_WALK:u
18,300 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,569,384 L1D_TLB:u
4.355627133 seconds time elapsed
15.883078000 seconds user
27.120829000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
nnz=39994, layout=torch.sparse_csr)
tensor([0.1682, 0.9350, 0.9210, ..., 0.3758, 0.2263, 0.1068])
Matrix: p2p-Gnutella04
Shape: torch.Size([10879, 10879])
NNZ: 39994
Density: 0.0003379223282393842
Time: 1.0207850933074951 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
30,830,481 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
290,545 L1I_CACHE_REFILL:u
473,875 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,284,772 L1D_CACHE:u
4.427088851 seconds time elapsed
15.711555000 seconds user
29.627091000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
nnz=39994, layout=torch.sparse_csr)
tensor([0.9351, 0.3836, 0.0822, ..., 0.9798, 0.3726, 0.7394])
Matrix: p2p-Gnutella04
Shape: torch.Size([10879, 10879])
NNZ: 39994
Density: 0.0003379223282393842
Time: 1.041510820388794 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
529,403 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
549,794 LL_CACHE_RD:u
198,306 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,497 L2D_TLB_REFILL:u
298,519 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,772,795 L2D_CACHE:u
4.454107604 seconds time elapsed
16.577921000 seconds user
29.390427000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [30.72, 30.6, 28.68, 26.48, 22.44, 21.4, 21.28, 21.08, 21.32, 21.6], "matrix": "p2p-Gnutella24", "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 1.718301773071289, "power": [31.52, 32.48, 33.64, 33.88, 33.44, 31.52], "power_after": [20.96, 20.84, 20.92, 20.8, 20.76, 20.76, 20.76, 20.68, 20.72, 20.92], "task clock (msec)": 67.08, "page faults": 3303, "cycles": 61261862, "instructions": 83757591, "branch mispredictions": 329248, "branches": 19953212, "ITLB accesses": 27084694, "ITLB misses": 7107, "DTLB misses": 17529, "DTLB accesses": 36684333, "L1I cache accesses": 32158234, "L1I cache misses": 286484, "L1D cache misses": 474161, "L1D cache accesses": 33730073, "LL cache misses": 550064, "LL cache accesses": 565245, "L2D TLB accesses": 191046, "L2D TLB misses": 23775, "L2D cache misses": 307419, "L2D cache accesses": 1772169, "instructions per cycle": 1.3672060930828385, "branch miss rate": 0.016501002445120115, "ITLB miss rate": 0.0002623991247602797, "DTLB miss rate": 0.0004778334118818516, "L2D TLB miss rate": 0.12444646838981188, "L1I cache miss rate": 0.008908573773049851, "L1D cache miss rate": 0.014057514788064645, "L2D cache miss rate": 0.1734704760099065, "LL cache miss rate": 0.973142619572044}

View File

@ -5,45 +5,46 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394141 queued and waiting for resources
srun: job 3394141 has been allocated resources
srun: job 3395289 queued and waiting for resources
srun: job 3395289 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
nnz=65369, layout=torch.sparse_csr)
tensor([0.6616, 0.1149, 0.0110, ..., 0.2481, 0.7877, 0.5589])
tensor([0.3210, 0.3418, 0.9584, ..., 0.8929, 0.9807, 0.5532])
Matrix: p2p-Gnutella24
Shape: torch.Size([26518, 26518])
NNZ: 65369
Density: 9.295875717624285e-05
Time: 0.16974925994873047 seconds
Time: 1.6565663814544678 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
61.92 msec task-clock:u # 0.017 CPUs utilized
67.08 msec task-clock:u # 0.013 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,281 page-faults:u # 52.988 K/sec
66,250,810 cycles:u # 1.070 GHz (62.94%)
75,178,179 instructions:u # 1.13 insn per cycle (83.47%)
3,303 page-faults:u # 49.241 K/sec
61,261,862 cycles:u # 0.913 GHz (49.19%)
83,757,591 instructions:u # 1.37 insn per cycle (88.30%)
<not supported> branches:u
367,749 branch-misses:u
33,064,095 L1-dcache-loads:u # 533.986 M/sec
465,542 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
364,692 branch-misses:u
31,954,743 L1-dcache-loads:u # 476.379 M/sec
490,953 L1-dcache-load-misses:u # 1.54% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,552,264 L1-icache-loads:u # 509.570 M/sec
296,060 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
73,155,896 dTLB-loads:u # 1.181 G/sec (17.31%)
30,490,915 L1-icache-loads:u # 454.556 M/sec
291,964 L1-icache-load-misses:u # 0.96% of all L1-icache accesses
32,131,046 dTLB-loads:u # 479.007 M/sec (19.20%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
3.675971385 seconds time elapsed
5.107407925 seconds time elapsed
14.857293000 seconds user
29.791187000 seconds sys
16.045361000 seconds user
30.574855000 seconds sys
@ -53,21 +54,22 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
nnz=65369, layout=torch.sparse_csr)
tensor([0.1683, 0.8999, 0.0578, ..., 0.5893, 0.0628, 0.8262])
tensor([0.4851, 0.2524, 0.2134, ..., 0.5976, 0.0089, 0.2284])
Matrix: p2p-Gnutella24
Shape: torch.Size([26518, 26518])
NNZ: 65369
Density: 9.295875717624285e-05
Time: 0.2227163314819336 seconds
Time: 1.6902527809143066 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
332,366 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,076,182 BR_RETIRED:u
329,248 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,953,212 BR_RETIRED:u
3.532329673 seconds time elapsed
4.990707186 seconds time elapsed
14.883993000 seconds user
28.516661000 seconds sys
16.713526000 seconds user
27.761595000 seconds sys
@ -77,23 +79,24 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
nnz=65369, layout=torch.sparse_csr)
tensor([0.8389, 0.5614, 0.9033, ..., 0.2231, 0.0349, 0.5167])
tensor([0.1844, 0.9003, 0.0155, ..., 0.5184, 0.1445, 0.3588])
Matrix: p2p-Gnutella24
Shape: torch.Size([26518, 26518])
NNZ: 65369
Density: 9.295875717624285e-05
Time: 0.17095375061035156 seconds
Time: 1.6478993892669678 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
27,005,133 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
4,791 ITLB_WALK:u
13,403 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,457,054 L1D_TLB:u
27,084,694 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
7,107 ITLB_WALK:u
17,529 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,684,333 L1D_TLB:u
3.579041343 seconds time elapsed
5.010572757 seconds time elapsed
14.885159000 seconds user
29.562650000 seconds sys
16.570396000 seconds user
27.387405000 seconds sys
@ -103,23 +106,24 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
nnz=65369, layout=torch.sparse_csr)
tensor([0.8849, 0.5982, 0.0578, ..., 0.9975, 0.2204, 0.0718])
tensor([0.2313, 0.8375, 0.3065, ..., 0.2374, 0.2281, 0.2100])
Matrix: p2p-Gnutella24
Shape: torch.Size([26518, 26518])
NNZ: 65369
Density: 9.295875717624285e-05
Time: 0.18003463745117188 seconds
Time: 1.637598991394043 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
32,367,686 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
287,524 L1I_CACHE_REFILL:u
467,557 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
34,022,862 L1D_CACHE:u
32,158,234 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
286,484 L1I_CACHE_REFILL:u
474,161 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,730,073 L1D_CACHE:u
3.405321132 seconds time elapsed
4.963121627 seconds time elapsed
15.291636000 seconds user
28.005015000 seconds sys
16.730431000 seconds user
29.869416000 seconds sys
@ -129,25 +133,26 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
nnz=65369, layout=torch.sparse_csr)
tensor([0.2790, 0.1291, 0.6053, ..., 0.1651, 0.4973, 0.6821])
tensor([0.5006, 0.8470, 0.3527, ..., 0.3901, 0.3581, 0.1154])
Matrix: p2p-Gnutella24
Shape: torch.Size([26518, 26518])
NNZ: 65369
Density: 9.295875717624285e-05
Time: 0.22036528587341309 seconds
Time: 1.6584653854370117 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
535,707 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
556,316 LL_CACHE_RD:u
150,149 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
18,418 L2D_TLB_REFILL:u
297,042 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,687,364 L2D_CACHE:u
550,064 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
565,245 LL_CACHE_RD:u
191,046 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,775 L2D_TLB_REFILL:u
307,419 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,772,169 L2D_CACHE:u
3.505209576 seconds time elapsed
5.019317303 seconds time elapsed
15.297738000 seconds user
29.848441000 seconds sys
16.518292000 seconds user
30.069880000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [86.48, 72.16, 59.36, 41.84, 28.44, 22.96, 22.92, 22.92, 23.04, 23.24], "matrix": "p2p-Gnutella25", "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 1.431199073791504, "power": [35.16, 36.2, 36.72, 37.52, 37.52], "power_after": [21.32, 21.2, 21.2, 21.28, 21.52, 21.44, 21.92, 21.68, 21.6, 21.36], "task clock (msec)": 59.85, "page faults": 3318, "cycles": 76505130, "instructions": 72343215, "branch mispredictions": 322338, "branches": 19784096, "ITLB accesses": 27270404, "ITLB misses": 6607, "DTLB misses": 17981, "DTLB accesses": 36751047, "L1I cache accesses": 30620441, "L1I cache misses": 302139, "L1D cache misses": 471011, "L1D cache accesses": 32141810, "LL cache misses": 531907, "LL cache accesses": 545159, "L2D TLB accesses": 188244, "L2D TLB misses": 23034, "L2D cache misses": 293848, "L2D cache accesses": 1757551, "instructions per cycle": 0.945599530384433, "branch miss rate": 0.016292783860329025, "ITLB miss rate": 0.00024227730546272803, "DTLB miss rate": 0.0004892649725054092, "L2D TLB miss rate": 0.12236246573595971, "L1I cache miss rate": 0.009867232153841285, "L1D cache miss rate": 0.014654152955294054, "L2D cache miss rate": 0.1671917344077071, "LL cache miss rate": 0.9756914955086498}

View File

@ -0,0 +1,158 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395288 queued and waiting for resources
srun: job 3395288 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
nnz=54705, layout=torch.sparse_csr)
tensor([0.9962, 0.2550, 0.9564, ..., 0.7113, 0.6635, 0.3831])
Matrix: p2p-Gnutella25
Shape: torch.Size([22687, 22687])
NNZ: 54705
Density: 0.00010628522108964806
Time: 1.4832944869995117 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
59.85 msec task-clock:u # 0.012 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,318 page-faults:u # 55.439 K/sec
76,505,130 cycles:u # 1.278 GHz (43.11%)
72,343,215 instructions:u # 0.95 insn per cycle (62.06%)
<not supported> branches:u
371,337 branch-misses:u (77.63%)
33,969,604 L1-dcache-loads:u # 567.579 M/sec (88.85%)
472,023 L1-dcache-load-misses:u # 1.39% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,728,689 L1-icache-loads:u # 530.137 M/sec
299,356 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
50,921,898 dTLB-loads:u # 850.825 M/sec (39.93%)
90,542 dTLB-load-misses:u # 0.18% of all dTLB cache accesses (36.53%)
11,563,883 iTLB-loads:u # 193.214 M/sec (20.26%)
<not counted> iTLB-load-misses:u (0.00%)
4.953668960 seconds time elapsed
16.652653000 seconds user
30.408692000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
nnz=54705, layout=torch.sparse_csr)
tensor([0.9968, 0.7101, 0.9319, ..., 0.2871, 0.7386, 0.8934])
Matrix: p2p-Gnutella25
Shape: torch.Size([22687, 22687])
NNZ: 54705
Density: 0.00010628522108964806
Time: 1.3799591064453125 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
322,338 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,784,096 BR_RETIRED:u
4.633544255 seconds time elapsed
16.572749000 seconds user
26.228349000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
nnz=54705, layout=torch.sparse_csr)
tensor([0.3551, 0.8297, 0.9950, ..., 0.9625, 0.7129, 0.2173])
Matrix: p2p-Gnutella25
Shape: torch.Size([22687, 22687])
NNZ: 54705
Density: 0.00010628522108964806
Time: 1.400240182876587 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
27,270,404 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,607 ITLB_WALK:u
17,981 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,751,047 L1D_TLB:u
4.696092090 seconds time elapsed
15.781810000 seconds user
28.383624000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
nnz=54705, layout=torch.sparse_csr)
tensor([0.3600, 0.0388, 0.5262, ..., 0.5849, 0.3707, 0.1514])
Matrix: p2p-Gnutella25
Shape: torch.Size([22687, 22687])
NNZ: 54705
Density: 0.00010628522108964806
Time: 1.4545772075653076 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
30,620,441 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
302,139 L1I_CACHE_REFILL:u
471,011 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,141,810 L1D_CACHE:u
4.897499310 seconds time elapsed
16.207163000 seconds user
32.246890000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
nnz=54705, layout=torch.sparse_csr)
tensor([0.1220, 0.8435, 0.7035, ..., 0.2109, 0.0289, 0.0715])
Matrix: p2p-Gnutella25
Shape: torch.Size([22687, 22687])
NNZ: 54705
Density: 0.00010628522108964806
Time: 1.4200170040130615 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
531,907 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
545,159 LL_CACHE_RD:u
188,244 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,034 L2D_TLB_REFILL:u
293,848 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,757,551 L2D_CACHE:u
4.683262937 seconds time elapsed
16.111909000 seconds user
29.660483000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.44, 16.44, 16.44, 16.84, 16.72, 16.6, 16.72, 16.84, 16.68, 16.84], "matrix": "p2p-Gnutella30", "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 2.896674871444702, "power": [56.32, 68.24, 71.76, 59.48, 47.6, 48.76, 52.6], "power_after": [16.92, 17.0, 16.96, 16.8, 16.48, 16.52, 16.52, 16.52, 16.24, 16.36], "task clock (msec)": 56.47, "page faults": 3222, "cycles": 69105836, "instructions": 89065155, "branch mispredictions": 333669, "branches": 20078755, "ITLB accesses": 26015038, "ITLB misses": 5212, "DTLB misses": 17039, "DTLB accesses": 35296010, "L1I cache accesses": 31837486, "L1I cache misses": 293353, "L1D cache misses": 462358, "L1D cache accesses": 33478540, "LL cache misses": 546516, "LL cache accesses": 559865, "L2D TLB accesses": 190400, "L2D TLB misses": 23787, "L2D cache misses": 307032, "L2D cache accesses": 1768186, "instructions per cycle": 1.288822480926213, "branch miss rate": 0.016618012421586895, "ITLB miss rate": 0.00020034566161310238, "DTLB miss rate": 0.00048274578344691083, "L2D TLB miss rate": 0.12493172268907562, "L1I cache miss rate": 0.009214075508348869, "L1D cache miss rate": 0.013810578358554464, "L2D cache miss rate": 0.17364236567872385, "LL cache miss rate": 0.9761567520741607}

View File

@ -5,45 +5,46 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394142 queued and waiting for resources
srun: job 3394142 has been allocated resources
srun: job 3395282 queued and waiting for resources
srun: job 3395282 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
nnz=88328, layout=torch.sparse_csr)
tensor([0.5867, 0.3729, 0.0718, ..., 0.5551, 0.6046, 0.6005])
tensor([0.0302, 0.1334, 0.4142, ..., 0.9516, 0.6030, 0.3883])
Matrix: p2p-Gnutella30
Shape: torch.Size([36682, 36682])
NNZ: 88328
Density: 6.564359899804003e-05
Time: 0.3765556812286377 seconds
Time: 2.790724277496338 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
65.91 msec task-clock:u # 0.017 CPUs utilized
56.47 msec task-clock:u # 0.009 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,247 page-faults:u # 49.267 K/sec
92,293,071 cycles:u # 1.400 GHz (58.72%)
76,208,632 instructions:u # 0.83 insn per cycle (75.47%)
3,222 page-faults:u # 57.061 K/sec
69,105,836 cycles:u # 1.224 GHz (53.55%)
89,065,155 instructions:u # 1.29 insn per cycle (92.79%)
<not supported> branches:u
336,620 branch-misses:u (89.96%)
33,256,017 L1-dcache-loads:u # 504.599 M/sec
479,188 L1-dcache-load-misses:u # 1.44% of all L1-dcache accesses
367,525 branch-misses:u
32,122,654 L1-dcache-loads:u # 568.886 M/sec
467,921 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,686,331 L1-icache-loads:u # 480.782 M/sec
297,521 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
55,295,804 dTLB-loads:u # 839.012 M/sec (27.47%)
103,616 dTLB-load-misses:u # 0.19% of all dTLB cache accesses (20.17%)
30,765,438 L1-icache-loads:u # 544.850 M/sec
289,327 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
24,642,710 dTLB-loads:u # 436.418 M/sec (11.11%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
3.803094533 seconds time elapsed
6.334250152 seconds time elapsed
16.585763000 seconds user
62.703127000 seconds sys
32.099712000 seconds user
240.206702000 seconds sys
@ -53,21 +54,22 @@ tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
nnz=88328, layout=torch.sparse_csr)
tensor([0.2027, 0.2128, 0.5093, ..., 0.8069, 0.6413, 0.1136])
tensor([0.6147, 0.4171, 0.2258, ..., 0.0253, 0.8932, 0.8040])
Matrix: p2p-Gnutella30
Shape: torch.Size([36682, 36682])
NNZ: 88328
Density: 6.564359899804003e-05
Time: 0.2942969799041748 seconds
Time: 2.092158079147339 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
320,083 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,285,106 BR_RETIRED:u
333,669 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,078,755 BR_RETIRED:u
3.763535833 seconds time elapsed
5.557038624 seconds time elapsed
16.476022000 seconds user
55.208213000 seconds sys
29.074016000 seconds user
186.372846000 seconds sys
@ -77,23 +79,24 @@ tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
nnz=88328, layout=torch.sparse_csr)
tensor([0.5930, 0.8044, 0.8115, ..., 0.6366, 0.1026, 0.6914])
tensor([0.0146, 0.2151, 0.1948, ..., 0.7633, 0.4329, 0.7106])
Matrix: p2p-Gnutella30
Shape: torch.Size([36682, 36682])
NNZ: 88328
Density: 6.564359899804003e-05
Time: 0.2431955337524414 seconds
Time: 3.1269772052764893 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
26,853,940 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,728 ITLB_WALK:u
13,955 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,111,059 L1D_TLB:u
26,015,038 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,212 ITLB_WALK:u
17,039 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
35,296,010 L1D_TLB:u
3.752433570 seconds time elapsed
6.550798214 seconds time elapsed
16.433982000 seconds user
53.207908000 seconds sys
36.334689000 seconds user
263.614426000 seconds sys
@ -103,23 +106,24 @@ tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
nnz=88328, layout=torch.sparse_csr)
tensor([0.9666, 0.8206, 0.6252, ..., 0.5180, 0.8170, 0.7406])
tensor([0.1810, 0.5208, 0.0542, ..., 0.6108, 0.4905, 0.8918])
Matrix: p2p-Gnutella30
Shape: torch.Size([36682, 36682])
NNZ: 88328
Density: 6.564359899804003e-05
Time: 0.15313339233398438 seconds
Time: 1.9065814018249512 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
32,554,796 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
298,729 L1I_CACHE_REFILL:u
473,779 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
34,117,102 L1D_CACHE:u
31,837,486 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
293,353 L1I_CACHE_REFILL:u
462,358 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,478,540 L1D_CACHE:u
3.595579651 seconds time elapsed
5.319975004 seconds time elapsed
15.817851000 seconds user
44.491315000 seconds sys
26.918342000 seconds user
175.603919000 seconds sys
@ -129,25 +133,26 @@ tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
nnz=88328, layout=torch.sparse_csr)
tensor([0.9800, 0.9021, 0.5677, ..., 0.3869, 0.2468, 0.3286])
tensor([0.8456, 0.8302, 0.2078, ..., 0.8155, 0.5148, 0.5853])
Matrix: p2p-Gnutella30
Shape: torch.Size([36682, 36682])
NNZ: 88328
Density: 6.564359899804003e-05
Time: 0.2539215087890625 seconds
Time: 3.8523874282836914 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
535,040 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
547,502 LL_CACHE_RD:u
179,876 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
21,809 L2D_TLB_REFILL:u
298,620 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,722,959 L2D_CACHE:u
546,516 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
559,865 LL_CACHE_RD:u
190,400 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,787 L2D_TLB_REFILL:u
307,032 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,768,186 L2D_CACHE:u
3.549060962 seconds time elapsed
7.266305868 seconds time elapsed
16.570077000 seconds user
52.238012000 seconds sys
37.085321000 seconds user
320.780766000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.6, 16.64, 17.04, 17.08, 16.92, 17.24, 16.88, 16.36, 16.4, 16.4], "matrix": "ri2010", "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 2.970583200454712, "power": [23.04, 23.28, 23.76, 24.12, 21.4, 26.28, 26.36], "power_after": [16.16, 16.16, 16.52, 16.48, 16.52, 16.44, 16.36, 16.48, 16.76, 16.6], "task clock (msec)": 52.61, "page faults": 3292, "cycles": 42915672, "instructions": 71002596, "branch mispredictions": 344300, "branches": 20224759, "ITLB accesses": 26039851, "ITLB misses": 5035, "DTLB misses": 16402, "DTLB accesses": 34820806, "L1I cache accesses": 31878105, "L1I cache misses": 299057, "L1D cache misses": 471869, "L1D cache accesses": 33450518, "LL cache misses": 530093, "LL cache accesses": 551126, "L2D TLB accesses": 188315, "L2D TLB misses": 22856, "L2D cache misses": 299885, "L2D cache accesses": 1763155, "instructions per cycle": 1.6544677664607, "branch miss rate": 0.01702368863826758, "ITLB miss rate": 0.00019335748119296073, "DTLB miss rate": 0.0004710402165877493, "L2D TLB miss rate": 0.12137110692191275, "L1I cache miss rate": 0.009381266546427399, "L1D cache miss rate": 0.014106478111938357, "L2D cache miss rate": 0.1700843090936418, "LL cache miss rate": 0.9618363132931489}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394145 queued and waiting for resources
srun: job 3394145 has been allocated resources
srun: job 3395268 queued and waiting for resources
srun: job 3395268 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
tensor([0.1402, 0.0708, 0.4576, ..., 0.4700, 0.5629, 0.9120])
tensor([0.4029, 0.5373, 0.8376, ..., 0.9299, 0.3127, 0.4778])
Matrix: ri2010
Shape: torch.Size([25181, 25181])
NNZ: 125750
Density: 0.00019831796057928155
Time: 0.3585643768310547 seconds
Time: 2.9858975410461426 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
60.77 msec task-clock:u # 0.016 CPUs utilized
52.61 msec task-clock:u # 0.008 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,361 page-faults:u # 55.311 K/sec
63,493,475 cycles:u # 1.045 GHz (49.59%)
91,578,911 instructions:u # 1.44 insn per cycle (92.22%)
3,292 page-faults:u # 62.576 K/sec
42,915,672 cycles:u # 0.816 GHz (55.04%)
71,002,596 instructions:u # 1.65 insn per cycle (81.89%)
<not supported> branches:u
374,941 branch-misses:u
33,905,978 L1-dcache-loads:u # 557.979 M/sec
470,553 L1-dcache-load-misses:u # 1.39% of all L1-dcache accesses
369,793 branch-misses:u
33,163,106 L1-dcache-loads:u # 630.381 M/sec
471,533 L1-dcache-load-misses:u # 1.42% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
32,247,376 L1-icache-loads:u # 530.684 M/sec
299,037 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
27,428,635 dTLB-loads:u # 451.384 M/sec (13.50%)
<not counted> dTLB-load-misses:u (0.00%)
31,640,002 L1-icache-loads:u # 601.429 M/sec
297,919 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
48,642,108 dTLB-loads:u # 924.614 M/sec (29.77%)
0 dTLB-load-misses:u (5.06%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
3.818532962 seconds time elapsed
6.215745697 seconds time elapsed
15.563570000 seconds user
30.194882000 seconds sys
17.600216000 seconds user
30.777524000 seconds sys
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
tensor([0.1841, 0.4436, 0.8281, ..., 0.0546, 0.5967, 0.9496])
tensor([0.8706, 0.3724, 0.8779, ..., 0.4299, 0.0920, 0.4238])
Matrix: ri2010
Shape: torch.Size([25181, 25181])
NNZ: 125750
Density: 0.00019831796057928155
Time: 0.3050577640533447 seconds
Time: 2.9231789112091064 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
329,084 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,406,595 BR_RETIRED:u
344,300 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,224,759 BR_RETIRED:u
3.673527837 seconds time elapsed
6.297708483 seconds time elapsed
15.520198000 seconds user
29.068211000 seconds sys
17.546068000 seconds user
26.920857000 seconds sys
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
tensor([0.1849, 0.5991, 0.5040, ..., 0.4916, 0.4789, 0.8887])
tensor([0.2988, 0.0160, 0.4360, ..., 0.7543, 0.0919, 0.2321])
Matrix: ri2010
Shape: torch.Size([25181, 25181])
NNZ: 125750
Density: 0.00019831796057928155
Time: 0.3605458736419678 seconds
Time: 2.9701316356658936 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
26,859,919 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,237 ITLB_WALK:u
16,689 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,348,977 L1D_TLB:u
26,039,851 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,035 ITLB_WALK:u
16,402 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
34,820,806 L1D_TLB:u
3.769690988 seconds time elapsed
6.227977259 seconds time elapsed
15.173839000 seconds user
29.963392000 seconds sys
17.937381000 seconds user
30.196552000 seconds sys
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
tensor([0.0513, 0.4498, 0.6748, ..., 0.2114, 0.6847, 0.2188])
tensor([0.5797, 0.8992, 0.8317, ..., 0.0283, 0.7124, 0.2690])
Matrix: ri2010
Shape: torch.Size([25181, 25181])
NNZ: 125750
Density: 0.00019831796057928155
Time: 0.3485410213470459 seconds
Time: 2.968733072280884 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
30,979,764 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
292,038 L1I_CACHE_REFILL:u
469,219 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,411,890 L1D_CACHE:u
31,878,105 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
299,057 L1I_CACHE_REFILL:u
471,869 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,450,518 L1D_CACHE:u
3.598754329 seconds time elapsed
6.278062824 seconds time elapsed
16.139631000 seconds user
29.287026000 seconds sys
17.822878000 seconds user
27.932170000 seconds sys
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
tensor([0.7270, 0.7858, 0.3165, ..., 0.7139, 0.8270, 0.9478])
tensor([0.0630, 0.5194, 0.8720, ..., 0.9537, 0.3959, 0.5550])
Matrix: ri2010
Shape: torch.Size([25181, 25181])
NNZ: 125750
Density: 0.00019831796057928155
Time: 0.3687746524810791 seconds
Time: 2.9069995880126953 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
571,870 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
598,306 LL_CACHE_RD:u
205,488 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
26,392 L2D_TLB_REFILL:u
342,141 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,857,697 L2D_CACHE:u
530,093 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
551,126 LL_CACHE_RD:u
188,315 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
22,856 L2D_TLB_REFILL:u
299,885 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,763,155 L2D_CACHE:u
3.726794738 seconds time elapsed
6.075529293 seconds time elapsed
15.231331000 seconds user
32.108693000 seconds sys
17.073983000 seconds user
27.811966000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.36, 30.64, 31.12, 24.52, 24.16, 23.12, 22.08, 21.28, 21.16, 20.88], "matrix": "rma10", "shape": [46835, 46835], "nnz": 2374001, "% density": 0.0010822805369125833, "time_s": 68.86891412734985, "power": [81.8, 81.32, 75.08, 63.48, 51.92, 51.96, 51.8, 65.0, 65.0, 75.12, 82.68, 82.32, 82.08, 82.76, 82.8, 83.6, 83.36, 83.08, 82.88, 83.0, 83.32, 83.32, 83.36, 84.64, 84.56, 84.24, 83.52, 83.4, 83.36, 83.36, 83.72, 84.16, 83.24, 82.76, 82.76, 82.96, 82.36, 82.24, 81.64, 81.6, 81.4, 81.6, 81.88, 82.32, 83.04, 83.48, 83.48, 84.32, 84.04, 84.32, 83.16, 82.44, 81.96, 81.4, 81.8, 82.08, 81.8, 81.84, 82.04, 82.04, 82.08, 82.44, 82.6, 82.84, 83.8, 84.24, 84.6, 85.4, 85.6, 86.0, 85.72, 85.36], "power_after": [21.96, 21.88, 21.96, 21.96, 22.0, 21.68, 21.44, 21.16, 21.04, 20.92], "task clock (msec)": 58.3, "page faults": 3281, "cycles": 81319364, "instructions": 90830397, "branch mispredictions": 342237, "branches": 20641135, "ITLB accesses": 27974213, "ITLB misses": 6660, "DTLB misses": 18441, "DTLB accesses": 37780346, "L1I cache accesses": 31166891, "L1I cache misses": 291301, "L1D cache misses": 477186, "L1D cache accesses": 32682323, "LL cache misses": 538552, "LL cache accesses": 552543, "L2D TLB accesses": 202351, "L2D TLB misses": 24178, "L2D cache misses": 298051, "L2D cache accesses": 1775481, "instructions per cycle": 1.1169590185186398, "branch miss rate": 0.01658033824205888, "ITLB miss rate": 0.00023807640272132053, "DTLB miss rate": 0.00048811093471722044, "L2D TLB miss rate": 0.11948544855226809, "L1I cache miss rate": 0.00934648887500521, "L1D cache miss rate": 0.014600736918241704, "L2D cache miss rate": 0.1678705657790762, "LL cache miss rate": 0.9746788937693537}

View File

@ -0,0 +1,168 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395286 queued and waiting for resources
srun: job 3395286 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
2373970, 2374001]),
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
8.3378e+01, 2.5138e+00, 1.2184e+03]),
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
tensor([0.4937, 0.5946, 0.4240, ..., 0.9888, 0.5278, 0.9155])
Matrix: rma10
Shape: torch.Size([46835, 46835])
NNZ: 2374001
Density: 0.0010822805369125833
Time: 52.320035219192505 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
58.30 msec task-clock:u # 0.001 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,281 page-faults:u # 56.279 K/sec
81,319,364 cycles:u # 1.395 GHz (62.38%)
90,830,397 instructions:u # 1.12 insn per cycle (94.62%)
<not supported> branches:u
358,947 branch-misses:u
32,561,141 L1-dcache-loads:u # 558.523 M/sec
477,147 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,044,361 L1-icache-loads:u # 532.506 M/sec
286,125 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
29,678,379 dTLB-loads:u # 509.075 M/sec (5.72%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
56.145511940 seconds time elapsed
269.541895000 seconds user
3993.928150000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
2373970, 2374001]),
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
8.3378e+01, 2.5138e+00, 1.2184e+03]),
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
tensor([0.2401, 0.9608, 0.9686, ..., 0.2643, 0.1097, 0.0695])
Matrix: rma10
Shape: torch.Size([46835, 46835])
NNZ: 2374001
Density: 0.0010822805369125833
Time: 65.29214668273926 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
342,237 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,641,135 BR_RETIRED:u
69.131216008 seconds time elapsed
324.908899000 seconds user
4969.165543000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
2373970, 2374001]),
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
8.3378e+01, 2.5138e+00, 1.2184e+03]),
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
tensor([0.5237, 0.3525, 0.2809, ..., 0.8641, 0.3894, 0.4198])
Matrix: rma10
Shape: torch.Size([46835, 46835])
NNZ: 2374001
Density: 0.0010822805369125833
Time: 66.05637407302856 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
27,974,213 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,660 ITLB_WALK:u
18,441 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
37,780,346 L1D_TLB:u
69.880637029 seconds time elapsed
320.759259000 seconds user
5037.255757000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
2373970, 2374001]),
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
8.3378e+01, 2.5138e+00, 1.2184e+03]),
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
tensor([0.8185, 0.4278, 0.7553, ..., 0.5022, 0.1058, 0.0783])
Matrix: rma10
Shape: torch.Size([46835, 46835])
NNZ: 2374001
Density: 0.0010822805369125833
Time: 63.55399775505066 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
31,166,891 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
291,301 L1I_CACHE_REFILL:u
477,186 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,682,323 L1D_CACHE:u
67.517251505 seconds time elapsed
319.301754000 seconds user
4839.755901000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
2373970, 2374001]),
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
8.3378e+01, 2.5138e+00, 1.2184e+03]),
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
tensor([0.8358, 0.0086, 0.1779, ..., 0.6354, 0.7134, 0.5745])
Matrix: rma10
Shape: torch.Size([46835, 46835])
NNZ: 2374001
Density: 0.0010822805369125833
Time: 63.55393171310425 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
538,552 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
552,543 LL_CACHE_RD:u
202,351 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,178 L2D_TLB_REFILL:u
298,051 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,775,481 L2D_CACHE:u
67.538674790 seconds time elapsed
321.810383000 seconds user
4836.154538000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [37.56, 23.12, 22.32, 22.28, 22.28, 21.96, 21.76, 21.72, 21.88, 21.84], "matrix": "tn2010", "shape": [240116, 240116], "nnz": 1193966, "% density": 2.070855328296721e-05, "time_s": 16.282614707946777, "power": [85.48, 85.84, 79.28, 70.16, 55.52, 49.48, 49.48, 60.48, 76.32, 88.88, 91.0, 91.0, 90.68, 88.32, 86.92, 86.4, 88.08, 86.8, 87.32, 87.8], "power_after": [21.68, 21.48, 21.44, 21.36, 21.52, 21.4, 21.4, 21.32, 21.2, 21.04], "task clock (msec)": 68.11, "page faults": 3486, "cycles": 70427921, "instructions": 85638293, "branch mispredictions": 333780, "branches": 19402540, "ITLB accesses": 26935483, "ITLB misses": 5639, "DTLB misses": 16688, "DTLB accesses": 36421540, "L1I cache accesses": 33029213, "L1I cache misses": 302558, "L1D cache misses": 481598, "L1D cache accesses": 34668833, "LL cache misses": 551659, "LL cache accesses": 564579, "L2D TLB accesses": 188346, "L2D TLB misses": 24479, "L2D cache misses": 311796, "L2D cache accesses": 1767924, "instructions per cycle": 1.215970765344614, "branch miss rate": 0.017202902300420462, "ITLB miss rate": 0.0002093521025778524, "DTLB miss rate": 0.00045819040051573877, "L2D TLB miss rate": 0.12996824992301403, "L1I cache miss rate": 0.00916031514284037, "L1D cache miss rate": 0.013891381922200843, "L2D cache miss rate": 0.17636278482559206, "LL cache miss rate": 0.9771156915152707}

View File

@ -0,0 +1,173 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395275 queued and waiting for resources
srun: job 3395275 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
1193963, 1193966]),
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
240113]),
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
34928.]), size=(240116, 240116), nnz=1193966,
layout=torch.sparse_csr)
tensor([0.2511, 0.1104, 0.8257, ..., 0.4006, 0.1534, 0.0009])
Matrix: tn2010
Shape: torch.Size([240116, 240116])
NNZ: 1193966
Density: 2.070855328296721e-05
Time: 12.89618182182312 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
68.11 msec task-clock:u # 0.004 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,486 page-faults:u # 51.182 K/sec
70,427,921 cycles:u # 1.034 GHz (46.81%)
85,638,293 instructions:u # 1.22 insn per cycle (74.19%)
<not supported> branches:u
356,748 branch-misses:u (89.74%)
34,044,117 L1-dcache-loads:u # 499.843 M/sec
481,076 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
32,553,977 L1-icache-loads:u # 477.965 M/sec
309,127 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
41,245,978 dTLB-loads:u # 605.583 M/sec (33.60%)
127,770 dTLB-load-misses:u # 0.31% of all dTLB cache accesses (15.43%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
16.626373547 seconds time elapsed
101.073288000 seconds user
996.348020000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
1193963, 1193966]),
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
240113]),
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
34928.]), size=(240116, 240116), nnz=1193966,
layout=torch.sparse_csr)
tensor([0.0138, 0.1394, 0.6273, ..., 0.8681, 0.0444, 0.2705])
Matrix: tn2010
Shape: torch.Size([240116, 240116])
NNZ: 1193966
Density: 2.070855328296721e-05
Time: 14.216531038284302 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
333,780 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,402,540 BR_RETIRED:u
17.985093703 seconds time elapsed
106.904608000 seconds user
1091.172933000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
1193963, 1193966]),
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
240113]),
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
34928.]), size=(240116, 240116), nnz=1193966,
layout=torch.sparse_csr)
tensor([0.6279, 0.1696, 0.6937, ..., 0.4267, 0.4847, 0.6447])
Matrix: tn2010
Shape: torch.Size([240116, 240116])
NNZ: 1193966
Density: 2.070855328296721e-05
Time: 12.462992429733276 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
26,935,483 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
5,639 ITLB_WALK:u
16,688 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,421,540 L1D_TLB:u
15.984498303 seconds time elapsed
95.195897000 seconds user
962.237122000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
1193963, 1193966]),
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
240113]),
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
34928.]), size=(240116, 240116), nnz=1193966,
layout=torch.sparse_csr)
tensor([0.4060, 0.4915, 0.8557, ..., 0.9902, 0.0548, 0.2450])
Matrix: tn2010
Shape: torch.Size([240116, 240116])
NNZ: 1193966
Density: 2.070855328296721e-05
Time: 9.298198223114014 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
33,029,213 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
302,558 L1I_CACHE_REFILL:u
481,598 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
34,668,833 L1D_CACHE:u
12.985459942 seconds time elapsed
78.950722000 seconds user
727.126874000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
1193963, 1193966]),
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
240113]),
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
34928.]), size=(240116, 240116), nnz=1193966,
layout=torch.sparse_csr)
tensor([0.0166, 0.6910, 0.0311, ..., 0.6156, 0.5689, 0.9849])
Matrix: tn2010
Shape: torch.Size([240116, 240116])
NNZ: 1193966
Density: 2.070855328296721e-05
Time: 12.012693405151367 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
551,659 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
564,579 LL_CACHE_RD:u
188,346 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,479 L2D_TLB_REFILL:u
311,796 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,767,924 L2D_CACHE:u
15.749851583 seconds time elapsed
98.008506000 seconds user
926.127594000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [93.52, 87.76, 71.6, 58.32, 39.6, 26.24, 26.24, 22.16, 22.24, 22.24], "matrix": "ut2010", "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 8.478580713272095, "power": [89.68, 88.92, 80.84, 68.96, 56.64, 54.52, 55.88, 70.44, 85.36, 85.36, 98.2, 96.52], "power_after": [21.24, 21.32, 21.16, 21.44, 21.68, 21.76, 21.72, 22.0, 21.72, 21.72], "task clock (msec)": 53.84, "page faults": 3291, "cycles": 66389970, "instructions": 74935543, "branch mispredictions": 330515, "branches": 19475058, "ITLB accesses": 26125490, "ITLB misses": 6431, "DTLB misses": 13728, "DTLB accesses": 35274185, "L1I cache accesses": 30428652, "L1I cache misses": 288897, "L1D cache misses": 475615, "L1D cache accesses": 31855716, "LL cache misses": 553829, "LL cache accesses": 574192, "L2D TLB accesses": 181148, "L2D TLB misses": 23202, "L2D cache misses": 307806, "L2D cache accesses": 1767037, "instructions per cycle": 1.1287178319255153, "branch miss rate": 0.016971194642911976, "ITLB miss rate": 0.00024615806248992844, "DTLB miss rate": 0.0003891797925309968, "L2D TLB miss rate": 0.12808311435952924, "L1I cache miss rate": 0.009494242465949527, "L1D cache miss rate": 0.014930287550278261, "L2D cache miss rate": 0.17419329646181717, "LL cache miss rate": 0.9645362526820297}

View File

@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3394146 queued and waiting for resources
srun: job 3394146 has been allocated resources
srun: job 3395284 queued and waiting for resources
srun: job 3395284 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
@ -16,37 +16,38 @@ tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
18651.]), size=(115406, 115406), nnz=572066,
layout=torch.sparse_csr)
tensor([0.4608, 0.1516, 0.8492, ..., 0.8920, 0.4275, 0.8070])
tensor([0.1487, 0.4275, 0.9471, ..., 0.3851, 0.0801, 0.4295])
Matrix: ut2010
Shape: torch.Size([115406, 115406])
NNZ: 572066
Density: 4.295259032005559e-05
Time: 1.3751039505004883 seconds
Time: 8.772023677825928 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
60.55 msec task-clock:u # 0.012 CPUs utilized
53.84 msec task-clock:u # 0.004 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,490 page-faults:u # 57.638 K/sec
49,977,496 cycles:u # 0.825 GHz (40.93%)
78,622,993 instructions:u # 1.57 insn per cycle (85.37%)
3,291 page-faults:u # 61.127 K/sec
66,389,970 cycles:u # 1.233 GHz (67.37%)
74,935,543 instructions:u # 1.13 insn per cycle (83.30%)
<not supported> branches:u
358,029 branch-misses:u
31,478,500 L1-dcache-loads:u # 519.877 M/sec
479,449 L1-dcache-load-misses:u # 1.52% of all L1-dcache accesses
365,846 branch-misses:u
31,684,169 L1-dcache-loads:u # 588.504 M/sec
462,583 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
29,991,824 L1-icache-loads:u # 495.324 M/sec
294,864 L1-icache-load-misses:u # 0.98% of all L1-icache accesses
35,154,647 dTLB-loads:u # 580.589 M/sec (23.19%)
30,260,337 L1-icache-loads:u # 562.058 M/sec
288,196 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
57,721,334 dTLB-loads:u # 1.072 G/sec (18.54%)
<not counted> dTLB-load-misses:u (0.00%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
4.986156121 seconds time elapsed
12.179628060 seconds time elapsed
23.724703000 seconds user
145.034521000 seconds sys
68.068275000 seconds user
690.223452000 seconds sys
@ -59,21 +60,22 @@ tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
18651.]), size=(115406, 115406), nnz=572066,
layout=torch.sparse_csr)
tensor([0.4697, 0.7121, 0.5987, ..., 0.2619, 0.7308, 0.3129])
tensor([0.9553, 0.9401, 0.7135, ..., 0.8664, 0.5986, 0.8459])
Matrix: ut2010
Shape: torch.Size([115406, 115406])
NNZ: 572066
Density: 4.295259032005559e-05
Time: 1.6881086826324463 seconds
Time: 8.94040060043335 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
327,078 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,135,808 BR_RETIRED:u
330,515 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,475,058 BR_RETIRED:u
5.374156677 seconds time elapsed
12.428594105 seconds time elapsed
25.609168000 seconds user
167.278028000 seconds sys
67.011228000 seconds user
709.528404000 seconds sys
@ -86,23 +88,24 @@ tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
18651.]), size=(115406, 115406), nnz=572066,
layout=torch.sparse_csr)
tensor([0.9215, 0.6706, 0.8015, ..., 0.8507, 0.8546, 0.4441])
tensor([0.6289, 0.8171, 0.1590, ..., 0.7515, 0.5400, 0.3693])
Matrix: ut2010
Shape: torch.Size([115406, 115406])
NNZ: 572066
Density: 4.295259032005559e-05
Time: 1.2785694599151611 seconds
Time: 14.403366804122925 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
27,608,093 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,616 ITLB_WALK:u
17,185 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,866,957 L1D_TLB:u
26,125,490 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,431 ITLB_WALK:u
13,728 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
35,274,185 L1D_TLB:u
4.861513311 seconds time elapsed
18.084508405 seconds time elapsed
23.339077000 seconds user
141.584760000 seconds sys
95.162133000 seconds user
1117.716009000 seconds sys
@ -115,23 +118,24 @@ tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
18651.]), size=(115406, 115406), nnz=572066,
layout=torch.sparse_csr)
tensor([0.8973, 0.5228, 0.4492, ..., 0.7677, 0.7722, 0.1700])
tensor([0.8824, 0.0692, 0.7225, ..., 0.8736, 0.6854, 0.7514])
Matrix: ut2010
Shape: torch.Size([115406, 115406])
NNZ: 572066
Density: 4.295259032005559e-05
Time: 1.1654376983642578 seconds
Time: 9.64679503440857 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
32,639,204 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
309,643 L1I_CACHE_REFILL:u
478,856 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
34,280,618 L1D_CACHE:u
30,428,652 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
288,897 L1I_CACHE_REFILL:u
475,615 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
31,855,716 L1D_CACHE:u
4.677973310 seconds time elapsed
13.170070008 seconds time elapsed
22.972655000 seconds user
125.062401000 seconds sys
68.362809000 seconds user
761.360459000 seconds sys
@ -144,25 +148,26 @@ tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
18651.]), size=(115406, 115406), nnz=572066,
layout=torch.sparse_csr)
tensor([0.4542, 0.7095, 0.5701, ..., 0.2172, 0.8829, 0.7757])
tensor([0.9552, 0.0509, 0.7738, ..., 0.7722, 0.4417, 0.7772])
Matrix: ut2010
Shape: torch.Size([115406, 115406])
NNZ: 572066
Density: 4.295259032005559e-05
Time: 1.1153452396392822 seconds
Time: 12.372079133987427 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
555,275 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
578,455 LL_CACHE_RD:u
188,723 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,635 L2D_TLB_REFILL:u
319,663 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,799,940 L2D_CACHE:u
553,829 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
574,192 LL_CACHE_RD:u
181,148 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,202 L2D_TLB_REFILL:u
307,806 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,767,037 L2D_CACHE:u
4.655024760 seconds time elapsed
15.923392394 seconds time elapsed
23.104641000 seconds user
122.294597000 seconds sys
83.307253000 seconds user
958.949992000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [32.08, 31.8, 28.68, 27.6, 22.96, 22.08, 21.0, 20.84, 20.68, 20.72], "matrix": "va2010", "shape": [285762, 285762], "nnz": 1402128, "% density": 1.717033263003816e-05, "time_s": 14.632386922836304, "power": [85.16, 83.48, 76.96, 67.44, 54.04, 51.4, 54.24, 66.76, 83.2, 96.44, 96.44, 95.84, 94.24, 92.36, 91.2, 89.32, 87.48, 88.68, 88.24], "power_after": [21.12, 21.0, 21.16, 21.4, 21.32, 21.36, 21.36, 21.12, 20.76, 20.84], "task clock (msec)": 57.32, "page faults": 3280, "cycles": 39497791, "instructions": 64385555, "branch mispredictions": 332792, "branches": 19983954, "ITLB accesses": 27156853, "ITLB misses": 6466, "DTLB misses": 18244, "DTLB accesses": 36466301, "L1I cache accesses": 30929971, "L1I cache misses": 291811, "L1D cache misses": 473063, "L1D cache accesses": 32462905, "LL cache misses": 544953, "LL cache accesses": 565172, "L2D TLB accesses": 183225, "L2D TLB misses": 23924, "L2D cache misses": 301362, "L2D cache accesses": 1756590, "instructions per cycle": 1.6301052127193645, "branch miss rate": 0.01665296067034582, "ITLB miss rate": 0.00023809828038616994, "DTLB miss rate": 0.000500297521264907, "L2D TLB miss rate": 0.13057170145995362, "L1I cache miss rate": 0.009434570759862659, "L1D cache miss rate": 0.014572417348354991, "L2D cache miss rate": 0.17156080815671274, "LL cache miss rate": 0.964225050073252}

View File

@ -0,0 +1,173 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395277 queued and waiting for resources
srun: job 3395277 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
1402123, 1402128]),
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
285760]),
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
9832.]), size=(285762, 285762), nnz=1402128,
layout=torch.sparse_csr)
tensor([0.2920, 0.3583, 0.0598, ..., 0.2208, 0.1741, 0.4955])
Matrix: va2010
Shape: torch.Size([285762, 285762])
NNZ: 1402128
Density: 1.717033263003816e-05
Time: 14.792448997497559 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
57.32 msec task-clock:u # 0.003 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,280 page-faults:u # 57.220 K/sec
39,497,791 cycles:u # 0.689 GHz (54.25%)
64,385,555 instructions:u # 1.63 insn per cycle (81.24%)
<not supported> branches:u
362,674 branch-misses:u
33,532,520 L1-dcache-loads:u # 584.977 M/sec
481,355 L1-dcache-load-misses:u # 1.44% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
31,924,348 L1-icache-loads:u # 556.922 M/sec
296,637 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
43,420,143 dTLB-loads:u # 757.467 M/sec (40.22%)
30,923 dTLB-load-misses:u # 0.07% of all dTLB cache accesses (19.05%)
<not counted> iTLB-loads:u (0.00%)
<not counted> iTLB-load-misses:u (0.00%)
18.678937115 seconds time elapsed
112.979167000 seconds user
1135.785668000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
1402123, 1402128]),
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
285760]),
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
9832.]), size=(285762, 285762), nnz=1402128,
layout=torch.sparse_csr)
tensor([0.7703, 0.7481, 0.5351, ..., 0.4663, 0.6089, 0.3679])
Matrix: va2010
Shape: torch.Size([285762, 285762])
NNZ: 1402128
Density: 1.717033263003816e-05
Time: 14.130552530288696 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
332,792 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
19,983,954 BR_RETIRED:u
17.923156218 seconds time elapsed
107.999690000 seconds user
1091.659165000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
1402123, 1402128]),
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
285760]),
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
9832.]), size=(285762, 285762), nnz=1402128,
layout=torch.sparse_csr)
tensor([0.8850, 0.1406, 0.0617, ..., 0.4325, 0.2725, 0.9292])
Matrix: va2010
Shape: torch.Size([285762, 285762])
NNZ: 1402128
Density: 1.717033263003816e-05
Time: 13.32977032661438 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
27,156,853 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,466 ITLB_WALK:u
18,244 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
36,466,301 L1D_TLB:u
17.186572497 seconds time elapsed
104.940187000 seconds user
1032.527271000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
1402123, 1402128]),
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
285760]),
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
9832.]), size=(285762, 285762), nnz=1402128,
layout=torch.sparse_csr)
tensor([0.6289, 0.0403, 0.9207, ..., 0.0183, 0.4807, 0.7504])
Matrix: va2010
Shape: torch.Size([285762, 285762])
NNZ: 1402128
Density: 1.717033263003816e-05
Time: 13.460915803909302 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
30,929,971 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
291,811 L1I_CACHE_REFILL:u
473,063 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
32,462,905 L1D_CACHE:u
17.219448483 seconds time elapsed
100.274467000 seconds user
1045.271682000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
1402123, 1402128]),
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
285760]),
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
9832.]), size=(285762, 285762), nnz=1402128,
layout=torch.sparse_csr)
tensor([0.6412, 0.1151, 0.5075, ..., 0.9251, 0.9288, 0.3560])
Matrix: va2010
Shape: torch.Size([285762, 285762])
NNZ: 1402128
Density: 1.717033263003816e-05
Time: 15.992860555648804 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
544,953 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
565,172 LL_CACHE_RD:u
183,225 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
23,924 L2D_TLB_REFILL:u
301,362 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,756,590 L2D_CACHE:u
19.884223259 seconds time elapsed
113.211516000 seconds user
1230.525804000 seconds sys

View File

@ -0,0 +1 @@
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [77.2, 64.12, 64.12, 48.92, 36.2, 21.72, 21.88, 22.36, 22.36, 22.44], "matrix": "vt2010", "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 3.5892834663391113, "power": [33.44, 30.68, 31.08, 26.96, 26.88, 32.48, 32.24], "power_after": [21.24, 21.24, 21.36, 21.36, 21.2, 21.04, 20.84, 20.72, 20.72, 20.56], "task clock (msec)": 55.26, "page faults": 3297, "cycles": 49276491, "instructions": 64763517, "branch mispredictions": 340611, "branches": 20355849, "ITLB accesses": 27946393, "ITLB misses": 6805, "DTLB misses": 17877, "DTLB accesses": 38226912, "L1I cache accesses": 31946141, "L1I cache misses": 295259, "L1D cache misses": 468136, "L1D cache accesses": 33395666, "LL cache misses": 527109, "LL cache accesses": 540409, "L2D TLB accesses": 192519, "L2D TLB misses": 24204, "L2D cache misses": 290933, "L2D cache accesses": 1743452, "instructions per cycle": 1.3142883286880147, "branch miss rate": 0.016732831924622747, "ITLB miss rate": 0.00024350190738389746, "DTLB miss rate": 0.0004676548291423592, "L2D TLB miss rate": 0.1257226559456469, "L1I cache miss rate": 0.009242399574959616, "L1D cache miss rate": 0.014017866869311724, "L2D cache miss rate": 0.16687181522634406, "LL cache miss rate": 0.9753890109158063}

View File

@ -0,0 +1,163 @@
srun: Job time limit was unset; set to partition default of 60 minutes
srun: ################################################################################
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
srun: ################################################################################
srun: job 3395285 queued and waiting for resources
srun: job 3395285 has been allocated resources
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
155598]),
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
tensor([0.1179, 0.2288, 0.5357, ..., 0.4845, 0.6375, 0.4513])
Matrix: vt2010
Shape: torch.Size([32580, 32580])
NNZ: 155598
Density: 0.00014658915806621921
Time: 3.628732681274414 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
55.26 msec task-clock:u # 0.008 CPUs utilized
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
3,297 page-faults:u # 59.661 K/sec
49,276,491 cycles:u # 0.892 GHz (31.65%)
64,763,517 instructions:u # 1.31 insn per cycle (57.73%)
<not supported> branches:u
357,693 branch-misses:u (76.18%)
32,426,852 L1-dcache-loads:u # 586.784 M/sec (88.36%)
469,495 L1-dcache-load-misses:u # 1.45% of all L1-dcache accesses
<not supported> LLC-loads:u
<not supported> LLC-load-misses:u
30,941,957 L1-icache-loads:u # 559.914 M/sec
279,512 L1-icache-load-misses:u # 0.90% of all L1-icache accesses
47,128,547 dTLB-loads:u # 852.821 M/sec (46.73%)
108,931 dTLB-load-misses:u # 0.23% of all dTLB cache accesses (32.30%)
14,189,608 iTLB-loads:u # 256.770 M/sec (19.86%)
<not counted> iTLB-load-misses:u (0.00%)
7.117399121 seconds time elapsed
18.404618000 seconds user
29.532104000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
155598]),
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
tensor([0.7544, 0.0071, 0.0491, ..., 0.7236, 0.5537, 0.4901])
Matrix: vt2010
Shape: torch.Size([32580, 32580])
NNZ: 155598
Density: 0.00014658915806621921
Time: 3.6322426795959473 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
340,611 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
20,355,849 BR_RETIRED:u
7.112879848 seconds time elapsed
18.362004000 seconds user
29.398677000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
155598]),
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
tensor([0.7651, 0.6605, 0.7128, ..., 0.7434, 0.6656, 0.3987])
Matrix: vt2010
Shape: torch.Size([32580, 32580])
NNZ: 155598
Density: 0.00014658915806621921
Time: 3.7933311462402344 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
27,946,393 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
6,805 ITLB_WALK:u
17,877 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
38,226,912 L1D_TLB:u
7.235266934 seconds time elapsed
18.566568000 seconds user
29.759130000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
155598]),
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
tensor([0.3319, 0.1241, 0.4830, ..., 0.5188, 0.8684, 0.1488])
Matrix: vt2010
Shape: torch.Size([32580, 32580])
NNZ: 155598
Density: 0.00014658915806621921
Time: 3.662006378173828 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
31,946,141 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
295,259 L1I_CACHE_REFILL:u
468,136 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
33,395,666 L1D_CACHE:u
7.187008251 seconds time elapsed
18.275672000 seconds user
30.724065000 seconds sys
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
).to_sparse_csr().type(torch.float)
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
155598]),
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
tensor([0.0055, 0.7774, 0.9046, ..., 0.5143, 0.0678, 0.4725])
Matrix: vt2010
Shape: torch.Size([32580, 32580])
NNZ: 155598
Density: 0.00014658915806621921
Time: 3.616023063659668 seconds
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
527,109 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
540,409 LL_CACHE_RD:u
192,519 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
24,204 L2D_TLB_REFILL:u
290,933 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
1,743,452 L2D_CACHE:u
7.030605378 seconds time elapsed
18.274323000 seconds user
28.779020000 seconds sys

View File

@ -42,6 +42,10 @@ def run_program(program: list[str]) -> tuple[dict, str]:
return (json.loads(process.stdout), process.stderr)
result = dict()
result['architecture'] = args.arch
result['iterations'] = args.iterations
result['baseline_time_s'] = args.baseline_time_s
result['baseline_delay_s'] = args.baseline_delay_s
if args.power is True:
result['power_before'] = baseline_power(args.baseline_time_s)

View File

@ -3,7 +3,7 @@ import numpy as np
import argparse
import time
import json
import sys
import sys, os
parser = argparse.ArgumentParser()
parser.add_argument('matrix_file', help='the input matrix (.mtx) file')
@ -32,6 +32,9 @@ end = time.time()
result = dict()
result['matrix'] = os.path.splitext(os.path.basename(args.matrix_file))[0]
print(f"Matrix: {result['matrix']}", file=sys.stderr)
result['shape'] = matrix.shape
print(f"Shape: {result['shape']}", file=sys.stderr)