new output
This commit is contained in:
parent
01b1b0fc0c
commit
93690abfee
@ -4,6 +4,7 @@ import argparse
|
|||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import random
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('arch')
|
parser.add_argument('arch')
|
||||||
@ -14,7 +15,7 @@ parser.add_argument('baseline_time_s', type=int)
|
|||||||
parser.add_argument('baseline_delay_s', type=int)
|
parser.add_argument('baseline_delay_s', type=int)
|
||||||
parser.add_argument('--perf', action='store_const', const='--perf')
|
parser.add_argument('--perf', action='store_const', const='--perf')
|
||||||
parser.add_argument('--power', action='store_const', const='--power')
|
parser.add_argument('--power', action='store_const', const='--power')
|
||||||
parser.add_argument('--distribute', type=bool)
|
parser.add_argument('--distribute', action='store_true')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
srun_args_altra = [
|
srun_args_altra = [
|
||||||
@ -42,32 +43,36 @@ def srun(srun_args_list: list, run_args, matrix_file: str) -> list:
|
|||||||
run_args_list += [args.perf]
|
run_args_list += [args.perf]
|
||||||
if args.power is not None:
|
if args.power is not None:
|
||||||
run_args_list += [args.power]
|
run_args_list += [args.power]
|
||||||
return ['srun'] + srun_args_list + ['run.py'] + run_args_list
|
return ['srun'] + srun_args_list + ['./run.py'] + run_args_list
|
||||||
|
|
||||||
|
processes = list()
|
||||||
|
|
||||||
for i, matrix in enumerate(glob.glob(f'{args.matrix_dir.rstrip("/")}/*.mtx')):
|
for i, matrix in enumerate(glob.glob(f'{args.matrix_dir.rstrip("/")}/*.mtx')):
|
||||||
if args.arch == 'altra':
|
if args.arch == 'altra':
|
||||||
if args.distribute == True:
|
if args.distribute:
|
||||||
i = i % 40
|
i = i % 40
|
||||||
srun_args_altra += [f'--nodelist oasis{i:02}']
|
srun_args = srun_args_altra + ['--nodelist', f'oasis{i:02}']
|
||||||
|
else:
|
||||||
|
srun_args = srun_args_altra
|
||||||
|
|
||||||
output_filename = '_'.join([
|
output_filename = '_'.join([
|
||||||
args.arch,
|
args.arch,
|
||||||
str(args.iterations),
|
|
||||||
os.path.splitext(os.path.basename(matrix))[0],
|
|
||||||
str(args.baseline_time_s),
|
str(args.baseline_time_s),
|
||||||
str(args.baseline_delay_s)])
|
str(args.baseline_delay_s),
|
||||||
|
os.path.splitext(os.path.basename(matrix))[0],
|
||||||
|
str(args.iterations)])
|
||||||
|
|
||||||
json_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.json'
|
json_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.json'
|
||||||
raw_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.output'
|
raw_filepath = f'{args.output_dir.rstrip("/")}/{output_filename}.output'
|
||||||
with open(json_filepath, 'w') as json_file, open(raw_filepath, 'w') as raw_file:
|
with open(json_filepath, 'w') as json_file, open(raw_filepath, 'w') as raw_file:
|
||||||
print(srun(srun_args_altra, args, matrix))
|
print(srun(srun_args, args, matrix))
|
||||||
proc = subprocess.run(
|
print(json_filepath)
|
||||||
|
print(raw_filepath)
|
||||||
|
|
||||||
|
processes.append(subprocess.Popen(
|
||||||
srun(srun_args_altra, args, matrix),
|
srun(srun_args_altra, args, matrix),
|
||||||
stdout=json_file,
|
stdout=json_file,
|
||||||
stderr=raw_file,
|
stderr=raw_file))
|
||||||
text=True)
|
|
||||||
#output = proc.communicate()
|
|
||||||
#print(output[0])
|
|
||||||
#print(output[1])
|
|
||||||
|
|
||||||
break;
|
for process in processes:
|
||||||
|
process.wait()
|
||||||
|
1
pytorch/output/altra_10_30_Oregon-2_1000.json
Normal file
1
pytorch/output/altra_10_30_Oregon-2_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [37.36, 22.88, 22.36, 22.72, 22.52, 22.2, 21.96, 21.8, 21.48, 21.48], "matrix": "Oregon-2", "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 1.5312557220458984, "power": [26.68, 27.84, 28.48, 29.92, 30.0], "power_after": [21.16, 21.32, 21.16, 21.16, 21.16, 20.88, 20.92, 20.76, 20.96, 21.2], "task clock (msec)": 64.81, "page faults": 3244, "cycles": 82069432, "instructions": 78292700, "branch mispredictions": 319703, "branches": 19996903, "ITLB accesses": 26988315, "ITLB misses": 5988, "DTLB misses": 14570, "DTLB accesses": 36879854, "L1I cache accesses": 30465174, "L1I cache misses": 293085, "L1D cache misses": 487330, "L1D cache accesses": 31932249, "LL cache misses": 545501, "LL cache accesses": 558084, "L2D TLB accesses": 204746, "L2D TLB misses": 25302, "L2D cache misses": 314594, "L2D cache accesses": 1828047, "instructions per cycle": 0.9539812582107307, "branch miss rate": 0.01598762568383714, "ITLB miss rate": 0.00022187379982781437, "DTLB miss rate": 0.0003950666399058955, "L2D TLB miss rate": 0.12357750578765886, "L1I cache miss rate": 0.009620329101025322, "L1D cache miss rate": 0.015261374167538278, "L2D cache miss rate": 0.17209294947011755, "LL cache miss rate": 0.9774532149282187}
|
@ -5,45 +5,46 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394148 queued and waiting for resources
|
srun: job 3394980 queued and waiting for resources
|
||||||
srun: job 3394148 has been allocated resources
|
srun: job 3394980 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
nnz=65460, layout=torch.sparse_csr)
|
||||||
tensor([0.3190, 0.2829, 0.6210, ..., 0.9278, 0.7514, 0.5737])
|
tensor([0.9231, 0.7723, 0.0509, ..., 0.0839, 0.6982, 0.3459])
|
||||||
|
Matrix: Oregon-2
|
||||||
Shape: torch.Size([11806, 11806])
|
Shape: torch.Size([11806, 11806])
|
||||||
NNZ: 65460
|
NNZ: 65460
|
||||||
Density: 0.0004696458003979807
|
Density: 0.0004696458003979807
|
||||||
Time: 0.22389841079711914 seconds
|
Time: 1.5677142143249512 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
|
||||||
|
|
||||||
42.01 msec task-clock:u # 0.012 CPUs utilized
|
64.81 msec task-clock:u # 0.013 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,263 page-faults:u # 77.672 K/sec
|
3,244 page-faults:u # 50.056 K/sec
|
||||||
47,084,933 cycles:u # 1.121 GHz (65.90%)
|
82,069,432 cycles:u # 1.266 GHz (59.04%)
|
||||||
77,895,119 instructions:u # 1.65 insn per cycle (85.49%)
|
78,292,700 instructions:u # 0.95 insn per cycle (76.75%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
352,740 branch-misses:u
|
341,509 branch-misses:u (90.97%)
|
||||||
30,958,922 L1-dcache-loads:u # 736.946 M/sec
|
33,032,555 L1-dcache-loads:u # 509.704 M/sec
|
||||||
442,351 L1-dcache-load-misses:u # 1.43% of all L1-dcache accesses
|
478,674 L1-dcache-load-misses:u # 1.45% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
29,506,648 L1-icache-loads:u # 702.376 M/sec
|
31,508,310 L1-icache-loads:u # 486.184 M/sec
|
||||||
272,063 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
|
297,528 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
||||||
51,646,382 dTLB-loads:u # 1.229 G/sec (15.87%)
|
49,358,091 dTLB-loads:u # 761.613 M/sec (27.83%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
88,514 dTLB-load-misses:u # 0.18% of all dTLB cache accesses (14.82%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
3.513156571 seconds time elapsed
|
5.016393105 seconds time elapsed
|
||||||
|
|
||||||
15.150380000 seconds user
|
16.759527000 seconds user
|
||||||
32.922923000 seconds sys
|
31.429551000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -53,21 +54,22 @@ tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
|||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
nnz=65460, layout=torch.sparse_csr)
|
||||||
tensor([0.0741, 0.5476, 0.1060, ..., 0.8459, 0.8270, 0.8313])
|
tensor([0.8423, 0.9339, 0.8037, ..., 0.5953, 0.0649, 0.1559])
|
||||||
|
Matrix: Oregon-2
|
||||||
Shape: torch.Size([11806, 11806])
|
Shape: torch.Size([11806, 11806])
|
||||||
NNZ: 65460
|
NNZ: 65460
|
||||||
Density: 0.0004696458003979807
|
Density: 0.0004696458003979807
|
||||||
Time: 0.20610284805297852 seconds
|
Time: 1.516484022140503 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
|
||||||
|
|
||||||
330,923 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
319,703 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
19,740,519 BR_RETIRED:u
|
19,996,903 BR_RETIRED:u
|
||||||
|
|
||||||
3.639725976 seconds time elapsed
|
4.945699041 seconds time elapsed
|
||||||
|
|
||||||
15.493122000 seconds user
|
16.431978000 seconds user
|
||||||
27.617441000 seconds sys
|
29.752452000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -77,23 +79,24 @@ tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
|||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
nnz=65460, layout=torch.sparse_csr)
|
||||||
tensor([0.9699, 0.9368, 0.7284, ..., 0.7182, 0.5308, 0.9833])
|
tensor([0.8058, 0.2922, 0.1227, ..., 0.2176, 0.9496, 0.8838])
|
||||||
|
Matrix: Oregon-2
|
||||||
Shape: torch.Size([11806, 11806])
|
Shape: torch.Size([11806, 11806])
|
||||||
NNZ: 65460
|
NNZ: 65460
|
||||||
Density: 0.0004696458003979807
|
Density: 0.0004696458003979807
|
||||||
Time: 0.15960955619812012 seconds
|
Time: 1.6458909511566162 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
|
||||||
|
|
||||||
27,761,239 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
26,988,315 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,471 ITLB_WALK:u
|
5,988 ITLB_WALK:u
|
||||||
17,268 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
14,570 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
36,993,265 L1D_TLB:u
|
36,879,854 L1D_TLB:u
|
||||||
|
|
||||||
3.455602215 seconds time elapsed
|
5.011871473 seconds time elapsed
|
||||||
|
|
||||||
15.015027000 seconds user
|
16.529942000 seconds user
|
||||||
27.930709000 seconds sys
|
30.438432000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -103,23 +106,24 @@ tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
|||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
nnz=65460, layout=torch.sparse_csr)
|
||||||
tensor([0.5851, 0.3425, 0.8120, ..., 0.0829, 0.5823, 0.2256])
|
tensor([0.7728, 0.1182, 0.3337, ..., 0.2555, 0.2523, 0.5746])
|
||||||
|
Matrix: Oregon-2
|
||||||
Shape: torch.Size([11806, 11806])
|
Shape: torch.Size([11806, 11806])
|
||||||
NNZ: 65460
|
NNZ: 65460
|
||||||
Density: 0.0004696458003979807
|
Density: 0.0004696458003979807
|
||||||
Time: 0.15697884559631348 seconds
|
Time: 1.529954433441162 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
|
||||||
|
|
||||||
31,834,980 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
30,465,174 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
298,333 L1I_CACHE_REFILL:u
|
293,085 L1I_CACHE_REFILL:u
|
||||||
466,901 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
487,330 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
33,528,976 L1D_CACHE:u
|
31,932,249 L1D_CACHE:u
|
||||||
|
|
||||||
3.452279902 seconds time elapsed
|
4.954100105 seconds time elapsed
|
||||||
|
|
||||||
14.635240000 seconds user
|
16.282966000 seconds user
|
||||||
28.262858000 seconds sys
|
28.926724000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -129,25 +133,26 @@ tensor(crow_indices=tensor([ 0, 583, 584, ..., 65459, 65460, 65460]),
|
|||||||
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
col_indices=tensor([ 2, 23, 27, ..., 3324, 958, 841]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(11806, 11806),
|
||||||
nnz=65460, layout=torch.sparse_csr)
|
nnz=65460, layout=torch.sparse_csr)
|
||||||
tensor([0.0772, 0.9112, 0.0293, ..., 0.4016, 0.4357, 0.5368])
|
tensor([0.5613, 0.3211, 0.1739, ..., 0.5461, 0.1391, 0.8387])
|
||||||
|
Matrix: Oregon-2
|
||||||
Shape: torch.Size([11806, 11806])
|
Shape: torch.Size([11806, 11806])
|
||||||
NNZ: 65460
|
NNZ: 65460
|
||||||
Density: 0.0004696458003979807
|
Density: 0.0004696458003979807
|
||||||
Time: 0.20962285995483398 seconds
|
Time: 1.5726752281188965 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/Oregon-2.mtx 1000':
|
||||||
|
|
||||||
525,505 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
545,501 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
546,521 LL_CACHE_RD:u
|
558,084 LL_CACHE_RD:u
|
||||||
184,884 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
204,746 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
22,933 L2D_TLB_REFILL:u
|
25,302 L2D_TLB_REFILL:u
|
||||||
292,367 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
314,594 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,706,226 L2D_CACHE:u
|
1,828,047 L2D_CACHE:u
|
||||||
|
|
||||||
3.566096255 seconds time elapsed
|
4.866549675 seconds time elapsed
|
||||||
|
|
||||||
15.763579000 seconds user
|
16.609257000 seconds user
|
||||||
28.620423000 seconds sys
|
31.381282000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_as-caida_1000.json
Normal file
1
pytorch/output/altra_10_30_as-caida_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [21.6, 21.64, 21.88, 22.08, 22.2, 22.32, 22.36, 22.04, 22.0, 21.96], "matrix": "as-caida", "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 2.6254467964172363, "power": [30.92, 29.2, 29.52, 29.72, 29.72, 31.72], "power_after": [21.04, 21.28, 21.04, 21.16, 21.16, 20.96, 21.04, 20.88, 20.56, 20.84], "task clock (msec)": 61.4, "page faults": 3507, "cycles": 78967021, "instructions": 94334531, "branch mispredictions": 325893, "branches": 19069753, "ITLB accesses": 27181279, "ITLB misses": 5995, "DTLB misses": 17412, "DTLB accesses": 37016930, "L1I cache accesses": 31535482, "L1I cache misses": 292676, "L1D cache misses": 471752, "L1D cache accesses": 33119145, "LL cache misses": 540894, "LL cache accesses": 554700, "L2D TLB accesses": 191772, "L2D TLB misses": 23711, "L2D cache misses": 306195, "L2D cache accesses": 1755986, "instructions per cycle": 1.1946066827061894, "branch miss rate": 0.017089523917797993, "ITLB miss rate": 0.00022055621444450792, "DTLB miss rate": 0.00047037936425305935, "L2D TLB miss rate": 0.12364161608576851, "L1I cache miss rate": 0.009280847522799873, "L1D cache miss rate": 0.01424408752097918, "L2D cache miss rate": 0.17437211913990203, "LL cache miss rate": 0.975110870740941}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394150 queued and waiting for resources
|
srun: job 3394983 queued and waiting for resources
|
||||||
srun: job 3394150 has been allocated resources
|
srun: job 3394983 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
||||||
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
nnz=106762, layout=torch.sparse_csr)
|
||||||
tensor([0.7672, 0.5818, 0.6775, ..., 0.1052, 0.2539, 0.4347])
|
tensor([0.4886, 0.3652, 0.5691, ..., 0.6466, 0.4355, 0.8397])
|
||||||
|
Matrix: as-caida
|
||||||
Shape: torch.Size([31379, 31379])
|
Shape: torch.Size([31379, 31379])
|
||||||
NNZ: 106762
|
NNZ: 106762
|
||||||
Density: 0.00010842726485909405
|
Density: 0.00010842726485909405
|
||||||
Time: 0.28373050689697266 seconds
|
Time: 2.6297245025634766 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
|
||||||
|
|
||||||
60.78 msec task-clock:u # 0.017 CPUs utilized
|
61.40 msec task-clock:u # 0.010 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,300 page-faults:u # 54.293 K/sec
|
3,507 page-faults:u # 57.117 K/sec
|
||||||
66,733,059 cycles:u # 1.098 GHz (58.34%)
|
78,967,021 cycles:u # 1.286 GHz (61.13%)
|
||||||
87,889,334 instructions:u # 1.32 insn per cycle (93.45%)
|
94,334,531 instructions:u # 1.19 insn per cycle (95.16%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
369,909 branch-misses:u
|
365,239 branch-misses:u
|
||||||
31,872,708 L1-dcache-loads:u # 524.386 M/sec
|
33,334,312 L1-dcache-loads:u # 542.906 M/sec
|
||||||
465,719 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
|
457,950 L1-dcache-load-misses:u # 1.37% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
30,443,353 L1-icache-loads:u # 500.870 M/sec
|
31,725,851 L1-icache-loads:u # 516.709 M/sec
|
||||||
292,371 L1-icache-load-misses:u # 0.96% of all L1-icache accesses
|
297,720 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
||||||
34,702,735 dTLB-loads:u # 570.947 M/sec (6.96%)
|
25,188,580 dTLB-loads:u # 410.239 M/sec (5.16%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
3.683429807 seconds time elapsed
|
6.049042045 seconds time elapsed
|
||||||
|
|
||||||
15.161162000 seconds user
|
17.649315000 seconds user
|
||||||
31.335288000 seconds sys
|
29.335859000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
nnz=106762, layout=torch.sparse_csr)
|
||||||
tensor([0.2708, 0.2455, 0.7615, ..., 0.1172, 0.4072, 0.8970])
|
tensor([0.8344, 0.2588, 0.2246, ..., 0.5607, 0.8141, 0.9893])
|
||||||
|
Matrix: as-caida
|
||||||
Shape: torch.Size([31379, 31379])
|
Shape: torch.Size([31379, 31379])
|
||||||
NNZ: 106762
|
NNZ: 106762
|
||||||
Density: 0.00010842726485909405
|
Density: 0.00010842726485909405
|
||||||
Time: 0.32511067390441895 seconds
|
Time: 2.6495532989501953 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
|
||||||
|
|
||||||
326,300 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
325,893 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
19,832,700 BR_RETIRED:u
|
19,069,753 BR_RETIRED:u
|
||||||
|
|
||||||
3.755497210 seconds time elapsed
|
6.023780447 seconds time elapsed
|
||||||
|
|
||||||
14.681699000 seconds user
|
17.654658000 seconds user
|
||||||
29.413955000 seconds sys
|
28.848805000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
nnz=106762, layout=torch.sparse_csr)
|
||||||
tensor([0.9417, 0.0965, 0.8551, ..., 0.6665, 0.0164, 0.5102])
|
tensor([0.0814, 0.1132, 0.8515, ..., 0.8987, 0.5912, 0.5002])
|
||||||
|
Matrix: as-caida
|
||||||
Shape: torch.Size([31379, 31379])
|
Shape: torch.Size([31379, 31379])
|
||||||
NNZ: 106762
|
NNZ: 106762
|
||||||
Density: 0.00010842726485909405
|
Density: 0.00010842726485909405
|
||||||
Time: 0.33124780654907227 seconds
|
Time: 2.5444185733795166 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
|
||||||
|
|
||||||
27,233,629 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
27,181,279 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
5,868 ITLB_WALK:u
|
5,995 ITLB_WALK:u
|
||||||
16,893 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
17,412 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
36,409,508 L1D_TLB:u
|
37,016,930 L1D_TLB:u
|
||||||
|
|
||||||
3.751203540 seconds time elapsed
|
5.790360666 seconds time elapsed
|
||||||
|
|
||||||
14.849342000 seconds user
|
17.919315000 seconds user
|
||||||
27.706396000 seconds sys
|
30.569858000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
nnz=106762, layout=torch.sparse_csr)
|
||||||
tensor([0.9215, 0.4139, 0.1789, ..., 0.0245, 0.0029, 0.2129])
|
tensor([0.0439, 0.1884, 0.3342, ..., 0.2027, 0.5532, 0.7245])
|
||||||
|
Matrix: as-caida
|
||||||
Shape: torch.Size([31379, 31379])
|
Shape: torch.Size([31379, 31379])
|
||||||
NNZ: 106762
|
NNZ: 106762
|
||||||
Density: 0.00010842726485909405
|
Density: 0.00010842726485909405
|
||||||
Time: 0.3386805057525635 seconds
|
Time: 2.620804786682129 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
|
||||||
|
|
||||||
30,924,532 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
31,535,482 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
288,199 L1I_CACHE_REFILL:u
|
292,676 L1I_CACHE_REFILL:u
|
||||||
462,816 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
471,752 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
32,428,375 L1D_CACHE:u
|
33,119,145 L1D_CACHE:u
|
||||||
|
|
||||||
3.628443937 seconds time elapsed
|
6.002311801 seconds time elapsed
|
||||||
|
|
||||||
15.430937000 seconds user
|
17.427887000 seconds user
|
||||||
30.878583000 seconds sys
|
30.063688000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 0, 0, ..., 106761, 106761,
|
|||||||
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
col_indices=tensor([ 106, 329, 1040, ..., 155, 160, 12170]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(31379, 31379),
|
||||||
nnz=106762, layout=torch.sparse_csr)
|
nnz=106762, layout=torch.sparse_csr)
|
||||||
tensor([0.4983, 0.0268, 0.1695, ..., 0.6987, 0.7224, 0.8577])
|
tensor([0.1495, 0.5856, 0.8600, ..., 0.2101, 0.6229, 0.2019])
|
||||||
|
Matrix: as-caida
|
||||||
Shape: torch.Size([31379, 31379])
|
Shape: torch.Size([31379, 31379])
|
||||||
NNZ: 106762
|
NNZ: 106762
|
||||||
Density: 0.00010842726485909405
|
Density: 0.00010842726485909405
|
||||||
Time: 0.3289623260498047 seconds
|
Time: 2.561279296875 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/as-caida.mtx 1000':
|
||||||
|
|
||||||
551,997 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
540,894 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
568,528 LL_CACHE_RD:u
|
554,700 LL_CACHE_RD:u
|
||||||
193,991 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
191,772 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
24,353 L2D_TLB_REFILL:u
|
23,711 L2D_TLB_REFILL:u
|
||||||
312,207 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
306,195 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,821,196 L2D_CACHE:u
|
1,755,986 L2D_CACHE:u
|
||||||
|
|
||||||
3.698790384 seconds time elapsed
|
5.946428572 seconds time elapsed
|
||||||
|
|
||||||
15.745189000 seconds user
|
17.396567000 seconds user
|
||||||
31.063512000 seconds sys
|
32.141235000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_dc2_1000.json
Normal file
1
pytorch/output/altra_10_30_dc2_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [83.04, 78.44, 65.92, 53.76, 38.68, 38.68, 25.68, 22.6, 22.52, 22.32], "matrix": "dc2", "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 14.128849267959595, "power": [89.84, 89.4, 82.8, 71.32, 57.72, 51.92, 53.0, 63.8, 78.24, 78.24, 90.2, 90.36, 90.08, 88.64, 88.64, 87.64, 87.68, 87.24], "power_after": [21.4, 21.2, 21.08, 21.08, 21.28, 21.04, 20.92, 21.12, 21.08, 21.0], "task clock (msec)": 58.45, "page faults": 3471, "cycles": 76691414, "instructions": 89547095, "branch mispredictions": 329725, "branches": 19946857, "ITLB accesses": 27648951, "ITLB misses": 6857, "DTLB misses": 18047, "DTLB accesses": 37225736, "L1I cache accesses": 32434686, "L1I cache misses": 293072, "L1D cache misses": 483557, "L1D cache accesses": 34059722, "LL cache misses": 561480, "LL cache accesses": 578369, "L2D TLB accesses": 192306, "L2D TLB misses": 25364, "L2D cache misses": 317121, "L2D cache accesses": 1812330, "instructions per cycle": 1.16762868656979, "branch miss rate": 0.01653017314958442, "ITLB miss rate": 0.00024800217556174194, "DTLB miss rate": 0.00048479901109275584, "L2D TLB miss rate": 0.13189396066685385, "L1I cache miss rate": 0.00903575881696527, "L1D cache miss rate": 0.014197326683993487, "L2D cache miss rate": 0.17497972223601663, "LL cache miss rate": 0.9707989190292011}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394149 queued and waiting for resources
|
srun: job 3394982 queued and waiting for resources
|
||||||
srun: job 3394149 has been allocated resources
|
srun: job 3394982 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
||||||
@ -16,37 +16,38 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
||||||
tensor([0.4749, 0.3788, 0.8812, ..., 0.8281, 0.8889, 0.4945])
|
tensor([0.0986, 0.6504, 0.0132, ..., 0.6525, 0.3337, 0.7557])
|
||||||
|
Matrix: dc2
|
||||||
Shape: torch.Size([116835, 116835])
|
Shape: torch.Size([116835, 116835])
|
||||||
NNZ: 766396
|
NNZ: 766396
|
||||||
Density: 5.614451099680581e-05
|
Density: 5.614451099680581e-05
|
||||||
Time: 2.2480316162109375 seconds
|
Time: 18.46260714530945 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
|
||||||
|
|
||||||
50.43 msec task-clock:u # 0.009 CPUs utilized
|
58.45 msec task-clock:u # 0.003 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,285 page-faults:u # 65.135 K/sec
|
3,471 page-faults:u # 59.382 K/sec
|
||||||
54,118,679 cycles:u # 1.073 GHz (60.92%)
|
76,691,414 cycles:u # 1.312 GHz (41.20%)
|
||||||
77,692,421 instructions:u # 1.44 insn per cycle (82.73%)
|
89,547,095 instructions:u # 1.17 insn per cycle (73.16%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
367,999 branch-misses:u
|
382,362 branch-misses:u (96.21%)
|
||||||
32,182,371 L1-dcache-loads:u # 638.112 M/sec
|
33,271,433 L1-dcache-loads:u # 569.211 M/sec
|
||||||
491,960 L1-dcache-load-misses:u # 1.53% of all L1-dcache accesses
|
488,730 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
30,682,258 L1-icache-loads:u # 608.367 M/sec
|
31,926,596 L1-icache-loads:u # 546.204 M/sec
|
||||||
300,874 L1-icache-load-misses:u # 0.98% of all L1-icache accesses
|
304,792 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
||||||
55,244,523 dTLB-loads:u # 1.095 G/sec (19.09%)
|
36,392,791 dTLB-loads:u # 622.612 M/sec (31.21%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
0 dTLB-load-misses:u (5.35%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
5.813837947 seconds time elapsed
|
22.126601025 seconds time elapsed
|
||||||
|
|
||||||
28.815118000 seconds user
|
103.642372000 seconds user
|
||||||
213.749674000 seconds sys
|
1434.131491000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -59,21 +60,22 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
||||||
tensor([0.9715, 0.3920, 0.0297, ..., 0.1819, 0.5744, 0.8105])
|
tensor([0.5605, 0.9374, 0.4444, ..., 0.5937, 0.3099, 0.2252])
|
||||||
|
Matrix: dc2
|
||||||
Shape: torch.Size([116835, 116835])
|
Shape: torch.Size([116835, 116835])
|
||||||
NNZ: 766396
|
NNZ: 766396
|
||||||
Density: 5.614451099680581e-05
|
Density: 5.614451099680581e-05
|
||||||
Time: 2.2333595752716064 seconds
|
Time: 13.607120752334595 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
|
||||||
|
|
||||||
325,039 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
329,725 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
19,383,216 BR_RETIRED:u
|
19,946,857 BR_RETIRED:u
|
||||||
|
|
||||||
5.973132269 seconds time elapsed
|
17.131143957 seconds time elapsed
|
||||||
|
|
||||||
29.719778000 seconds user
|
96.945305000 seconds user
|
||||||
213.706315000 seconds sys
|
1045.242697000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -86,23 +88,24 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
||||||
tensor([0.3371, 0.4985, 0.9905, ..., 0.6075, 0.1568, 0.3782])
|
tensor([0.8954, 0.9777, 0.8042, ..., 0.2069, 0.7063, 0.8479])
|
||||||
|
Matrix: dc2
|
||||||
Shape: torch.Size([116835, 116835])
|
Shape: torch.Size([116835, 116835])
|
||||||
NNZ: 766396
|
NNZ: 766396
|
||||||
Density: 5.614451099680581e-05
|
Density: 5.614451099680581e-05
|
||||||
Time: 1.9790923595428467 seconds
|
Time: 17.22396969795227 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
|
||||||
|
|
||||||
26,060,519 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
27,648,951 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
4,749 ITLB_WALK:u
|
6,857 ITLB_WALK:u
|
||||||
16,865 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
18,047 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
34,819,729 L1D_TLB:u
|
37,225,736 L1D_TLB:u
|
||||||
|
|
||||||
5.575020445 seconds time elapsed
|
20.911480243 seconds time elapsed
|
||||||
|
|
||||||
26.769391000 seconds user
|
107.392462000 seconds user
|
||||||
188.138935000 seconds sys
|
1329.272154000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -115,23 +118,24 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
||||||
tensor([0.6806, 0.8858, 0.7035, ..., 0.6007, 0.0880, 0.4550])
|
tensor([0.9293, 0.9606, 0.8914, ..., 0.2407, 0.2843, 0.5174])
|
||||||
|
Matrix: dc2
|
||||||
Shape: torch.Size([116835, 116835])
|
Shape: torch.Size([116835, 116835])
|
||||||
NNZ: 766396
|
NNZ: 766396
|
||||||
Density: 5.614451099680581e-05
|
Density: 5.614451099680581e-05
|
||||||
Time: 1.5306556224822998 seconds
|
Time: 13.233965873718262 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
|
||||||
|
|
||||||
30,777,115 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
32,434,686 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
293,980 L1I_CACHE_REFILL:u
|
293,072 L1I_CACHE_REFILL:u
|
||||||
461,522 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
483,557 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
32,216,597 L1D_CACHE:u
|
34,059,722 L1D_CACHE:u
|
||||||
|
|
||||||
4.961298684 seconds time elapsed
|
16.956477005 seconds time elapsed
|
||||||
|
|
||||||
23.946357000 seconds user
|
88.393687000 seconds user
|
||||||
156.598674000 seconds sys
|
1037.101858000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -144,25 +148,26 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 766390, 766394,
|
|||||||
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
values=tensor([-1.0000e+00, -1.0000e+00, -1.0000e+00, ...,
|
||||||
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
1.0331e+01, -1.0000e-03, 1.0000e-03]),
|
||||||
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
size=(116835, 116835), nnz=766396, layout=torch.sparse_csr)
|
||||||
tensor([0.3029, 0.1908, 0.9816, ..., 0.0418, 0.8182, 0.5474])
|
tensor([0.8850, 0.9552, 0.7029, ..., 0.3357, 0.0248, 0.5395])
|
||||||
|
Matrix: dc2
|
||||||
Shape: torch.Size([116835, 116835])
|
Shape: torch.Size([116835, 116835])
|
||||||
NNZ: 766396
|
NNZ: 766396
|
||||||
Density: 5.614451099680581e-05
|
Density: 5.614451099680581e-05
|
||||||
Time: 2.28926944732666 seconds
|
Time: 13.873224973678589 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/dc2.mtx 1000':
|
||||||
|
|
||||||
567,700 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
561,480 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
588,689 LL_CACHE_RD:u
|
578,369 LL_CACHE_RD:u
|
||||||
189,417 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
192,306 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
22,360 L2D_TLB_REFILL:u
|
25,364 L2D_TLB_REFILL:u
|
||||||
328,306 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
317,121 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,908,607 L2D_CACHE:u
|
1,812,330 L2D_CACHE:u
|
||||||
|
|
||||||
5.710829283 seconds time elapsed
|
17.467787426 seconds time elapsed
|
||||||
|
|
||||||
28.671301000 seconds user
|
92.463054000 seconds user
|
||||||
213.960421000 seconds sys
|
1072.584062000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_de2010_1000.json
Normal file
1
pytorch/output/altra_10_30_de2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [28.56, 28.04, 23.8, 23.08, 22.12, 21.16, 21.16, 21.0, 20.96, 20.72], "matrix": "de2010", "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 2.713265895843506, "power": [33.24, 30.84, 29.96, 27.68, 25.8, 25.8, 31.16], "power_after": [20.6, 20.48, 20.24, 20.32, 20.2, 20.36, 20.4, 20.4, 20.36, 20.36], "task clock (msec)": 48.96, "page faults": 3285, "cycles": 48563060, "instructions": 73465190, "branch mispredictions": 326361, "branches": 19599354, "ITLB accesses": 26666488, "ITLB misses": 6643, "DTLB misses": 17347, "DTLB accesses": 35986736, "L1I cache accesses": 32502068, "L1I cache misses": 302739, "L1D cache misses": 480619, "L1D cache accesses": 34031072, "LL cache misses": 552815, "LL cache accesses": 567373, "L2D TLB accesses": 188248, "L2D TLB misses": 23165, "L2D cache misses": 308211, "L2D cache accesses": 1787647, "instructions per cycle": 1.5127792606149613, "branch miss rate": 0.016651620252381788, "ITLB miss rate": 0.0002491141690649327, "DTLB miss rate": 0.0004820387155978803, "L2D TLB miss rate": 0.12305575623645404, "L1I cache miss rate": 0.00931445346800702, "L1D cache miss rate": 0.014122946229845479, "L2D cache miss rate": 0.17241155552522394, "LL cache miss rate": 0.9743413944618443}
|
168
pytorch/output/altra_10_30_de2010_1000.output
Normal file
168
pytorch/output/altra_10_30_de2010_1000.output
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3394985 queued and waiting for resources
|
||||||
|
srun: job 3394985 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
||||||
|
116056]),
|
||||||
|
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
||||||
|
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
||||||
|
16949.]), size=(24115, 24115), nnz=116056,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.6055, 0.8789, 0.0482, ..., 0.0736, 0.1316, 0.6744])
|
||||||
|
Matrix: de2010
|
||||||
|
Shape: torch.Size([24115, 24115])
|
||||||
|
NNZ: 116056
|
||||||
|
Density: 0.0001995689928120616
|
||||||
|
Time: 2.6956887245178223 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
||||||
|
|
||||||
|
48.96 msec task-clock:u # 0.008 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,285 page-faults:u # 67.090 K/sec
|
||||||
|
48,563,060 cycles:u # 0.992 GHz (59.76%)
|
||||||
|
73,465,190 instructions:u # 1.51 insn per cycle (78.23%)
|
||||||
|
<not supported> branches:u
|
||||||
|
369,314 branch-misses:u (98.16%)
|
||||||
|
31,769,641 L1-dcache-loads:u # 648.836 M/sec
|
||||||
|
479,594 L1-dcache-load-misses:u # 1.51% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
30,338,929 L1-icache-loads:u # 619.616 M/sec
|
||||||
|
282,162 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
||||||
|
55,516,925 dTLB-loads:u # 1.134 G/sec (23.54%)
|
||||||
|
12,345 dTLB-load-misses:u # 0.02% of all dTLB cache accesses (3.47%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
6.017085179 seconds time elapsed
|
||||||
|
|
||||||
|
17.484355000 seconds user
|
||||||
|
28.678064000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
||||||
|
116056]),
|
||||||
|
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
||||||
|
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
||||||
|
16949.]), size=(24115, 24115), nnz=116056,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.2815, 0.8196, 0.3706, ..., 0.1328, 0.4062, 0.9113])
|
||||||
|
Matrix: de2010
|
||||||
|
Shape: torch.Size([24115, 24115])
|
||||||
|
NNZ: 116056
|
||||||
|
Density: 0.0001995689928120616
|
||||||
|
Time: 2.7908551692962646 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
||||||
|
|
||||||
|
326,361 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
19,599,354 BR_RETIRED:u
|
||||||
|
|
||||||
|
6.215591535 seconds time elapsed
|
||||||
|
|
||||||
|
18.097112000 seconds user
|
||||||
|
27.831633000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
||||||
|
116056]),
|
||||||
|
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
||||||
|
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
||||||
|
16949.]), size=(24115, 24115), nnz=116056,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.9002, 0.0843, 0.5558, ..., 0.3931, 0.8070, 0.7414])
|
||||||
|
Matrix: de2010
|
||||||
|
Shape: torch.Size([24115, 24115])
|
||||||
|
NNZ: 116056
|
||||||
|
Density: 0.0001995689928120616
|
||||||
|
Time: 2.819589376449585 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
||||||
|
|
||||||
|
26,666,488 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,643 ITLB_WALK:u
|
||||||
|
17,347 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
35,986,736 L1D_TLB:u
|
||||||
|
|
||||||
|
6.243883495 seconds time elapsed
|
||||||
|
|
||||||
|
17.783312000 seconds user
|
||||||
|
31.714619000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
||||||
|
116056]),
|
||||||
|
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
||||||
|
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
||||||
|
16949.]), size=(24115, 24115), nnz=116056,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.9109, 0.6392, 0.7899, ..., 0.0945, 0.3298, 0.6865])
|
||||||
|
Matrix: de2010
|
||||||
|
Shape: torch.Size([24115, 24115])
|
||||||
|
NNZ: 116056
|
||||||
|
Density: 0.0001995689928120616
|
||||||
|
Time: 2.747800827026367 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
||||||
|
|
||||||
|
32,502,068 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
302,739 L1I_CACHE_REFILL:u
|
||||||
|
480,619 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
34,031,072 L1D_CACHE:u
|
||||||
|
|
||||||
|
6.126767063 seconds time elapsed
|
||||||
|
|
||||||
|
17.702029000 seconds user
|
||||||
|
29.137072000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
||||||
|
116056]),
|
||||||
|
col_indices=tensor([ 250, 251, 757, ..., 23334, 23553, 24050]),
|
||||||
|
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
||||||
|
16949.]), size=(24115, 24115), nnz=116056,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.7083, 0.6766, 0.7649, ..., 0.3027, 0.9885, 0.8086])
|
||||||
|
Matrix: de2010
|
||||||
|
Shape: torch.Size([24115, 24115])
|
||||||
|
NNZ: 116056
|
||||||
|
Density: 0.0001995689928120616
|
||||||
|
Time: 2.795116901397705 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
||||||
|
|
||||||
|
552,815 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
567,373 LL_CACHE_RD:u
|
||||||
|
188,248 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
23,165 L2D_TLB_REFILL:u
|
||||||
|
308,211 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,787,647 L2D_CACHE:u
|
||||||
|
|
||||||
|
6.041792624 seconds time elapsed
|
||||||
|
|
||||||
|
17.791735000 seconds user
|
||||||
|
29.790006000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_email-Enron_1000.json
Normal file
1
pytorch/output/altra_10_30_email-Enron_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [28.96, 27.92, 27.24, 23.0, 22.28, 22.28, 21.6, 20.8, 20.68, 20.76], "matrix": "email-Enron", "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 12.818164587020874, "power": [84.24, 82.72, 82.72, 72.0, 60.2, 51.88, 52.4, 59.36, 72.08, 83.88, 86.48, 84.28, 82.28, 81.12, 80.96, 80.96, 81.16], "power_after": [20.92, 20.92, 20.92, 20.92, 21.0, 20.96, 20.88, 20.84, 20.88, 20.68], "task clock (msec)": 48.76, "page faults": 3281, "cycles": 45495589, "instructions": 79104832, "branch mispredictions": 335574, "branches": 20121415, "ITLB accesses": 26011880, "ITLB misses": 5842, "DTLB misses": 16448, "DTLB accesses": 35000292, "L1I cache accesses": 32193112, "L1I cache misses": 310304, "L1D cache misses": 495806, "L1D cache accesses": 33829187, "LL cache misses": 546628, "LL cache accesses": 570044, "L2D TLB accesses": 196794, "L2D TLB misses": 24071, "L2D cache misses": 316028, "L2D cache accesses": 1836018, "instructions per cycle": 1.7387362981496954, "branch miss rate": 0.016677455338006797, "ITLB miss rate": 0.00022458968748125855, "DTLB miss rate": 0.000469938936509444, "L2D TLB miss rate": 0.1223157210077543, "L1I cache miss rate": 0.009638832058236556, "L1D cache miss rate": 0.014656160669779029, "L2D cache miss rate": 0.1721268527868463, "LL cache miss rate": 0.9589224691427328}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394152 queued and waiting for resources
|
srun: job 3394986 queued and waiting for resources
|
||||||
srun: job 3394152 has been allocated resources
|
srun: job 3394986 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
||||||
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
nnz=367662, layout=torch.sparse_csr)
|
||||||
tensor([0.3626, 0.7532, 0.0782, ..., 0.6679, 0.4308, 0.6586])
|
tensor([0.9906, 0.9401, 0.5661, ..., 0.4491, 0.7550, 0.2452])
|
||||||
|
Matrix: email-Enron
|
||||||
Shape: torch.Size([36692, 36692])
|
Shape: torch.Size([36692, 36692])
|
||||||
NNZ: 367662
|
NNZ: 367662
|
||||||
Density: 0.0002730901120626302
|
Density: 0.0002730901120626302
|
||||||
Time: 1.3745801448822021 seconds
|
Time: 12.80848503112793 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
|
||||||
|
|
||||||
60.43 msec task-clock:u # 0.012 CPUs utilized
|
48.76 msec task-clock:u # 0.003 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,319 page-faults:u # 54.926 K/sec
|
3,281 page-faults:u # 67.289 K/sec
|
||||||
66,114,448 cycles:u # 1.094 GHz (58.10%)
|
45,495,589 cycles:u # 0.933 GHz (57.79%)
|
||||||
90,786,829 instructions:u # 1.37 insn per cycle (92.25%)
|
79,104,832 instructions:u # 1.74 insn per cycle (81.70%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
372,381 branch-misses:u
|
372,161 branch-misses:u
|
||||||
32,997,410 L1-dcache-loads:u # 546.070 M/sec
|
32,089,348 L1-dcache-loads:u # 658.113 M/sec
|
||||||
470,216 L1-dcache-load-misses:u # 1.43% of all L1-dcache accesses
|
467,576 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
31,485,339 L1-icache-loads:u # 521.047 M/sec
|
30,688,995 L1-icache-loads:u # 629.393 M/sec
|
||||||
294,395 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
289,698 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
||||||
31,376,646 dTLB-loads:u # 519.248 M/sec (10.03%)
|
47,006,355 dTLB-loads:u # 964.042 M/sec (22.12%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
4.904488673 seconds time elapsed
|
16.331438990 seconds time elapsed
|
||||||
|
|
||||||
22.874521000 seconds user
|
76.869141000 seconds user
|
||||||
139.276239000 seconds sys
|
999.179638000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
nnz=367662, layout=torch.sparse_csr)
|
||||||
tensor([0.2040, 0.8252, 0.0215, ..., 0.2921, 0.9143, 0.8728])
|
tensor([0.7565, 0.5273, 0.1038, ..., 0.9432, 0.1309, 0.5542])
|
||||||
|
Matrix: email-Enron
|
||||||
Shape: torch.Size([36692, 36692])
|
Shape: torch.Size([36692, 36692])
|
||||||
NNZ: 367662
|
NNZ: 367662
|
||||||
Density: 0.0002730901120626302
|
Density: 0.0002730901120626302
|
||||||
Time: 1.3087654113769531 seconds
|
Time: 26.91536283493042 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
|
||||||
|
|
||||||
341,625 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
335,574 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
20,129,354 BR_RETIRED:u
|
20,121,415 BR_RETIRED:u
|
||||||
|
|
||||||
4.644873434 seconds time elapsed
|
30.559245388 seconds time elapsed
|
||||||
|
|
||||||
22.729927000 seconds user
|
126.799314000 seconds user
|
||||||
132.278582000 seconds sys
|
2081.777635000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
nnz=367662, layout=torch.sparse_csr)
|
||||||
tensor([0.6154, 0.6641, 0.3794, ..., 0.9736, 0.0619, 0.4790])
|
tensor([0.2321, 0.0702, 0.2538, ..., 0.6254, 0.6308, 0.5317])
|
||||||
|
Matrix: email-Enron
|
||||||
Shape: torch.Size([36692, 36692])
|
Shape: torch.Size([36692, 36692])
|
||||||
NNZ: 367662
|
NNZ: 367662
|
||||||
Density: 0.0002730901120626302
|
Density: 0.0002730901120626302
|
||||||
Time: 1.2701547145843506 seconds
|
Time: 14.841739892959595 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
|
||||||
|
|
||||||
27,441,303 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
26,011,880 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,807 ITLB_WALK:u
|
5,842 ITLB_WALK:u
|
||||||
20,551 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
16,448 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
36,867,114 L1D_TLB:u
|
35,000,292 L1D_TLB:u
|
||||||
|
|
||||||
4.861510767 seconds time elapsed
|
18.443612527 seconds time elapsed
|
||||||
|
|
||||||
22.111354000 seconds user
|
80.694133000 seconds user
|
||||||
132.431608000 seconds sys
|
1159.740575000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
nnz=367662, layout=torch.sparse_csr)
|
||||||
tensor([0.4201, 0.4134, 0.8169, ..., 0.6631, 0.0087, 0.8439])
|
tensor([0.7091, 0.9447, 0.0959, ..., 0.0090, 0.7012, 0.6025])
|
||||||
|
Matrix: email-Enron
|
||||||
Shape: torch.Size([36692, 36692])
|
Shape: torch.Size([36692, 36692])
|
||||||
NNZ: 367662
|
NNZ: 367662
|
||||||
Density: 0.0002730901120626302
|
Density: 0.0002730901120626302
|
||||||
Time: 1.1176586151123047 seconds
|
Time: 10.863199234008789 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
|
||||||
|
|
||||||
31,744,243 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
32,193,112 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
271,027 L1I_CACHE_REFILL:u
|
310,304 L1I_CACHE_REFILL:u
|
||||||
464,135 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
495,806 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
33,441,141 L1D_CACHE:u
|
33,829,187 L1D_CACHE:u
|
||||||
|
|
||||||
4.693803969 seconds time elapsed
|
14.426841778 seconds time elapsed
|
||||||
|
|
||||||
21.724904000 seconds user
|
70.728541000 seconds user
|
||||||
119.873018000 seconds sys
|
853.184507000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 1, 71, ..., 367660, 367661,
|
|||||||
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
col_indices=tensor([ 1, 0, 2, ..., 36690, 36689, 8203]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36692, 36692),
|
||||||
nnz=367662, layout=torch.sparse_csr)
|
nnz=367662, layout=torch.sparse_csr)
|
||||||
tensor([0.1285, 0.3989, 0.3903, ..., 0.7892, 0.2737, 0.2659])
|
tensor([0.8267, 0.6185, 0.8015, ..., 0.8593, 0.4881, 0.8599])
|
||||||
|
Matrix: email-Enron
|
||||||
Shape: torch.Size([36692, 36692])
|
Shape: torch.Size([36692, 36692])
|
||||||
NNZ: 367662
|
NNZ: 367662
|
||||||
Density: 0.0002730901120626302
|
Density: 0.0002730901120626302
|
||||||
Time: 1.196892261505127 seconds
|
Time: 12.076026678085327 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/email-Enron.mtx 1000':
|
||||||
|
|
||||||
539,935 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
546,628 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
552,519 LL_CACHE_RD:u
|
570,044 LL_CACHE_RD:u
|
||||||
188,291 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
196,794 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
24,177 L2D_TLB_REFILL:u
|
24,071 L2D_TLB_REFILL:u
|
||||||
301,281 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
316,028 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,737,575 L2D_CACHE:u
|
1,836,018 L2D_CACHE:u
|
||||||
|
|
||||||
4.741030347 seconds time elapsed
|
15.581045199 seconds time elapsed
|
||||||
|
|
||||||
23.793930000 seconds user
|
77.345591000 seconds user
|
||||||
125.634838000 seconds sys
|
942.987439000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_p2p-Gnutella04_1000.json
Normal file
1
pytorch/output/altra_10_30_p2p-Gnutella04_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.12, 16.36, 16.8, 16.76, 16.6, 16.48, 16.44, 16.28, 16.28, 16.16], "matrix": "p2p-Gnutella04", "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 1.0642461776733398, "power": [26.6, 27.52, 27.52, 31.16, 28.48], "power_after": [16.28, 16.4, 16.32, 16.12, 16.24, 16.0, 16.0, 16.24, 16.52, 17.04], "task clock (msec)": 50.59, "page faults": 3303, "cycles": 51318459, "instructions": 74705078, "branch mispredictions": 328853, "branches": 19620312, "ITLB accesses": 27939682, "ITLB misses": 5470, "DTLB misses": 17679, "DTLB accesses": 37425602, "L1I cache accesses": 30276633, "L1I cache misses": 291467, "L1D cache misses": 479061, "L1D cache accesses": 31689326, "LL cache misses": 529426, "LL cache accesses": 550033, "L2D TLB accesses": 171913, "L2D TLB misses": 20624, "L2D cache misses": 296662, "L2D cache accesses": 1714211, "instructions per cycle": 1.455715535028049, "branch miss rate": 0.01676084457780284, "ITLB miss rate": 0.0001957788925443031, "DTLB miss rate": 0.00047237717111404113, "L2D TLB miss rate": 0.11996765805959991, "L1I cache miss rate": 0.009626797008769106, "L1D cache miss rate": 0.015117424712661923, "L2D cache miss rate": 0.17306037588138215, "LL cache miss rate": 0.9625349751742168}
|
@ -5,45 +5,46 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394153 queued and waiting for resources
|
srun: job 3394992 queued and waiting for resources
|
||||||
srun: job 3394153 has been allocated resources
|
srun: job 3394992 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
nnz=39994, layout=torch.sparse_csr)
|
||||||
tensor([0.6982, 0.7263, 0.0064, ..., 0.9256, 0.7249, 0.5065])
|
tensor([0.1181, 0.8387, 0.0554, ..., 0.8107, 0.4393, 0.9489])
|
||||||
|
Matrix: p2p-Gnutella04
|
||||||
Shape: torch.Size([10879, 10879])
|
Shape: torch.Size([10879, 10879])
|
||||||
NNZ: 39994
|
NNZ: 39994
|
||||||
Density: 0.0003379223282393842
|
Density: 0.0003379223282393842
|
||||||
Time: 0.18009519577026367 seconds
|
Time: 1.061662197113037 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
||||||
|
|
||||||
67.56 msec task-clock:u # 0.019 CPUs utilized
|
50.59 msec task-clock:u # 0.012 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,829 page-faults:u # 56.674 K/sec
|
3,303 page-faults:u # 65.291 K/sec
|
||||||
47,862,000 cycles:u # 0.708 GHz (59.24%)
|
51,318,459 cycles:u # 1.014 GHz (59.34%)
|
||||||
84,392,375 instructions:u # 1.76 insn per cycle (87.61%)
|
74,705,078 instructions:u # 1.46 insn per cycle (83.02%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
368,432 branch-misses:u
|
366,825 branch-misses:u
|
||||||
32,507,448 L1-dcache-loads:u # 481.147 M/sec
|
31,809,194 L1-dcache-loads:u # 628.781 M/sec
|
||||||
481,389 L1-dcache-load-misses:u # 1.48% of all L1-dcache accesses
|
466,198 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
31,030,656 L1-icache-loads:u # 459.289 M/sec
|
30,390,161 L1-icache-loads:u # 600.731 M/sec
|
||||||
308,582 L1-icache-load-misses:u # 0.99% of all L1-icache accesses
|
296,270 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
|
||||||
34,988,046 dTLB-loads:u # 517.863 M/sec (20.00%)
|
61,518,375 dTLB-loads:u # 1.216 G/sec (17.94%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
3.538329547 seconds time elapsed
|
4.302241563 seconds time elapsed
|
||||||
|
|
||||||
14.667604000 seconds user
|
16.122298000 seconds user
|
||||||
29.534487000 seconds sys
|
29.141140000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -53,21 +54,22 @@ tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
nnz=39994, layout=torch.sparse_csr)
|
||||||
tensor([0.4946, 0.3509, 0.5239, ..., 0.4520, 0.4206, 0.8181])
|
tensor([0.7249, 0.8723, 0.3843, ..., 0.2264, 0.4891, 0.9107])
|
||||||
|
Matrix: p2p-Gnutella04
|
||||||
Shape: torch.Size([10879, 10879])
|
Shape: torch.Size([10879, 10879])
|
||||||
NNZ: 39994
|
NNZ: 39994
|
||||||
Density: 0.0003379223282393842
|
Density: 0.0003379223282393842
|
||||||
Time: 0.18875432014465332 seconds
|
Time: 1.0079431533813477 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
||||||
|
|
||||||
331,622 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
328,853 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
19,800,140 BR_RETIRED:u
|
19,620,312 BR_RETIRED:u
|
||||||
|
|
||||||
3.556031790 seconds time elapsed
|
4.241400567 seconds time elapsed
|
||||||
|
|
||||||
14.799719000 seconds user
|
15.325937000 seconds user
|
||||||
27.876987000 seconds sys
|
28.223386000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -77,23 +79,24 @@ tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
nnz=39994, layout=torch.sparse_csr)
|
||||||
tensor([0.2184, 0.4999, 0.9567, ..., 0.8794, 0.8213, 0.8713])
|
tensor([0.7608, 0.2449, 0.5322, ..., 0.5547, 0.8659, 0.8437])
|
||||||
|
Matrix: p2p-Gnutella04
|
||||||
Shape: torch.Size([10879, 10879])
|
Shape: torch.Size([10879, 10879])
|
||||||
NNZ: 39994
|
NNZ: 39994
|
||||||
Density: 0.0003379223282393842
|
Density: 0.0003379223282393842
|
||||||
Time: 0.1066896915435791 seconds
|
Time: 1.1017234325408936 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
||||||
|
|
||||||
25,905,045 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
27,939,682 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,746 ITLB_WALK:u
|
5,470 ITLB_WALK:u
|
||||||
17,547 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
17,679 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
35,220,079 L1D_TLB:u
|
37,425,602 L1D_TLB:u
|
||||||
|
|
||||||
3.505367779 seconds time elapsed
|
4.296820500 seconds time elapsed
|
||||||
|
|
||||||
14.557493000 seconds user
|
15.875162000 seconds user
|
||||||
29.642958000 seconds sys
|
28.803412000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -103,23 +106,24 @@ tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
nnz=39994, layout=torch.sparse_csr)
|
||||||
tensor([0.2180, 0.0881, 0.5532, ..., 0.4961, 0.0093, 0.4929])
|
tensor([0.9980, 0.9991, 0.6749, ..., 0.4225, 0.7297, 0.3717])
|
||||||
|
Matrix: p2p-Gnutella04
|
||||||
Shape: torch.Size([10879, 10879])
|
Shape: torch.Size([10879, 10879])
|
||||||
NNZ: 39994
|
NNZ: 39994
|
||||||
Density: 0.0003379223282393842
|
Density: 0.0003379223282393842
|
||||||
Time: 0.12433028221130371 seconds
|
Time: 1.0812580585479736 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
||||||
|
|
||||||
30,359,576 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
30,276,633 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
283,204 L1I_CACHE_REFILL:u
|
291,467 L1I_CACHE_REFILL:u
|
||||||
465,520 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
479,061 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
31,843,274 L1D_CACHE:u
|
31,689,326 L1D_CACHE:u
|
||||||
|
|
||||||
3.565310130 seconds time elapsed
|
4.500137840 seconds time elapsed
|
||||||
|
|
||||||
14.913239000 seconds user
|
15.794710000 seconds user
|
||||||
28.125605000 seconds sys
|
27.773851000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -129,25 +133,26 @@ tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
||||||
nnz=39994, layout=torch.sparse_csr)
|
nnz=39994, layout=torch.sparse_csr)
|
||||||
tensor([0.6394, 0.6808, 0.7957, ..., 0.1529, 0.0561, 0.7834])
|
tensor([0.8707, 0.5871, 0.5970, ..., 0.8826, 0.4673, 0.4994])
|
||||||
|
Matrix: p2p-Gnutella04
|
||||||
Shape: torch.Size([10879, 10879])
|
Shape: torch.Size([10879, 10879])
|
||||||
NNZ: 39994
|
NNZ: 39994
|
||||||
Density: 0.0003379223282393842
|
Density: 0.0003379223282393842
|
||||||
Time: 0.13401126861572266 seconds
|
Time: 0.9900743961334229 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
||||||
|
|
||||||
560,542 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
529,426 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
575,610 LL_CACHE_RD:u
|
550,033 LL_CACHE_RD:u
|
||||||
173,643 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
171,913 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
21,499 L2D_TLB_REFILL:u
|
20,624 L2D_TLB_REFILL:u
|
||||||
313,335 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
296,662 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,741,621 L2D_CACHE:u
|
1,714,211 L2D_CACHE:u
|
||||||
|
|
||||||
3.503362704 seconds time elapsed
|
4.284402033 seconds time elapsed
|
||||||
|
|
||||||
15.287949000 seconds user
|
15.584671000 seconds user
|
||||||
28.752303000 seconds sys
|
27.523772000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_p2p-Gnutella24_1000.json
Normal file
1
pytorch/output/altra_10_30_p2p-Gnutella24_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.12, 16.12, 16.12, 16.36, 16.56, 16.52, 17.04, 16.76, 16.64, 16.92], "matrix": "p2p-Gnutella24", "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 1.6947758197784424, "power": [25.2, 25.2, 26.6, 26.28, 26.48], "power_after": [16.4, 16.6, 16.6, 16.64, 16.8, 16.48, 16.44, 16.16, 16.12, 16.2], "task clock (msec)": 66.78, "page faults": 3520, "cycles": 28858055, "instructions": 64429843, "branch mispredictions": 331167, "branches": 19518210, "ITLB accesses": 26964483, "ITLB misses": 4666, "DTLB misses": 14001, "DTLB accesses": 36143905, "L1I cache accesses": 31901160, "L1I cache misses": 302516, "L1D cache misses": 475663, "L1D cache accesses": 33507563, "LL cache misses": 558546, "LL cache accesses": 578676, "L2D TLB accesses": 187549, "L2D TLB misses": 22990, "L2D cache misses": 321826, "L2D cache accesses": 1816571, "instructions per cycle": 2.2326467601506756, "branch miss rate": 0.016967078435983628, "ITLB miss rate": 0.00017304244253449992, "DTLB miss rate": 0.00038736821602425086, "L2D TLB miss rate": 0.12258129875392564, "L1I cache miss rate": 0.009482915354802146, "L1D cache miss rate": 0.01419569068630864, "L2D cache miss rate": 0.1771612560147663, "LL cache miss rate": 0.9652136947099932}
|
158
pytorch/output/altra_10_30_p2p-Gnutella24_1000.output
Normal file
158
pytorch/output/altra_10_30_p2p-Gnutella24_1000.output
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3394989 queued and waiting for resources
|
||||||
|
srun: job 3394989 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
||||||
|
nnz=65369, layout=torch.sparse_csr)
|
||||||
|
tensor([0.2470, 0.4231, 0.1036, ..., 0.7937, 0.3241, 0.7116])
|
||||||
|
Matrix: p2p-Gnutella24
|
||||||
|
Shape: torch.Size([26518, 26518])
|
||||||
|
NNZ: 65369
|
||||||
|
Density: 9.295875717624285e-05
|
||||||
|
Time: 1.6974337100982666 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
||||||
|
|
||||||
|
66.78 msec task-clock:u # 0.013 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,520 page-faults:u # 52.713 K/sec
|
||||||
|
28,858,055 cycles:u # 0.432 GHz (26.93%)
|
||||||
|
64,429,843 instructions:u # 2.23 insn per cycle (67.63%)
|
||||||
|
<not supported> branches:u
|
||||||
|
296,857 branch-misses:u (84.08%)
|
||||||
|
33,646,348 L1-dcache-loads:u # 503.866 M/sec
|
||||||
|
493,998 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
32,070,415 L1-icache-loads:u # 480.266 M/sec
|
||||||
|
305,993 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
||||||
|
46,903,081 dTLB-loads:u # 702.391 M/sec (46.16%)
|
||||||
|
114,272 dTLB-load-misses:u # 0.24% of all dTLB cache accesses (32.45%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
5.106933083 seconds time elapsed
|
||||||
|
|
||||||
|
16.391614000 seconds user
|
||||||
|
28.913912000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
||||||
|
nnz=65369, layout=torch.sparse_csr)
|
||||||
|
tensor([0.2307, 0.4662, 0.3789, ..., 0.0144, 0.6300, 0.7829])
|
||||||
|
Matrix: p2p-Gnutella24
|
||||||
|
Shape: torch.Size([26518, 26518])
|
||||||
|
NNZ: 65369
|
||||||
|
Density: 9.295875717624285e-05
|
||||||
|
Time: 1.6379659175872803 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
||||||
|
|
||||||
|
331,167 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
19,518,210 BR_RETIRED:u
|
||||||
|
|
||||||
|
5.017894585 seconds time elapsed
|
||||||
|
|
||||||
|
16.446505000 seconds user
|
||||||
|
31.004338000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
||||||
|
nnz=65369, layout=torch.sparse_csr)
|
||||||
|
tensor([0.7309, 0.0314, 0.4424, ..., 0.7434, 0.2124, 0.1432])
|
||||||
|
Matrix: p2p-Gnutella24
|
||||||
|
Shape: torch.Size([26518, 26518])
|
||||||
|
NNZ: 65369
|
||||||
|
Density: 9.295875717624285e-05
|
||||||
|
Time: 1.7232718467712402 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
||||||
|
|
||||||
|
26,964,483 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
4,666 ITLB_WALK:u
|
||||||
|
14,001 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
36,143,905 L1D_TLB:u
|
||||||
|
|
||||||
|
5.053286721 seconds time elapsed
|
||||||
|
|
||||||
|
16.447780000 seconds user
|
||||||
|
28.580949000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
||||||
|
nnz=65369, layout=torch.sparse_csr)
|
||||||
|
tensor([0.5695, 0.5025, 0.1946, ..., 0.7428, 0.9634, 0.4327])
|
||||||
|
Matrix: p2p-Gnutella24
|
||||||
|
Shape: torch.Size([26518, 26518])
|
||||||
|
NNZ: 65369
|
||||||
|
Density: 9.295875717624285e-05
|
||||||
|
Time: 1.644775629043579 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
||||||
|
|
||||||
|
31,901,160 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
302,516 L1I_CACHE_REFILL:u
|
||||||
|
475,663 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
33,507,563 L1D_CACHE:u
|
||||||
|
|
||||||
|
4.978338941 seconds time elapsed
|
||||||
|
|
||||||
|
16.455298000 seconds user
|
||||||
|
30.249373000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
||||||
|
nnz=65369, layout=torch.sparse_csr)
|
||||||
|
tensor([0.0969, 0.1950, 0.8456, ..., 0.3315, 0.1512, 0.3182])
|
||||||
|
Matrix: p2p-Gnutella24
|
||||||
|
Shape: torch.Size([26518, 26518])
|
||||||
|
NNZ: 65369
|
||||||
|
Density: 9.295875717624285e-05
|
||||||
|
Time: 1.752812385559082 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
||||||
|
|
||||||
|
558,546 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
578,676 LL_CACHE_RD:u
|
||||||
|
187,549 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
22,990 L2D_TLB_REFILL:u
|
||||||
|
321,826 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,816,571 L2D_CACHE:u
|
||||||
|
|
||||||
|
4.952297819 seconds time elapsed
|
||||||
|
|
||||||
|
16.648691000 seconds user
|
||||||
|
27.005944000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_p2p-Gnutella25_1000.json
Normal file
1
pytorch/output/altra_10_30_p2p-Gnutella25_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.0, 16.4, 16.4, 16.28, 16.48, 16.6, 16.48, 16.56, 16.88, 16.92], "matrix": "p2p-Gnutella25", "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 1.4688231945037842, "power": [23.04, 29.0, 30.24, 27.96, 28.04], "power_after": [16.52, 16.68, 16.88, 17.12, 17.08, 17.04, 16.84, 16.72, 16.84, 16.84], "task clock (msec)": 48.61, "page faults": 3308, "cycles": 60072179, "instructions": 70991785, "branch mispredictions": 331765, "branches": 19906014, "ITLB accesses": 28194337, "ITLB misses": 5083, "DTLB misses": 17916, "DTLB accesses": 37944713, "L1I cache accesses": 31162212, "L1I cache misses": 270684, "L1D cache misses": 465467, "L1D cache accesses": 32857500, "LL cache misses": 541118, "LL cache accesses": 564199, "L2D TLB accesses": 194022, "L2D TLB misses": 23932, "L2D cache misses": 311476, "L2D cache accesses": 1783574, "instructions per cycle": 1.1817747613250387, "branch miss rate": 0.016666571218125335, "ITLB miss rate": 0.00018028443087702328, "DTLB miss rate": 0.00047216064066685654, "L2D TLB miss rate": 0.12334683695663379, "L1I cache miss rate": 0.008686289663904475, "L1D cache miss rate": 0.014166232975728525, "L2D cache miss rate": 0.17463587157022922, "LL cache miss rate": 0.9590906754531646}
|
@ -5,45 +5,46 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394140 queued and waiting for resources
|
srun: job 3394994 queued and waiting for resources
|
||||||
srun: job 3394140 has been allocated resources
|
srun: job 3394994 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
nnz=54705, layout=torch.sparse_csr)
|
||||||
tensor([0.8199, 0.9849, 0.4642, ..., 0.7594, 0.3568, 0.4020])
|
tensor([0.1465, 0.4354, 0.7334, ..., 0.2837, 0.5913, 0.9525])
|
||||||
|
Matrix: p2p-Gnutella25
|
||||||
Shape: torch.Size([22687, 22687])
|
Shape: torch.Size([22687, 22687])
|
||||||
NNZ: 54705
|
NNZ: 54705
|
||||||
Density: 0.00010628522108964806
|
Density: 0.00010628522108964806
|
||||||
Time: 0.19272208213806152 seconds
|
Time: 1.4786670207977295 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
||||||
|
|
||||||
64.71 msec task-clock:u # 0.018 CPUs utilized
|
48.61 msec task-clock:u # 0.010 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,319 page-faults:u # 51.288 K/sec
|
3,308 page-faults:u # 68.054 K/sec
|
||||||
57,611,295 cycles:u # 0.890 GHz (39.00%)
|
60,072,179 cycles:u # 1.236 GHz (53.26%)
|
||||||
83,148,228 instructions:u # 1.44 insn per cycle (82.73%)
|
70,991,785 instructions:u # 1.18 insn per cycle (71.54%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
375,111 branch-misses:u
|
371,197 branch-misses:u
|
||||||
32,759,228 L1-dcache-loads:u # 506.221 M/sec
|
32,964,378 L1-dcache-loads:u # 678.165 M/sec
|
||||||
475,086 L1-dcache-load-misses:u # 1.45% of all L1-dcache accesses
|
465,448 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
31,366,158 L1-icache-loads:u # 484.694 M/sec
|
31,435,424 L1-icache-loads:u # 646.710 M/sec
|
||||||
297,293 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
293,561 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
||||||
35,611,781 dTLB-loads:u # 550.301 M/sec (25.73%)
|
56,761,270 dTLB-loads:u # 1.168 G/sec (30.54%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
3.578384817 seconds time elapsed
|
4.700046411 seconds time elapsed
|
||||||
|
|
||||||
14.435258000 seconds user
|
16.235801000 seconds user
|
||||||
27.700836000 seconds sys
|
28.396327000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -53,21 +54,22 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
nnz=54705, layout=torch.sparse_csr)
|
||||||
tensor([0.0069, 0.9904, 0.5316, ..., 0.2082, 0.4858, 0.4936])
|
tensor([0.7780, 0.3388, 0.1540, ..., 0.2989, 0.3682, 0.9160])
|
||||||
|
Matrix: p2p-Gnutella25
|
||||||
Shape: torch.Size([22687, 22687])
|
Shape: torch.Size([22687, 22687])
|
||||||
NNZ: 54705
|
NNZ: 54705
|
||||||
Density: 0.00010628522108964806
|
Density: 0.00010628522108964806
|
||||||
Time: 0.1423017978668213 seconds
|
Time: 1.4235138893127441 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
||||||
|
|
||||||
318,386 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
331,765 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
19,233,431 BR_RETIRED:u
|
19,906,014 BR_RETIRED:u
|
||||||
|
|
||||||
3.555753224 seconds time elapsed
|
4.757340585 seconds time elapsed
|
||||||
|
|
||||||
14.642518000 seconds user
|
16.412311000 seconds user
|
||||||
30.112207000 seconds sys
|
29.238029000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -77,23 +79,24 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
nnz=54705, layout=torch.sparse_csr)
|
||||||
tensor([0.2250, 0.5676, 0.3018, ..., 0.5431, 0.7314, 0.5593])
|
tensor([0.4944, 0.8057, 0.8211, ..., 0.5137, 0.3388, 0.6316])
|
||||||
|
Matrix: p2p-Gnutella25
|
||||||
Shape: torch.Size([22687, 22687])
|
Shape: torch.Size([22687, 22687])
|
||||||
NNZ: 54705
|
NNZ: 54705
|
||||||
Density: 0.00010628522108964806
|
Density: 0.00010628522108964806
|
||||||
Time: 0.14638042449951172 seconds
|
Time: 1.4664146900177002 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
||||||
|
|
||||||
27,039,805 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
28,194,337 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,375 ITLB_WALK:u
|
5,083 ITLB_WALK:u
|
||||||
17,290 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
17,916 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
36,688,544 L1D_TLB:u
|
37,944,713 L1D_TLB:u
|
||||||
|
|
||||||
3.566915241 seconds time elapsed
|
4.844329421 seconds time elapsed
|
||||||
|
|
||||||
16.116565000 seconds user
|
16.081022000 seconds user
|
||||||
28.752519000 seconds sys
|
28.021902000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -103,23 +106,24 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
nnz=54705, layout=torch.sparse_csr)
|
||||||
tensor([0.0220, 0.7494, 0.7913, ..., 0.8924, 0.8542, 0.5491])
|
tensor([0.0963, 0.5806, 0.0397, ..., 0.1604, 0.5700, 0.8103])
|
||||||
|
Matrix: p2p-Gnutella25
|
||||||
Shape: torch.Size([22687, 22687])
|
Shape: torch.Size([22687, 22687])
|
||||||
NNZ: 54705
|
NNZ: 54705
|
||||||
Density: 0.00010628522108964806
|
Density: 0.00010628522108964806
|
||||||
Time: 0.17815685272216797 seconds
|
Time: 1.3717434406280518 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
||||||
|
|
||||||
32,508,072 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
31,162,212 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
297,568 L1I_CACHE_REFILL:u
|
270,684 L1I_CACHE_REFILL:u
|
||||||
477,654 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
465,467 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
34,044,579 L1D_CACHE:u
|
32,857,500 L1D_CACHE:u
|
||||||
|
|
||||||
3.435706033 seconds time elapsed
|
4.598461782 seconds time elapsed
|
||||||
|
|
||||||
14.690285000 seconds user
|
15.609727000 seconds user
|
||||||
28.763423000 seconds sys
|
30.606837000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -129,25 +133,26 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
||||||
nnz=54705, layout=torch.sparse_csr)
|
nnz=54705, layout=torch.sparse_csr)
|
||||||
tensor([0.6277, 0.4955, 0.9335, ..., 0.1476, 0.2079, 0.0931])
|
tensor([0.9137, 0.5009, 0.7507, ..., 0.6623, 0.8760, 0.2991])
|
||||||
|
Matrix: p2p-Gnutella25
|
||||||
Shape: torch.Size([22687, 22687])
|
Shape: torch.Size([22687, 22687])
|
||||||
NNZ: 54705
|
NNZ: 54705
|
||||||
Density: 0.00010628522108964806
|
Density: 0.00010628522108964806
|
||||||
Time: 0.14432048797607422 seconds
|
Time: 1.4291880130767822 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
||||||
|
|
||||||
549,474 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
541,118 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
561,939 LL_CACHE_RD:u
|
564,199 LL_CACHE_RD:u
|
||||||
185,622 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
194,022 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
23,295 L2D_TLB_REFILL:u
|
23,932 L2D_TLB_REFILL:u
|
||||||
305,878 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
311,476 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,763,089 L2D_CACHE:u
|
1,783,574 L2D_CACHE:u
|
||||||
|
|
||||||
3.538826979 seconds time elapsed
|
4.792239951 seconds time elapsed
|
||||||
|
|
||||||
15.006109000 seconds user
|
15.902307000 seconds user
|
||||||
29.644298000 seconds sys
|
28.747620000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_p2p-Gnutella30_1000.json
Normal file
1
pytorch/output/altra_10_30_p2p-Gnutella30_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.96, 22.0, 22.16, 22.16, 21.84, 22.08, 22.4, 22.08, 22.0, 21.48], "matrix": "p2p-Gnutella30", "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 3.504030466079712, "power": [54.2, 64.16, 67.64, 67.64, 65.92, 58.96, 59.92], "power_after": [20.72, 20.76, 20.76, 20.8, 20.8, 20.88, 20.92, 21.04, 21.04, 21.12], "task clock (msec)": 56.52, "page faults": 3194, "cycles": 58074747, "instructions": 90036443, "branch mispredictions": 327895, "branches": 20553601, "ITLB accesses": 26120611, "ITLB misses": 7531, "DTLB misses": 19097, "DTLB accesses": 35744928, "L1I cache accesses": 31819981, "L1I cache misses": 284493, "L1D cache misses": 486709, "L1D cache accesses": 33545755, "LL cache misses": 544742, "LL cache accesses": 558323, "L2D TLB accesses": 190574, "L2D TLB misses": 23746, "L2D cache misses": 305844, "L2D cache accesses": 1736964, "instructions per cycle": 1.5503544595725918, "branch miss rate": 0.015953165579111903, "ITLB miss rate": 0.00028831637973552763, "DTLB miss rate": 0.0005342576155140109, "L2D TLB miss rate": 0.12460251660772194, "L1I cache miss rate": 0.008940703012990485, "L1D cache miss rate": 0.014508810429218243, "L2D cache miss rate": 0.17607964241055082, "LL cache miss rate": 0.9756753707083534}
|
158
pytorch/output/altra_10_30_p2p-Gnutella30_1000.output
Normal file
158
pytorch/output/altra_10_30_p2p-Gnutella30_1000.output
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3394991 queued and waiting for resources
|
||||||
|
srun: job 3394991 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
||||||
|
nnz=88328, layout=torch.sparse_csr)
|
||||||
|
tensor([0.3046, 0.0725, 0.4580, ..., 0.0593, 0.5121, 0.2116])
|
||||||
|
Matrix: p2p-Gnutella30
|
||||||
|
Shape: torch.Size([36682, 36682])
|
||||||
|
NNZ: 88328
|
||||||
|
Density: 6.564359899804003e-05
|
||||||
|
Time: 3.6646029949188232 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
||||||
|
|
||||||
|
56.52 msec task-clock:u # 0.008 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,194 page-faults:u # 56.515 K/sec
|
||||||
|
58,074,747 cycles:u # 1.028 GHz (51.20%)
|
||||||
|
90,036,443 instructions:u # 1.55 insn per cycle (89.06%)
|
||||||
|
<not supported> branches:u
|
||||||
|
363,262 branch-misses:u
|
||||||
|
33,111,438 L1-dcache-loads:u # 585.875 M/sec
|
||||||
|
454,665 L1-dcache-load-misses:u # 1.37% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
31,646,314 L1-icache-loads:u # 559.951 M/sec
|
||||||
|
281,443 L1-icache-load-misses:u # 0.89% of all L1-icache accesses
|
||||||
|
43,495,524 dTLB-loads:u # 769.611 M/sec (11.87%)
|
||||||
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
7.033463989 seconds time elapsed
|
||||||
|
|
||||||
|
34.670765000 seconds user
|
||||||
|
307.031553000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
||||||
|
nnz=88328, layout=torch.sparse_csr)
|
||||||
|
tensor([0.9700, 0.1728, 0.2199, ..., 0.6107, 0.3357, 0.2661])
|
||||||
|
Matrix: p2p-Gnutella30
|
||||||
|
Shape: torch.Size([36682, 36682])
|
||||||
|
NNZ: 88328
|
||||||
|
Density: 6.564359899804003e-05
|
||||||
|
Time: 2.3380045890808105 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
||||||
|
|
||||||
|
327,895 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
20,553,601 BR_RETIRED:u
|
||||||
|
|
||||||
|
5.895917276 seconds time elapsed
|
||||||
|
|
||||||
|
31.121063000 seconds user
|
||||||
|
208.127447000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
||||||
|
nnz=88328, layout=torch.sparse_csr)
|
||||||
|
tensor([0.9533, 0.7568, 0.8141, ..., 0.8395, 0.5617, 0.7830])
|
||||||
|
Matrix: p2p-Gnutella30
|
||||||
|
Shape: torch.Size([36682, 36682])
|
||||||
|
NNZ: 88328
|
||||||
|
Density: 6.564359899804003e-05
|
||||||
|
Time: 4.476518869400024 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
||||||
|
|
||||||
|
26,120,611 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
7,531 ITLB_WALK:u
|
||||||
|
19,097 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
35,744,928 L1D_TLB:u
|
||||||
|
|
||||||
|
8.109622410 seconds time elapsed
|
||||||
|
|
||||||
|
38.467161000 seconds user
|
||||||
|
370.437915000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
||||||
|
nnz=88328, layout=torch.sparse_csr)
|
||||||
|
tensor([0.6886, 0.7814, 0.9957, ..., 0.8460, 0.1015, 0.8097])
|
||||||
|
Matrix: p2p-Gnutella30
|
||||||
|
Shape: torch.Size([36682, 36682])
|
||||||
|
NNZ: 88328
|
||||||
|
Density: 6.564359899804003e-05
|
||||||
|
Time: 2.856834888458252 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
||||||
|
|
||||||
|
31,819,981 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
284,493 L1I_CACHE_REFILL:u
|
||||||
|
486,709 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
33,545,755 L1D_CACHE:u
|
||||||
|
|
||||||
|
6.374371632 seconds time elapsed
|
||||||
|
|
||||||
|
30.817943000 seconds user
|
||||||
|
247.363843000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
||||||
|
nnz=88328, layout=torch.sparse_csr)
|
||||||
|
tensor([0.8464, 0.0437, 0.1230, ..., 0.6221, 0.9268, 0.5436])
|
||||||
|
Matrix: p2p-Gnutella30
|
||||||
|
Shape: torch.Size([36682, 36682])
|
||||||
|
NNZ: 88328
|
||||||
|
Density: 6.564359899804003e-05
|
||||||
|
Time: 4.838747978210449 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
||||||
|
|
||||||
|
544,742 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
558,323 LL_CACHE_RD:u
|
||||||
|
190,574 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
23,746 L2D_TLB_REFILL:u
|
||||||
|
305,844 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,736,964 L2D_CACHE:u
|
||||||
|
|
||||||
|
8.386896120 seconds time elapsed
|
||||||
|
|
||||||
|
39.861141000 seconds user
|
||||||
|
395.959334000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_ri2010_1000.json
Normal file
1
pytorch/output/altra_10_30_ri2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.2, 31.56, 31.56, 30.84, 24.52, 23.2, 21.32, 20.76, 20.84, 20.84], "matrix": "ri2010", "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 3.077709913253784, "power": [27.76, 28.28, 28.44, 28.28, 25.16, 30.44, 30.6], "power_after": [21.08, 20.88, 20.68, 20.68, 20.6, 20.56, 20.68, 20.8, 20.96, 21.24], "task clock (msec)": 64.49, "page faults": 3473, "cycles": 42783607, "instructions": 84598454, "branch mispredictions": 331326, "branches": 20438455, "ITLB accesses": 26869742, "ITLB misses": 6302, "DTLB misses": 14926, "DTLB accesses": 36876841, "L1I cache accesses": 31664385, "L1I cache misses": 301678, "L1D cache misses": 493536, "L1D cache accesses": 33219437, "LL cache misses": 552180, "LL cache accesses": 564990, "L2D TLB accesses": 167824, "L2D TLB misses": 19594, "L2D cache misses": 304114, "L2D cache accesses": 1716370, "instructions per cycle": 1.977356747877756, "branch miss rate": 0.01621091222404042, "ITLB miss rate": 0.00023453890997539165, "DTLB miss rate": 0.00040475267390718204, "L2D TLB miss rate": 0.11675326532557918, "L1I cache miss rate": 0.009527360155581737, "L1D cache miss rate": 0.014856844202386693, "L2D cache miss rate": 0.17718440662561102, "LL cache miss rate": 0.9773270323368555}
|
163
pytorch/output/altra_10_30_ri2010_1000.output
Normal file
163
pytorch/output/altra_10_30_ri2010_1000.output
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3394984 queued and waiting for resources
|
||||||
|
srun: job 3394984 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
||||||
|
125750]),
|
||||||
|
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
||||||
|
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
||||||
|
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
||||||
|
tensor([0.5906, 0.9651, 0.2033, ..., 0.2175, 0.4484, 0.0412])
|
||||||
|
Matrix: ri2010
|
||||||
|
Shape: torch.Size([25181, 25181])
|
||||||
|
NNZ: 125750
|
||||||
|
Density: 0.00019831796057928155
|
||||||
|
Time: 3.107008934020996 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
||||||
|
|
||||||
|
64.49 msec task-clock:u # 0.010 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,473 page-faults:u # 53.852 K/sec
|
||||||
|
42,783,607 cycles:u # 0.663 GHz (37.27%)
|
||||||
|
84,598,454 instructions:u # 1.98 insn per cycle (73.53%)
|
||||||
|
<not supported> branches:u
|
||||||
|
353,558 branch-misses:u (89.57%)
|
||||||
|
33,192,964 L1-dcache-loads:u # 514.689 M/sec
|
||||||
|
466,217 L1-dcache-load-misses:u # 1.40% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
31,727,502 L1-icache-loads:u # 491.965 M/sec
|
||||||
|
292,570 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
|
||||||
|
38,623,737 dTLB-loads:u # 598.898 M/sec (34.88%)
|
||||||
|
124,174 dTLB-load-misses:u # 0.32% of all dTLB cache accesses (14.74%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
6.612563197 seconds time elapsed
|
||||||
|
|
||||||
|
18.114584000 seconds user
|
||||||
|
29.808542000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
||||||
|
125750]),
|
||||||
|
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
||||||
|
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
||||||
|
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
||||||
|
tensor([0.6092, 0.5511, 0.6052, ..., 0.8002, 0.0295, 0.2972])
|
||||||
|
Matrix: ri2010
|
||||||
|
Shape: torch.Size([25181, 25181])
|
||||||
|
NNZ: 125750
|
||||||
|
Density: 0.00019831796057928155
|
||||||
|
Time: 2.9385879039764404 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
||||||
|
|
||||||
|
331,326 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
20,438,455 BR_RETIRED:u
|
||||||
|
|
||||||
|
6.446731410 seconds time elapsed
|
||||||
|
|
||||||
|
17.939571000 seconds user
|
||||||
|
33.272929000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
||||||
|
125750]),
|
||||||
|
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
||||||
|
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
||||||
|
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
||||||
|
tensor([0.3348, 0.2974, 0.2569, ..., 0.2397, 0.1965, 0.5651])
|
||||||
|
Matrix: ri2010
|
||||||
|
Shape: torch.Size([25181, 25181])
|
||||||
|
NNZ: 125750
|
||||||
|
Density: 0.00019831796057928155
|
||||||
|
Time: 2.972891330718994 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
||||||
|
|
||||||
|
26,869,742 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,302 ITLB_WALK:u
|
||||||
|
14,926 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
36,876,841 L1D_TLB:u
|
||||||
|
|
||||||
|
6.376775396 seconds time elapsed
|
||||||
|
|
||||||
|
17.836418000 seconds user
|
||||||
|
29.830135000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
||||||
|
125750]),
|
||||||
|
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
||||||
|
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
||||||
|
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
||||||
|
tensor([0.7889, 0.7395, 0.6553, ..., 0.3938, 0.2478, 0.7923])
|
||||||
|
Matrix: ri2010
|
||||||
|
Shape: torch.Size([25181, 25181])
|
||||||
|
NNZ: 125750
|
||||||
|
Density: 0.00019831796057928155
|
||||||
|
Time: 2.9658284187316895 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
||||||
|
|
||||||
|
31,664,385 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
301,678 L1I_CACHE_REFILL:u
|
||||||
|
493,536 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
33,219,437 L1D_CACHE:u
|
||||||
|
|
||||||
|
6.559158078 seconds time elapsed
|
||||||
|
|
||||||
|
19.008146000 seconds user
|
||||||
|
38.233666000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
||||||
|
125750]),
|
||||||
|
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
||||||
|
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
||||||
|
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1256, 0.1417, 0.9800, ..., 0.2509, 0.8121, 0.6210])
|
||||||
|
Matrix: ri2010
|
||||||
|
Shape: torch.Size([25181, 25181])
|
||||||
|
NNZ: 125750
|
||||||
|
Density: 0.00019831796057928155
|
||||||
|
Time: 2.9228267669677734 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
||||||
|
|
||||||
|
552,180 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
564,990 LL_CACHE_RD:u
|
||||||
|
167,824 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
19,594 L2D_TLB_REFILL:u
|
||||||
|
304,114 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,716,370 L2D_CACHE:u
|
||||||
|
|
||||||
|
6.135787277 seconds time elapsed
|
||||||
|
|
||||||
|
18.029630000 seconds user
|
||||||
|
28.723217000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [29.88, 23.64, 23.08, 21.84, 21.4, 21.2, 21.0, 21.0, 21.16, 21.0], "matrix": "soc-sign-Slashdot090216", "shape": [81871, 81871], "nnz": 545671, "% density": 8.140867447881048e-05, "time_s": 19.113287687301636, "power": [81.08, 81.56, 71.96, 60.52, 47.16, 48.4, 53.84, 53.84, 67.4, 82.64, 90.8, 89.16, 87.96, 85.76, 84.64, 84.04, 83.64, 84.68, 84.88, 84.88, 84.64, 84.04, 83.6], "power_after": [20.72, 20.6, 20.68, 20.88, 21.2, 21.28, 21.28, 21.48, 21.56, 21.36], "task clock (msec)": 67.66, "page faults": 3317, "cycles": 41915850, "instructions": 84471787, "branch mispredictions": 344452, "branches": 20610765, "ITLB accesses": 27276117, "ITLB misses": 6358, "DTLB misses": 17361, "DTLB accesses": 36565837, "L1I cache accesses": 32022662, "L1I cache misses": 293044, "L1D cache misses": 458939, "L1D cache accesses": 33505164, "LL cache misses": 553814, "LL cache accesses": 567372, "L2D TLB accesses": 199301, "L2D TLB misses": 25193, "L2D cache misses": 313278, "L2D cache accesses": 1796299, "instructions per cycle": 2.015270762730566, "branch miss rate": 0.016712237512775483, "ITLB miss rate": 0.00023309769495416082, "DTLB miss rate": 0.0004747874361524939, "L2D TLB miss rate": 0.12640679173712124, "L1I cache miss rate": 0.009151144274014446, "L1D cache miss rate": 0.01369756017311242, "L2D cache miss rate": 0.17440192306514674, "LL cache miss rate": 0.97610386131145}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394151 queued and waiting for resources
|
srun: job 3394981 queued and waiting for resources
|
||||||
srun: job 3394151 has been allocated resources
|
srun: job 3394981 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
||||||
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
||||||
nnz=545671, layout=torch.sparse_csr)
|
nnz=545671, layout=torch.sparse_csr)
|
||||||
tensor([0.3831, 0.6714, 0.8380, ..., 0.7892, 0.5274, 0.9035])
|
tensor([0.6780, 0.5234, 0.1205, ..., 0.2995, 0.6275, 0.1399])
|
||||||
|
Matrix: soc-sign-Slashdot090216
|
||||||
Shape: torch.Size([81871, 81871])
|
Shape: torch.Size([81871, 81871])
|
||||||
NNZ: 545671
|
NNZ: 545671
|
||||||
Density: 8.140867447881048e-05
|
Density: 8.140867447881048e-05
|
||||||
Time: 2.044952392578125 seconds
|
Time: 30.653191089630127 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
|
||||||
|
|
||||||
59.01 msec task-clock:u # 0.010 CPUs utilized
|
67.66 msec task-clock:u # 0.002 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,448 page-faults:u # 58.432 K/sec
|
3,317 page-faults:u # 49.022 K/sec
|
||||||
73,062,796 cycles:u # 1.238 GHz (59.95%)
|
41,915,850 cycles:u # 0.619 GHz (57.88%)
|
||||||
88,329,175 instructions:u # 1.21 insn per cycle (93.89%)
|
84,471,787 instructions:u # 2.02 insn per cycle (88.19%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
365,177 branch-misses:u
|
375,016 branch-misses:u
|
||||||
31,850,867 L1-dcache-loads:u # 539.766 M/sec
|
32,438,527 L1-dcache-loads:u # 479.407 M/sec
|
||||||
473,835 L1-dcache-load-misses:u # 1.49% of all L1-dcache accesses
|
499,618 L1-dcache-load-misses:u # 1.54% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
30,385,913 L1-icache-loads:u # 514.940 M/sec
|
30,998,693 L1-icache-loads:u # 458.127 M/sec
|
||||||
299,969 L1-icache-load-misses:u # 0.99% of all L1-icache accesses
|
306,445 L1-icache-load-misses:u # 0.99% of all L1-icache accesses
|
||||||
24,365,554 dTLB-loads:u # 412.915 M/sec (8.42%)
|
34,294,934 dTLB-loads:u # 506.842 M/sec (18.86%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
5.680365622 seconds time elapsed
|
34.340632995 seconds time elapsed
|
||||||
|
|
||||||
27.656957000 seconds user
|
149.743244000 seconds user
|
||||||
194.823873000 seconds sys
|
2355.852109000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
||||||
nnz=545671, layout=torch.sparse_csr)
|
nnz=545671, layout=torch.sparse_csr)
|
||||||
tensor([0.6906, 0.4067, 0.7042, ..., 0.8333, 0.7120, 0.3519])
|
tensor([0.9875, 0.2031, 0.7260, ..., 0.5908, 0.1575, 0.7971])
|
||||||
|
Matrix: soc-sign-Slashdot090216
|
||||||
Shape: torch.Size([81871, 81871])
|
Shape: torch.Size([81871, 81871])
|
||||||
NNZ: 545671
|
NNZ: 545671
|
||||||
Density: 8.140867447881048e-05
|
Density: 8.140867447881048e-05
|
||||||
Time: 1.3788115978240967 seconds
|
Time: 13.671181440353394 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
|
||||||
|
|
||||||
331,091 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
344,452 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
20,013,316 BR_RETIRED:u
|
20,610,765 BR_RETIRED:u
|
||||||
|
|
||||||
4.886021169 seconds time elapsed
|
17.331425967 seconds time elapsed
|
||||||
|
|
||||||
23.105025000 seconds user
|
83.136180000 seconds user
|
||||||
141.491451000 seconds sys
|
1069.027469000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
||||||
nnz=545671, layout=torch.sparse_csr)
|
nnz=545671, layout=torch.sparse_csr)
|
||||||
tensor([0.8755, 0.6165, 0.4104, ..., 0.6974, 0.9453, 0.9872])
|
tensor([0.2046, 0.3645, 0.7960, ..., 0.6490, 0.4098, 0.5342])
|
||||||
|
Matrix: soc-sign-Slashdot090216
|
||||||
Shape: torch.Size([81871, 81871])
|
Shape: torch.Size([81871, 81871])
|
||||||
NNZ: 545671
|
NNZ: 545671
|
||||||
Density: 8.140867447881048e-05
|
Density: 8.140867447881048e-05
|
||||||
Time: 2.8570749759674072 seconds
|
Time: 19.569235801696777 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
|
||||||
|
|
||||||
26,330,936 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
27,276,117 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
5,193 ITLB_WALK:u
|
6,358 ITLB_WALK:u
|
||||||
16,837 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
17,361 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
35,930,477 L1D_TLB:u
|
36,565,837 L1D_TLB:u
|
||||||
|
|
||||||
6.371573603 seconds time elapsed
|
23.323243037 seconds time elapsed
|
||||||
|
|
||||||
30.986329000 seconds user
|
108.830923000 seconds user
|
||||||
254.347216000 seconds sys
|
1521.834565000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
||||||
nnz=545671, layout=torch.sparse_csr)
|
nnz=545671, layout=torch.sparse_csr)
|
||||||
tensor([0.3573, 0.9331, 0.0611, ..., 0.9133, 0.6057, 0.2374])
|
tensor([0.4164, 0.2188, 0.5460, ..., 0.1057, 0.5277, 0.0624])
|
||||||
|
Matrix: soc-sign-Slashdot090216
|
||||||
Shape: torch.Size([81871, 81871])
|
Shape: torch.Size([81871, 81871])
|
||||||
NNZ: 545671
|
NNZ: 545671
|
||||||
Density: 8.140867447881048e-05
|
Density: 8.140867447881048e-05
|
||||||
Time: 2.311248540878296 seconds
|
Time: 26.337355375289917 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
|
||||||
|
|
||||||
31,853,890 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
32,022,662 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
306,147 L1I_CACHE_REFILL:u
|
293,044 L1I_CACHE_REFILL:u
|
||||||
479,933 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
458,939 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
33,426,019 L1D_CACHE:u
|
33,505,164 L1D_CACHE:u
|
||||||
|
|
||||||
5.718741260 seconds time elapsed
|
30.017812847 seconds time elapsed
|
||||||
|
|
||||||
28.451593000 seconds user
|
131.976276000 seconds user
|
||||||
214.350594000 seconds sys
|
2029.636174000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 545669, 545669,
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
col_indices=tensor([ 1, 2, 3, ..., 81869, 81699, 81863]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(81871, 81871),
|
||||||
nnz=545671, layout=torch.sparse_csr)
|
nnz=545671, layout=torch.sparse_csr)
|
||||||
tensor([0.6021, 0.5679, 0.4538, ..., 0.9086, 0.9552, 0.5329])
|
tensor([0.7679, 0.9196, 0.3474, ..., 0.5624, 0.0163, 0.8596])
|
||||||
|
Matrix: soc-sign-Slashdot090216
|
||||||
Shape: torch.Size([81871, 81871])
|
Shape: torch.Size([81871, 81871])
|
||||||
NNZ: 545671
|
NNZ: 545671
|
||||||
Density: 8.140867447881048e-05
|
Density: 8.140867447881048e-05
|
||||||
Time: 1.8193013668060303 seconds
|
Time: 29.926054000854492 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090216.mtx 1000':
|
||||||
|
|
||||||
540,302 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
553,814 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
553,181 LL_CACHE_RD:u
|
567,372 LL_CACHE_RD:u
|
||||||
173,206 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
199,301 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
21,390 L2D_TLB_REFILL:u
|
25,193 L2D_TLB_REFILL:u
|
||||||
300,032 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
313,278 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,739,931 L2D_CACHE:u
|
1,796,299 L2D_CACHE:u
|
||||||
|
|
||||||
5.546861941 seconds time elapsed
|
33.553779692 seconds time elapsed
|
||||||
|
|
||||||
28.194596000 seconds user
|
154.498461000 seconds user
|
||||||
181.004698000 seconds sys
|
2293.574463000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [21.92, 21.84, 20.96, 20.24, 20.28, 20.16, 19.96, 19.72, 19.88, 19.76], "matrix": "soc-sign-Slashdot090221", "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 18.79910135269165, "power": [80.48, 80.08, 69.04, 69.04, 55.0, 46.8, 49.16, 56.2, 70.84, 82.84, 86.52, 84.28, 82.56, 81.2, 80.28, 80.28, 80.04, 80.16, 80.8, 81.0, 81.92, 83.04, 82.88], "power_after": [21.0, 20.96, 21.12, 20.76, 20.72, 20.56, 20.52, 20.64, 20.88, 21.04], "task clock (msec)": 58.57, "page faults": 3259, "cycles": 74509373, "instructions": 88672751, "branch mispredictions": 342121, "branches": 20436338, "ITLB accesses": 27189335, "ITLB misses": 6437, "DTLB misses": 18156, "DTLB accesses": 36676625, "L1I cache accesses": 30721032, "L1I cache misses": 302777, "L1D cache misses": 469833, "L1D cache accesses": 32109077, "LL cache misses": 551850, "LL cache accesses": 565355, "L2D TLB accesses": 200417, "L2D TLB misses": 25536, "L2D cache misses": 304133, "L2D cache accesses": 1801849, "instructions per cycle": 1.190088540941017, "branch miss rate": 0.016740817263836603, "ITLB miss rate": 0.0002367472393127673, "DTLB miss rate": 0.0004950291909356436, "L2D TLB miss rate": 0.12741434109880898, "L1I cache miss rate": 0.009855691045795596, "L1D cache miss rate": 0.014632404413244267, "L2D cache miss rate": 0.16878939356183564, "LL cache miss rate": 0.9761123541845389}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394147 queued and waiting for resources
|
srun: job 3394979 queued and waiting for resources
|
||||||
srun: job 3394147 has been allocated resources
|
srun: job 3394979 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
||||||
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
||||||
nnz=549202, layout=torch.sparse_csr)
|
nnz=549202, layout=torch.sparse_csr)
|
||||||
tensor([0.2696, 0.6106, 0.1626, ..., 0.2215, 0.5107, 0.8609])
|
tensor([0.4201, 0.7748, 0.6565, ..., 0.0517, 0.6958, 0.5341])
|
||||||
|
Matrix: soc-sign-Slashdot090221
|
||||||
Shape: torch.Size([82144, 82144])
|
Shape: torch.Size([82144, 82144])
|
||||||
NNZ: 549202
|
NNZ: 549202
|
||||||
Density: 8.13917555860553e-05
|
Density: 8.13917555860553e-05
|
||||||
Time: 1.4500706195831299 seconds
|
Time: 27.35153603553772 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
|
||||||
|
|
||||||
61.26 msec task-clock:u # 0.012 CPUs utilized
|
58.57 msec task-clock:u # 0.002 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,303 page-faults:u # 53.917 K/sec
|
3,259 page-faults:u # 55.640 K/sec
|
||||||
44,515,786 cycles:u # 0.727 GHz (40.46%)
|
74,509,373 cycles:u # 1.272 GHz (58.00%)
|
||||||
81,513,738 instructions:u # 1.83 insn per cycle (73.51%)
|
88,672,751 instructions:u # 1.19 insn per cycle (90.97%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
344,479 branch-misses:u (89.42%)
|
361,568 branch-misses:u
|
||||||
34,411,073 L1-dcache-loads:u # 561.710 M/sec
|
31,594,797 L1-dcache-loads:u # 539.410 M/sec
|
||||||
484,811 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
|
460,467 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
32,789,672 L1-icache-loads:u # 535.243 M/sec
|
30,148,838 L1-icache-loads:u # 514.724 M/sec
|
||||||
293,487 L1-icache-load-misses:u # 0.90% of all L1-icache accesses
|
282,768 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
||||||
47,065,740 dTLB-loads:u # 768.279 M/sec (32.81%)
|
19,757,856 dTLB-loads:u # 337.321 M/sec (11.69%)
|
||||||
146,215 dTLB-load-misses:u # 0.31% of all dTLB cache accesses (13.39%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
4.966101053 seconds time elapsed
|
31.087250856 seconds time elapsed
|
||||||
|
|
||||||
23.375418000 seconds user
|
142.716222000 seconds user
|
||||||
148.052989000 seconds sys
|
2102.420776000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
||||||
nnz=549202, layout=torch.sparse_csr)
|
nnz=549202, layout=torch.sparse_csr)
|
||||||
tensor([0.1999, 0.3932, 0.8035, ..., 0.5079, 0.5903, 0.7606])
|
tensor([0.7637, 0.5328, 0.8286, ..., 0.7084, 0.8903, 0.1707])
|
||||||
|
Matrix: soc-sign-Slashdot090221
|
||||||
Shape: torch.Size([82144, 82144])
|
Shape: torch.Size([82144, 82144])
|
||||||
NNZ: 549202
|
NNZ: 549202
|
||||||
Density: 8.13917555860553e-05
|
Density: 8.13917555860553e-05
|
||||||
Time: 1.9677543640136719 seconds
|
Time: 17.188836336135864 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
|
||||||
|
|
||||||
328,019 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
342,121 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
19,893,662 BR_RETIRED:u
|
20,436,338 BR_RETIRED:u
|
||||||
|
|
||||||
5.529871590 seconds time elapsed
|
20.753346873 seconds time elapsed
|
||||||
|
|
||||||
26.844356000 seconds user
|
98.605331000 seconds user
|
||||||
190.429440000 seconds sys
|
1332.291974000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
||||||
nnz=549202, layout=torch.sparse_csr)
|
nnz=549202, layout=torch.sparse_csr)
|
||||||
tensor([0.2933, 0.6999, 0.0078, ..., 0.6213, 0.9377, 0.6359])
|
tensor([0.9017, 0.8505, 0.0023, ..., 0.4182, 0.6895, 0.5023])
|
||||||
|
Matrix: soc-sign-Slashdot090221
|
||||||
Shape: torch.Size([82144, 82144])
|
Shape: torch.Size([82144, 82144])
|
||||||
NNZ: 549202
|
NNZ: 549202
|
||||||
Density: 8.13917555860553e-05
|
Density: 8.13917555860553e-05
|
||||||
Time: 1.4976201057434082 seconds
|
Time: 16.22375249862671 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
|
||||||
|
|
||||||
27,248,112 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
27,189,335 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
5,792 ITLB_WALK:u
|
6,437 ITLB_WALK:u
|
||||||
16,632 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
18,156 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
36,929,042 L1D_TLB:u
|
36,676,625 L1D_TLB:u
|
||||||
|
|
||||||
4.971341163 seconds time elapsed
|
19.748749363 seconds time elapsed
|
||||||
|
|
||||||
24.247480000 seconds user
|
103.049578000 seconds user
|
||||||
151.276717000 seconds sys
|
1249.814927000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
||||||
nnz=549202, layout=torch.sparse_csr)
|
nnz=549202, layout=torch.sparse_csr)
|
||||||
tensor([0.1310, 0.6695, 0.9479, ..., 0.3141, 0.9327, 0.2117])
|
tensor([0.4805, 0.2325, 0.2103, ..., 0.1710, 0.7638, 0.9368])
|
||||||
|
Matrix: soc-sign-Slashdot090221
|
||||||
Shape: torch.Size([82144, 82144])
|
Shape: torch.Size([82144, 82144])
|
||||||
NNZ: 549202
|
NNZ: 549202
|
||||||
Density: 8.13917555860553e-05
|
Density: 8.13917555860553e-05
|
||||||
Time: 1.0877256393432617 seconds
|
Time: 15.453373908996582 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
|
||||||
|
|
||||||
31,702,830 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
30,721,032 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
295,778 L1I_CACHE_REFILL:u
|
302,777 L1I_CACHE_REFILL:u
|
||||||
470,423 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
469,833 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
33,155,119 L1D_CACHE:u
|
32,109,077 L1D_CACHE:u
|
||||||
|
|
||||||
4.675682406 seconds time elapsed
|
19.090250444 seconds time elapsed
|
||||||
|
|
||||||
23.098007000 seconds user
|
94.904880000 seconds user
|
||||||
119.827712000 seconds sys
|
1195.102767000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 29, 124, ..., 549200, 549200,
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
col_indices=tensor([ 1, 2, 3, ..., 82142, 81974, 82136]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(82144, 82144),
|
||||||
nnz=549202, layout=torch.sparse_csr)
|
nnz=549202, layout=torch.sparse_csr)
|
||||||
tensor([0.0860, 0.5402, 0.6738, ..., 0.3856, 0.5968, 0.4203])
|
tensor([0.8430, 0.9439, 0.4260, ..., 0.8172, 0.4243, 0.3834])
|
||||||
|
Matrix: soc-sign-Slashdot090221
|
||||||
Shape: torch.Size([82144, 82144])
|
Shape: torch.Size([82144, 82144])
|
||||||
NNZ: 549202
|
NNZ: 549202
|
||||||
Density: 8.13917555860553e-05
|
Density: 8.13917555860553e-05
|
||||||
Time: 1.2302696704864502 seconds
|
Time: 29.316507816314697 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-Slashdot090221.mtx 1000':
|
||||||
|
|
||||||
545,220 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
551,850 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
562,139 LL_CACHE_RD:u
|
565,355 LL_CACHE_RD:u
|
||||||
192,206 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
200,417 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
24,891 L2D_TLB_REFILL:u
|
25,536 L2D_TLB_REFILL:u
|
||||||
307,033 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
304,133 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,782,260 L2D_CACHE:u
|
1,801,849 L2D_CACHE:u
|
||||||
|
|
||||||
4.781838296 seconds time elapsed
|
32.859276963 seconds time elapsed
|
||||||
|
|
||||||
23.716896000 seconds user
|
148.969816000 seconds user
|
||||||
130.971947000 seconds sys
|
2252.321936000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_soc-sign-epinions_1000.json
Normal file
1
pytorch/output/altra_10_30_soc-sign-epinions_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.32, 20.52, 20.52, 20.56, 20.6, 20.4, 20.76, 20.6, 20.36, 20.4], "matrix": "soc-sign-epinions", "shape": [131828, 131828], "nnz": 841372, "% density": 4.841419648464106e-05, "time_s": 22.52380871772766, "power": [81.24, 81.16, 74.84, 62.04, 51.6, 50.56, 52.4, 52.4, 68.24, 80.56, 91.44, 91.36, 90.28, 88.32, 86.4, 85.16, 83.64, 82.36, 82.96, 82.84, 82.84, 82.56, 82.44, 82.08, 83.64, 84.4], "power_after": [20.8, 20.88, 20.8, 20.92, 20.88, 20.88, 20.8, 20.84, 20.84, 20.6], "task clock (msec)": 63.9, "page faults": 3446, "cycles": 55931043, "instructions": 77907356, "branch mispredictions": 332778, "branches": 20000746, "ITLB accesses": 27000304, "ITLB misses": 6713, "DTLB misses": 18689, "DTLB accesses": 36395663, "L1I cache accesses": 32396405, "L1I cache misses": 292629, "L1D cache misses": 473799, "L1D cache accesses": 34061981, "LL cache misses": 542765, "LL cache accesses": 557193, "L2D TLB accesses": 203626, "L2D TLB misses": 24363, "L2D cache misses": 303397, "L2D cache accesses": 1772084, "instructions per cycle": 1.3929179901043505, "branch miss rate": 0.01663827939217867, "ITLB miss rate": 0.00024862683027568875, "DTLB miss rate": 0.0005134952480464499, "L2D TLB miss rate": 0.11964582126054629, "L1I cache miss rate": 0.009032761505481858, "L1D cache miss rate": 0.01390990735389113, "L2D cache miss rate": 0.171209152613533, "LL cache miss rate": 0.9741059202107708}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394154 queued and waiting for resources
|
srun: job 3394990 queued and waiting for resources
|
||||||
srun: job 3394154 has been allocated resources
|
srun: job 3394990 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
||||||
@ -15,37 +15,38 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
|||||||
7714]),
|
7714]),
|
||||||
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
||||||
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
||||||
tensor([0.5842, 0.3042, 0.7358, ..., 0.7882, 0.7596, 0.5895])
|
tensor([0.3914, 0.2076, 0.6733, ..., 0.4758, 0.6360, 0.6316])
|
||||||
|
Matrix: soc-sign-epinions
|
||||||
Shape: torch.Size([131828, 131828])
|
Shape: torch.Size([131828, 131828])
|
||||||
NNZ: 841372
|
NNZ: 841372
|
||||||
Density: 4.841419648464106e-05
|
Density: 4.841419648464106e-05
|
||||||
Time: 2.4407293796539307 seconds
|
Time: 20.04187798500061 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
|
||||||
|
|
||||||
49.87 msec task-clock:u # 0.008 CPUs utilized
|
63.90 msec task-clock:u # 0.003 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,300 page-faults:u # 66.174 K/sec
|
3,446 page-faults:u # 53.927 K/sec
|
||||||
51,935,476 cycles:u # 1.041 GHz (65.00%)
|
55,931,043 cycles:u # 0.875 GHz (85.43%)
|
||||||
83,731,856 instructions:u # 1.61 insn per cycle (84.25%)
|
77,907,356 instructions:u # 1.39 insn per cycle
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
375,900 branch-misses:u
|
357,739 branch-misses:u
|
||||||
34,169,837 L1-dcache-loads:u # 685.197 M/sec
|
33,000,188 L1-dcache-loads:u # 516.421 M/sec
|
||||||
474,410 L1-dcache-load-misses:u # 1.39% of all L1-dcache accesses
|
466,824 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
32,443,215 L1-icache-loads:u # 650.574 M/sec
|
31,503,048 L1-icache-loads:u # 492.992 M/sec
|
||||||
294,146 L1-icache-load-misses:u # 0.91% of all L1-icache accesses
|
301,112 L1-icache-load-misses:u # 0.96% of all L1-icache accesses
|
||||||
63,709,518 dTLB-loads:u # 1.278 G/sec (16.44%)
|
34,740,872 dTLB-loads:u # 543.661 M/sec (18.37%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
32,355 dTLB-load-misses:u # 0.09% of all dTLB cache accesses (12.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
6.058862056 seconds time elapsed
|
23.478083368 seconds time elapsed
|
||||||
|
|
||||||
29.101578000 seconds user
|
119.232326000 seconds user
|
||||||
224.790489000 seconds sys
|
1541.081607000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -57,21 +58,22 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
|||||||
7714]),
|
7714]),
|
||||||
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
||||||
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
||||||
tensor([0.9696, 0.8139, 0.4858, ..., 0.2374, 0.1716, 0.9756])
|
tensor([0.3970, 0.5643, 0.0036, ..., 0.0338, 0.0807, 0.3885])
|
||||||
|
Matrix: soc-sign-epinions
|
||||||
Shape: torch.Size([131828, 131828])
|
Shape: torch.Size([131828, 131828])
|
||||||
NNZ: 841372
|
NNZ: 841372
|
||||||
Density: 4.841419648464106e-05
|
Density: 4.841419648464106e-05
|
||||||
Time: 2.0945546627044678 seconds
|
Time: 16.115705490112305 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
|
||||||
|
|
||||||
326,464 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
332,778 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
20,341,367 BR_RETIRED:u
|
20,000,746 BR_RETIRED:u
|
||||||
|
|
||||||
5.525378890 seconds time elapsed
|
19.765627973 seconds time elapsed
|
||||||
|
|
||||||
28.841740000 seconds user
|
103.591961000 seconds user
|
||||||
199.678982000 seconds sys
|
1250.845091000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -83,23 +85,24 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
|||||||
7714]),
|
7714]),
|
||||||
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
||||||
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
||||||
tensor([0.3478, 0.0057, 0.8574, ..., 0.6409, 0.1876, 0.8429])
|
tensor([0.0049, 0.4550, 0.3166, ..., 0.3734, 0.8337, 0.5156])
|
||||||
|
Matrix: soc-sign-epinions
|
||||||
Shape: torch.Size([131828, 131828])
|
Shape: torch.Size([131828, 131828])
|
||||||
NNZ: 841372
|
NNZ: 841372
|
||||||
Density: 4.841419648464106e-05
|
Density: 4.841419648464106e-05
|
||||||
Time: 2.8504912853240967 seconds
|
Time: 18.55180263519287 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
|
||||||
|
|
||||||
27,590,154 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
27,000,304 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,210 ITLB_WALK:u
|
6,713 ITLB_WALK:u
|
||||||
17,536 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
18,689 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
36,763,243 L1D_TLB:u
|
36,395,663 L1D_TLB:u
|
||||||
|
|
||||||
6.425887143 seconds time elapsed
|
22.333459337 seconds time elapsed
|
||||||
|
|
||||||
33.069094000 seconds user
|
109.075160000 seconds user
|
||||||
256.667850000 seconds sys
|
1441.055730000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -111,23 +114,24 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
|||||||
7714]),
|
7714]),
|
||||||
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
||||||
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
||||||
tensor([0.5381, 0.6651, 0.4689, ..., 0.7251, 0.3759, 0.8516])
|
tensor([0.0560, 0.8530, 0.8946, ..., 0.4591, 0.5391, 0.2898])
|
||||||
|
Matrix: soc-sign-epinions
|
||||||
Shape: torch.Size([131828, 131828])
|
Shape: torch.Size([131828, 131828])
|
||||||
NNZ: 841372
|
NNZ: 841372
|
||||||
Density: 4.841419648464106e-05
|
Density: 4.841419648464106e-05
|
||||||
Time: 1.6941111087799072 seconds
|
Time: 25.587534427642822 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
|
||||||
|
|
||||||
31,663,300 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
32,396,405 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
289,727 L1I_CACHE_REFILL:u
|
292,629 L1I_CACHE_REFILL:u
|
||||||
462,864 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
473,799 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
33,262,254 L1D_CACHE:u
|
34,061,981 L1D_CACHE:u
|
||||||
|
|
||||||
5.304170809 seconds time elapsed
|
29.367381835 seconds time elapsed
|
||||||
|
|
||||||
25.992245000 seconds user
|
142.233743000 seconds user
|
||||||
173.752913000 seconds sys
|
1962.747683000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -139,25 +143,26 @@ tensor(crow_indices=tensor([ 0, 1, 2, ..., 841371, 841371,
|
|||||||
7714]),
|
7714]),
|
||||||
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
values=tensor([-1., -1., 1., ..., 1., 1., 1.]),
|
||||||
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
size=(131828, 131828), nnz=841372, layout=torch.sparse_csr)
|
||||||
tensor([0.4145, 0.8515, 0.7222, ..., 0.1386, 0.6641, 0.6662])
|
tensor([0.7002, 0.7829, 0.1511, ..., 0.3651, 0.2391, 0.7788])
|
||||||
|
Matrix: soc-sign-epinions
|
||||||
Shape: torch.Size([131828, 131828])
|
Shape: torch.Size([131828, 131828])
|
||||||
NNZ: 841372
|
NNZ: 841372
|
||||||
Density: 4.841419648464106e-05
|
Density: 4.841419648464106e-05
|
||||||
Time: 3.0850296020507812 seconds
|
Time: 23.656178951263428 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/soc-sign-epinions.mtx 1000':
|
||||||
|
|
||||||
530,272 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
542,765 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
551,373 LL_CACHE_RD:u
|
557,193 LL_CACHE_RD:u
|
||||||
196,152 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
203,626 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
23,542 L2D_TLB_REFILL:u
|
24,363 L2D_TLB_REFILL:u
|
||||||
301,998 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
303,397 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,732,662 L2D_CACHE:u
|
1,772,084 L2D_CACHE:u
|
||||||
|
|
||||||
6.733517838 seconds time elapsed
|
27.453055481 seconds time elapsed
|
||||||
|
|
||||||
34.030476000 seconds user
|
128.709934000 seconds user
|
||||||
271.397968000 seconds sys
|
1831.887905000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_sx-mathoverflow_1000.json
Normal file
1
pytorch/output/altra_10_30_sx-mathoverflow_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.28, 16.44, 16.68, 16.68, 16.84, 17.04, 16.84, 16.84, 16.72, 16.72], "matrix": "sx-mathoverflow", "shape": [24818, 24818], "nnz": 239978, "% density": 0.00038961697406616504, "time_s": 5.405760288238525, "power": [25.64, 20.44, 21.24, 22.16, 22.28, 27.04, 26.92, 26.28, 25.32], "power_after": [16.32, 16.44, 16.4, 16.4, 16.6, 16.48, 16.56, 16.6, 16.32, 16.44], "task clock (msec)": 50.36, "page faults": 3296, "cycles": 56049457, "instructions": 72333565, "branch mispredictions": 325529, "branches": 19463406, "ITLB accesses": 27374917, "ITLB misses": 5203, "DTLB misses": 16771, "DTLB accesses": 36373182, "L1I cache accesses": 31839975, "L1I cache misses": 274158, "L1D cache misses": 471992, "L1D cache accesses": 33638817, "LL cache misses": 538067, "LL cache accesses": 557981, "L2D TLB accesses": 170169, "L2D TLB misses": 21987, "L2D cache misses": 301746, "L2D cache accesses": 1735872, "instructions per cycle": 1.2905310572411077, "branch miss rate": 0.016725181604905125, "ITLB miss rate": 0.00019006450320927, "DTLB miss rate": 0.00046108146381034247, "L2D TLB miss rate": 0.12920684731061474, "L1I cache miss rate": 0.00861049671050307, "L1D cache miss rate": 0.014031171191305569, "L2D cache miss rate": 0.1738296372082734, "LL cache miss rate": 0.9643106127269566}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394144 queued and waiting for resources
|
srun: job 3394987 queued and waiting for resources
|
||||||
srun: job 3394144 has been allocated resources
|
srun: job 3394987 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
||||||
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
||||||
tensor([0.7658, 0.2874, 0.7506, ..., 0.3335, 0.5056, 0.9767])
|
tensor([0.8864, 0.5637, 0.9805, ..., 0.0234, 0.9487, 0.4860])
|
||||||
|
Matrix: sx-mathoverflow
|
||||||
Shape: torch.Size([24818, 24818])
|
Shape: torch.Size([24818, 24818])
|
||||||
NNZ: 239978
|
NNZ: 239978
|
||||||
Density: 0.00038961697406616504
|
Density: 0.00038961697406616504
|
||||||
Time: 0.5561239719390869 seconds
|
Time: 5.484489917755127 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
|
||||||
|
|
||||||
62.49 msec task-clock:u # 0.015 CPUs utilized
|
50.36 msec task-clock:u # 0.006 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,312 page-faults:u # 53.003 K/sec
|
3,296 page-faults:u # 65.452 K/sec
|
||||||
76,783,170 cycles:u # 1.229 GHz (62.65%)
|
56,049,457 cycles:u # 1.113 GHz (49.66%)
|
||||||
77,095,702 instructions:u # 1.00 insn per cycle (80.20%)
|
72,333,565 instructions:u # 1.29 insn per cycle (66.35%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
370,891 branch-misses:u (94.99%)
|
369,218 branch-misses:u (86.12%)
|
||||||
32,730,448 L1-dcache-loads:u # 523.800 M/sec
|
33,730,437 L1-dcache-loads:u # 669.814 M/sec (93.88%)
|
||||||
467,718 L1-dcache-load-misses:u # 1.43% of all L1-dcache accesses
|
459,922 L1-dcache-load-misses:u # 1.36% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
31,548,469 L1-icache-loads:u # 504.885 M/sec
|
31,827,672 L1-icache-loads:u # 632.030 M/sec
|
||||||
298,966 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
295,060 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
||||||
61,098,419 dTLB-loads:u # 977.786 M/sec (20.67%)
|
54,366,618 dTLB-loads:u # 1.080 G/sec (35.64%)
|
||||||
64,747 dTLB-load-misses:u # 0.11% of all dTLB cache accesses (10.91%)
|
84,768 dTLB-load-misses:u # 0.16% of all dTLB cache accesses (25.48%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
12,107,953 iTLB-loads:u # 240.438 M/sec (10.11%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
4.062782709 seconds time elapsed
|
8.968532171 seconds time elapsed
|
||||||
|
|
||||||
16.106338000 seconds user
|
20.749643000 seconds user
|
||||||
32.399716000 seconds sys
|
28.745486000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
||||||
tensor([0.7531, 0.4727, 0.4126, ..., 0.1574, 0.5247, 0.8875])
|
tensor([0.5549, 0.0336, 0.9472, ..., 0.2657, 0.3394, 0.6185])
|
||||||
|
Matrix: sx-mathoverflow
|
||||||
Shape: torch.Size([24818, 24818])
|
Shape: torch.Size([24818, 24818])
|
||||||
NNZ: 239978
|
NNZ: 239978
|
||||||
Density: 0.00038961697406616504
|
Density: 0.00038961697406616504
|
||||||
Time: 0.6003477573394775 seconds
|
Time: 5.532417297363281 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
|
||||||
|
|
||||||
323,514 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
325,529 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
19,769,937 BR_RETIRED:u
|
19,463,406 BR_RETIRED:u
|
||||||
|
|
||||||
4.061021393 seconds time elapsed
|
8.912497962 seconds time elapsed
|
||||||
|
|
||||||
16.155442000 seconds user
|
20.214519000 seconds user
|
||||||
31.047278000 seconds sys
|
31.566513000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
||||||
tensor([0.3067, 0.4335, 0.8814, ..., 0.2370, 0.1210, 0.7695])
|
tensor([0.3330, 0.8843, 0.5150, ..., 0.7292, 0.0873, 0.4184])
|
||||||
|
Matrix: sx-mathoverflow
|
||||||
Shape: torch.Size([24818, 24818])
|
Shape: torch.Size([24818, 24818])
|
||||||
NNZ: 239978
|
NNZ: 239978
|
||||||
Density: 0.00038961697406616504
|
Density: 0.00038961697406616504
|
||||||
Time: 0.5404119491577148 seconds
|
Time: 5.457342863082886 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
|
||||||
|
|
||||||
26,809,325 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
27,374,917 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,925 ITLB_WALK:u
|
5,203 ITLB_WALK:u
|
||||||
19,003 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
16,771 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
36,516,965 L1D_TLB:u
|
36,373,182 L1D_TLB:u
|
||||||
|
|
||||||
4.031175418 seconds time elapsed
|
8.730534933 seconds time elapsed
|
||||||
|
|
||||||
15.607232000 seconds user
|
20.156482000 seconds user
|
||||||
30.562258000 seconds sys
|
31.426118000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
||||||
tensor([0.5013, 0.5961, 0.5565, ..., 0.3779, 0.1835, 0.6722])
|
tensor([0.5864, 0.4449, 0.4042, ..., 0.1651, 0.7793, 0.8302])
|
||||||
|
Matrix: sx-mathoverflow
|
||||||
Shape: torch.Size([24818, 24818])
|
Shape: torch.Size([24818, 24818])
|
||||||
NNZ: 239978
|
NNZ: 239978
|
||||||
Density: 0.00038961697406616504
|
Density: 0.00038961697406616504
|
||||||
Time: 0.6185996532440186 seconds
|
Time: 5.449937582015991 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
|
||||||
|
|
||||||
31,104,231 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
31,839,975 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
285,499 L1I_CACHE_REFILL:u
|
274,158 L1I_CACHE_REFILL:u
|
||||||
468,498 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
471,992 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
32,677,465 L1D_CACHE:u
|
33,638,817 L1D_CACHE:u
|
||||||
|
|
||||||
4.083129305 seconds time elapsed
|
8.845491835 seconds time elapsed
|
||||||
|
|
||||||
16.243642000 seconds user
|
20.577696000 seconds user
|
||||||
36.578375000 seconds sys
|
35.105662000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 317, 416, ..., 239976, 239977,
|
|||||||
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
col_indices=tensor([ 0, 1, 2, ..., 1483, 2179, 24817]),
|
||||||
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
values=tensor([151., 17., 6., ..., 1., 1., 1.]),
|
||||||
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
size=(24818, 24818), nnz=239978, layout=torch.sparse_csr)
|
||||||
tensor([0.9075, 0.2788, 0.1365, ..., 0.4240, 0.8832, 0.1064])
|
tensor([0.8880, 0.4700, 0.5542, ..., 0.8505, 0.9123, 0.5742])
|
||||||
|
Matrix: sx-mathoverflow
|
||||||
Shape: torch.Size([24818, 24818])
|
Shape: torch.Size([24818, 24818])
|
||||||
NNZ: 239978
|
NNZ: 239978
|
||||||
Density: 0.00038961697406616504
|
Density: 0.00038961697406616504
|
||||||
Time: 0.54673171043396 seconds
|
Time: 5.400304794311523 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/sx-mathoverflow.mtx 1000':
|
||||||
|
|
||||||
559,358 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
538,067 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
571,935 LL_CACHE_RD:u
|
557,981 LL_CACHE_RD:u
|
||||||
194,840 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
170,169 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
23,481 L2D_TLB_REFILL:u
|
21,987 L2D_TLB_REFILL:u
|
||||||
313,487 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
301,746 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,779,730 L2D_CACHE:u
|
1,735,872 L2D_CACHE:u
|
||||||
|
|
||||||
3.961843929 seconds time elapsed
|
8.606800178 seconds time elapsed
|
||||||
|
|
||||||
15.425912000 seconds user
|
21.064990000 seconds user
|
||||||
28.864046000 seconds sys
|
34.158762000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_ut2010_1000.json
Normal file
1
pytorch/output/altra_10_30_ut2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.36, 20.4, 20.68, 20.64, 20.92, 20.92, 20.88, 20.68, 20.68, 20.6], "matrix": "ut2010", "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 11.10523509979248, "power": [90.68, 90.68, 88.24, 72.2, 59.48, 52.0, 54.72, 64.28, 79.24, 94.08, 96.24, 93.72, 92.36, 92.36, 90.08], "power_after": [21.24, 21.28, 20.96, 21.16, 20.92, 21.04, 21.32, 21.56, 21.16, 21.24], "task clock (msec)": 52.22, "page faults": 3288, "cycles": 67463873, "instructions": 73042754, "branch mispredictions": 344635, "branches": 20775821, "ITLB accesses": 27488750, "ITLB misses": 6494, "DTLB misses": 18293, "DTLB accesses": 36697113, "L1I cache accesses": 31066176, "L1I cache misses": 298652, "L1D cache misses": 473808, "L1D cache accesses": 32572985, "LL cache misses": 547428, "LL cache accesses": 566356, "L2D TLB accesses": 162858, "L2D TLB misses": 19852, "L2D cache misses": 304056, "L2D cache accesses": 1713420, "instructions per cycle": 1.0826943481291091, "branch miss rate": 0.01658827345499367, "ITLB miss rate": 0.00023624209904051657, "DTLB miss rate": 0.0004984860798177775, "L2D TLB miss rate": 0.12189760404769799, "L1I cache miss rate": 0.009613413636747567, "L1D cache miss rate": 0.014546041758223879, "L2D cache miss rate": 0.17745561508561825, "LL cache miss rate": 0.9665793246650517}
|
173
pytorch/output/altra_10_30_ut2010_1000.output
Normal file
173
pytorch/output/altra_10_30_ut2010_1000.output
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3394993 queued and waiting for resources
|
||||||
|
srun: job 3394993 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
||||||
|
572066]),
|
||||||
|
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
||||||
|
114602]),
|
||||||
|
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
||||||
|
18651.]), size=(115406, 115406), nnz=572066,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.6983, 0.2845, 0.5984, ..., 0.1182, 0.9468, 0.3161])
|
||||||
|
Matrix: ut2010
|
||||||
|
Shape: torch.Size([115406, 115406])
|
||||||
|
NNZ: 572066
|
||||||
|
Density: 4.295259032005559e-05
|
||||||
|
Time: 8.604448795318604 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
||||||
|
|
||||||
|
52.22 msec task-clock:u # 0.004 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,288 page-faults:u # 62.965 K/sec
|
||||||
|
67,463,873 cycles:u # 1.292 GHz (52.95%)
|
||||||
|
73,042,754 instructions:u # 1.08 insn per cycle (71.78%)
|
||||||
|
<not supported> branches:u
|
||||||
|
376,297 branch-misses:u (87.57%)
|
||||||
|
34,189,906 L1-dcache-loads:u # 654.731 M/sec (97.72%)
|
||||||
|
471,636 L1-dcache-load-misses:u # 1.38% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
31,870,328 L1-icache-loads:u # 610.312 M/sec
|
||||||
|
297,680 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
||||||
|
57,623,823 dTLB-loads:u # 1.103 G/sec (30.16%)
|
||||||
|
75,454 dTLB-load-misses:u # 0.13% of all dTLB cache accesses (24.31%)
|
||||||
|
0 iTLB-loads:u # 0.000 /sec (3.96%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
12.112100803 seconds time elapsed
|
||||||
|
|
||||||
|
66.253313000 seconds user
|
||||||
|
675.855469000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
||||||
|
572066]),
|
||||||
|
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
||||||
|
114602]),
|
||||||
|
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
||||||
|
18651.]), size=(115406, 115406), nnz=572066,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.0260, 0.8569, 0.4315, ..., 0.5243, 0.8018, 0.1763])
|
||||||
|
Matrix: ut2010
|
||||||
|
Shape: torch.Size([115406, 115406])
|
||||||
|
NNZ: 572066
|
||||||
|
Density: 4.295259032005559e-05
|
||||||
|
Time: 8.702903270721436 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
||||||
|
|
||||||
|
344,635 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
20,775,821 BR_RETIRED:u
|
||||||
|
|
||||||
|
12.383096073 seconds time elapsed
|
||||||
|
|
||||||
|
64.544546000 seconds user
|
||||||
|
688.477174000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
||||||
|
572066]),
|
||||||
|
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
||||||
|
114602]),
|
||||||
|
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
||||||
|
18651.]), size=(115406, 115406), nnz=572066,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.7940, 0.1585, 0.6879, ..., 0.4017, 0.1738, 0.9713])
|
||||||
|
Matrix: ut2010
|
||||||
|
Shape: torch.Size([115406, 115406])
|
||||||
|
NNZ: 572066
|
||||||
|
Density: 4.295259032005559e-05
|
||||||
|
Time: 7.38647985458374 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
||||||
|
|
||||||
|
27,488,750 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,494 ITLB_WALK:u
|
||||||
|
18,293 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
36,697,113 L1D_TLB:u
|
||||||
|
|
||||||
|
10.936742446 seconds time elapsed
|
||||||
|
|
||||||
|
63.993242000 seconds user
|
||||||
|
580.515047000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
||||||
|
572066]),
|
||||||
|
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
||||||
|
114602]),
|
||||||
|
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
||||||
|
18651.]), size=(115406, 115406), nnz=572066,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.2725, 0.6578, 0.8180, ..., 0.0148, 0.5094, 0.1155])
|
||||||
|
Matrix: ut2010
|
||||||
|
Shape: torch.Size([115406, 115406])
|
||||||
|
NNZ: 572066
|
||||||
|
Density: 4.295259032005559e-05
|
||||||
|
Time: 12.719107389450073 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
||||||
|
|
||||||
|
31,066,176 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
298,652 L1I_CACHE_REFILL:u
|
||||||
|
473,808 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
32,572,985 L1D_CACHE:u
|
||||||
|
|
||||||
|
16.299576479 seconds time elapsed
|
||||||
|
|
||||||
|
86.072431000 seconds user
|
||||||
|
987.199923000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
||||||
|
572066]),
|
||||||
|
col_indices=tensor([ 453, 1291, 1979, ..., 113521, 114509,
|
||||||
|
114602]),
|
||||||
|
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
||||||
|
18651.]), size=(115406, 115406), nnz=572066,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.1156, 0.5715, 0.3099, ..., 0.3964, 0.9672, 0.5694])
|
||||||
|
Matrix: ut2010
|
||||||
|
Shape: torch.Size([115406, 115406])
|
||||||
|
NNZ: 572066
|
||||||
|
Density: 4.295259032005559e-05
|
||||||
|
Time: 12.682909727096558 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
||||||
|
|
||||||
|
547,428 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
566,356 LL_CACHE_RD:u
|
||||||
|
162,858 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
19,852 L2D_TLB_REFILL:u
|
||||||
|
304,056 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,713,420 L2D_CACHE:u
|
||||||
|
|
||||||
|
16.221517033 seconds time elapsed
|
||||||
|
|
||||||
|
79.927661000 seconds user
|
||||||
|
988.333919000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output/altra_10_30_vt2010_1000.json
Normal file
1
pytorch/output/altra_10_30_vt2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.88, 20.76, 20.76, 20.96, 20.92, 20.88, 20.72, 20.4, 20.4, 20.24], "matrix": "vt2010", "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 3.6774682998657227, "power": [34.12, 31.52, 30.36, 27.2, 27.16, 30.64, 31.0, 31.32], "power_after": [20.44, 20.52, 20.68, 20.72, 20.68, 20.72, 20.88, 20.8, 20.88, 20.52], "task clock (msec)": 48.59, "page faults": 3274, "cycles": 55030923, "instructions": 78222423, "branch mispredictions": 323004, "branches": 19091130, "ITLB accesses": 27178617, "ITLB misses": 6398, "DTLB misses": 19770, "DTLB accesses": 36355567, "L1I cache accesses": 31341858, "L1I cache misses": 291951, "L1D cache misses": 468242, "L1D cache accesses": 32805413, "LL cache misses": 520057, "LL cache accesses": 541186, "L2D TLB accesses": 191068, "L2D TLB misses": 22725, "L2D cache misses": 288895, "L2D cache accesses": 1728320, "instructions per cycle": 1.4214266949511278, "branch miss rate": 0.01691906136514706, "ITLB miss rate": 0.00023540564996371965, "DTLB miss rate": 0.0005437956723381593, "L2D TLB miss rate": 0.11893671363074926, "L1I cache miss rate": 0.009315050817982775, "L1D cache miss rate": 0.014273315199537345, "L2D cache miss rate": 0.16715365210146269, "LL cache miss rate": 0.9609579700879181}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394143 queued and waiting for resources
|
srun: job 3394988 queued and waiting for resources
|
||||||
srun: job 3394143 has been allocated resources
|
srun: job 3394988 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
||||||
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
||||||
tensor([0.9170, 0.7306, 0.1175, ..., 0.0616, 0.0147, 0.6403])
|
tensor([0.2022, 0.3400, 0.2561, ..., 0.8370, 0.0285, 0.6506])
|
||||||
|
Matrix: vt2010
|
||||||
Shape: torch.Size([32580, 32580])
|
Shape: torch.Size([32580, 32580])
|
||||||
NNZ: 155598
|
NNZ: 155598
|
||||||
Density: 0.00014658915806621921
|
Density: 0.00014658915806621921
|
||||||
Time: 0.4440653324127197 seconds
|
Time: 3.74875545501709 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
||||||
|
|
||||||
61.63 msec task-clock:u # 0.016 CPUs utilized
|
48.59 msec task-clock:u # 0.007 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,304 page-faults:u # 53.611 K/sec
|
3,274 page-faults:u # 67.376 K/sec
|
||||||
64,734,203 cycles:u # 1.050 GHz (50.46%)
|
55,030,923 cycles:u # 1.132 GHz (65.54%)
|
||||||
53,597,991 instructions:u # 0.83 insn per cycle (70.10%)
|
78,222,423 instructions:u # 1.42 insn per cycle (83.60%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
347,389 branch-misses:u (91.95%)
|
369,917 branch-misses:u
|
||||||
31,363,842 L1-dcache-loads:u # 508.915 M/sec
|
32,435,815 L1-dcache-loads:u # 667.500 M/sec
|
||||||
482,780 L1-dcache-load-misses:u # 1.54% of all L1-dcache accesses
|
467,963 L1-dcache-load-misses:u # 1.44% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
30,027,001 L1-icache-loads:u # 487.223 M/sec
|
31,013,287 L1-icache-loads:u # 638.226 M/sec
|
||||||
288,023 L1-icache-load-misses:u # 0.96% of all L1-icache accesses
|
289,982 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
||||||
44,333,825 dTLB-loads:u # 719.368 M/sec (48.58%)
|
60,644,978 dTLB-loads:u # 1.248 G/sec (17.29%)
|
||||||
74,525 dTLB-load-misses:u # 0.17% of all dTLB cache accesses (16.71%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
3.811654040 seconds time elapsed
|
6.978143797 seconds time elapsed
|
||||||
|
|
||||||
15.616953000 seconds user
|
18.401752000 seconds user
|
||||||
30.906234000 seconds sys
|
28.060858000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
||||||
tensor([0.5548, 0.3514, 0.6283, ..., 0.5672, 0.1575, 0.4493])
|
tensor([0.3381, 0.0423, 0.5363, ..., 0.0429, 0.4077, 0.4744])
|
||||||
|
Matrix: vt2010
|
||||||
Shape: torch.Size([32580, 32580])
|
Shape: torch.Size([32580, 32580])
|
||||||
NNZ: 155598
|
NNZ: 155598
|
||||||
Density: 0.00014658915806621921
|
Density: 0.00014658915806621921
|
||||||
Time: 0.44233155250549316 seconds
|
Time: 3.7925527095794678 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
||||||
|
|
||||||
330,777 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
323,004 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
20,357,034 BR_RETIRED:u
|
19,091,130 BR_RETIRED:u
|
||||||
|
|
||||||
3.835342404 seconds time elapsed
|
7.233250772 seconds time elapsed
|
||||||
|
|
||||||
15.497637000 seconds user
|
19.111768000 seconds user
|
||||||
28.676763000 seconds sys
|
32.178633000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
||||||
tensor([0.0953, 0.5790, 0.0112, ..., 0.9540, 0.3173, 0.4731])
|
tensor([0.7962, 0.6492, 0.2778, ..., 0.5407, 0.1159, 0.3587])
|
||||||
|
Matrix: vt2010
|
||||||
Shape: torch.Size([32580, 32580])
|
Shape: torch.Size([32580, 32580])
|
||||||
NNZ: 155598
|
NNZ: 155598
|
||||||
Density: 0.00014658915806621921
|
Density: 0.00014658915806621921
|
||||||
Time: 0.43302106857299805 seconds
|
Time: 3.668635129928589 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
||||||
|
|
||||||
27,381,387 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
27,178,617 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,248 ITLB_WALK:u
|
6,398 ITLB_WALK:u
|
||||||
17,636 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
19,770 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
37,436,110 L1D_TLB:u
|
36,355,567 L1D_TLB:u
|
||||||
|
|
||||||
3.828586094 seconds time elapsed
|
6.925944164 seconds time elapsed
|
||||||
|
|
||||||
15.518057000 seconds user
|
18.970654000 seconds user
|
||||||
31.389361000 seconds sys
|
30.786317000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
||||||
tensor([0.5456, 0.8708, 0.2037, ..., 0.8669, 0.9122, 0.2046])
|
tensor([0.8340, 0.3434, 0.3449, ..., 0.9828, 0.6683, 0.0312])
|
||||||
|
Matrix: vt2010
|
||||||
Shape: torch.Size([32580, 32580])
|
Shape: torch.Size([32580, 32580])
|
||||||
NNZ: 155598
|
NNZ: 155598
|
||||||
Density: 0.00014658915806621921
|
Density: 0.00014658915806621921
|
||||||
Time: 0.4426534175872803 seconds
|
Time: 3.623232126235962 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
||||||
|
|
||||||
32,505,993 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
31,341,858 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
303,849 L1I_CACHE_REFILL:u
|
291,951 L1I_CACHE_REFILL:u
|
||||||
467,426 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
468,242 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
34,241,110 L1D_CACHE:u
|
32,805,413 L1D_CACHE:u
|
||||||
|
|
||||||
3.811299200 seconds time elapsed
|
6.941260499 seconds time elapsed
|
||||||
|
|
||||||
15.932195000 seconds user
|
18.410270000 seconds user
|
||||||
30.887870000 seconds sys
|
27.908787000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
|||||||
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
||||||
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
||||||
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
||||||
tensor([0.5024, 0.2304, 0.7925, ..., 0.1397, 0.5558, 0.6450])
|
tensor([0.2754, 0.3661, 0.9484, ..., 0.7285, 0.5354, 0.4116])
|
||||||
|
Matrix: vt2010
|
||||||
Shape: torch.Size([32580, 32580])
|
Shape: torch.Size([32580, 32580])
|
||||||
NNZ: 155598
|
NNZ: 155598
|
||||||
Density: 0.00014658915806621921
|
Density: 0.00014658915806621921
|
||||||
Time: 0.3671383857727051 seconds
|
Time: 3.7337992191314697 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
||||||
|
|
||||||
550,075 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
520,057 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
562,829 LL_CACHE_RD:u
|
541,186 LL_CACHE_RD:u
|
||||||
199,285 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
191,068 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
24,424 L2D_TLB_REFILL:u
|
22,725 L2D_TLB_REFILL:u
|
||||||
310,155 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
288,895 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,783,824 L2D_CACHE:u
|
1,728,320 L2D_CACHE:u
|
||||||
|
|
||||||
3.824434783 seconds time elapsed
|
7.164825085 seconds time elapsed
|
||||||
|
|
||||||
15.754438000 seconds user
|
18.193885000 seconds user
|
||||||
28.226523000 seconds sys
|
30.023194000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [50.88, 50.88], "shape": [11806, 11806], "nnz": 65460, "% density": 0.0004696458003979807, "time_s": 0.1896660327911377, "power": [25.52, 32.28, 33.12, 33.12], "power_after": [32.88, 26.52], "task clock (msec)": 42.01, "page faults": 3263, "cycles": 47084933, "instructions": 77895119, "branch mispredictions": 330923, "branches": 19740519, "ITLB accesses": 27761239, "ITLB misses": 6471, "DTLB misses": 17268, "DTLB accesses": 36993265, "L1I cache accesses": 31834980, "L1I cache misses": 298333, "L1D cache misses": 466901, "L1D cache accesses": 33528976, "LL cache misses": 525505, "LL cache accesses": 546521, "L2D TLB accesses": 184884, "L2D TLB misses": 22933, "L2D cache misses": 292367, "L2D cache accesses": 1706226, "instructions per cycle": 1.6543534000568716, "branch miss rate": 0.016763642333821112, "ITLB miss rate": 0.00023309478370183695, "DTLB miss rate": 0.0004667876706746485, "L2D TLB miss rate": 0.12403993855606758, "L1I cache miss rate": 0.009371232524725947, "L1D cache miss rate": 0.013925298523879763, "L2D cache miss rate": 0.1713530329510862, "LL cache miss rate": 0.9615458509371094}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [20.16, 20.08], "shape": [31379, 31379], "nnz": 106762, "% density": 0.00010842726485909405, "time_s": 0.336850643157959, "power": [24.28, 30.72, 30.72, 34.56], "power_after": [37.32, 32.92], "task clock (msec)": 60.78, "page faults": 3300, "cycles": 66733059, "instructions": 87889334, "branch mispredictions": 326300, "branches": 19832700, "ITLB accesses": 27233629, "ITLB misses": 5868, "DTLB misses": 16893, "DTLB accesses": 36409508, "L1I cache accesses": 30924532, "L1I cache misses": 288199, "L1D cache misses": 462816, "L1D cache accesses": 32428375, "LL cache misses": 551997, "LL cache accesses": 568528, "L2D TLB accesses": 193991, "L2D TLB misses": 24353, "L2D cache misses": 312207, "L2D cache accesses": 1821196, "instructions per cycle": 1.3170284011707, "branch miss rate": 0.016452626218316214, "ITLB miss rate": 0.0002154688969288669, "DTLB miss rate": 0.00046397221297250155, "L2D TLB miss rate": 0.125536751704976, "L1I cache miss rate": 0.009319429635992551, "L1D cache miss rate": 0.014271945479845968, "L2D cache miss rate": 0.17142965391973186, "LL cache miss rate": 0.9709231559395491}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [16.32, 16.2], "shape": [116835, 116835], "nnz": 766396, "% density": 5.614451099680581e-05, "time_s": 2.2665774822235107, "power": [35.16, 50.8, 53.4, 53.4, 46.08, 46.88], "power_after": [58.4, 57.32], "task clock (msec)": 50.43, "page faults": 3285, "cycles": 54118679, "instructions": 77692421, "branch mispredictions": 325039, "branches": 19383216, "ITLB accesses": 26060519, "ITLB misses": 4749, "DTLB misses": 16865, "DTLB accesses": 34819729, "L1I cache accesses": 30777115, "L1I cache misses": 293980, "L1D cache misses": 461522, "L1D cache accesses": 32216597, "LL cache misses": 567700, "LL cache accesses": 588689, "L2D TLB accesses": 189417, "L2D TLB misses": 22360, "L2D cache misses": 328306, "L2D cache accesses": 1908607, "instructions per cycle": 1.4355934482436277, "branch miss rate": 0.0167690954896236, "ITLB miss rate": 0.00018222967854170517, "DTLB miss rate": 0.00048435184547243316, "L2D TLB miss rate": 0.11804642666708902, "L1I cache miss rate": 0.009551902444397404, "L1D cache miss rate": 0.014325597455249542, "L2D cache miss rate": 0.172013410827897, "LL cache miss rate": 0.9643461997761127}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [20.48, 20.96], "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 0.3271017074584961, "power": [25.28, 26.08, 31.28, 32.96], "power_after": [33.4, 30.24], "task clock (msec)": 59.88, "page faults": 3313, "cycles": 58169777, "instructions": 57993431, "branch mispredictions": 330494, "branches": 20578427, "ITLB accesses": 27982097, "ITLB misses": 6614, "DTLB misses": 17270, "DTLB accesses": 37728899, "L1I cache accesses": 29754926, "L1I cache misses": 278786, "L1D cache misses": 454742, "L1D cache accesses": 31173246, "LL cache misses": 543243, "LL cache accesses": 560716, "L2D TLB accesses": 162281, "L2D TLB misses": 19847, "L2D cache misses": 300577, "L2D cache accesses": 1696278, "instructions per cycle": 0.9969684257170179, "branch miss rate": 0.016060216847478187, "ITLB miss rate": 0.0002363654160729984, "DTLB miss rate": 0.00045773930482307474, "L2D TLB miss rate": 0.12230020766448321, "L1I cache miss rate": 0.009369406598423401, "L1D cache miss rate": 0.014587572946365611, "L2D cache miss rate": 0.1771979592967662, "LL cache miss rate": 0.9688380570556218}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [20.28, 20.32], "shape": [36692, 36692], "nnz": 367662, "% density": 0.0002730901120626302, "time_s": 1.030203104019165, "power": [32.08, 47.84, 55.76, 58.08, 58.24], "power_after": [48.76, 45.16], "task clock (msec)": 60.43, "page faults": 3319, "cycles": 66114448, "instructions": 90786829, "branch mispredictions": 341625, "branches": 20129354, "ITLB accesses": 27441303, "ITLB misses": 6807, "DTLB misses": 20551, "DTLB accesses": 36867114, "L1I cache accesses": 31744243, "L1I cache misses": 271027, "L1D cache misses": 464135, "L1D cache accesses": 33441141, "LL cache misses": 539935, "LL cache accesses": 552519, "L2D TLB accesses": 188291, "L2D TLB misses": 24177, "L2D cache misses": 301281, "L2D cache accesses": 1737575, "instructions per cycle": 1.3731768432824245, "branch miss rate": 0.016971483535934636, "ITLB miss rate": 0.00024805673404065397, "DTLB miss rate": 0.0005574344658494288, "L2D TLB miss rate": 0.12840231344036623, "L1I cache miss rate": 0.008537831568388637, "L1D cache miss rate": 0.01387916159918108, "L2D cache miss rate": 0.17339165215889962, "LL cache miss rate": 0.9772243126480719}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [50.68, 49.4], "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.11296772956848145, "power": [26.2, 29.76, 33.64, 34.44], "power_after": [36.84, 29.44], "task clock (msec)": 67.56, "page faults": 3829, "cycles": 47862000, "instructions": 84392375, "branch mispredictions": 331622, "branches": 19800140, "ITLB accesses": 25905045, "ITLB misses": 6746, "DTLB misses": 17547, "DTLB accesses": 35220079, "L1I cache accesses": 30359576, "L1I cache misses": 283204, "L1D cache misses": 465520, "L1D cache accesses": 31843274, "LL cache misses": 560542, "LL cache accesses": 575610, "L2D TLB accesses": 173643, "L2D TLB misses": 21499, "L2D cache misses": 313335, "L2D cache accesses": 1741621, "instructions per cycle": 1.7632438051063475, "branch miss rate": 0.016748467435078743, "ITLB miss rate": 0.0002604125953072075, "DTLB miss rate": 0.0004982101261044871, "L2D TLB miss rate": 0.12381149830399152, "L1I cache miss rate": 0.009328325270418797, "L1D cache miss rate": 0.014619099782264852, "L2D cache miss rate": 0.17990998041479747, "LL cache miss rate": 0.9738225534650197}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [16.52, 16.24], "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 0.1715233325958252, "power": [18.56, 24.92, 27.84, 27.84], "power_after": [33.2, 27.28], "task clock (msec)": 61.92, "page faults": 3281, "cycles": 66250810, "instructions": 75178179, "branch mispredictions": 332366, "branches": 19076182, "ITLB accesses": 27005133, "ITLB misses": 4791, "DTLB misses": 13403, "DTLB accesses": 36457054, "L1I cache accesses": 32367686, "L1I cache misses": 287524, "L1D cache misses": 467557, "L1D cache accesses": 34022862, "LL cache misses": 535707, "LL cache accesses": 556316, "L2D TLB accesses": 150149, "L2D TLB misses": 18418, "L2D cache misses": 297042, "L2D cache accesses": 1687364, "instructions per cycle": 1.1347510920998551, "branch miss rate": 0.017423088121092577, "ITLB miss rate": 0.00017741071669597036, "DTLB miss rate": 0.00036763804338112453, "L2D TLB miss rate": 0.12266481961251822, "L1I cache miss rate": 0.008883057009388932, "L1D cache miss rate": 0.013742435895016709, "L2D cache miss rate": 0.1760390763344483, "LL cache miss rate": 0.9629545078696281}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [29.76, 33.16], "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 0.14322686195373535, "power": [22.6, 22.6, 26.16, 29.2], "power_after": [34.0, 30.16], "task clock (msec)": 64.71, "page faults": 3319, "cycles": 57611295, "instructions": 83148228, "branch mispredictions": 318386, "branches": 19233431, "ITLB accesses": 27039805, "ITLB misses": 6375, "DTLB misses": 17290, "DTLB accesses": 36688544, "L1I cache accesses": 32508072, "L1I cache misses": 297568, "L1D cache misses": 477654, "L1D cache accesses": 34044579, "LL cache misses": 549474, "LL cache accesses": 561939, "L2D TLB accesses": 185622, "L2D TLB misses": 23295, "L2D cache misses": 305878, "L2D cache accesses": 1763089, "instructions per cycle": 1.4432626102225268, "branch miss rate": 0.01655378075809771, "ITLB miss rate": 0.00023576353453732377, "DTLB miss rate": 0.00047126427257511227, "L2D TLB miss rate": 0.12549697772893298, "L1I cache miss rate": 0.009153664972810446, "L1D cache miss rate": 0.014030251336049713, "L2D cache miss rate": 0.17348982382625042, "LL cache miss rate": 0.9778178770293573}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [20.56, 20.28], "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 0.30861377716064453, "power": [23.88, 27.6, 39.8, 40.12], "power_after": [39.28, 35.2], "task clock (msec)": 65.91, "page faults": 3247, "cycles": 92293071, "instructions": 76208632, "branch mispredictions": 320083, "branches": 19285106, "ITLB accesses": 26853940, "ITLB misses": 6728, "DTLB misses": 13955, "DTLB accesses": 37111059, "L1I cache accesses": 32554796, "L1I cache misses": 298729, "L1D cache misses": 473779, "L1D cache accesses": 34117102, "LL cache misses": 535040, "LL cache accesses": 547502, "L2D TLB accesses": 179876, "L2D TLB misses": 21809, "L2D cache misses": 298620, "L2D cache accesses": 1722959, "instructions per cycle": 0.8257243059990929, "branch miss rate": 0.016597419791210898, "ITLB miss rate": 0.0002505405165871377, "DTLB miss rate": 0.0003760334621547717, "L2D TLB miss rate": 0.12124463519313304, "L1I cache miss rate": 0.009176190199440968, "L1D cache miss rate": 0.013886847716432655, "L2D cache miss rate": 0.17331811145825293, "LL cache miss rate": 0.9772384393116372}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [30.44, 35.52], "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 0.29622840881347656, "power": [23.84, 29.44, 33.0, 33.04], "power_after": [36.32, 30.0], "task clock (msec)": 60.77, "page faults": 3361, "cycles": 63493475, "instructions": 91578911, "branch mispredictions": 329084, "branches": 20406595, "ITLB accesses": 26859919, "ITLB misses": 6237, "DTLB misses": 16689, "DTLB accesses": 36348977, "L1I cache accesses": 30979764, "L1I cache misses": 292038, "L1D cache misses": 469219, "L1D cache accesses": 32411890, "LL cache misses": 571870, "LL cache accesses": 598306, "L2D TLB accesses": 205488, "L2D TLB misses": 26392, "L2D cache misses": 342141, "L2D cache accesses": 1857697, "instructions per cycle": 1.442335783322617, "branch miss rate": 0.01612635522976763, "ITLB miss rate": 0.00023220472109390948, "DTLB miss rate": 0.0004591325912693499, "L2D TLB miss rate": 0.12843572374055906, "L1I cache miss rate": 0.009426734173959492, "L1D cache miss rate": 0.014476755289494072, "L2D cache miss rate": 0.1841748142996409, "LL cache miss rate": 0.9558152517273769}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [16.52, 16.64], "shape": [81871, 81871], "nnz": 545671, "% density": 8.140867447881048e-05, "time_s": 1.3372814655303955, "power": [23.92, 38.6, 46.04, 48.2, 48.2], "power_after": [45.0, 44.08], "task clock (msec)": 59.01, "page faults": 3448, "cycles": 73062796, "instructions": 88329175, "branch mispredictions": 331091, "branches": 20013316, "ITLB accesses": 26330936, "ITLB misses": 5193, "DTLB misses": 16837, "DTLB accesses": 35930477, "L1I cache accesses": 31853890, "L1I cache misses": 306147, "L1D cache misses": 479933, "L1D cache accesses": 33426019, "LL cache misses": 540302, "LL cache accesses": 553181, "L2D TLB accesses": 173206, "L2D TLB misses": 21390, "L2D cache misses": 300032, "L2D cache accesses": 1739931, "instructions per cycle": 1.2089487377406143, "branch miss rate": 0.016543535314187813, "ITLB miss rate": 0.0001972204861991993, "DTLB miss rate": 0.000468599401004334, "L2D TLB miss rate": 0.12349456716280037, "L1I cache miss rate": 0.009610976869701, "L1D cache miss rate": 0.014358066391334247, "L2D cache miss rate": 0.17243902200719455, "LL cache miss rate": 0.9767182893121781}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [53.64, 46.88], "shape": [82144, 82144], "nnz": 549202, "% density": 8.13917555860553e-05, "time_s": 1.2292509078979492, "power": [40.64, 52.44, 54.8, 54.96, 46.8], "power_after": [47.88, 47.08], "task clock (msec)": 61.26, "page faults": 3303, "cycles": 44515786, "instructions": 81513738, "branch mispredictions": 328019, "branches": 19893662, "ITLB accesses": 27248112, "ITLB misses": 5792, "DTLB misses": 16632, "DTLB accesses": 36929042, "L1I cache accesses": 31702830, "L1I cache misses": 295778, "L1D cache misses": 470423, "L1D cache accesses": 33155119, "LL cache misses": 545220, "LL cache accesses": 562139, "L2D TLB accesses": 192206, "L2D TLB misses": 24891, "L2D cache misses": 307033, "L2D cache accesses": 1782260, "instructions per cycle": 1.8311198189334452, "branch miss rate": 0.01648861833482443, "ITLB miss rate": 0.0002125651861677609, "DTLB miss rate": 0.0004503772396803578, "L2D TLB miss rate": 0.12950168048864238, "L1I cache miss rate": 0.009329703373484323, "L1D cache miss rate": 0.014188548079106578, "L2D cache miss rate": 0.17227172241984895, "LL cache miss rate": 0.9699024618466251}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [30.48, 33.04], "shape": [131828, 131828], "nnz": 841372, "% density": 4.841419648464106e-05, "time_s": 2.848874092102051, "power": [65.52, 75.88, 71.16, 71.16, 59.72, 47.92, 48.68], "power_after": [68.68, 67.88], "task clock (msec)": 49.87, "page faults": 3300, "cycles": 51935476, "instructions": 83731856, "branch mispredictions": 326464, "branches": 20341367, "ITLB accesses": 27590154, "ITLB misses": 6210, "DTLB misses": 17536, "DTLB accesses": 36763243, "L1I cache accesses": 31663300, "L1I cache misses": 289727, "L1D cache misses": 462864, "L1D cache accesses": 33262254, "LL cache misses": 530272, "LL cache accesses": 551373, "L2D TLB accesses": 196152, "L2D TLB misses": 23542, "L2D cache misses": 301998, "L2D cache accesses": 1732662, "instructions per cycle": 1.6122285275675532, "branch miss rate": 0.01604926551888081, "ITLB miss rate": 0.000225080294948698, "DTLB miss rate": 0.0004769981799483794, "L2D TLB miss rate": 0.12001916880786329, "L1I cache miss rate": 0.00915024649989104, "L1D cache miss rate": 0.013915593332911234, "L2D cache miss rate": 0.17429712200071334, "LL cache miss rate": 0.9617300810884828}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [20.44, 20.2], "shape": [24818, 24818], "nnz": 239978, "% density": 0.00038961697406616504, "time_s": 0.556269645690918, "power": [25.24, 32.16, 33.0, 32.52], "power_after": [34.24, 30.28], "task clock (msec)": 62.49, "page faults": 3312, "cycles": 76783170, "instructions": 77095702, "branch mispredictions": 323514, "branches": 19769937, "ITLB accesses": 26809325, "ITLB misses": 6925, "DTLB misses": 19003, "DTLB accesses": 36516965, "L1I cache accesses": 31104231, "L1I cache misses": 285499, "L1D cache misses": 468498, "L1D cache accesses": 32677465, "LL cache misses": 559358, "LL cache accesses": 571935, "L2D TLB accesses": 194840, "L2D TLB misses": 23481, "L2D cache misses": 313487, "L2D cache accesses": 1779730, "instructions per cycle": 1.004070319055595, "branch miss rate": 0.016363936819829016, "ITLB miss rate": 0.00025830564551699827, "DTLB miss rate": 0.0005203882633729282, "L2D TLB miss rate": 0.12051426811742968, "L1I cache miss rate": 0.009178783426601994, "L1D cache miss rate": 0.01433703624194839, "L2D cache miss rate": 0.1761430104566423, "LL cache miss rate": 0.9780097388689274}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [34.6, 37.16], "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 1.0817186832427979, "power": [34.32, 50.84, 52.12, 52.4, 52.76], "power_after": [49.0, 45.08], "task clock (msec)": 60.55, "page faults": 3490, "cycles": 49977496, "instructions": 78622993, "branch mispredictions": 327078, "branches": 20135808, "ITLB accesses": 27608093, "ITLB misses": 6616, "DTLB misses": 17185, "DTLB accesses": 36866957, "L1I cache accesses": 32639204, "L1I cache misses": 309643, "L1D cache misses": 478856, "L1D cache accesses": 34280618, "LL cache misses": 555275, "LL cache accesses": 578455, "L2D TLB accesses": 188723, "L2D TLB misses": 24635, "L2D cache misses": 319663, "L2D cache accesses": 1799940, "instructions per cycle": 1.573167911413569, "branch miss rate": 0.016243599462211798, "ITLB miss rate": 0.00023963987661154286, "DTLB miss rate": 0.00046613556958335347, "L2D TLB miss rate": 0.13053522888042263, "L1I cache miss rate": 0.009486842877663316, "L1D cache miss rate": 0.013968709665619214, "L2D cache miss rate": 0.17759647543807017, "LL cache miss rate": 0.9599277385449171}
|
|
@ -1 +0,0 @@
|
|||||||
{"power_before": [34.04, 43.96], "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 0.4164857864379883, "power": [23.72, 23.72, 29.88, 33.32], "power_after": [33.36, 32.52], "task clock (msec)": 61.63, "page faults": 3304, "cycles": 64734203, "instructions": 53597991, "branch mispredictions": 330777, "branches": 20357034, "ITLB accesses": 27381387, "ITLB misses": 6248, "DTLB misses": 17636, "DTLB accesses": 37436110, "L1I cache accesses": 32505993, "L1I cache misses": 303849, "L1D cache misses": 467426, "L1D cache accesses": 34241110, "LL cache misses": 550075, "LL cache accesses": 562829, "L2D TLB accesses": 199285, "L2D TLB misses": 24424, "L2D cache misses": 310155, "L2D cache accesses": 1783824, "instructions per cycle": 0.8279701999266138, "branch miss rate": 0.016248781625063848, "ITLB miss rate": 0.00022818420410916364, "DTLB miss rate": 0.00047109595521543235, "L2D TLB miss rate": 0.12255814536969667, "L1I cache miss rate": 0.009347476325365603, "L1D cache miss rate": 0.01365101773861887, "L2D cache miss rate": 0.17387085272986572, "LL cache miss rate": 0.9773394761108614}
|
|
1
pytorch/output_HPC/altra_10_30_ASIC_680k_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_ASIC_680k_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [80.64, 75.2, 61.4, 49.84, 38.12, 38.12, 24.16, 22.68, 22.36, 22.2], "matrix": "ASIC_680k", "shape": [682862, 682862], "nnz": 3871773, "% density": 8.303171256088674e-06, "time_s": 41.51614689826965, "power": [92.4, 92.44, 84.28, 73.04, 59.64, 53.28, 56.32, 65.88, 80.28, 93.76, 96.16, 94.44, 94.44, 94.2, 93.92, 92.48, 92.16, 91.84, 92.08, 91.84, 91.68, 90.68, 90.88, 90.28, 90.28, 92.44, 92.52, 92.84, 90.0, 89.64, 88.16, 87.28, 88.12, 88.24, 88.08, 85.72, 85.12, 85.12, 81.72, 82.52, 83.84, 86.32, 88.8, 91.0, 90.2], "power_after": [21.92, 21.88, 21.92, 21.88, 21.88, 21.72, 21.72, 21.72, 21.72, 21.44], "task clock (msec)": 55.74, "page faults": 3266, "cycles": 51085608, "instructions": 88049969, "branch mispredictions": 332704, "branches": 20219525, "ITLB accesses": 27856157, "ITLB misses": 6496, "DTLB misses": 17046, "DTLB accesses": 37522360, "L1I cache accesses": 31475230, "L1I cache misses": 277921, "L1D cache misses": 462005, "L1D cache accesses": 33126938, "LL cache misses": 558923, "LL cache accesses": 571263, "L2D TLB accesses": 190627, "L2D TLB misses": 24234, "L2D cache misses": 314815, "L2D cache accesses": 1760110, "instructions per cycle": 1.7235768046452535, "branch miss rate": 0.01645459030318467, "ITLB miss rate": 0.00023319799640704206, "DTLB miss rate": 0.0004542891225392006, "L2D TLB miss rate": 0.12712784652751186, "L1I cache miss rate": 0.008829832220447635, "L1D cache miss rate": 0.013946504805243395, "L2D cache miss rate": 0.17886098027964162, "LL cache miss rate": 0.978398741035215}
|
173
pytorch/output_HPC/altra_10_30_ASIC_680k_1000.output
Normal file
173
pytorch/output_HPC/altra_10_30_ASIC_680k_1000.output
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395287 queued and waiting for resources
|
||||||
|
srun: job 3395287 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
|
||||||
|
3871770, 3871773]),
|
||||||
|
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
|
||||||
|
682861]),
|
||||||
|
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
|
||||||
|
0.0000e+00, 0.0000e+00, 7.9289e-02]),
|
||||||
|
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
|
||||||
|
tensor([0.9283, 0.0381, 0.0668, ..., 0.8379, 0.4193, 0.2544])
|
||||||
|
Matrix: ASIC_680k
|
||||||
|
Shape: torch.Size([682862, 682862])
|
||||||
|
NNZ: 3871773
|
||||||
|
Density: 8.303171256088674e-06
|
||||||
|
Time: 29.317893266677856 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
|
||||||
|
|
||||||
|
55.74 msec task-clock:u # 0.002 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,266 page-faults:u # 58.589 K/sec
|
||||||
|
51,085,608 cycles:u # 0.916 GHz (47.05%)
|
||||||
|
88,049,969 instructions:u # 1.72 insn per cycle (92.14%)
|
||||||
|
<not supported> branches:u
|
||||||
|
360,079 branch-misses:u
|
||||||
|
31,381,953 L1-dcache-loads:u # 562.963 M/sec
|
||||||
|
471,072 L1-dcache-load-misses:u # 1.50% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
29,944,756 L1-icache-loads:u # 537.181 M/sec
|
||||||
|
283,203 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
||||||
|
20,217,238 dTLB-loads:u # 362.679 M/sec (11.38%)
|
||||||
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
33.488240295 seconds time elapsed
|
||||||
|
|
||||||
|
222.678572000 seconds user
|
||||||
|
2205.889153000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
|
||||||
|
3871770, 3871773]),
|
||||||
|
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
|
||||||
|
682861]),
|
||||||
|
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
|
||||||
|
0.0000e+00, 0.0000e+00, 7.9289e-02]),
|
||||||
|
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
|
||||||
|
tensor([0.3482, 0.5546, 0.8398, ..., 0.6137, 0.0654, 0.9075])
|
||||||
|
Matrix: ASIC_680k
|
||||||
|
Shape: torch.Size([682862, 682862])
|
||||||
|
NNZ: 3871773
|
||||||
|
Density: 8.303171256088674e-06
|
||||||
|
Time: 38.4066903591156 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
|
||||||
|
|
||||||
|
332,704 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
20,219,525 BR_RETIRED:u
|
||||||
|
|
||||||
|
42.582064532 seconds time elapsed
|
||||||
|
|
||||||
|
238.965431000 seconds user
|
||||||
|
2914.615754000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
|
||||||
|
3871770, 3871773]),
|
||||||
|
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
|
||||||
|
682861]),
|
||||||
|
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
|
||||||
|
0.0000e+00, 0.0000e+00, 7.9289e-02]),
|
||||||
|
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
|
||||||
|
tensor([0.2581, 0.2884, 0.9465, ..., 0.4833, 0.3421, 0.4862])
|
||||||
|
Matrix: ASIC_680k
|
||||||
|
Shape: torch.Size([682862, 682862])
|
||||||
|
NNZ: 3871773
|
||||||
|
Density: 8.303171256088674e-06
|
||||||
|
Time: 34.74818539619446 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
|
||||||
|
|
||||||
|
27,856,157 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,496 ITLB_WALK:u
|
||||||
|
17,046 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
37,522,360 L1D_TLB:u
|
||||||
|
|
||||||
|
39.019872270 seconds time elapsed
|
||||||
|
|
||||||
|
239.678206000 seconds user
|
||||||
|
2622.552757000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
|
||||||
|
3871770, 3871773]),
|
||||||
|
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
|
||||||
|
682861]),
|
||||||
|
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
|
||||||
|
0.0000e+00, 0.0000e+00, 7.9289e-02]),
|
||||||
|
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
|
||||||
|
tensor([0.8603, 0.0423, 0.3724, ..., 0.4873, 0.6469, 0.9634])
|
||||||
|
Matrix: ASIC_680k
|
||||||
|
Shape: torch.Size([682862, 682862])
|
||||||
|
NNZ: 3871773
|
||||||
|
Density: 8.303171256088674e-06
|
||||||
|
Time: 33.05097770690918 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
|
||||||
|
|
||||||
|
31,475,230 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
277,921 L1I_CACHE_REFILL:u
|
||||||
|
462,005 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
33,126,938 L1D_CACHE:u
|
||||||
|
|
||||||
|
37.399374202 seconds time elapsed
|
||||||
|
|
||||||
|
239.238852000 seconds user
|
||||||
|
2492.385966000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 4, ..., 3871767,
|
||||||
|
3871770, 3871773]),
|
||||||
|
col_indices=tensor([ 0, 11698, 11699, ..., 169456, 645874,
|
||||||
|
682861]),
|
||||||
|
values=tensor([ 3.8333e-04, -3.3333e-04, -5.0000e-05, ...,
|
||||||
|
0.0000e+00, 0.0000e+00, 7.9289e-02]),
|
||||||
|
size=(682862, 682862), nnz=3871773, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1993, 0.2167, 0.6338, ..., 0.0614, 0.0230, 0.4851])
|
||||||
|
Matrix: ASIC_680k
|
||||||
|
Shape: torch.Size([682862, 682862])
|
||||||
|
NNZ: 3871773
|
||||||
|
Density: 8.303171256088674e-06
|
||||||
|
Time: 32.37103772163391 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ASIC_680k.mtx 1000':
|
||||||
|
|
||||||
|
558,923 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
571,263 LL_CACHE_RD:u
|
||||||
|
190,627 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
24,234 L2D_TLB_REFILL:u
|
||||||
|
314,815 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,760,110 L2D_CACHE:u
|
||||||
|
|
||||||
|
36.644016288 seconds time elapsed
|
||||||
|
|
||||||
|
233.933818000 seconds user
|
||||||
|
2439.284669000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_de2010_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_de2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [22.08, 21.88, 21.88, 21.88, 21.56, 21.64, 21.84, 21.88, 21.72, 21.92], "matrix": "de2010", "shape": [24115, 24115], "nnz": 116056, "% density": 0.0001995689928120616, "time_s": 2.7533018589019775, "power": [29.48, 30.24, 27.96, 28.4, 26.84, 30.6, 30.92], "power_after": [20.84, 21.24, 21.2, 21.24, 21.28, 20.88, 20.68, 20.56, 20.52, 20.56], "task clock (msec)": 61.38, "page faults": 3315, "cycles": 65013274, "instructions": 87442627, "branch mispredictions": 328392, "branches": 19496396, "ITLB accesses": 28311619, "ITLB misses": 6963, "DTLB misses": 17888, "DTLB accesses": 38223408, "L1I cache accesses": 30063404, "L1I cache misses": 272797, "L1D cache misses": 468341, "L1D cache accesses": 31519623, "LL cache misses": 538689, "LL cache accesses": 552789, "L2D TLB accesses": 192995, "L2D TLB misses": 23339, "L2D cache misses": 300578, "L2D cache accesses": 1764035, "instructions per cycle": 1.344996515634638, "branch miss rate": 0.016843728451145536, "ITLB miss rate": 0.0002459414277933028, "DTLB miss rate": 0.00046798548156668814, "L2D TLB miss rate": 0.12093059405684085, "L1I cache miss rate": 0.009074055619250568, "L1D cache miss rate": 0.01485871198395996, "L2D cache miss rate": 0.17039231081015965, "LL cache miss rate": 0.9744929801425137}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394139 queued and waiting for resources
|
srun: job 3395278 queued and waiting for resources
|
||||||
srun: job 3394139 has been allocated resources
|
srun: job 3395278 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
||||||
@ -15,37 +15,38 @@ tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
16949.]), size=(24115, 24115), nnz=116056,
|
||||||
layout=torch.sparse_csr)
|
layout=torch.sparse_csr)
|
||||||
tensor([0.4207, 0.3943, 0.6543, ..., 0.2191, 0.5415, 0.1575])
|
tensor([0.3547, 0.6554, 0.2142, ..., 0.8854, 0.1041, 0.2243])
|
||||||
|
Matrix: de2010
|
||||||
Shape: torch.Size([24115, 24115])
|
Shape: torch.Size([24115, 24115])
|
||||||
NNZ: 116056
|
NNZ: 116056
|
||||||
Density: 0.0001995689928120616
|
Density: 0.0001995689928120616
|
||||||
Time: 0.36042284965515137 seconds
|
Time: 2.74495267868042 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
||||||
|
|
||||||
59.88 msec task-clock:u # 0.016 CPUs utilized
|
61.38 msec task-clock:u # 0.010 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,313 page-faults:u # 55.328 K/sec
|
3,315 page-faults:u # 54.008 K/sec
|
||||||
58,169,777 cycles:u # 0.971 GHz (61.49%)
|
65,013,274 cycles:u # 1.059 GHz (90.47%)
|
||||||
57,993,431 instructions:u # 1.00 insn per cycle (81.67%)
|
87,442,627 instructions:u # 1.34 insn per cycle
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
341,266 branch-misses:u
|
369,052 branch-misses:u
|
||||||
31,858,781 L1-dcache-loads:u # 532.049 M/sec
|
31,570,549 L1-dcache-loads:u # 514.350 M/sec
|
||||||
467,486 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
477,402 L1-dcache-load-misses:u # 1.51% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
30,461,310 L1-icache-loads:u # 508.711 M/sec
|
30,354,192 L1-icache-loads:u # 494.533 M/sec
|
||||||
294,156 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
|
294,845 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
|
||||||
43,828,130 dTLB-loads:u # 731.940 M/sec (40.26%)
|
0 dTLB-loads:u # 0.000 /sec (3.92%)
|
||||||
47,836 dTLB-load-misses:u # 0.11% of all dTLB cache accesses (25.52%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
0 iTLB-loads:u # 0.000 /sec (2.73%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
3.824054028 seconds time elapsed
|
6.232986287 seconds time elapsed
|
||||||
|
|
||||||
15.099361000 seconds user
|
17.354331000 seconds user
|
||||||
28.830417000 seconds sys
|
29.036034000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -57,21 +58,22 @@ tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
16949.]), size=(24115, 24115), nnz=116056,
|
||||||
layout=torch.sparse_csr)
|
layout=torch.sparse_csr)
|
||||||
tensor([0.0456, 0.2095, 0.0276, ..., 0.4209, 0.6824, 0.5475])
|
tensor([0.3177, 0.9122, 0.6465, ..., 0.5489, 0.2254, 0.7965])
|
||||||
|
Matrix: de2010
|
||||||
Shape: torch.Size([24115, 24115])
|
Shape: torch.Size([24115, 24115])
|
||||||
NNZ: 116056
|
NNZ: 116056
|
||||||
Density: 0.0001995689928120616
|
Density: 0.0001995689928120616
|
||||||
Time: 0.3598823547363281 seconds
|
Time: 2.7603256702423096 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
||||||
|
|
||||||
330,494 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
328,392 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
20,578,427 BR_RETIRED:u
|
19,496,396 BR_RETIRED:u
|
||||||
|
|
||||||
3.781234836 seconds time elapsed
|
6.149991615 seconds time elapsed
|
||||||
|
|
||||||
14.965545000 seconds user
|
17.630426000 seconds user
|
||||||
29.444131000 seconds sys
|
30.586756000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -83,23 +85,24 @@ tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
16949.]), size=(24115, 24115), nnz=116056,
|
||||||
layout=torch.sparse_csr)
|
layout=torch.sparse_csr)
|
||||||
tensor([0.9882, 0.5477, 0.6307, ..., 0.1179, 0.6903, 0.1235])
|
tensor([0.7815, 0.6240, 0.3715, ..., 0.5116, 0.5969, 0.4241])
|
||||||
|
Matrix: de2010
|
||||||
Shape: torch.Size([24115, 24115])
|
Shape: torch.Size([24115, 24115])
|
||||||
NNZ: 116056
|
NNZ: 116056
|
||||||
Density: 0.0001995689928120616
|
Density: 0.0001995689928120616
|
||||||
Time: 0.29088521003723145 seconds
|
Time: 2.7978765964508057 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
||||||
|
|
||||||
27,982,097 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
28,311,619 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,614 ITLB_WALK:u
|
6,963 ITLB_WALK:u
|
||||||
17,270 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
17,888 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
37,728,899 L1D_TLB:u
|
38,223,408 L1D_TLB:u
|
||||||
|
|
||||||
3.576632300 seconds time elapsed
|
6.151843492 seconds time elapsed
|
||||||
|
|
||||||
14.864601000 seconds user
|
17.202045000 seconds user
|
||||||
29.274547000 seconds sys
|
28.014218000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -111,23 +114,24 @@ tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
16949.]), size=(24115, 24115), nnz=116056,
|
||||||
layout=torch.sparse_csr)
|
layout=torch.sparse_csr)
|
||||||
tensor([0.3952, 0.0475, 0.1125, ..., 0.3481, 0.1290, 0.3495])
|
tensor([0.9638, 0.0929, 0.0479, ..., 0.1500, 0.3117, 0.9664])
|
||||||
|
Matrix: de2010
|
||||||
Shape: torch.Size([24115, 24115])
|
Shape: torch.Size([24115, 24115])
|
||||||
NNZ: 116056
|
NNZ: 116056
|
||||||
Density: 0.0001995689928120616
|
Density: 0.0001995689928120616
|
||||||
Time: 0.30365920066833496 seconds
|
Time: 2.684640884399414 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
||||||
|
|
||||||
29,754,926 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
30,063,404 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
278,786 L1I_CACHE_REFILL:u
|
272,797 L1I_CACHE_REFILL:u
|
||||||
454,742 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
468,341 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
31,173,246 L1D_CACHE:u
|
31,519,623 L1D_CACHE:u
|
||||||
|
|
||||||
3.730995381 seconds time elapsed
|
5.874324363 seconds time elapsed
|
||||||
|
|
||||||
15.213930000 seconds user
|
17.629166000 seconds user
|
||||||
30.995070000 seconds sys
|
29.998701000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -139,25 +143,26 @@ tensor(crow_indices=tensor([ 0, 13, 21, ..., 116047, 116051,
|
|||||||
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
values=tensor([ 14900., 33341., 20255., ..., 164227., 52413.,
|
||||||
16949.]), size=(24115, 24115), nnz=116056,
|
16949.]), size=(24115, 24115), nnz=116056,
|
||||||
layout=torch.sparse_csr)
|
layout=torch.sparse_csr)
|
||||||
tensor([0.7266, 0.7537, 0.9729, ..., 0.3349, 0.3523, 0.6532])
|
tensor([0.3936, 0.9167, 0.4396, ..., 0.1628, 0.6361, 0.1875])
|
||||||
|
Matrix: de2010
|
||||||
Shape: torch.Size([24115, 24115])
|
Shape: torch.Size([24115, 24115])
|
||||||
NNZ: 116056
|
NNZ: 116056
|
||||||
Density: 0.0001995689928120616
|
Density: 0.0001995689928120616
|
||||||
Time: 0.2798902988433838 seconds
|
Time: 2.747934103012085 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/de2010.mtx 1000':
|
||||||
|
|
||||||
543,243 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
538,689 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
560,716 LL_CACHE_RD:u
|
552,789 LL_CACHE_RD:u
|
||||||
162,281 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
192,995 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
19,847 L2D_TLB_REFILL:u
|
23,339 L2D_TLB_REFILL:u
|
||||||
300,577 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
300,578 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,696,278 L2D_CACHE:u
|
1,764,035 L2D_CACHE:u
|
||||||
|
|
||||||
3.819959836 seconds time elapsed
|
6.102012809 seconds time elapsed
|
||||||
|
|
||||||
15.346035000 seconds user
|
18.001082000 seconds user
|
||||||
29.199873000 seconds sys
|
27.986033000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_fl2010_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_fl2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [20.72, 20.8, 20.96, 21.08, 21.4, 21.48, 21.48, 21.36, 21.08, 21.04], "matrix": "fl2010", "shape": [484481, 484481], "nnz": 2346294, "% density": 9.99606174861054e-06, "time_s": 14.43001127243042, "power": [93.04, 93.04, 89.16, 77.68, 62.92, 55.12, 53.84, 64.72, 77.04, 89.56, 94.4, 94.76, 93.52, 93.52, 96.04, 97.12, 96.44, 93.88, 93.72], "power_after": [21.08, 21.28, 21.28, 21.36, 21.08, 21.24, 21.08, 20.8, 21.04, 20.88], "task clock (msec)": 61.6, "page faults": 3276, "cycles": 41408849, "instructions": 49118917, "branch mispredictions": 331330, "branches": 19331189, "ITLB accesses": 27367982, "ITLB misses": 6160, "DTLB misses": 17157, "DTLB accesses": 36828216, "L1I cache accesses": 30147304, "L1I cache misses": 280082, "L1D cache misses": 454022, "L1D cache accesses": 31595140, "LL cache misses": 536056, "LL cache accesses": 550006, "L2D TLB accesses": 185998, "L2D TLB misses": 23735, "L2D cache misses": 296648, "L2D cache accesses": 1723525, "instructions per cycle": 1.1861937287848787, "branch miss rate": 0.017139659645353425, "ITLB miss rate": 0.00022508053388810325, "DTLB miss rate": 0.00046586562867992305, "L2D TLB miss rate": 0.12760889902041958, "L1I cache miss rate": 0.009290449321770198, "L1D cache miss rate": 0.014369994878959232, "L2D cache miss rate": 0.172117027603313, "LL cache miss rate": 0.97463664032756}
|
169
pytorch/output_HPC/altra_10_30_fl2010_1000.output
Normal file
169
pytorch/output_HPC/altra_10_30_fl2010_1000.output
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395283 queued and waiting for resources
|
||||||
|
srun: job 3395283 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
|
||||||
|
2346292, 2346294]),
|
||||||
|
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
|
||||||
|
484022]),
|
||||||
|
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
|
||||||
|
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
|
||||||
|
tensor([2.0367e-04, 1.7661e-01, 2.1772e-01, ..., 1.8646e-01, 2.2210e-01,
|
||||||
|
4.2364e-02])
|
||||||
|
Matrix: fl2010
|
||||||
|
Shape: torch.Size([484481, 484481])
|
||||||
|
NNZ: 2346294
|
||||||
|
Density: 9.99606174861054e-06
|
||||||
|
Time: 16.31556534767151 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
|
||||||
|
|
||||||
|
61.60 msec task-clock:u # 0.003 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,276 page-faults:u # 53.185 K/sec
|
||||||
|
41,408,849 cycles:u # 0.672 GHz (41.57%)
|
||||||
|
49,118,917 instructions:u # 1.19 insn per cycle (67.74%)
|
||||||
|
<not supported> branches:u
|
||||||
|
344,653 branch-misses:u (91.69%)
|
||||||
|
31,501,274 L1-dcache-loads:u # 511.418 M/sec
|
||||||
|
477,740 L1-dcache-load-misses:u # 1.52% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
30,099,667 L1-icache-loads:u # 488.663 M/sec
|
||||||
|
285,734 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
||||||
|
41,879,387 dTLB-loads:u # 679.904 M/sec (54.00%)
|
||||||
|
99,044 dTLB-load-misses:u # 0.24% of all dTLB cache accesses (13.61%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
20.288512544 seconds time elapsed
|
||||||
|
|
||||||
|
134.447078000 seconds user
|
||||||
|
1247.121046000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
|
||||||
|
2346292, 2346294]),
|
||||||
|
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
|
||||||
|
484022]),
|
||||||
|
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
|
||||||
|
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
|
||||||
|
tensor([0.9700, 0.5813, 0.6566, ..., 0.4126, 0.7652, 0.9833])
|
||||||
|
Matrix: fl2010
|
||||||
|
Shape: torch.Size([484481, 484481])
|
||||||
|
NNZ: 2346294
|
||||||
|
Density: 9.99606174861054e-06
|
||||||
|
Time: 16.561575651168823 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
|
||||||
|
|
||||||
|
331,330 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
19,331,189 BR_RETIRED:u
|
||||||
|
|
||||||
|
20.603578845 seconds time elapsed
|
||||||
|
|
||||||
|
136.555709000 seconds user
|
||||||
|
1264.382740000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
|
||||||
|
2346292, 2346294]),
|
||||||
|
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
|
||||||
|
484022]),
|
||||||
|
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
|
||||||
|
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1770, 0.8270, 0.4236, ..., 0.0091, 0.2300, 0.5084])
|
||||||
|
Matrix: fl2010
|
||||||
|
Shape: torch.Size([484481, 484481])
|
||||||
|
NNZ: 2346294
|
||||||
|
Density: 9.99606174861054e-06
|
||||||
|
Time: 17.374610424041748 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
|
||||||
|
|
||||||
|
27,367,982 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,160 ITLB_WALK:u
|
||||||
|
17,157 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
36,828,216 L1D_TLB:u
|
||||||
|
|
||||||
|
21.377378255 seconds time elapsed
|
||||||
|
|
||||||
|
140.848520000 seconds user
|
||||||
|
1326.124469000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
|
||||||
|
2346292, 2346294]),
|
||||||
|
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
|
||||||
|
484022]),
|
||||||
|
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
|
||||||
|
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1268, 0.8786, 0.9762, ..., 0.0649, 0.4474, 0.9707])
|
||||||
|
Matrix: fl2010
|
||||||
|
Shape: torch.Size([484481, 484481])
|
||||||
|
NNZ: 2346294
|
||||||
|
Density: 9.99606174861054e-06
|
||||||
|
Time: 16.753613471984863 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
|
||||||
|
|
||||||
|
30,147,304 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
280,082 L1I_CACHE_REFILL:u
|
||||||
|
454,022 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
31,595,140 L1D_CACHE:u
|
||||||
|
|
||||||
|
20.706929400 seconds time elapsed
|
||||||
|
|
||||||
|
139.881127000 seconds user
|
||||||
|
1278.527504000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2346288,
|
||||||
|
2346292, 2346294]),
|
||||||
|
col_indices=tensor([ 1513, 5311, 947, ..., 484460, 482463,
|
||||||
|
484022]),
|
||||||
|
values=tensor([28364., 12497., 11567., ..., 8532., 22622., 35914.]),
|
||||||
|
size=(484481, 484481), nnz=2346294, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1394, 0.8842, 0.4362, ..., 0.8265, 0.1643, 0.9034])
|
||||||
|
Matrix: fl2010
|
||||||
|
Shape: torch.Size([484481, 484481])
|
||||||
|
NNZ: 2346294
|
||||||
|
Density: 9.99606174861054e-06
|
||||||
|
Time: 14.484151124954224 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/fl2010.mtx 1000':
|
||||||
|
|
||||||
|
536,056 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
550,006 LL_CACHE_RD:u
|
||||||
|
185,998 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
23,735 L2D_TLB_REFILL:u
|
||||||
|
296,648 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,723,525 L2D_CACHE:u
|
||||||
|
|
||||||
|
18.443039315 seconds time elapsed
|
||||||
|
|
||||||
|
135.498625000 seconds user
|
||||||
|
1101.745145000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_ga2010_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_ga2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [51.04, 38.64, 22.84, 22.24, 21.88, 21.88, 21.6, 21.4, 21.24, 21.28], "matrix": "ga2010", "shape": [291086, 291086], "nnz": 1418056, "% density": 1.6735964475229304e-05, "time_s": 15.249999523162842, "power": [88.88, 89.52, 78.6, 64.88, 52.64, 52.64, 54.76, 60.16, 71.44, 86.84, 90.72, 89.6, 90.56, 90.36, 91.68, 91.84, 93.4, 93.4, 92.72], "power_after": [21.68, 21.4, 21.28, 21.04, 21.04, 20.96, 20.92, 20.76, 20.8, 20.96], "task clock (msec)": 72.45, "page faults": 3289, "cycles": 24836161, "instructions": 74134706, "branch mispredictions": 325643, "branches": 19697746, "ITLB accesses": 27767290, "ITLB misses": 5832, "DTLB misses": 18134, "DTLB accesses": 37063060, "L1I cache accesses": 32135376, "L1I cache misses": 302429, "L1D cache misses": 484427, "L1D cache accesses": 33639686, "LL cache misses": 548380, "LL cache accesses": 561312, "L2D TLB accesses": 186006, "L2D TLB misses": 25022, "L2D cache misses": 304539, "L2D cache accesses": 1750107, "instructions per cycle": 2.9849502908279586, "branch miss rate": 0.01653199305138771, "ITLB miss rate": 0.00021003129941740803, "DTLB miss rate": 0.0004892742261432272, "L2D TLB miss rate": 0.13452254228358226, "L1I cache miss rate": 0.009411092622659838, "L1D cache miss rate": 0.014400461407398393, "L2D cache miss rate": 0.17401164614506429, "LL cache miss rate": 0.976961119662505}
|
168
pytorch/output_HPC/altra_10_30_ga2010_1000.output
Normal file
168
pytorch/output_HPC/altra_10_30_ga2010_1000.output
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395281 queued and waiting for resources
|
||||||
|
srun: job 3395281 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
||||||
|
1418054, 1418056]),
|
||||||
|
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
||||||
|
290176]),
|
||||||
|
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
||||||
|
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
||||||
|
tensor([0.8043, 0.7164, 0.5687, ..., 0.1275, 0.5142, 0.8456])
|
||||||
|
Matrix: ga2010
|
||||||
|
Shape: torch.Size([291086, 291086])
|
||||||
|
NNZ: 1418056
|
||||||
|
Density: 1.6735964475229304e-05
|
||||||
|
Time: 13.566045045852661 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
|
||||||
|
|
||||||
|
72.45 msec task-clock:u # 0.004 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,289 page-faults:u # 45.396 K/sec
|
||||||
|
24,836,161 cycles:u # 0.343 GHz (23.15%)
|
||||||
|
74,134,706 instructions:u # 2.98 insn per cycle (85.49%)
|
||||||
|
<not supported> branches:u
|
||||||
|
381,828 branch-misses:u
|
||||||
|
33,748,654 L1-dcache-loads:u # 465.814 M/sec
|
||||||
|
497,166 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
32,271,900 L1-icache-loads:u # 445.431 M/sec
|
||||||
|
311,814 L1-icache-load-misses:u # 0.97% of all L1-icache accesses
|
||||||
|
43,431,516 dTLB-loads:u # 599.461 M/sec (27.81%)
|
||||||
|
33,416 dTLB-load-misses:u # 0.08% of all dTLB cache accesses (4.55%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
17.276157893 seconds time elapsed
|
||||||
|
|
||||||
|
100.320029000 seconds user
|
||||||
|
1057.703228000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
||||||
|
1418054, 1418056]),
|
||||||
|
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
||||||
|
290176]),
|
||||||
|
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
||||||
|
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
||||||
|
tensor([0.6290, 0.2236, 0.0669, ..., 0.6531, 0.4280, 0.4384])
|
||||||
|
Matrix: ga2010
|
||||||
|
Shape: torch.Size([291086, 291086])
|
||||||
|
NNZ: 1418056
|
||||||
|
Density: 1.6735964475229304e-05
|
||||||
|
Time: 17.094524145126343 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
|
||||||
|
|
||||||
|
325,643 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
19,697,746 BR_RETIRED:u
|
||||||
|
|
||||||
|
20.849795214 seconds time elapsed
|
||||||
|
|
||||||
|
115.280665000 seconds user
|
||||||
|
1318.654953000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
||||||
|
1418054, 1418056]),
|
||||||
|
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
||||||
|
290176]),
|
||||||
|
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
||||||
|
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1008, 0.2309, 0.3749, ..., 0.1568, 0.8852, 0.8182])
|
||||||
|
Matrix: ga2010
|
||||||
|
Shape: torch.Size([291086, 291086])
|
||||||
|
NNZ: 1418056
|
||||||
|
Density: 1.6735964475229304e-05
|
||||||
|
Time: 15.106332063674927 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
|
||||||
|
|
||||||
|
27,767,290 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
5,832 ITLB_WALK:u
|
||||||
|
18,134 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
37,063,060 L1D_TLB:u
|
||||||
|
|
||||||
|
18.753509375 seconds time elapsed
|
||||||
|
|
||||||
|
112.958759000 seconds user
|
||||||
|
1167.457916000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
||||||
|
1418054, 1418056]),
|
||||||
|
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
||||||
|
290176]),
|
||||||
|
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
||||||
|
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
||||||
|
tensor([0.8347, 0.6624, 0.6196, ..., 0.2250, 0.0157, 0.1843])
|
||||||
|
Matrix: ga2010
|
||||||
|
Shape: torch.Size([291086, 291086])
|
||||||
|
NNZ: 1418056
|
||||||
|
Density: 1.6735964475229304e-05
|
||||||
|
Time: 13.73094367980957 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
|
||||||
|
|
||||||
|
32,135,376 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
302,429 L1I_CACHE_REFILL:u
|
||||||
|
484,427 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
33,639,686 L1D_CACHE:u
|
||||||
|
|
||||||
|
17.400567824 seconds time elapsed
|
||||||
|
|
||||||
|
110.027662000 seconds user
|
||||||
|
1054.271122000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 10, ..., 1418047,
|
||||||
|
1418054, 1418056]),
|
||||||
|
col_indices=tensor([ 1566, 1871, 1997, ..., 291064, 289820,
|
||||||
|
290176]),
|
||||||
|
values=tensor([18760., 17851., 18847., ..., 65219., 56729., 77629.]),
|
||||||
|
size=(291086, 291086), nnz=1418056, layout=torch.sparse_csr)
|
||||||
|
tensor([0.8369, 0.3399, 0.1689, ..., 0.2081, 0.0714, 0.7388])
|
||||||
|
Matrix: ga2010
|
||||||
|
Shape: torch.Size([291086, 291086])
|
||||||
|
NNZ: 1418056
|
||||||
|
Density: 1.6735964475229304e-05
|
||||||
|
Time: 15.809288501739502 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ga2010.mtx 1000':
|
||||||
|
|
||||||
|
548,380 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
561,312 LL_CACHE_RD:u
|
||||||
|
186,006 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
25,022 L2D_TLB_REFILL:u
|
||||||
|
304,539 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,750,107 L2D_CACHE:u
|
||||||
|
|
||||||
|
19.626934574 seconds time elapsed
|
||||||
|
|
||||||
|
116.733174000 seconds user
|
||||||
|
1214.439657000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_mac_econ_fwd500_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_mac_econ_fwd500_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [22.04, 21.32, 21.32, 21.32, 21.12, 21.12, 21.0, 20.68, 20.72, 20.56], "matrix": "mac_econ_fwd500", "shape": [206500, 206500], "nnz": 1273389, "% density": 2.9862143765866013e-05, "time_s": 15.046087741851807, "power": [91.88, 91.12, 83.92, 72.88, 57.76, 51.24, 53.12, 62.84, 78.32, 91.64, 95.8, 95.8, 94.08, 92.48, 91.6, 89.88, 87.36, 87.84, 87.32], "power_after": [20.92, 21.04, 21.12, 20.92, 20.92, 20.88, 20.88, 20.92, 21.04, 20.96], "task clock (msec)": 62.46, "page faults": 3243, "cycles": 57150420, "instructions": 94155455, "branch mispredictions": 320781, "branches": 19491698, "ITLB accesses": 27433101, "ITLB misses": 7382, "DTLB misses": 19213, "DTLB accesses": 37123052, "L1I cache accesses": 32027284, "L1I cache misses": 290368, "L1D cache misses": 471338, "L1D cache accesses": 33366668, "LL cache misses": 571063, "LL cache accesses": 583554, "L2D TLB accesses": 196434, "L2D TLB misses": 25171, "L2D cache misses": 329198, "L2D cache accesses": 1814040, "instructions per cycle": 1.6475024155553013, "branch miss rate": 0.016457314288370363, "ITLB miss rate": 0.0002690909788142434, "DTLB miss rate": 0.0005175490420345827, "L2D TLB miss rate": 0.1281397314110592, "L1I cache miss rate": 0.009066269871650684, "L1D cache miss rate": 0.014126013421537926, "L2D cache miss rate": 0.1814722938854711, "LL cache miss rate": 0.9785949543658342}
|
173
pytorch/output_HPC/altra_10_30_mac_econ_fwd500_1000.output
Normal file
173
pytorch/output_HPC/altra_10_30_mac_econ_fwd500_1000.output
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395279 queued and waiting for resources
|
||||||
|
srun: job 3395279 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
|
||||||
|
1273379, 1273389]),
|
||||||
|
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
|
||||||
|
206459]),
|
||||||
|
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
|
||||||
|
1.2290e-01, 2.2235e-01, -1.0000e+00]),
|
||||||
|
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
|
||||||
|
tensor([0.5388, 0.2921, 0.7349, ..., 0.6379, 0.9676, 0.6389])
|
||||||
|
Matrix: mac_econ_fwd500
|
||||||
|
Shape: torch.Size([206500, 206500])
|
||||||
|
NNZ: 1273389
|
||||||
|
Density: 2.9862143765866013e-05
|
||||||
|
Time: 21.700236320495605 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
|
||||||
|
|
||||||
|
62.46 msec task-clock:u # 0.002 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,243 page-faults:u # 51.921 K/sec
|
||||||
|
57,150,420 cycles:u # 0.915 GHz (90.14%)
|
||||||
|
94,155,455 instructions:u # 1.65 insn per cycle
|
||||||
|
<not supported> branches:u
|
||||||
|
373,032 branch-misses:u
|
||||||
|
33,654,742 L1-dcache-loads:u # 538.817 M/sec
|
||||||
|
479,068 L1-dcache-load-misses:u # 1.42% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
32,149,866 L1-icache-loads:u # 514.724 M/sec
|
||||||
|
293,643 L1-icache-load-misses:u # 0.91% of all L1-icache accesses
|
||||||
|
0 dTLB-loads:u # 0.000 /sec (5.14%)
|
||||||
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
25.310174677 seconds time elapsed
|
||||||
|
|
||||||
|
125.287203000 seconds user
|
||||||
|
1680.798909000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
|
||||||
|
1273379, 1273389]),
|
||||||
|
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
|
||||||
|
206459]),
|
||||||
|
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
|
||||||
|
1.2290e-01, 2.2235e-01, -1.0000e+00]),
|
||||||
|
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
|
||||||
|
tensor([0.6433, 0.3677, 0.3308, ..., 0.5364, 0.2509, 0.4204])
|
||||||
|
Matrix: mac_econ_fwd500
|
||||||
|
Shape: torch.Size([206500, 206500])
|
||||||
|
NNZ: 1273389
|
||||||
|
Density: 2.9862143765866013e-05
|
||||||
|
Time: 16.171404361724854 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
|
||||||
|
|
||||||
|
320,781 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
19,491,698 BR_RETIRED:u
|
||||||
|
|
||||||
|
19.988421837 seconds time elapsed
|
||||||
|
|
||||||
|
112.429117000 seconds user
|
||||||
|
1245.246161000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
|
||||||
|
1273379, 1273389]),
|
||||||
|
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
|
||||||
|
206459]),
|
||||||
|
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
|
||||||
|
1.2290e-01, 2.2235e-01, -1.0000e+00]),
|
||||||
|
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
|
||||||
|
tensor([0.9344, 0.9844, 0.2313, ..., 0.8634, 0.6912, 0.9693])
|
||||||
|
Matrix: mac_econ_fwd500
|
||||||
|
Shape: torch.Size([206500, 206500])
|
||||||
|
NNZ: 1273389
|
||||||
|
Density: 2.9862143765866013e-05
|
||||||
|
Time: 11.788637161254883 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
|
||||||
|
|
||||||
|
27,433,101 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
7,382 ITLB_WALK:u
|
||||||
|
19,213 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
37,123,052 L1D_TLB:u
|
||||||
|
|
||||||
|
15.542834153 seconds time elapsed
|
||||||
|
|
||||||
|
99.681401000 seconds user
|
||||||
|
906.856853000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
|
||||||
|
1273379, 1273389]),
|
||||||
|
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
|
||||||
|
206459]),
|
||||||
|
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
|
||||||
|
1.2290e-01, 2.2235e-01, -1.0000e+00]),
|
||||||
|
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
|
||||||
|
tensor([0.2037, 0.6417, 0.9786, ..., 0.8187, 0.4933, 0.1289])
|
||||||
|
Matrix: mac_econ_fwd500
|
||||||
|
Shape: torch.Size([206500, 206500])
|
||||||
|
NNZ: 1273389
|
||||||
|
Density: 2.9862143765866013e-05
|
||||||
|
Time: 13.596147060394287 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
|
||||||
|
|
||||||
|
32,027,284 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
290,368 L1I_CACHE_REFILL:u
|
||||||
|
471,338 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
33,366,668 L1D_CACHE:u
|
||||||
|
|
||||||
|
17.325855116 seconds time elapsed
|
||||||
|
|
||||||
|
101.368582000 seconds user
|
||||||
|
1053.826259000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 1273376,
|
||||||
|
1273379, 1273389]),
|
||||||
|
col_indices=tensor([ 3, 30, 44, ..., 206363, 206408,
|
||||||
|
206459]),
|
||||||
|
values=tensor([-3.7877e-03, -1.5420e-01, 9.5305e-04, ...,
|
||||||
|
1.2290e-01, 2.2235e-01, -1.0000e+00]),
|
||||||
|
size=(206500, 206500), nnz=1273389, layout=torch.sparse_csr)
|
||||||
|
tensor([0.2072, 0.8681, 0.4768, ..., 0.4873, 0.8997, 0.8601])
|
||||||
|
Matrix: mac_econ_fwd500
|
||||||
|
Shape: torch.Size([206500, 206500])
|
||||||
|
NNZ: 1273389
|
||||||
|
Density: 2.9862143765866013e-05
|
||||||
|
Time: 14.157796382904053 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mac_econ_fwd500.mtx 1000':
|
||||||
|
|
||||||
|
571,063 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
583,554 LL_CACHE_RD:u
|
||||||
|
196,434 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
25,171 L2D_TLB_REFILL:u
|
||||||
|
329,198 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,814,040 L2D_CACHE:u
|
||||||
|
|
||||||
|
17.958287837 seconds time elapsed
|
||||||
|
|
||||||
|
104.145071000 seconds user
|
||||||
|
1089.962121000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_mc2depi_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_mc2depi_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [94.16, 91.68, 78.92, 60.88, 46.72, 28.36, 22.08, 21.64, 21.64, 21.64], "matrix": "mc2depi", "shape": [525825, 525825], "nnz": 2100225, "% density": 7.595972132902821e-06, "time_s": 11.03979206085205, "power": [95.44, 94.0, 88.76, 72.12, 59.48, 51.92, 53.88, 68.6, 83.2, 97.76, 98.4, 97.12, 97.12, 95.28, 94.12], "power_after": [21.48, 21.44, 21.28, 21.24, 21.16, 21.08, 21.24, 21.24, 21.24, 21.16], "task clock (msec)": 56.14, "page faults": 3289, "cycles": 47515158, "instructions": 72388154, "branch mispredictions": 327042, "branches": 19309026, "ITLB accesses": 26093030, "ITLB misses": 6189, "DTLB misses": 17253, "DTLB accesses": 35168741, "L1I cache accesses": 30539322, "L1I cache misses": 285404, "L1D cache misses": 465747, "L1D cache accesses": 31932803, "LL cache misses": 530261, "LL cache accesses": 551030, "L2D TLB accesses": 183570, "L2D TLB misses": 23883, "L2D cache misses": 297006, "L2D cache accesses": 1721848, "instructions per cycle": 1.5234749719236964, "branch miss rate": 0.01693726032581861, "ITLB miss rate": 0.0002371897782664566, "DTLB miss rate": 0.0004905776979619486, "L2D TLB miss rate": 0.13010295799967314, "L1I cache miss rate": 0.009345459601231487, "L1D cache miss rate": 0.014585221347465175, "L2D cache miss rate": 0.1724925777420539, "LL cache miss rate": 0.9623087672177558}
|
168
pytorch/output_HPC/altra_10_30_mc2depi_1000.output
Normal file
168
pytorch/output_HPC/altra_10_30_mc2depi_1000.output
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395280 queued and waiting for resources
|
||||||
|
srun: job 3395280 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
||||||
|
2100223, 2100225]),
|
||||||
|
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
||||||
|
525824]),
|
||||||
|
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
||||||
|
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
||||||
|
tensor([0.7162, 0.9445, 0.3087, ..., 0.2863, 0.2977, 0.0994])
|
||||||
|
Matrix: mc2depi
|
||||||
|
Shape: torch.Size([525825, 525825])
|
||||||
|
NNZ: 2100225
|
||||||
|
Density: 7.595972132902821e-06
|
||||||
|
Time: 14.228392839431763 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
|
||||||
|
|
||||||
|
56.14 msec task-clock:u # 0.003 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,289 page-faults:u # 58.584 K/sec
|
||||||
|
47,515,158 cycles:u # 0.846 GHz (55.54%)
|
||||||
|
72,388,154 instructions:u # 1.52 insn per cycle (79.69%)
|
||||||
|
<not supported> branches:u
|
||||||
|
369,139 branch-misses:u
|
||||||
|
32,820,508 L1-dcache-loads:u # 584.601 M/sec
|
||||||
|
483,558 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
31,317,848 L1-icache-loads:u # 557.836 M/sec
|
||||||
|
288,398 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
|
||||||
|
39,511,659 dTLB-loads:u # 703.784 M/sec (36.64%)
|
||||||
|
0 dTLB-load-misses:u (3.47%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
18.186987302 seconds time elapsed
|
||||||
|
|
||||||
|
124.639912000 seconds user
|
||||||
|
1088.590740000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
||||||
|
2100223, 2100225]),
|
||||||
|
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
||||||
|
525824]),
|
||||||
|
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
||||||
|
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
||||||
|
tensor([0.4954, 0.2907, 0.0979, ..., 0.0742, 0.4519, 0.0278])
|
||||||
|
Matrix: mc2depi
|
||||||
|
Shape: torch.Size([525825, 525825])
|
||||||
|
NNZ: 2100225
|
||||||
|
Density: 7.595972132902821e-06
|
||||||
|
Time: 11.948119163513184 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
|
||||||
|
|
||||||
|
327,042 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
19,309,026 BR_RETIRED:u
|
||||||
|
|
||||||
|
15.715674756 seconds time elapsed
|
||||||
|
|
||||||
|
115.898749000 seconds user
|
||||||
|
910.018676000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
||||||
|
2100223, 2100225]),
|
||||||
|
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
||||||
|
525824]),
|
||||||
|
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
||||||
|
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1402, 0.9048, 0.8859, ..., 0.9542, 0.3509, 0.0695])
|
||||||
|
Matrix: mc2depi
|
||||||
|
Shape: torch.Size([525825, 525825])
|
||||||
|
NNZ: 2100225
|
||||||
|
Density: 7.595972132902821e-06
|
||||||
|
Time: 14.170094966888428 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
|
||||||
|
|
||||||
|
26,093,030 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,189 ITLB_WALK:u
|
||||||
|
17,253 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
35,168,741 L1D_TLB:u
|
||||||
|
|
||||||
|
18.132605509 seconds time elapsed
|
||||||
|
|
||||||
|
121.020111000 seconds user
|
||||||
|
1090.508165000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
||||||
|
2100223, 2100225]),
|
||||||
|
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
||||||
|
525824]),
|
||||||
|
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
||||||
|
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1192, 0.6084, 0.4643, ..., 0.3445, 0.4658, 0.7085])
|
||||||
|
Matrix: mc2depi
|
||||||
|
Shape: torch.Size([525825, 525825])
|
||||||
|
NNZ: 2100225
|
||||||
|
Density: 7.595972132902821e-06
|
||||||
|
Time: 13.925398826599121 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
|
||||||
|
|
||||||
|
30,539,322 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
285,404 L1I_CACHE_REFILL:u
|
||||||
|
465,747 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
31,932,803 L1D_CACHE:u
|
||||||
|
|
||||||
|
17.812911214 seconds time elapsed
|
||||||
|
|
||||||
|
119.918777000 seconds user
|
||||||
|
1067.928403000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 5, ..., 2100220,
|
||||||
|
2100223, 2100225]),
|
||||||
|
col_indices=tensor([ 0, 1, 1, ..., 525824, 525821,
|
||||||
|
525824]),
|
||||||
|
values=tensor([-2025., 2025., -2026., ..., 2025., 1024., -1024.]),
|
||||||
|
size=(525825, 525825), nnz=2100225, layout=torch.sparse_csr)
|
||||||
|
tensor([0.2075, 0.7442, 0.4477, ..., 0.0794, 0.0859, 0.8652])
|
||||||
|
Matrix: mc2depi
|
||||||
|
Shape: torch.Size([525825, 525825])
|
||||||
|
NNZ: 2100225
|
||||||
|
Density: 7.595972132902821e-06
|
||||||
|
Time: 12.866743564605713 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/mc2depi.mtx 1000':
|
||||||
|
|
||||||
|
530,261 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
551,030 LL_CACHE_RD:u
|
||||||
|
183,570 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
23,883 L2D_TLB_REFILL:u
|
||||||
|
297,006 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,721,848 L2D_CACHE:u
|
||||||
|
|
||||||
|
16.812811712 seconds time elapsed
|
||||||
|
|
||||||
|
117.780323000 seconds user
|
||||||
|
986.834040000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_p2p-Gnutella04_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_p2p-Gnutella04_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [30.08, 25.12, 24.68, 23.68, 22.84, 21.96, 21.08, 20.96, 20.8, 20.96], "matrix": "p2p-Gnutella04", "shape": [10879, 10879], "nnz": 39994, "% density": 0.0003379223282393842, "time_s": 0.9992897510528564, "power": [29.48, 30.52, 31.88, 31.24, 34.32], "power_after": [20.4, 20.6, 20.64, 20.76, 20.92, 20.84, 20.88, 20.88, 20.88, 20.84], "task clock (msec)": 52.68, "page faults": 3272, "cycles": 63019732, "instructions": 73518898, "branch mispredictions": 333423, "branches": 19435905, "ITLB accesses": 27447537, "ITLB misses": 6417, "DTLB misses": 18300, "DTLB accesses": 37569384, "L1I cache accesses": 30830481, "L1I cache misses": 290545, "L1D cache misses": 473875, "L1D cache accesses": 32284772, "LL cache misses": 529403, "LL cache accesses": 549794, "L2D TLB accesses": 198306, "L2D TLB misses": 24497, "L2D cache misses": 298519, "L2D cache accesses": 1772795, "instructions per cycle": 1.1666012480027683, "branch miss rate": 0.017155002558409294, "ITLB miss rate": 0.00023379146915805232, "DTLB miss rate": 0.000487098750408045, "L2D TLB miss rate": 0.12353131019737174, "L1I cache miss rate": 0.009423952873132274, "L1D cache miss rate": 0.014677972636758903, "L2D cache miss rate": 0.16838890001381998, "LL cache miss rate": 0.9629115632400499}
|
158
pytorch/output_HPC/altra_10_30_p2p-Gnutella04_1000.output
Normal file
158
pytorch/output_HPC/altra_10_30_p2p-Gnutella04_1000.output
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395271 queued and waiting for resources
|
||||||
|
srun: job 3395271 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
||||||
|
nnz=39994, layout=torch.sparse_csr)
|
||||||
|
tensor([0.3559, 0.4732, 0.3024, ..., 0.9176, 0.7712, 0.4949])
|
||||||
|
Matrix: p2p-Gnutella04
|
||||||
|
Shape: torch.Size([10879, 10879])
|
||||||
|
NNZ: 39994
|
||||||
|
Density: 0.0003379223282393842
|
||||||
|
Time: 1.0082497596740723 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
||||||
|
|
||||||
|
52.68 msec task-clock:u # 0.012 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,272 page-faults:u # 62.105 K/sec
|
||||||
|
63,019,732 cycles:u # 1.196 GHz (70.67%)
|
||||||
|
73,518,898 instructions:u # 1.17 insn per cycle (85.80%)
|
||||||
|
<not supported> branches:u
|
||||||
|
359,236 branch-misses:u (99.44%)
|
||||||
|
31,459,751 L1-dcache-loads:u # 597.131 M/sec
|
||||||
|
460,969 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
29,975,208 L1-icache-loads:u # 568.954 M/sec
|
||||||
|
281,710 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
||||||
|
59,589,523 dTLB-loads:u # 1.131 G/sec (17.10%)
|
||||||
|
0 dTLB-load-misses:u (1.27%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
4.456867719 seconds time elapsed
|
||||||
|
|
||||||
|
16.389568000 seconds user
|
||||||
|
29.247355000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
||||||
|
nnz=39994, layout=torch.sparse_csr)
|
||||||
|
tensor([0.0123, 0.4107, 0.7785, ..., 0.7964, 0.7541, 0.4153])
|
||||||
|
Matrix: p2p-Gnutella04
|
||||||
|
Shape: torch.Size([10879, 10879])
|
||||||
|
NNZ: 39994
|
||||||
|
Density: 0.0003379223282393842
|
||||||
|
Time: 1.030029058456421 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
||||||
|
|
||||||
|
333,423 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
19,435,905 BR_RETIRED:u
|
||||||
|
|
||||||
|
4.359656946 seconds time elapsed
|
||||||
|
|
||||||
|
16.490532000 seconds user
|
||||||
|
28.366462000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
||||||
|
nnz=39994, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1898, 0.0740, 0.4564, ..., 0.7987, 0.1017, 0.5949])
|
||||||
|
Matrix: p2p-Gnutella04
|
||||||
|
Shape: torch.Size([10879, 10879])
|
||||||
|
NNZ: 39994
|
||||||
|
Density: 0.0003379223282393842
|
||||||
|
Time: 1.004878044128418 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
||||||
|
|
||||||
|
27,447,537 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,417 ITLB_WALK:u
|
||||||
|
18,300 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
37,569,384 L1D_TLB:u
|
||||||
|
|
||||||
|
4.355627133 seconds time elapsed
|
||||||
|
|
||||||
|
15.883078000 seconds user
|
||||||
|
27.120829000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
||||||
|
nnz=39994, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1682, 0.9350, 0.9210, ..., 0.3758, 0.2263, 0.1068])
|
||||||
|
Matrix: p2p-Gnutella04
|
||||||
|
Shape: torch.Size([10879, 10879])
|
||||||
|
NNZ: 39994
|
||||||
|
Density: 0.0003379223282393842
|
||||||
|
Time: 1.0207850933074951 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
||||||
|
|
||||||
|
30,830,481 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
290,545 L1I_CACHE_REFILL:u
|
||||||
|
473,875 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
32,284,772 L1D_CACHE:u
|
||||||
|
|
||||||
|
4.427088851 seconds time elapsed
|
||||||
|
|
||||||
|
15.711555000 seconds user
|
||||||
|
29.627091000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 10, 20, ..., 39994, 39994, 39994]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 9711, 10875, 10876]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(10879, 10879),
|
||||||
|
nnz=39994, layout=torch.sparse_csr)
|
||||||
|
tensor([0.9351, 0.3836, 0.0822, ..., 0.9798, 0.3726, 0.7394])
|
||||||
|
Matrix: p2p-Gnutella04
|
||||||
|
Shape: torch.Size([10879, 10879])
|
||||||
|
NNZ: 39994
|
||||||
|
Density: 0.0003379223282393842
|
||||||
|
Time: 1.041510820388794 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella04.mtx 1000':
|
||||||
|
|
||||||
|
529,403 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
549,794 LL_CACHE_RD:u
|
||||||
|
198,306 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
24,497 L2D_TLB_REFILL:u
|
||||||
|
298,519 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,772,795 L2D_CACHE:u
|
||||||
|
|
||||||
|
4.454107604 seconds time elapsed
|
||||||
|
|
||||||
|
16.577921000 seconds user
|
||||||
|
29.390427000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_p2p-Gnutella24_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_p2p-Gnutella24_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [30.72, 30.6, 28.68, 26.48, 22.44, 21.4, 21.28, 21.08, 21.32, 21.6], "matrix": "p2p-Gnutella24", "shape": [26518, 26518], "nnz": 65369, "% density": 9.295875717624285e-05, "time_s": 1.718301773071289, "power": [31.52, 32.48, 33.64, 33.88, 33.44, 31.52], "power_after": [20.96, 20.84, 20.92, 20.8, 20.76, 20.76, 20.76, 20.68, 20.72, 20.92], "task clock (msec)": 67.08, "page faults": 3303, "cycles": 61261862, "instructions": 83757591, "branch mispredictions": 329248, "branches": 19953212, "ITLB accesses": 27084694, "ITLB misses": 7107, "DTLB misses": 17529, "DTLB accesses": 36684333, "L1I cache accesses": 32158234, "L1I cache misses": 286484, "L1D cache misses": 474161, "L1D cache accesses": 33730073, "LL cache misses": 550064, "LL cache accesses": 565245, "L2D TLB accesses": 191046, "L2D TLB misses": 23775, "L2D cache misses": 307419, "L2D cache accesses": 1772169, "instructions per cycle": 1.3672060930828385, "branch miss rate": 0.016501002445120115, "ITLB miss rate": 0.0002623991247602797, "DTLB miss rate": 0.0004778334118818516, "L2D TLB miss rate": 0.12444646838981188, "L1I cache miss rate": 0.008908573773049851, "L1D cache miss rate": 0.014057514788064645, "L2D cache miss rate": 0.1734704760099065, "LL cache miss rate": 0.973142619572044}
|
@ -5,45 +5,46 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394141 queued and waiting for resources
|
srun: job 3395289 queued and waiting for resources
|
||||||
srun: job 3394141 has been allocated resources
|
srun: job 3395289 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
nnz=65369, layout=torch.sparse_csr)
|
||||||
tensor([0.6616, 0.1149, 0.0110, ..., 0.2481, 0.7877, 0.5589])
|
tensor([0.3210, 0.3418, 0.9584, ..., 0.8929, 0.9807, 0.5532])
|
||||||
|
Matrix: p2p-Gnutella24
|
||||||
Shape: torch.Size([26518, 26518])
|
Shape: torch.Size([26518, 26518])
|
||||||
NNZ: 65369
|
NNZ: 65369
|
||||||
Density: 9.295875717624285e-05
|
Density: 9.295875717624285e-05
|
||||||
Time: 0.16974925994873047 seconds
|
Time: 1.6565663814544678 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
||||||
|
|
||||||
61.92 msec task-clock:u # 0.017 CPUs utilized
|
67.08 msec task-clock:u # 0.013 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,281 page-faults:u # 52.988 K/sec
|
3,303 page-faults:u # 49.241 K/sec
|
||||||
66,250,810 cycles:u # 1.070 GHz (62.94%)
|
61,261,862 cycles:u # 0.913 GHz (49.19%)
|
||||||
75,178,179 instructions:u # 1.13 insn per cycle (83.47%)
|
83,757,591 instructions:u # 1.37 insn per cycle (88.30%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
367,749 branch-misses:u
|
364,692 branch-misses:u
|
||||||
33,064,095 L1-dcache-loads:u # 533.986 M/sec
|
31,954,743 L1-dcache-loads:u # 476.379 M/sec
|
||||||
465,542 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
|
490,953 L1-dcache-load-misses:u # 1.54% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
31,552,264 L1-icache-loads:u # 509.570 M/sec
|
30,490,915 L1-icache-loads:u # 454.556 M/sec
|
||||||
296,060 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
291,964 L1-icache-load-misses:u # 0.96% of all L1-icache accesses
|
||||||
73,155,896 dTLB-loads:u # 1.181 G/sec (17.31%)
|
32,131,046 dTLB-loads:u # 479.007 M/sec (19.20%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
3.675971385 seconds time elapsed
|
5.107407925 seconds time elapsed
|
||||||
|
|
||||||
14.857293000 seconds user
|
16.045361000 seconds user
|
||||||
29.791187000 seconds sys
|
30.574855000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -53,21 +54,22 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
nnz=65369, layout=torch.sparse_csr)
|
||||||
tensor([0.1683, 0.8999, 0.0578, ..., 0.5893, 0.0628, 0.8262])
|
tensor([0.4851, 0.2524, 0.2134, ..., 0.5976, 0.0089, 0.2284])
|
||||||
|
Matrix: p2p-Gnutella24
|
||||||
Shape: torch.Size([26518, 26518])
|
Shape: torch.Size([26518, 26518])
|
||||||
NNZ: 65369
|
NNZ: 65369
|
||||||
Density: 9.295875717624285e-05
|
Density: 9.295875717624285e-05
|
||||||
Time: 0.2227163314819336 seconds
|
Time: 1.6902527809143066 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
||||||
|
|
||||||
332,366 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
329,248 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
19,076,182 BR_RETIRED:u
|
19,953,212 BR_RETIRED:u
|
||||||
|
|
||||||
3.532329673 seconds time elapsed
|
4.990707186 seconds time elapsed
|
||||||
|
|
||||||
14.883993000 seconds user
|
16.713526000 seconds user
|
||||||
28.516661000 seconds sys
|
27.761595000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -77,23 +79,24 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
nnz=65369, layout=torch.sparse_csr)
|
||||||
tensor([0.8389, 0.5614, 0.9033, ..., 0.2231, 0.0349, 0.5167])
|
tensor([0.1844, 0.9003, 0.0155, ..., 0.5184, 0.1445, 0.3588])
|
||||||
|
Matrix: p2p-Gnutella24
|
||||||
Shape: torch.Size([26518, 26518])
|
Shape: torch.Size([26518, 26518])
|
||||||
NNZ: 65369
|
NNZ: 65369
|
||||||
Density: 9.295875717624285e-05
|
Density: 9.295875717624285e-05
|
||||||
Time: 0.17095375061035156 seconds
|
Time: 1.6478993892669678 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
||||||
|
|
||||||
27,005,133 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
27,084,694 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
4,791 ITLB_WALK:u
|
7,107 ITLB_WALK:u
|
||||||
13,403 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
17,529 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
36,457,054 L1D_TLB:u
|
36,684,333 L1D_TLB:u
|
||||||
|
|
||||||
3.579041343 seconds time elapsed
|
5.010572757 seconds time elapsed
|
||||||
|
|
||||||
14.885159000 seconds user
|
16.570396000 seconds user
|
||||||
29.562650000 seconds sys
|
27.387405000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -103,23 +106,24 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
nnz=65369, layout=torch.sparse_csr)
|
||||||
tensor([0.8849, 0.5982, 0.0578, ..., 0.9975, 0.2204, 0.0718])
|
tensor([0.2313, 0.8375, 0.3065, ..., 0.2374, 0.2281, 0.2100])
|
||||||
|
Matrix: p2p-Gnutella24
|
||||||
Shape: torch.Size([26518, 26518])
|
Shape: torch.Size([26518, 26518])
|
||||||
NNZ: 65369
|
NNZ: 65369
|
||||||
Density: 9.295875717624285e-05
|
Density: 9.295875717624285e-05
|
||||||
Time: 0.18003463745117188 seconds
|
Time: 1.637598991394043 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
||||||
|
|
||||||
32,367,686 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
32,158,234 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
287,524 L1I_CACHE_REFILL:u
|
286,484 L1I_CACHE_REFILL:u
|
||||||
467,557 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
474,161 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
34,022,862 L1D_CACHE:u
|
33,730,073 L1D_CACHE:u
|
||||||
|
|
||||||
3.405321132 seconds time elapsed
|
4.963121627 seconds time elapsed
|
||||||
|
|
||||||
15.291636000 seconds user
|
16.730431000 seconds user
|
||||||
28.005015000 seconds sys
|
29.869416000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -129,25 +133,26 @@ tensor(crow_indices=tensor([ 0, 9, 9, ..., 65369, 65369, 65369]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
col_indices=tensor([ 1, 2, 3, ..., 15065, 9401, 26517]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(26518, 26518),
|
||||||
nnz=65369, layout=torch.sparse_csr)
|
nnz=65369, layout=torch.sparse_csr)
|
||||||
tensor([0.2790, 0.1291, 0.6053, ..., 0.1651, 0.4973, 0.6821])
|
tensor([0.5006, 0.8470, 0.3527, ..., 0.3901, 0.3581, 0.1154])
|
||||||
|
Matrix: p2p-Gnutella24
|
||||||
Shape: torch.Size([26518, 26518])
|
Shape: torch.Size([26518, 26518])
|
||||||
NNZ: 65369
|
NNZ: 65369
|
||||||
Density: 9.295875717624285e-05
|
Density: 9.295875717624285e-05
|
||||||
Time: 0.22036528587341309 seconds
|
Time: 1.6584653854370117 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella24.mtx 1000':
|
||||||
|
|
||||||
535,707 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
550,064 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
556,316 LL_CACHE_RD:u
|
565,245 LL_CACHE_RD:u
|
||||||
150,149 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
191,046 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
18,418 L2D_TLB_REFILL:u
|
23,775 L2D_TLB_REFILL:u
|
||||||
297,042 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
307,419 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,687,364 L2D_CACHE:u
|
1,772,169 L2D_CACHE:u
|
||||||
|
|
||||||
3.505209576 seconds time elapsed
|
5.019317303 seconds time elapsed
|
||||||
|
|
||||||
15.297738000 seconds user
|
16.518292000 seconds user
|
||||||
29.848441000 seconds sys
|
30.069880000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_p2p-Gnutella25_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_p2p-Gnutella25_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [86.48, 72.16, 59.36, 41.84, 28.44, 22.96, 22.92, 22.92, 23.04, 23.24], "matrix": "p2p-Gnutella25", "shape": [22687, 22687], "nnz": 54705, "% density": 0.00010628522108964806, "time_s": 1.431199073791504, "power": [35.16, 36.2, 36.72, 37.52, 37.52], "power_after": [21.32, 21.2, 21.2, 21.28, 21.52, 21.44, 21.92, 21.68, 21.6, 21.36], "task clock (msec)": 59.85, "page faults": 3318, "cycles": 76505130, "instructions": 72343215, "branch mispredictions": 322338, "branches": 19784096, "ITLB accesses": 27270404, "ITLB misses": 6607, "DTLB misses": 17981, "DTLB accesses": 36751047, "L1I cache accesses": 30620441, "L1I cache misses": 302139, "L1D cache misses": 471011, "L1D cache accesses": 32141810, "LL cache misses": 531907, "LL cache accesses": 545159, "L2D TLB accesses": 188244, "L2D TLB misses": 23034, "L2D cache misses": 293848, "L2D cache accesses": 1757551, "instructions per cycle": 0.945599530384433, "branch miss rate": 0.016292783860329025, "ITLB miss rate": 0.00024227730546272803, "DTLB miss rate": 0.0004892649725054092, "L2D TLB miss rate": 0.12236246573595971, "L1I cache miss rate": 0.009867232153841285, "L1D cache miss rate": 0.014654152955294054, "L2D cache miss rate": 0.1671917344077071, "LL cache miss rate": 0.9756914955086498}
|
158
pytorch/output_HPC/altra_10_30_p2p-Gnutella25_1000.output
Normal file
158
pytorch/output_HPC/altra_10_30_p2p-Gnutella25_1000.output
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395288 queued and waiting for resources
|
||||||
|
srun: job 3395288 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
||||||
|
nnz=54705, layout=torch.sparse_csr)
|
||||||
|
tensor([0.9962, 0.2550, 0.9564, ..., 0.7113, 0.6635, 0.3831])
|
||||||
|
Matrix: p2p-Gnutella25
|
||||||
|
Shape: torch.Size([22687, 22687])
|
||||||
|
NNZ: 54705
|
||||||
|
Density: 0.00010628522108964806
|
||||||
|
Time: 1.4832944869995117 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
||||||
|
|
||||||
|
59.85 msec task-clock:u # 0.012 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,318 page-faults:u # 55.439 K/sec
|
||||||
|
76,505,130 cycles:u # 1.278 GHz (43.11%)
|
||||||
|
72,343,215 instructions:u # 0.95 insn per cycle (62.06%)
|
||||||
|
<not supported> branches:u
|
||||||
|
371,337 branch-misses:u (77.63%)
|
||||||
|
33,969,604 L1-dcache-loads:u # 567.579 M/sec (88.85%)
|
||||||
|
472,023 L1-dcache-load-misses:u # 1.39% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
31,728,689 L1-icache-loads:u # 530.137 M/sec
|
||||||
|
299,356 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
||||||
|
50,921,898 dTLB-loads:u # 850.825 M/sec (39.93%)
|
||||||
|
90,542 dTLB-load-misses:u # 0.18% of all dTLB cache accesses (36.53%)
|
||||||
|
11,563,883 iTLB-loads:u # 193.214 M/sec (20.26%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
4.953668960 seconds time elapsed
|
||||||
|
|
||||||
|
16.652653000 seconds user
|
||||||
|
30.408692000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
||||||
|
nnz=54705, layout=torch.sparse_csr)
|
||||||
|
tensor([0.9968, 0.7101, 0.9319, ..., 0.2871, 0.7386, 0.8934])
|
||||||
|
Matrix: p2p-Gnutella25
|
||||||
|
Shape: torch.Size([22687, 22687])
|
||||||
|
NNZ: 54705
|
||||||
|
Density: 0.00010628522108964806
|
||||||
|
Time: 1.3799591064453125 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
||||||
|
|
||||||
|
322,338 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
19,784,096 BR_RETIRED:u
|
||||||
|
|
||||||
|
4.633544255 seconds time elapsed
|
||||||
|
|
||||||
|
16.572749000 seconds user
|
||||||
|
26.228349000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
||||||
|
nnz=54705, layout=torch.sparse_csr)
|
||||||
|
tensor([0.3551, 0.8297, 0.9950, ..., 0.9625, 0.7129, 0.2173])
|
||||||
|
Matrix: p2p-Gnutella25
|
||||||
|
Shape: torch.Size([22687, 22687])
|
||||||
|
NNZ: 54705
|
||||||
|
Density: 0.00010628522108964806
|
||||||
|
Time: 1.400240182876587 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
||||||
|
|
||||||
|
27,270,404 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,607 ITLB_WALK:u
|
||||||
|
17,981 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
36,751,047 L1D_TLB:u
|
||||||
|
|
||||||
|
4.696092090 seconds time elapsed
|
||||||
|
|
||||||
|
15.781810000 seconds user
|
||||||
|
28.383624000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
||||||
|
nnz=54705, layout=torch.sparse_csr)
|
||||||
|
tensor([0.3600, 0.0388, 0.5262, ..., 0.5849, 0.3707, 0.1514])
|
||||||
|
Matrix: p2p-Gnutella25
|
||||||
|
Shape: torch.Size([22687, 22687])
|
||||||
|
NNZ: 54705
|
||||||
|
Density: 0.00010628522108964806
|
||||||
|
Time: 1.4545772075653076 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
||||||
|
|
||||||
|
30,620,441 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
302,139 L1I_CACHE_REFILL:u
|
||||||
|
471,011 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
32,141,810 L1D_CACHE:u
|
||||||
|
|
||||||
|
4.897499310 seconds time elapsed
|
||||||
|
|
||||||
|
16.207163000 seconds user
|
||||||
|
32.246890000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 9, 9, ..., 54704, 54704, 54705]),
|
||||||
|
col_indices=tensor([ 1, 2, 3, ..., 17949, 22685, 144]),
|
||||||
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(22687, 22687),
|
||||||
|
nnz=54705, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1220, 0.8435, 0.7035, ..., 0.2109, 0.0289, 0.0715])
|
||||||
|
Matrix: p2p-Gnutella25
|
||||||
|
Shape: torch.Size([22687, 22687])
|
||||||
|
NNZ: 54705
|
||||||
|
Density: 0.00010628522108964806
|
||||||
|
Time: 1.4200170040130615 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella25.mtx 1000':
|
||||||
|
|
||||||
|
531,907 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
545,159 LL_CACHE_RD:u
|
||||||
|
188,244 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
23,034 L2D_TLB_REFILL:u
|
||||||
|
293,848 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,757,551 L2D_CACHE:u
|
||||||
|
|
||||||
|
4.683262937 seconds time elapsed
|
||||||
|
|
||||||
|
16.111909000 seconds user
|
||||||
|
29.660483000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_p2p-Gnutella30_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_p2p-Gnutella30_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.44, 16.44, 16.44, 16.84, 16.72, 16.6, 16.72, 16.84, 16.68, 16.84], "matrix": "p2p-Gnutella30", "shape": [36682, 36682], "nnz": 88328, "% density": 6.564359899804003e-05, "time_s": 2.896674871444702, "power": [56.32, 68.24, 71.76, 59.48, 47.6, 48.76, 52.6], "power_after": [16.92, 17.0, 16.96, 16.8, 16.48, 16.52, 16.52, 16.52, 16.24, 16.36], "task clock (msec)": 56.47, "page faults": 3222, "cycles": 69105836, "instructions": 89065155, "branch mispredictions": 333669, "branches": 20078755, "ITLB accesses": 26015038, "ITLB misses": 5212, "DTLB misses": 17039, "DTLB accesses": 35296010, "L1I cache accesses": 31837486, "L1I cache misses": 293353, "L1D cache misses": 462358, "L1D cache accesses": 33478540, "LL cache misses": 546516, "LL cache accesses": 559865, "L2D TLB accesses": 190400, "L2D TLB misses": 23787, "L2D cache misses": 307032, "L2D cache accesses": 1768186, "instructions per cycle": 1.288822480926213, "branch miss rate": 0.016618012421586895, "ITLB miss rate": 0.00020034566161310238, "DTLB miss rate": 0.00048274578344691083, "L2D TLB miss rate": 0.12493172268907562, "L1I cache miss rate": 0.009214075508348869, "L1D cache miss rate": 0.013810578358554464, "L2D cache miss rate": 0.17364236567872385, "LL cache miss rate": 0.9761567520741607}
|
@ -5,45 +5,46 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394142 queued and waiting for resources
|
srun: job 3395282 queued and waiting for resources
|
||||||
srun: job 3394142 has been allocated resources
|
srun: job 3395282 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
nnz=88328, layout=torch.sparse_csr)
|
||||||
tensor([0.5867, 0.3729, 0.0718, ..., 0.5551, 0.6046, 0.6005])
|
tensor([0.0302, 0.1334, 0.4142, ..., 0.9516, 0.6030, 0.3883])
|
||||||
|
Matrix: p2p-Gnutella30
|
||||||
Shape: torch.Size([36682, 36682])
|
Shape: torch.Size([36682, 36682])
|
||||||
NNZ: 88328
|
NNZ: 88328
|
||||||
Density: 6.564359899804003e-05
|
Density: 6.564359899804003e-05
|
||||||
Time: 0.3765556812286377 seconds
|
Time: 2.790724277496338 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
||||||
|
|
||||||
65.91 msec task-clock:u # 0.017 CPUs utilized
|
56.47 msec task-clock:u # 0.009 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,247 page-faults:u # 49.267 K/sec
|
3,222 page-faults:u # 57.061 K/sec
|
||||||
92,293,071 cycles:u # 1.400 GHz (58.72%)
|
69,105,836 cycles:u # 1.224 GHz (53.55%)
|
||||||
76,208,632 instructions:u # 0.83 insn per cycle (75.47%)
|
89,065,155 instructions:u # 1.29 insn per cycle (92.79%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
336,620 branch-misses:u (89.96%)
|
367,525 branch-misses:u
|
||||||
33,256,017 L1-dcache-loads:u # 504.599 M/sec
|
32,122,654 L1-dcache-loads:u # 568.886 M/sec
|
||||||
479,188 L1-dcache-load-misses:u # 1.44% of all L1-dcache accesses
|
467,921 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
31,686,331 L1-icache-loads:u # 480.782 M/sec
|
30,765,438 L1-icache-loads:u # 544.850 M/sec
|
||||||
297,521 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
289,327 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
||||||
55,295,804 dTLB-loads:u # 839.012 M/sec (27.47%)
|
24,642,710 dTLB-loads:u # 436.418 M/sec (11.11%)
|
||||||
103,616 dTLB-load-misses:u # 0.19% of all dTLB cache accesses (20.17%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
3.803094533 seconds time elapsed
|
6.334250152 seconds time elapsed
|
||||||
|
|
||||||
16.585763000 seconds user
|
32.099712000 seconds user
|
||||||
62.703127000 seconds sys
|
240.206702000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -53,21 +54,22 @@ tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
nnz=88328, layout=torch.sparse_csr)
|
||||||
tensor([0.2027, 0.2128, 0.5093, ..., 0.8069, 0.6413, 0.1136])
|
tensor([0.6147, 0.4171, 0.2258, ..., 0.0253, 0.8932, 0.8040])
|
||||||
|
Matrix: p2p-Gnutella30
|
||||||
Shape: torch.Size([36682, 36682])
|
Shape: torch.Size([36682, 36682])
|
||||||
NNZ: 88328
|
NNZ: 88328
|
||||||
Density: 6.564359899804003e-05
|
Density: 6.564359899804003e-05
|
||||||
Time: 0.2942969799041748 seconds
|
Time: 2.092158079147339 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
||||||
|
|
||||||
320,083 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
333,669 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
19,285,106 BR_RETIRED:u
|
20,078,755 BR_RETIRED:u
|
||||||
|
|
||||||
3.763535833 seconds time elapsed
|
5.557038624 seconds time elapsed
|
||||||
|
|
||||||
16.476022000 seconds user
|
29.074016000 seconds user
|
||||||
55.208213000 seconds sys
|
186.372846000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -77,23 +79,24 @@ tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
nnz=88328, layout=torch.sparse_csr)
|
||||||
tensor([0.5930, 0.8044, 0.8115, ..., 0.6366, 0.1026, 0.6914])
|
tensor([0.0146, 0.2151, 0.1948, ..., 0.7633, 0.4329, 0.7106])
|
||||||
|
Matrix: p2p-Gnutella30
|
||||||
Shape: torch.Size([36682, 36682])
|
Shape: torch.Size([36682, 36682])
|
||||||
NNZ: 88328
|
NNZ: 88328
|
||||||
Density: 6.564359899804003e-05
|
Density: 6.564359899804003e-05
|
||||||
Time: 0.2431955337524414 seconds
|
Time: 3.1269772052764893 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
||||||
|
|
||||||
26,853,940 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
26,015,038 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,728 ITLB_WALK:u
|
5,212 ITLB_WALK:u
|
||||||
13,955 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
17,039 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
37,111,059 L1D_TLB:u
|
35,296,010 L1D_TLB:u
|
||||||
|
|
||||||
3.752433570 seconds time elapsed
|
6.550798214 seconds time elapsed
|
||||||
|
|
||||||
16.433982000 seconds user
|
36.334689000 seconds user
|
||||||
53.207908000 seconds sys
|
263.614426000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -103,23 +106,24 @@ tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
nnz=88328, layout=torch.sparse_csr)
|
||||||
tensor([0.9666, 0.8206, 0.6252, ..., 0.5180, 0.8170, 0.7406])
|
tensor([0.1810, 0.5208, 0.0542, ..., 0.6108, 0.4905, 0.8918])
|
||||||
|
Matrix: p2p-Gnutella30
|
||||||
Shape: torch.Size([36682, 36682])
|
Shape: torch.Size([36682, 36682])
|
||||||
NNZ: 88328
|
NNZ: 88328
|
||||||
Density: 6.564359899804003e-05
|
Density: 6.564359899804003e-05
|
||||||
Time: 0.15313339233398438 seconds
|
Time: 1.9065814018249512 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
||||||
|
|
||||||
32,554,796 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
31,837,486 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
298,729 L1I_CACHE_REFILL:u
|
293,353 L1I_CACHE_REFILL:u
|
||||||
473,779 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
462,358 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
34,117,102 L1D_CACHE:u
|
33,478,540 L1D_CACHE:u
|
||||||
|
|
||||||
3.595579651 seconds time elapsed
|
5.319975004 seconds time elapsed
|
||||||
|
|
||||||
15.817851000 seconds user
|
26.918342000 seconds user
|
||||||
44.491315000 seconds sys
|
175.603919000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -129,25 +133,26 @@ tensor(crow_indices=tensor([ 0, 10, 10, ..., 88328, 88328, 88328]),
|
|||||||
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
col_indices=tensor([ 1, 2, 3, ..., 36675, 36676, 36677]),
|
||||||
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
values=tensor([1., 1., 1., ..., 1., 1., 1.]), size=(36682, 36682),
|
||||||
nnz=88328, layout=torch.sparse_csr)
|
nnz=88328, layout=torch.sparse_csr)
|
||||||
tensor([0.9800, 0.9021, 0.5677, ..., 0.3869, 0.2468, 0.3286])
|
tensor([0.8456, 0.8302, 0.2078, ..., 0.8155, 0.5148, 0.5853])
|
||||||
|
Matrix: p2p-Gnutella30
|
||||||
Shape: torch.Size([36682, 36682])
|
Shape: torch.Size([36682, 36682])
|
||||||
NNZ: 88328
|
NNZ: 88328
|
||||||
Density: 6.564359899804003e-05
|
Density: 6.564359899804003e-05
|
||||||
Time: 0.2539215087890625 seconds
|
Time: 3.8523874282836914 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/p2p-Gnutella30.mtx 1000':
|
||||||
|
|
||||||
535,040 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
546,516 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
547,502 LL_CACHE_RD:u
|
559,865 LL_CACHE_RD:u
|
||||||
179,876 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
190,400 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
21,809 L2D_TLB_REFILL:u
|
23,787 L2D_TLB_REFILL:u
|
||||||
298,620 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
307,032 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,722,959 L2D_CACHE:u
|
1,768,186 L2D_CACHE:u
|
||||||
|
|
||||||
3.549060962 seconds time elapsed
|
7.266305868 seconds time elapsed
|
||||||
|
|
||||||
16.570077000 seconds user
|
37.085321000 seconds user
|
||||||
52.238012000 seconds sys
|
320.780766000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_ri2010_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_ri2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [16.6, 16.64, 17.04, 17.08, 16.92, 17.24, 16.88, 16.36, 16.4, 16.4], "matrix": "ri2010", "shape": [25181, 25181], "nnz": 125750, "% density": 0.00019831796057928155, "time_s": 2.970583200454712, "power": [23.04, 23.28, 23.76, 24.12, 21.4, 26.28, 26.36], "power_after": [16.16, 16.16, 16.52, 16.48, 16.52, 16.44, 16.36, 16.48, 16.76, 16.6], "task clock (msec)": 52.61, "page faults": 3292, "cycles": 42915672, "instructions": 71002596, "branch mispredictions": 344300, "branches": 20224759, "ITLB accesses": 26039851, "ITLB misses": 5035, "DTLB misses": 16402, "DTLB accesses": 34820806, "L1I cache accesses": 31878105, "L1I cache misses": 299057, "L1D cache misses": 471869, "L1D cache accesses": 33450518, "LL cache misses": 530093, "LL cache accesses": 551126, "L2D TLB accesses": 188315, "L2D TLB misses": 22856, "L2D cache misses": 299885, "L2D cache accesses": 1763155, "instructions per cycle": 1.6544677664607, "branch miss rate": 0.01702368863826758, "ITLB miss rate": 0.00019335748119296073, "DTLB miss rate": 0.0004710402165877493, "L2D TLB miss rate": 0.12137110692191275, "L1I cache miss rate": 0.009381266546427399, "L1D cache miss rate": 0.014106478111938357, "L2D cache miss rate": 0.1700843090936418, "LL cache miss rate": 0.9618363132931489}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394145 queued and waiting for resources
|
srun: job 3395268 queued and waiting for resources
|
||||||
srun: job 3394145 has been allocated resources
|
srun: job 3395268 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
||||||
@ -14,37 +14,38 @@ tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
||||||
tensor([0.1402, 0.0708, 0.4576, ..., 0.4700, 0.5629, 0.9120])
|
tensor([0.4029, 0.5373, 0.8376, ..., 0.9299, 0.3127, 0.4778])
|
||||||
|
Matrix: ri2010
|
||||||
Shape: torch.Size([25181, 25181])
|
Shape: torch.Size([25181, 25181])
|
||||||
NNZ: 125750
|
NNZ: 125750
|
||||||
Density: 0.00019831796057928155
|
Density: 0.00019831796057928155
|
||||||
Time: 0.3585643768310547 seconds
|
Time: 2.9858975410461426 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
||||||
|
|
||||||
60.77 msec task-clock:u # 0.016 CPUs utilized
|
52.61 msec task-clock:u # 0.008 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,361 page-faults:u # 55.311 K/sec
|
3,292 page-faults:u # 62.576 K/sec
|
||||||
63,493,475 cycles:u # 1.045 GHz (49.59%)
|
42,915,672 cycles:u # 0.816 GHz (55.04%)
|
||||||
91,578,911 instructions:u # 1.44 insn per cycle (92.22%)
|
71,002,596 instructions:u # 1.65 insn per cycle (81.89%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
374,941 branch-misses:u
|
369,793 branch-misses:u
|
||||||
33,905,978 L1-dcache-loads:u # 557.979 M/sec
|
33,163,106 L1-dcache-loads:u # 630.381 M/sec
|
||||||
470,553 L1-dcache-load-misses:u # 1.39% of all L1-dcache accesses
|
471,533 L1-dcache-load-misses:u # 1.42% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
32,247,376 L1-icache-loads:u # 530.684 M/sec
|
31,640,002 L1-icache-loads:u # 601.429 M/sec
|
||||||
299,037 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
297,919 L1-icache-load-misses:u # 0.94% of all L1-icache accesses
|
||||||
27,428,635 dTLB-loads:u # 451.384 M/sec (13.50%)
|
48,642,108 dTLB-loads:u # 924.614 M/sec (29.77%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
0 dTLB-load-misses:u (5.06%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
3.818532962 seconds time elapsed
|
6.215745697 seconds time elapsed
|
||||||
|
|
||||||
15.563570000 seconds user
|
17.600216000 seconds user
|
||||||
30.194882000 seconds sys
|
30.777524000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -55,21 +56,22 @@ tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
||||||
tensor([0.1841, 0.4436, 0.8281, ..., 0.0546, 0.5967, 0.9496])
|
tensor([0.8706, 0.3724, 0.8779, ..., 0.4299, 0.0920, 0.4238])
|
||||||
|
Matrix: ri2010
|
||||||
Shape: torch.Size([25181, 25181])
|
Shape: torch.Size([25181, 25181])
|
||||||
NNZ: 125750
|
NNZ: 125750
|
||||||
Density: 0.00019831796057928155
|
Density: 0.00019831796057928155
|
||||||
Time: 0.3050577640533447 seconds
|
Time: 2.9231789112091064 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
||||||
|
|
||||||
329,084 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
344,300 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
20,406,595 BR_RETIRED:u
|
20,224,759 BR_RETIRED:u
|
||||||
|
|
||||||
3.673527837 seconds time elapsed
|
6.297708483 seconds time elapsed
|
||||||
|
|
||||||
15.520198000 seconds user
|
17.546068000 seconds user
|
||||||
29.068211000 seconds sys
|
26.920857000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -80,23 +82,24 @@ tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
||||||
tensor([0.1849, 0.5991, 0.5040, ..., 0.4916, 0.4789, 0.8887])
|
tensor([0.2988, 0.0160, 0.4360, ..., 0.7543, 0.0919, 0.2321])
|
||||||
|
Matrix: ri2010
|
||||||
Shape: torch.Size([25181, 25181])
|
Shape: torch.Size([25181, 25181])
|
||||||
NNZ: 125750
|
NNZ: 125750
|
||||||
Density: 0.00019831796057928155
|
Density: 0.00019831796057928155
|
||||||
Time: 0.3605458736419678 seconds
|
Time: 2.9701316356658936 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
||||||
|
|
||||||
26,859,919 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
26,039,851 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,237 ITLB_WALK:u
|
5,035 ITLB_WALK:u
|
||||||
16,689 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
16,402 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
36,348,977 L1D_TLB:u
|
34,820,806 L1D_TLB:u
|
||||||
|
|
||||||
3.769690988 seconds time elapsed
|
6.227977259 seconds time elapsed
|
||||||
|
|
||||||
15.173839000 seconds user
|
17.937381000 seconds user
|
||||||
29.963392000 seconds sys
|
30.196552000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -107,23 +110,24 @@ tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
||||||
tensor([0.0513, 0.4498, 0.6748, ..., 0.2114, 0.6847, 0.2188])
|
tensor([0.5797, 0.8992, 0.8317, ..., 0.0283, 0.7124, 0.2690])
|
||||||
|
Matrix: ri2010
|
||||||
Shape: torch.Size([25181, 25181])
|
Shape: torch.Size([25181, 25181])
|
||||||
NNZ: 125750
|
NNZ: 125750
|
||||||
Density: 0.00019831796057928155
|
Density: 0.00019831796057928155
|
||||||
Time: 0.3485410213470459 seconds
|
Time: 2.968733072280884 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
||||||
|
|
||||||
30,979,764 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
31,878,105 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
292,038 L1I_CACHE_REFILL:u
|
299,057 L1I_CACHE_REFILL:u
|
||||||
469,219 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
471,869 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
32,411,890 L1D_CACHE:u
|
33,450,518 L1D_CACHE:u
|
||||||
|
|
||||||
3.598754329 seconds time elapsed
|
6.278062824 seconds time elapsed
|
||||||
|
|
||||||
16.139631000 seconds user
|
17.822878000 seconds user
|
||||||
29.287026000 seconds sys
|
27.932170000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -134,25 +138,26 @@ tensor(crow_indices=tensor([ 0, 3, 8, ..., 125742, 125747,
|
|||||||
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
col_indices=tensor([ 25, 56, 662, ..., 21738, 22279, 23882]),
|
||||||
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
values=tensor([17171., 37318., 5284., ..., 25993., 24918., 803.]),
|
||||||
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
size=(25181, 25181), nnz=125750, layout=torch.sparse_csr)
|
||||||
tensor([0.7270, 0.7858, 0.3165, ..., 0.7139, 0.8270, 0.9478])
|
tensor([0.0630, 0.5194, 0.8720, ..., 0.9537, 0.3959, 0.5550])
|
||||||
|
Matrix: ri2010
|
||||||
Shape: torch.Size([25181, 25181])
|
Shape: torch.Size([25181, 25181])
|
||||||
NNZ: 125750
|
NNZ: 125750
|
||||||
Density: 0.00019831796057928155
|
Density: 0.00019831796057928155
|
||||||
Time: 0.3687746524810791 seconds
|
Time: 2.9069995880126953 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ri2010.mtx 1000':
|
||||||
|
|
||||||
571,870 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
530,093 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
598,306 LL_CACHE_RD:u
|
551,126 LL_CACHE_RD:u
|
||||||
205,488 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
188,315 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
26,392 L2D_TLB_REFILL:u
|
22,856 L2D_TLB_REFILL:u
|
||||||
342,141 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
299,885 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,857,697 L2D_CACHE:u
|
1,763,155 L2D_CACHE:u
|
||||||
|
|
||||||
3.726794738 seconds time elapsed
|
6.075529293 seconds time elapsed
|
||||||
|
|
||||||
15.231331000 seconds user
|
17.073983000 seconds user
|
||||||
32.108693000 seconds sys
|
27.811966000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_rma10_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_rma10_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [31.36, 30.64, 31.12, 24.52, 24.16, 23.12, 22.08, 21.28, 21.16, 20.88], "matrix": "rma10", "shape": [46835, 46835], "nnz": 2374001, "% density": 0.0010822805369125833, "time_s": 68.86891412734985, "power": [81.8, 81.32, 75.08, 63.48, 51.92, 51.96, 51.8, 65.0, 65.0, 75.12, 82.68, 82.32, 82.08, 82.76, 82.8, 83.6, 83.36, 83.08, 82.88, 83.0, 83.32, 83.32, 83.36, 84.64, 84.56, 84.24, 83.52, 83.4, 83.36, 83.36, 83.72, 84.16, 83.24, 82.76, 82.76, 82.96, 82.36, 82.24, 81.64, 81.6, 81.4, 81.6, 81.88, 82.32, 83.04, 83.48, 83.48, 84.32, 84.04, 84.32, 83.16, 82.44, 81.96, 81.4, 81.8, 82.08, 81.8, 81.84, 82.04, 82.04, 82.08, 82.44, 82.6, 82.84, 83.8, 84.24, 84.6, 85.4, 85.6, 86.0, 85.72, 85.36], "power_after": [21.96, 21.88, 21.96, 21.96, 22.0, 21.68, 21.44, 21.16, 21.04, 20.92], "task clock (msec)": 58.3, "page faults": 3281, "cycles": 81319364, "instructions": 90830397, "branch mispredictions": 342237, "branches": 20641135, "ITLB accesses": 27974213, "ITLB misses": 6660, "DTLB misses": 18441, "DTLB accesses": 37780346, "L1I cache accesses": 31166891, "L1I cache misses": 291301, "L1D cache misses": 477186, "L1D cache accesses": 32682323, "LL cache misses": 538552, "LL cache accesses": 552543, "L2D TLB accesses": 202351, "L2D TLB misses": 24178, "L2D cache misses": 298051, "L2D cache accesses": 1775481, "instructions per cycle": 1.1169590185186398, "branch miss rate": 0.01658033824205888, "ITLB miss rate": 0.00023807640272132053, "DTLB miss rate": 0.00048811093471722044, "L2D TLB miss rate": 0.11948544855226809, "L1I cache miss rate": 0.00934648887500521, "L1D cache miss rate": 0.014600736918241704, "L2D cache miss rate": 0.1678705657790762, "LL cache miss rate": 0.9746788937693537}
|
168
pytorch/output_HPC/altra_10_30_rma10_1000.output
Normal file
168
pytorch/output_HPC/altra_10_30_rma10_1000.output
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395286 queued and waiting for resources
|
||||||
|
srun: job 3395286 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
|
||||||
|
2373970, 2374001]),
|
||||||
|
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
|
||||||
|
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
|
||||||
|
8.3378e+01, 2.5138e+00, 1.2184e+03]),
|
||||||
|
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
|
||||||
|
tensor([0.4937, 0.5946, 0.4240, ..., 0.9888, 0.5278, 0.9155])
|
||||||
|
Matrix: rma10
|
||||||
|
Shape: torch.Size([46835, 46835])
|
||||||
|
NNZ: 2374001
|
||||||
|
Density: 0.0010822805369125833
|
||||||
|
Time: 52.320035219192505 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
|
||||||
|
|
||||||
|
58.30 msec task-clock:u # 0.001 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,281 page-faults:u # 56.279 K/sec
|
||||||
|
81,319,364 cycles:u # 1.395 GHz (62.38%)
|
||||||
|
90,830,397 instructions:u # 1.12 insn per cycle (94.62%)
|
||||||
|
<not supported> branches:u
|
||||||
|
358,947 branch-misses:u
|
||||||
|
32,561,141 L1-dcache-loads:u # 558.523 M/sec
|
||||||
|
477,147 L1-dcache-load-misses:u # 1.47% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
31,044,361 L1-icache-loads:u # 532.506 M/sec
|
||||||
|
286,125 L1-icache-load-misses:u # 0.92% of all L1-icache accesses
|
||||||
|
29,678,379 dTLB-loads:u # 509.075 M/sec (5.72%)
|
||||||
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
56.145511940 seconds time elapsed
|
||||||
|
|
||||||
|
269.541895000 seconds user
|
||||||
|
3993.928150000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
|
||||||
|
2373970, 2374001]),
|
||||||
|
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
|
||||||
|
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
|
||||||
|
8.3378e+01, 2.5138e+00, 1.2184e+03]),
|
||||||
|
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
|
||||||
|
tensor([0.2401, 0.9608, 0.9686, ..., 0.2643, 0.1097, 0.0695])
|
||||||
|
Matrix: rma10
|
||||||
|
Shape: torch.Size([46835, 46835])
|
||||||
|
NNZ: 2374001
|
||||||
|
Density: 0.0010822805369125833
|
||||||
|
Time: 65.29214668273926 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
|
||||||
|
|
||||||
|
342,237 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
20,641,135 BR_RETIRED:u
|
||||||
|
|
||||||
|
69.131216008 seconds time elapsed
|
||||||
|
|
||||||
|
324.908899000 seconds user
|
||||||
|
4969.165543000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
|
||||||
|
2373970, 2374001]),
|
||||||
|
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
|
||||||
|
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
|
||||||
|
8.3378e+01, 2.5138e+00, 1.2184e+03]),
|
||||||
|
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
|
||||||
|
tensor([0.5237, 0.3525, 0.2809, ..., 0.8641, 0.3894, 0.4198])
|
||||||
|
Matrix: rma10
|
||||||
|
Shape: torch.Size([46835, 46835])
|
||||||
|
NNZ: 2374001
|
||||||
|
Density: 0.0010822805369125833
|
||||||
|
Time: 66.05637407302856 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
|
||||||
|
|
||||||
|
27,974,213 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,660 ITLB_WALK:u
|
||||||
|
18,441 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
37,780,346 L1D_TLB:u
|
||||||
|
|
||||||
|
69.880637029 seconds time elapsed
|
||||||
|
|
||||||
|
320.759259000 seconds user
|
||||||
|
5037.255757000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
|
||||||
|
2373970, 2374001]),
|
||||||
|
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
|
||||||
|
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
|
||||||
|
8.3378e+01, 2.5138e+00, 1.2184e+03]),
|
||||||
|
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
|
||||||
|
tensor([0.8185, 0.4278, 0.7553, ..., 0.5022, 0.1058, 0.0783])
|
||||||
|
Matrix: rma10
|
||||||
|
Shape: torch.Size([46835, 46835])
|
||||||
|
NNZ: 2374001
|
||||||
|
Density: 0.0010822805369125833
|
||||||
|
Time: 63.55399775505066 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
|
||||||
|
|
||||||
|
31,166,891 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
291,301 L1I_CACHE_REFILL:u
|
||||||
|
477,186 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
32,682,323 L1D_CACHE:u
|
||||||
|
|
||||||
|
67.517251505 seconds time elapsed
|
||||||
|
|
||||||
|
319.301754000 seconds user
|
||||||
|
4839.755901000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 17, 34, ..., 2373939,
|
||||||
|
2373970, 2374001]),
|
||||||
|
col_indices=tensor([ 0, 1, 2, ..., 46831, 46833, 46834]),
|
||||||
|
values=tensor([ 1.2636e+05, -1.6615e+07, -8.2015e+04, ...,
|
||||||
|
8.3378e+01, 2.5138e+00, 1.2184e+03]),
|
||||||
|
size=(46835, 46835), nnz=2374001, layout=torch.sparse_csr)
|
||||||
|
tensor([0.8358, 0.0086, 0.1779, ..., 0.6354, 0.7134, 0.5745])
|
||||||
|
Matrix: rma10
|
||||||
|
Shape: torch.Size([46835, 46835])
|
||||||
|
NNZ: 2374001
|
||||||
|
Density: 0.0010822805369125833
|
||||||
|
Time: 63.55393171310425 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/rma10.mtx 1000':
|
||||||
|
|
||||||
|
538,552 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
552,543 LL_CACHE_RD:u
|
||||||
|
202,351 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
24,178 L2D_TLB_REFILL:u
|
||||||
|
298,051 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,775,481 L2D_CACHE:u
|
||||||
|
|
||||||
|
67.538674790 seconds time elapsed
|
||||||
|
|
||||||
|
321.810383000 seconds user
|
||||||
|
4836.154538000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_tn2010_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_tn2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [37.56, 23.12, 22.32, 22.28, 22.28, 21.96, 21.76, 21.72, 21.88, 21.84], "matrix": "tn2010", "shape": [240116, 240116], "nnz": 1193966, "% density": 2.070855328296721e-05, "time_s": 16.282614707946777, "power": [85.48, 85.84, 79.28, 70.16, 55.52, 49.48, 49.48, 60.48, 76.32, 88.88, 91.0, 91.0, 90.68, 88.32, 86.92, 86.4, 88.08, 86.8, 87.32, 87.8], "power_after": [21.68, 21.48, 21.44, 21.36, 21.52, 21.4, 21.4, 21.32, 21.2, 21.04], "task clock (msec)": 68.11, "page faults": 3486, "cycles": 70427921, "instructions": 85638293, "branch mispredictions": 333780, "branches": 19402540, "ITLB accesses": 26935483, "ITLB misses": 5639, "DTLB misses": 16688, "DTLB accesses": 36421540, "L1I cache accesses": 33029213, "L1I cache misses": 302558, "L1D cache misses": 481598, "L1D cache accesses": 34668833, "LL cache misses": 551659, "LL cache accesses": 564579, "L2D TLB accesses": 188346, "L2D TLB misses": 24479, "L2D cache misses": 311796, "L2D cache accesses": 1767924, "instructions per cycle": 1.215970765344614, "branch miss rate": 0.017202902300420462, "ITLB miss rate": 0.0002093521025778524, "DTLB miss rate": 0.00045819040051573877, "L2D TLB miss rate": 0.12996824992301403, "L1I cache miss rate": 0.00916031514284037, "L1D cache miss rate": 0.013891381922200843, "L2D cache miss rate": 0.17636278482559206, "LL cache miss rate": 0.9771156915152707}
|
173
pytorch/output_HPC/altra_10_30_tn2010_1000.output
Normal file
173
pytorch/output_HPC/altra_10_30_tn2010_1000.output
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395275 queued and waiting for resources
|
||||||
|
srun: job 3395275 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
||||||
|
1193963, 1193966]),
|
||||||
|
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
||||||
|
240113]),
|
||||||
|
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
||||||
|
34928.]), size=(240116, 240116), nnz=1193966,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.2511, 0.1104, 0.8257, ..., 0.4006, 0.1534, 0.0009])
|
||||||
|
Matrix: tn2010
|
||||||
|
Shape: torch.Size([240116, 240116])
|
||||||
|
NNZ: 1193966
|
||||||
|
Density: 2.070855328296721e-05
|
||||||
|
Time: 12.89618182182312 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
|
||||||
|
|
||||||
|
68.11 msec task-clock:u # 0.004 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,486 page-faults:u # 51.182 K/sec
|
||||||
|
70,427,921 cycles:u # 1.034 GHz (46.81%)
|
||||||
|
85,638,293 instructions:u # 1.22 insn per cycle (74.19%)
|
||||||
|
<not supported> branches:u
|
||||||
|
356,748 branch-misses:u (89.74%)
|
||||||
|
34,044,117 L1-dcache-loads:u # 499.843 M/sec
|
||||||
|
481,076 L1-dcache-load-misses:u # 1.41% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
32,553,977 L1-icache-loads:u # 477.965 M/sec
|
||||||
|
309,127 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
||||||
|
41,245,978 dTLB-loads:u # 605.583 M/sec (33.60%)
|
||||||
|
127,770 dTLB-load-misses:u # 0.31% of all dTLB cache accesses (15.43%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
16.626373547 seconds time elapsed
|
||||||
|
|
||||||
|
101.073288000 seconds user
|
||||||
|
996.348020000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
||||||
|
1193963, 1193966]),
|
||||||
|
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
||||||
|
240113]),
|
||||||
|
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
||||||
|
34928.]), size=(240116, 240116), nnz=1193966,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.0138, 0.1394, 0.6273, ..., 0.8681, 0.0444, 0.2705])
|
||||||
|
Matrix: tn2010
|
||||||
|
Shape: torch.Size([240116, 240116])
|
||||||
|
NNZ: 1193966
|
||||||
|
Density: 2.070855328296721e-05
|
||||||
|
Time: 14.216531038284302 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
|
||||||
|
|
||||||
|
333,780 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
19,402,540 BR_RETIRED:u
|
||||||
|
|
||||||
|
17.985093703 seconds time elapsed
|
||||||
|
|
||||||
|
106.904608000 seconds user
|
||||||
|
1091.172933000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
||||||
|
1193963, 1193966]),
|
||||||
|
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
||||||
|
240113]),
|
||||||
|
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
||||||
|
34928.]), size=(240116, 240116), nnz=1193966,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.6279, 0.1696, 0.6937, ..., 0.4267, 0.4847, 0.6447])
|
||||||
|
Matrix: tn2010
|
||||||
|
Shape: torch.Size([240116, 240116])
|
||||||
|
NNZ: 1193966
|
||||||
|
Density: 2.070855328296721e-05
|
||||||
|
Time: 12.462992429733276 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
|
||||||
|
|
||||||
|
26,935,483 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
5,639 ITLB_WALK:u
|
||||||
|
16,688 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
36,421,540 L1D_TLB:u
|
||||||
|
|
||||||
|
15.984498303 seconds time elapsed
|
||||||
|
|
||||||
|
95.195897000 seconds user
|
||||||
|
962.237122000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
||||||
|
1193963, 1193966]),
|
||||||
|
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
||||||
|
240113]),
|
||||||
|
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
||||||
|
34928.]), size=(240116, 240116), nnz=1193966,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.4060, 0.4915, 0.8557, ..., 0.9902, 0.0548, 0.2450])
|
||||||
|
Matrix: tn2010
|
||||||
|
Shape: torch.Size([240116, 240116])
|
||||||
|
NNZ: 1193966
|
||||||
|
Density: 2.070855328296721e-05
|
||||||
|
Time: 9.298198223114014 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
|
||||||
|
|
||||||
|
33,029,213 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
302,558 L1I_CACHE_REFILL:u
|
||||||
|
481,598 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
34,668,833 L1D_CACHE:u
|
||||||
|
|
||||||
|
12.985459942 seconds time elapsed
|
||||||
|
|
||||||
|
78.950722000 seconds user
|
||||||
|
727.126874000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 4, 20, ..., 1193961,
|
||||||
|
1193963, 1193966]),
|
||||||
|
col_indices=tensor([ 1152, 1272, 1961, ..., 238254, 239142,
|
||||||
|
240113]),
|
||||||
|
values=tensor([ 5728., 2871., 418449., ..., 10058., 33324.,
|
||||||
|
34928.]), size=(240116, 240116), nnz=1193966,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.0166, 0.6910, 0.0311, ..., 0.6156, 0.5689, 0.9849])
|
||||||
|
Matrix: tn2010
|
||||||
|
Shape: torch.Size([240116, 240116])
|
||||||
|
NNZ: 1193966
|
||||||
|
Density: 2.070855328296721e-05
|
||||||
|
Time: 12.012693405151367 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/tn2010.mtx 1000':
|
||||||
|
|
||||||
|
551,659 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
564,579 LL_CACHE_RD:u
|
||||||
|
188,346 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
24,479 L2D_TLB_REFILL:u
|
||||||
|
311,796 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,767,924 L2D_CACHE:u
|
||||||
|
|
||||||
|
15.749851583 seconds time elapsed
|
||||||
|
|
||||||
|
98.008506000 seconds user
|
||||||
|
926.127594000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_ut2010_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_ut2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [93.52, 87.76, 71.6, 58.32, 39.6, 26.24, 26.24, 22.16, 22.24, 22.24], "matrix": "ut2010", "shape": [115406, 115406], "nnz": 572066, "% density": 4.295259032005559e-05, "time_s": 8.478580713272095, "power": [89.68, 88.92, 80.84, 68.96, 56.64, 54.52, 55.88, 70.44, 85.36, 85.36, 98.2, 96.52], "power_after": [21.24, 21.32, 21.16, 21.44, 21.68, 21.76, 21.72, 22.0, 21.72, 21.72], "task clock (msec)": 53.84, "page faults": 3291, "cycles": 66389970, "instructions": 74935543, "branch mispredictions": 330515, "branches": 19475058, "ITLB accesses": 26125490, "ITLB misses": 6431, "DTLB misses": 13728, "DTLB accesses": 35274185, "L1I cache accesses": 30428652, "L1I cache misses": 288897, "L1D cache misses": 475615, "L1D cache accesses": 31855716, "LL cache misses": 553829, "LL cache accesses": 574192, "L2D TLB accesses": 181148, "L2D TLB misses": 23202, "L2D cache misses": 307806, "L2D cache accesses": 1767037, "instructions per cycle": 1.1287178319255153, "branch miss rate": 0.016971194642911976, "ITLB miss rate": 0.00024615806248992844, "DTLB miss rate": 0.0003891797925309968, "L2D TLB miss rate": 0.12808311435952924, "L1I cache miss rate": 0.009494242465949527, "L1D cache miss rate": 0.014930287550278261, "L2D cache miss rate": 0.17419329646181717, "LL cache miss rate": 0.9645362526820297}
|
@ -5,8 +5,8 @@ srun: # All submission nodes and all other compute nodes have x86_64 architectur
|
|||||||
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
srun: ################################################################################
|
srun: ################################################################################
|
||||||
srun: job 3394146 queued and waiting for resources
|
srun: job 3395284 queued and waiting for resources
|
||||||
srun: job 3394146 has been allocated resources
|
srun: job 3395284 has been allocated resources
|
||||||
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
).to_sparse_csr().type(torch.float)
|
).to_sparse_csr().type(torch.float)
|
||||||
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
||||||
@ -16,37 +16,38 @@ tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
18651.]), size=(115406, 115406), nnz=572066,
|
||||||
layout=torch.sparse_csr)
|
layout=torch.sparse_csr)
|
||||||
tensor([0.4608, 0.1516, 0.8492, ..., 0.8920, 0.4275, 0.8070])
|
tensor([0.1487, 0.4275, 0.9471, ..., 0.3851, 0.0801, 0.4295])
|
||||||
|
Matrix: ut2010
|
||||||
Shape: torch.Size([115406, 115406])
|
Shape: torch.Size([115406, 115406])
|
||||||
NNZ: 572066
|
NNZ: 572066
|
||||||
Density: 4.295259032005559e-05
|
Density: 4.295259032005559e-05
|
||||||
Time: 1.3751039505004883 seconds
|
Time: 8.772023677825928 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
||||||
|
|
||||||
60.55 msec task-clock:u # 0.012 CPUs utilized
|
53.84 msec task-clock:u # 0.004 CPUs utilized
|
||||||
0 context-switches:u # 0.000 /sec
|
0 context-switches:u # 0.000 /sec
|
||||||
0 cpu-migrations:u # 0.000 /sec
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
3,490 page-faults:u # 57.638 K/sec
|
3,291 page-faults:u # 61.127 K/sec
|
||||||
49,977,496 cycles:u # 0.825 GHz (40.93%)
|
66,389,970 cycles:u # 1.233 GHz (67.37%)
|
||||||
78,622,993 instructions:u # 1.57 insn per cycle (85.37%)
|
74,935,543 instructions:u # 1.13 insn per cycle (83.30%)
|
||||||
<not supported> branches:u
|
<not supported> branches:u
|
||||||
358,029 branch-misses:u
|
365,846 branch-misses:u
|
||||||
31,478,500 L1-dcache-loads:u # 519.877 M/sec
|
31,684,169 L1-dcache-loads:u # 588.504 M/sec
|
||||||
479,449 L1-dcache-load-misses:u # 1.52% of all L1-dcache accesses
|
462,583 L1-dcache-load-misses:u # 1.46% of all L1-dcache accesses
|
||||||
<not supported> LLC-loads:u
|
<not supported> LLC-loads:u
|
||||||
<not supported> LLC-load-misses:u
|
<not supported> LLC-load-misses:u
|
||||||
29,991,824 L1-icache-loads:u # 495.324 M/sec
|
30,260,337 L1-icache-loads:u # 562.058 M/sec
|
||||||
294,864 L1-icache-load-misses:u # 0.98% of all L1-icache accesses
|
288,196 L1-icache-load-misses:u # 0.95% of all L1-icache accesses
|
||||||
35,154,647 dTLB-loads:u # 580.589 M/sec (23.19%)
|
57,721,334 dTLB-loads:u # 1.072 G/sec (18.54%)
|
||||||
<not counted> dTLB-load-misses:u (0.00%)
|
<not counted> dTLB-load-misses:u (0.00%)
|
||||||
<not counted> iTLB-loads:u (0.00%)
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
<not counted> iTLB-load-misses:u (0.00%)
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
4.986156121 seconds time elapsed
|
12.179628060 seconds time elapsed
|
||||||
|
|
||||||
23.724703000 seconds user
|
68.068275000 seconds user
|
||||||
145.034521000 seconds sys
|
690.223452000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -59,21 +60,22 @@ tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
18651.]), size=(115406, 115406), nnz=572066,
|
||||||
layout=torch.sparse_csr)
|
layout=torch.sparse_csr)
|
||||||
tensor([0.4697, 0.7121, 0.5987, ..., 0.2619, 0.7308, 0.3129])
|
tensor([0.9553, 0.9401, 0.7135, ..., 0.8664, 0.5986, 0.8459])
|
||||||
|
Matrix: ut2010
|
||||||
Shape: torch.Size([115406, 115406])
|
Shape: torch.Size([115406, 115406])
|
||||||
NNZ: 572066
|
NNZ: 572066
|
||||||
Density: 4.295259032005559e-05
|
Density: 4.295259032005559e-05
|
||||||
Time: 1.6881086826324463 seconds
|
Time: 8.94040060043335 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
||||||
|
|
||||||
327,078 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
330,515 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
20,135,808 BR_RETIRED:u
|
19,475,058 BR_RETIRED:u
|
||||||
|
|
||||||
5.374156677 seconds time elapsed
|
12.428594105 seconds time elapsed
|
||||||
|
|
||||||
25.609168000 seconds user
|
67.011228000 seconds user
|
||||||
167.278028000 seconds sys
|
709.528404000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -86,23 +88,24 @@ tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
18651.]), size=(115406, 115406), nnz=572066,
|
||||||
layout=torch.sparse_csr)
|
layout=torch.sparse_csr)
|
||||||
tensor([0.9215, 0.6706, 0.8015, ..., 0.8507, 0.8546, 0.4441])
|
tensor([0.6289, 0.8171, 0.1590, ..., 0.7515, 0.5400, 0.3693])
|
||||||
|
Matrix: ut2010
|
||||||
Shape: torch.Size([115406, 115406])
|
Shape: torch.Size([115406, 115406])
|
||||||
NNZ: 572066
|
NNZ: 572066
|
||||||
Density: 4.295259032005559e-05
|
Density: 4.295259032005559e-05
|
||||||
Time: 1.2785694599151611 seconds
|
Time: 14.403366804122925 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
||||||
|
|
||||||
27,608,093 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
26,125,490 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
6,616 ITLB_WALK:u
|
6,431 ITLB_WALK:u
|
||||||
17,185 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
13,728 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
36,866,957 L1D_TLB:u
|
35,274,185 L1D_TLB:u
|
||||||
|
|
||||||
4.861513311 seconds time elapsed
|
18.084508405 seconds time elapsed
|
||||||
|
|
||||||
23.339077000 seconds user
|
95.162133000 seconds user
|
||||||
141.584760000 seconds sys
|
1117.716009000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -115,23 +118,24 @@ tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
18651.]), size=(115406, 115406), nnz=572066,
|
||||||
layout=torch.sparse_csr)
|
layout=torch.sparse_csr)
|
||||||
tensor([0.8973, 0.5228, 0.4492, ..., 0.7677, 0.7722, 0.1700])
|
tensor([0.8824, 0.0692, 0.7225, ..., 0.8736, 0.6854, 0.7514])
|
||||||
|
Matrix: ut2010
|
||||||
Shape: torch.Size([115406, 115406])
|
Shape: torch.Size([115406, 115406])
|
||||||
NNZ: 572066
|
NNZ: 572066
|
||||||
Density: 4.295259032005559e-05
|
Density: 4.295259032005559e-05
|
||||||
Time: 1.1654376983642578 seconds
|
Time: 9.64679503440857 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
||||||
|
|
||||||
32,639,204 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
30,428,652 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
309,643 L1I_CACHE_REFILL:u
|
288,897 L1I_CACHE_REFILL:u
|
||||||
478,856 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
475,615 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
34,280,618 L1D_CACHE:u
|
31,855,716 L1D_CACHE:u
|
||||||
|
|
||||||
4.677973310 seconds time elapsed
|
13.170070008 seconds time elapsed
|
||||||
|
|
||||||
22.972655000 seconds user
|
68.362809000 seconds user
|
||||||
125.062401000 seconds sys
|
761.360459000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -144,25 +148,26 @@ tensor(crow_indices=tensor([ 0, 3, 9, ..., 572056, 572061,
|
|||||||
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
values=tensor([160642., 31335., 282373., ..., 88393., 99485.,
|
||||||
18651.]), size=(115406, 115406), nnz=572066,
|
18651.]), size=(115406, 115406), nnz=572066,
|
||||||
layout=torch.sparse_csr)
|
layout=torch.sparse_csr)
|
||||||
tensor([0.4542, 0.7095, 0.5701, ..., 0.2172, 0.8829, 0.7757])
|
tensor([0.9552, 0.0509, 0.7738, ..., 0.7722, 0.4417, 0.7772])
|
||||||
|
Matrix: ut2010
|
||||||
Shape: torch.Size([115406, 115406])
|
Shape: torch.Size([115406, 115406])
|
||||||
NNZ: 572066
|
NNZ: 572066
|
||||||
Density: 4.295259032005559e-05
|
Density: 4.295259032005559e-05
|
||||||
Time: 1.1153452396392822 seconds
|
Time: 12.372079133987427 seconds
|
||||||
|
|
||||||
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 100':
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/ut2010.mtx 1000':
|
||||||
|
|
||||||
555,275 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
553,829 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
578,455 LL_CACHE_RD:u
|
574,192 LL_CACHE_RD:u
|
||||||
188,723 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
181,148 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
24,635 L2D_TLB_REFILL:u
|
23,202 L2D_TLB_REFILL:u
|
||||||
319,663 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
307,806 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
1,799,940 L2D_CACHE:u
|
1,767,037 L2D_CACHE:u
|
||||||
|
|
||||||
4.655024760 seconds time elapsed
|
15.923392394 seconds time elapsed
|
||||||
|
|
||||||
23.104641000 seconds user
|
83.307253000 seconds user
|
||||||
122.294597000 seconds sys
|
958.949992000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_va2010_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_va2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [32.08, 31.8, 28.68, 27.6, 22.96, 22.08, 21.0, 20.84, 20.68, 20.72], "matrix": "va2010", "shape": [285762, 285762], "nnz": 1402128, "% density": 1.717033263003816e-05, "time_s": 14.632386922836304, "power": [85.16, 83.48, 76.96, 67.44, 54.04, 51.4, 54.24, 66.76, 83.2, 96.44, 96.44, 95.84, 94.24, 92.36, 91.2, 89.32, 87.48, 88.68, 88.24], "power_after": [21.12, 21.0, 21.16, 21.4, 21.32, 21.36, 21.36, 21.12, 20.76, 20.84], "task clock (msec)": 57.32, "page faults": 3280, "cycles": 39497791, "instructions": 64385555, "branch mispredictions": 332792, "branches": 19983954, "ITLB accesses": 27156853, "ITLB misses": 6466, "DTLB misses": 18244, "DTLB accesses": 36466301, "L1I cache accesses": 30929971, "L1I cache misses": 291811, "L1D cache misses": 473063, "L1D cache accesses": 32462905, "LL cache misses": 544953, "LL cache accesses": 565172, "L2D TLB accesses": 183225, "L2D TLB misses": 23924, "L2D cache misses": 301362, "L2D cache accesses": 1756590, "instructions per cycle": 1.6301052127193645, "branch miss rate": 0.01665296067034582, "ITLB miss rate": 0.00023809828038616994, "DTLB miss rate": 0.000500297521264907, "L2D TLB miss rate": 0.13057170145995362, "L1I cache miss rate": 0.009434570759862659, "L1D cache miss rate": 0.014572417348354991, "L2D cache miss rate": 0.17156080815671274, "LL cache miss rate": 0.964225050073252}
|
173
pytorch/output_HPC/altra_10_30_va2010_1000.output
Normal file
173
pytorch/output_HPC/altra_10_30_va2010_1000.output
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395277 queued and waiting for resources
|
||||||
|
srun: job 3395277 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
||||||
|
1402123, 1402128]),
|
||||||
|
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
||||||
|
285760]),
|
||||||
|
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
||||||
|
9832.]), size=(285762, 285762), nnz=1402128,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.2920, 0.3583, 0.0598, ..., 0.2208, 0.1741, 0.4955])
|
||||||
|
Matrix: va2010
|
||||||
|
Shape: torch.Size([285762, 285762])
|
||||||
|
NNZ: 1402128
|
||||||
|
Density: 1.717033263003816e-05
|
||||||
|
Time: 14.792448997497559 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
|
||||||
|
|
||||||
|
57.32 msec task-clock:u # 0.003 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,280 page-faults:u # 57.220 K/sec
|
||||||
|
39,497,791 cycles:u # 0.689 GHz (54.25%)
|
||||||
|
64,385,555 instructions:u # 1.63 insn per cycle (81.24%)
|
||||||
|
<not supported> branches:u
|
||||||
|
362,674 branch-misses:u
|
||||||
|
33,532,520 L1-dcache-loads:u # 584.977 M/sec
|
||||||
|
481,355 L1-dcache-load-misses:u # 1.44% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
31,924,348 L1-icache-loads:u # 556.922 M/sec
|
||||||
|
296,637 L1-icache-load-misses:u # 0.93% of all L1-icache accesses
|
||||||
|
43,420,143 dTLB-loads:u # 757.467 M/sec (40.22%)
|
||||||
|
30,923 dTLB-load-misses:u # 0.07% of all dTLB cache accesses (19.05%)
|
||||||
|
<not counted> iTLB-loads:u (0.00%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
18.678937115 seconds time elapsed
|
||||||
|
|
||||||
|
112.979167000 seconds user
|
||||||
|
1135.785668000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
||||||
|
1402123, 1402128]),
|
||||||
|
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
||||||
|
285760]),
|
||||||
|
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
||||||
|
9832.]), size=(285762, 285762), nnz=1402128,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.7703, 0.7481, 0.5351, ..., 0.4663, 0.6089, 0.3679])
|
||||||
|
Matrix: va2010
|
||||||
|
Shape: torch.Size([285762, 285762])
|
||||||
|
NNZ: 1402128
|
||||||
|
Density: 1.717033263003816e-05
|
||||||
|
Time: 14.130552530288696 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
|
||||||
|
|
||||||
|
332,792 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
19,983,954 BR_RETIRED:u
|
||||||
|
|
||||||
|
17.923156218 seconds time elapsed
|
||||||
|
|
||||||
|
107.999690000 seconds user
|
||||||
|
1091.659165000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
||||||
|
1402123, 1402128]),
|
||||||
|
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
||||||
|
285760]),
|
||||||
|
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
||||||
|
9832.]), size=(285762, 285762), nnz=1402128,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.8850, 0.1406, 0.0617, ..., 0.4325, 0.2725, 0.9292])
|
||||||
|
Matrix: va2010
|
||||||
|
Shape: torch.Size([285762, 285762])
|
||||||
|
NNZ: 1402128
|
||||||
|
Density: 1.717033263003816e-05
|
||||||
|
Time: 13.32977032661438 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
|
||||||
|
|
||||||
|
27,156,853 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,466 ITLB_WALK:u
|
||||||
|
18,244 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
36,466,301 L1D_TLB:u
|
||||||
|
|
||||||
|
17.186572497 seconds time elapsed
|
||||||
|
|
||||||
|
104.940187000 seconds user
|
||||||
|
1032.527271000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
||||||
|
1402123, 1402128]),
|
||||||
|
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
||||||
|
285760]),
|
||||||
|
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
||||||
|
9832.]), size=(285762, 285762), nnz=1402128,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.6289, 0.0403, 0.9207, ..., 0.0183, 0.4807, 0.7504])
|
||||||
|
Matrix: va2010
|
||||||
|
Shape: torch.Size([285762, 285762])
|
||||||
|
NNZ: 1402128
|
||||||
|
Density: 1.717033263003816e-05
|
||||||
|
Time: 13.460915803909302 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
|
||||||
|
|
||||||
|
30,929,971 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
291,811 L1I_CACHE_REFILL:u
|
||||||
|
473,063 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
32,462,905 L1D_CACHE:u
|
||||||
|
|
||||||
|
17.219448483 seconds time elapsed
|
||||||
|
|
||||||
|
100.274467000 seconds user
|
||||||
|
1045.271682000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 2, 8, ..., 1402119,
|
||||||
|
1402123, 1402128]),
|
||||||
|
col_indices=tensor([ 2006, 2464, 1166, ..., 285581, 285634,
|
||||||
|
285760]),
|
||||||
|
values=tensor([125334., 3558., 1192., ..., 10148., 1763.,
|
||||||
|
9832.]), size=(285762, 285762), nnz=1402128,
|
||||||
|
layout=torch.sparse_csr)
|
||||||
|
tensor([0.6412, 0.1151, 0.5075, ..., 0.9251, 0.9288, 0.3560])
|
||||||
|
Matrix: va2010
|
||||||
|
Shape: torch.Size([285762, 285762])
|
||||||
|
NNZ: 1402128
|
||||||
|
Density: 1.717033263003816e-05
|
||||||
|
Time: 15.992860555648804 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/va2010.mtx 1000':
|
||||||
|
|
||||||
|
544,953 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
565,172 LL_CACHE_RD:u
|
||||||
|
183,225 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
23,924 L2D_TLB_REFILL:u
|
||||||
|
301,362 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,756,590 L2D_CACHE:u
|
||||||
|
|
||||||
|
19.884223259 seconds time elapsed
|
||||||
|
|
||||||
|
113.211516000 seconds user
|
||||||
|
1230.525804000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
1
pytorch/output_HPC/altra_10_30_vt2010_1000.json
Normal file
1
pytorch/output_HPC/altra_10_30_vt2010_1000.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"architecture": "altra", "iterations": 1000, "baseline_time_s": 10, "baseline_delay_s": 30, "power_before": [77.2, 64.12, 64.12, 48.92, 36.2, 21.72, 21.88, 22.36, 22.36, 22.44], "matrix": "vt2010", "shape": [32580, 32580], "nnz": 155598, "% density": 0.00014658915806621921, "time_s": 3.5892834663391113, "power": [33.44, 30.68, 31.08, 26.96, 26.88, 32.48, 32.24], "power_after": [21.24, 21.24, 21.36, 21.36, 21.2, 21.04, 20.84, 20.72, 20.72, 20.56], "task clock (msec)": 55.26, "page faults": 3297, "cycles": 49276491, "instructions": 64763517, "branch mispredictions": 340611, "branches": 20355849, "ITLB accesses": 27946393, "ITLB misses": 6805, "DTLB misses": 17877, "DTLB accesses": 38226912, "L1I cache accesses": 31946141, "L1I cache misses": 295259, "L1D cache misses": 468136, "L1D cache accesses": 33395666, "LL cache misses": 527109, "LL cache accesses": 540409, "L2D TLB accesses": 192519, "L2D TLB misses": 24204, "L2D cache misses": 290933, "L2D cache accesses": 1743452, "instructions per cycle": 1.3142883286880147, "branch miss rate": 0.016732831924622747, "ITLB miss rate": 0.00024350190738389746, "DTLB miss rate": 0.0004676548291423592, "L2D TLB miss rate": 0.1257226559456469, "L1I cache miss rate": 0.009242399574959616, "L1D cache miss rate": 0.014017866869311724, "L2D cache miss rate": 0.16687181522634406, "LL cache miss rate": 0.9753890109158063}
|
163
pytorch/output_HPC/altra_10_30_vt2010_1000.output
Normal file
163
pytorch/output_HPC/altra_10_30_vt2010_1000.output
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
srun: Job time limit was unset; set to partition default of 60 minutes
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: # Please note that the oasis compute nodes have aarch64 architecture CPUs. #
|
||||||
|
srun: # All submission nodes and all other compute nodes have x86_64 architecture #
|
||||||
|
srun: # CPUs. Programs, environments, or other software that was built on x86_64 #
|
||||||
|
srun: # nodes may need to be rebuilt to properly execute on these nodes. #
|
||||||
|
srun: ################################################################################
|
||||||
|
srun: job 3395285 queued and waiting for resources
|
||||||
|
srun: job 3395285 has been allocated resources
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
||||||
|
155598]),
|
||||||
|
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
||||||
|
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
||||||
|
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
||||||
|
tensor([0.1179, 0.2288, 0.5357, ..., 0.4845, 0.6375, 0.4513])
|
||||||
|
Matrix: vt2010
|
||||||
|
Shape: torch.Size([32580, 32580])
|
||||||
|
NNZ: 155598
|
||||||
|
Density: 0.00014658915806621921
|
||||||
|
Time: 3.628732681274414 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
||||||
|
|
||||||
|
55.26 msec task-clock:u # 0.008 CPUs utilized
|
||||||
|
0 context-switches:u # 0.000 /sec
|
||||||
|
0 cpu-migrations:u # 0.000 /sec
|
||||||
|
3,297 page-faults:u # 59.661 K/sec
|
||||||
|
49,276,491 cycles:u # 0.892 GHz (31.65%)
|
||||||
|
64,763,517 instructions:u # 1.31 insn per cycle (57.73%)
|
||||||
|
<not supported> branches:u
|
||||||
|
357,693 branch-misses:u (76.18%)
|
||||||
|
32,426,852 L1-dcache-loads:u # 586.784 M/sec (88.36%)
|
||||||
|
469,495 L1-dcache-load-misses:u # 1.45% of all L1-dcache accesses
|
||||||
|
<not supported> LLC-loads:u
|
||||||
|
<not supported> LLC-load-misses:u
|
||||||
|
30,941,957 L1-icache-loads:u # 559.914 M/sec
|
||||||
|
279,512 L1-icache-load-misses:u # 0.90% of all L1-icache accesses
|
||||||
|
47,128,547 dTLB-loads:u # 852.821 M/sec (46.73%)
|
||||||
|
108,931 dTLB-load-misses:u # 0.23% of all dTLB cache accesses (32.30%)
|
||||||
|
14,189,608 iTLB-loads:u # 256.770 M/sec (19.86%)
|
||||||
|
<not counted> iTLB-load-misses:u (0.00%)
|
||||||
|
|
||||||
|
7.117399121 seconds time elapsed
|
||||||
|
|
||||||
|
18.404618000 seconds user
|
||||||
|
29.532104000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
||||||
|
155598]),
|
||||||
|
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
||||||
|
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
||||||
|
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
||||||
|
tensor([0.7544, 0.0071, 0.0491, ..., 0.7236, 0.5537, 0.4901])
|
||||||
|
Matrix: vt2010
|
||||||
|
Shape: torch.Size([32580, 32580])
|
||||||
|
NNZ: 155598
|
||||||
|
Density: 0.00014658915806621921
|
||||||
|
Time: 3.6322426795959473 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
||||||
|
|
||||||
|
340,611 BR_MIS_PRED_RETIRED:u # 0.0 per branch branch_misprediction_ratio
|
||||||
|
20,355,849 BR_RETIRED:u
|
||||||
|
|
||||||
|
7.112879848 seconds time elapsed
|
||||||
|
|
||||||
|
18.362004000 seconds user
|
||||||
|
29.398677000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
||||||
|
155598]),
|
||||||
|
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
||||||
|
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
||||||
|
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
||||||
|
tensor([0.7651, 0.6605, 0.7128, ..., 0.7434, 0.6656, 0.3987])
|
||||||
|
Matrix: vt2010
|
||||||
|
Shape: torch.Size([32580, 32580])
|
||||||
|
NNZ: 155598
|
||||||
|
Density: 0.00014658915806621921
|
||||||
|
Time: 3.7933311462402344 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
||||||
|
|
||||||
|
27,946,393 L1I_TLB:u # 0.0 per TLB access itlb_walk_ratio
|
||||||
|
6,805 ITLB_WALK:u
|
||||||
|
17,877 DTLB_WALK:u # 0.0 per TLB access dtlb_walk_ratio
|
||||||
|
38,226,912 L1D_TLB:u
|
||||||
|
|
||||||
|
7.235266934 seconds time elapsed
|
||||||
|
|
||||||
|
18.566568000 seconds user
|
||||||
|
29.759130000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
||||||
|
155598]),
|
||||||
|
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
||||||
|
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
||||||
|
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
||||||
|
tensor([0.3319, 0.1241, 0.4830, ..., 0.5188, 0.8684, 0.1488])
|
||||||
|
Matrix: vt2010
|
||||||
|
Shape: torch.Size([32580, 32580])
|
||||||
|
NNZ: 155598
|
||||||
|
Density: 0.00014658915806621921
|
||||||
|
Time: 3.662006378173828 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
||||||
|
|
||||||
|
31,946,141 L1I_CACHE:u # 0.0 per cache access l1i_cache_miss_ratio
|
||||||
|
295,259 L1I_CACHE_REFILL:u
|
||||||
|
468,136 L1D_CACHE_REFILL:u # 0.0 per cache access l1d_cache_miss_ratio
|
||||||
|
33,395,666 L1D_CACHE:u
|
||||||
|
|
||||||
|
7.187008251 seconds time elapsed
|
||||||
|
|
||||||
|
18.275672000 seconds user
|
||||||
|
30.724065000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/nfshomes/vut/ampere_research/pytorch/spmv.py:20: UserWarning: Sparse CSR tensor support is in beta state. If you miss a functionality in the sparse tensor support, please submit a feature request to https://github.com/pytorch/pytorch/issues. (Triggered internally at /space/jenkins/workspace/Releases/pytorch-dls/pytorch-dls/aten/src/ATen/SparseCsrTensorImpl.cpp:55.)
|
||||||
|
).to_sparse_csr().type(torch.float)
|
||||||
|
tensor(crow_indices=tensor([ 0, 4, 7, ..., 155588, 155592,
|
||||||
|
155598]),
|
||||||
|
col_indices=tensor([ 131, 561, 996, ..., 32237, 32238, 32570]),
|
||||||
|
values=tensor([79040., 7820., 15136., ..., 2828., 17986., 2482.]),
|
||||||
|
size=(32580, 32580), nnz=155598, layout=torch.sparse_csr)
|
||||||
|
tensor([0.0055, 0.7774, 0.9046, ..., 0.5143, 0.0678, 0.4725])
|
||||||
|
Matrix: vt2010
|
||||||
|
Shape: torch.Size([32580, 32580])
|
||||||
|
NNZ: 155598
|
||||||
|
Density: 0.00014658915806621921
|
||||||
|
Time: 3.616023063659668 seconds
|
||||||
|
|
||||||
|
Performance counter stats for 'apptainer run pytorch-altra.sif -c numactl --cpunodebind=0 --membind=0 python spmv.py matrices/vt2010.mtx 1000':
|
||||||
|
|
||||||
|
527,109 LL_CACHE_MISS_RD:u # 1.0 per cache access ll_cache_read_miss_ratio
|
||||||
|
540,409 LL_CACHE_RD:u
|
||||||
|
192,519 L2D_TLB:u # 0.1 per TLB access l2_tlb_miss_ratio
|
||||||
|
24,204 L2D_TLB_REFILL:u
|
||||||
|
290,933 L2D_CACHE_REFILL:u # 0.2 per cache access l2_cache_miss_ratio
|
||||||
|
1,743,452 L2D_CACHE:u
|
||||||
|
|
||||||
|
7.030605378 seconds time elapsed
|
||||||
|
|
||||||
|
18.274323000 seconds user
|
||||||
|
28.779020000 seconds sys
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -42,6 +42,10 @@ def run_program(program: list[str]) -> tuple[dict, str]:
|
|||||||
return (json.loads(process.stdout), process.stderr)
|
return (json.loads(process.stdout), process.stderr)
|
||||||
|
|
||||||
result = dict()
|
result = dict()
|
||||||
|
result['architecture'] = args.arch
|
||||||
|
result['iterations'] = args.iterations
|
||||||
|
result['baseline_time_s'] = args.baseline_time_s
|
||||||
|
result['baseline_delay_s'] = args.baseline_delay_s
|
||||||
|
|
||||||
if args.power is True:
|
if args.power is True:
|
||||||
result['power_before'] = baseline_power(args.baseline_time_s)
|
result['power_before'] = baseline_power(args.baseline_time_s)
|
||||||
|
@ -3,7 +3,7 @@ import numpy as np
|
|||||||
import argparse
|
import argparse
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys, os
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('matrix_file', help='the input matrix (.mtx) file')
|
parser.add_argument('matrix_file', help='the input matrix (.mtx) file')
|
||||||
@ -32,6 +32,9 @@ end = time.time()
|
|||||||
|
|
||||||
result = dict()
|
result = dict()
|
||||||
|
|
||||||
|
result['matrix'] = os.path.splitext(os.path.basename(args.matrix_file))[0]
|
||||||
|
print(f"Matrix: {result['matrix']}", file=sys.stderr)
|
||||||
|
|
||||||
result['shape'] = matrix.shape
|
result['shape'] = matrix.shape
|
||||||
print(f"Shape: {result['shape']}", file=sys.stderr)
|
print(f"Shape: {result['shape']}", file=sys.stderr)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user