from data_stat import Stat, Format, MatrixType import torch, scipy import numpy as np import argparse import time import json import sys, os device = 'cpu' parser = argparse.ArgumentParser() parser.add_argument('matrix_type', type=str, choices=[t.name.lower() for t in MatrixType], help='the type of matrix') parser.add_argument('format', type=str, choices=[fmt.name.lower() for fmt in Format], help='the sparse format to use') parser.add_argument('iterations', type=int, help='the number of iterations of multiplication to perform') parser.add_argument('-m', '--matrix_file', type=str, help='the input matrix (.mtx) file') parser.add_argument('-ss', '--synthetic_size', type=int, help='the synthetic matrix parameters size (rows)') parser.add_argument('-sd', '--synthetic_density', type=float, help='the synthetic matrix density') parser.add_argument('-c', '--cores', type=int, help='the number of cores to use') args = parser.parse_args() args.matrix_type = MatrixType[args.matrix_type.upper()] args.format = Format[args.format.upper()] if args.cores is not None: torch.set_num_threads(args.cores) if args.matrix_type == MatrixType.SUITESPARSE: if args.matrix_file is None: exit("Matrix file not specified!") matrix = scipy.io.mmread(args.matrix_file) matrix = torch.sparse_coo_tensor( np.vstack((matrix.row, matrix.col)), matrix.data, matrix.shape, device=device, dtype=torch.float32) elif args.matrix_type == MatrixType.SYNTHETIC: if args.synthetic_size is None and args.synthetic_density is None: exit("Synthetic matrix parameters not specified!") matrix = scipy.sparse.random( args.synthetic_size, args.synthetic_size, density=args.synthetic_density, format='coo', dtype=np.float32, random_state=np.random.default_rng()) indices = torch.tensor(np.vstack([matrix.row, matrix.col]), dtype=torch.float32, device=device) values = torch.tensor(matrix.data, dtype=torch.float32, device=device) matrix = torch.sparse_coo_tensor( indices, values, size=matrix.shape, dtype=torch.float32, device=device) # nnz = int((args.synthetic_size ** 2) * args.synthetic_density) # row_indices = torch.randint(0, args.synthetic_size, (nnz,)) # col_indices = torch.randint(0, args.synthetic_size, (nnz,)) # indices = torch.stack([row_indices, col_indices]) # values = torch.randn(nnz) # # matrix = torch.sparse_coo_tensor( # indices, values, # size=(args.synthetic_size, args.synthetic_size), # device=device, dtype=torch.float32) else: exit("Unrecognized matrix type!") if args.format == Format.CSR: matrix = matrix.to_sparse_csr().type(torch.float32) elif args.format == Format.COO: pass else: exit("Unrecognized format!") vector = torch.rand(matrix.shape[1], device=device) print(matrix, file=sys.stderr) print(vector, file=sys.stderr) start = time.time() for i in range(0, args.iterations): torch.mm(matrix, vector.unsqueeze(-1)) #torch.mv(matrix, vector) #torch.sparse.mm(matrix, vector.unsqueeze(-1)).squeeze(-1) #print(i) end = time.time() result = dict() result[Stat.MATRIX_TYPE.name] = args.matrix_type.value print(f"Matrix Type: {result[Stat.MATRIX_TYPE.name]}", file=sys.stderr) if args.matrix_type == MatrixType.SUITESPARSE: result[Stat.MATRIX_FILE.name] = os.path.splitext(os.path.basename(args.matrix_file))[0] print(f"Matrix: {result[Stat.MATRIX_FILE.name]}", file=sys.stderr) result[Stat.MATRIX_FORMAT.name] = args.format.value print(f"Matrix Format: {result[Stat.MATRIX_FORMAT.name]}", file=sys.stderr) result[Stat.MATRIX_SHAPE.name] = matrix.shape print(f"Shape: {result[Stat.MATRIX_SHAPE.name]}", file=sys.stderr) result[Stat.MATRIX_ROWS.name] = matrix.shape[0] print(f"Rows: {result[Stat.MATRIX_ROWS.name]}", file=sys.stderr) result[Stat.MATRIX_SIZE.name] = matrix.shape[0] * matrix.shape[1] print(f"Size: {result[Stat.MATRIX_SIZE.name]}", file=sys.stderr) if args.format == Format.CSR: rows = matrix.values().shape[0] elif args.format == Format.COO: rows = matrix.coalesce().values().shape[0] else: exit("Unrecognized format!") result[Stat.MATRIX_NNZ.name] = rows print(f"NNZ: {result[Stat.MATRIX_NNZ.name]}", file=sys.stderr) result[Stat.MATRIX_DENSITY.name] = rows / result[Stat.MATRIX_SIZE.name] print(f"Density: {result[Stat.MATRIX_DENSITY.name]}", file=sys.stderr) result[Stat.TIME_S.name] = end - start print(f"Time: {result[Stat.TIME_S.name]} seconds", file=sys.stderr) print(json.dumps(result))