ampere_research/pytorch/spmv.py

from data_stat import Stat, Format, MatrixType

import torch, scipy
import numpy as np
import argparse
import time
import json
import sys, os

device = 'cpu'

parser = argparse.ArgumentParser()
parser.add_argument('matrix_type', type=str,
        choices=[t.name.lower() for t in MatrixType],
        help='the type of matrix')
parser.add_argument('format', type=str,
        choices=[fmt.name.lower() for fmt in Format],
        help='the sparse format to use')
parser.add_argument('iterations', type=int, help='the number of iterations of multiplication to perform')
parser.add_argument('-m', '--matrix_file', type=str,
        help='the input matrix (.mtx) file')
parser.add_argument('-ss', '--synthetic_size', type=int,
        help='the synthetic matrix parameters size (rows)')
parser.add_argument('-sd', '--synthetic_density', type=float,
        help='the synthetic matrix density')
parser.add_argument('-c', '--cores', type=int,
        help='the number of cores to use')
args = parser.parse_args()
args.matrix_type = MatrixType[args.matrix_type.upper()]
args.format = Format[args.format.upper()]

if args.cores is not None:
    torch.set_num_threads(args.cores)

if args.matrix_type == MatrixType.SUITESPARSE:
    if args.matrix_file is None:
        exit("Matrix file not specified!")

    matrix = scipy.io.mmread(args.matrix_file)
    matrix = torch.sparse_coo_tensor(
        np.vstack((matrix.row, matrix.col)),
        matrix.data, matrix.shape,
        device=device, dtype=torch.float32)
elif args.matrix_type == MatrixType.SYNTHETIC:
    if args.synthetic_size is None and args.synthetic_density is None:
        exit("Synthetic matrix parameters not specified!")

    matrix = scipy.sparse.random(
            args.synthetic_size, args.synthetic_size,
            density=args.synthetic_density,
            format='coo', dtype=np.float32,
            random_state=np.random.default_rng())
    indices = torch.tensor(np.vstack([matrix.row, matrix.col]),
            dtype=torch.float32, device=device)
    values = torch.tensor(matrix.data,
            dtype=torch.float32, device=device)
    matrix = torch.sparse_coo_tensor(
            indices, values, size=matrix.shape,
            dtype=torch.float32, device=device)

#    nnz = int((args.synthetic_size ** 2) * args.synthetic_density)
#    row_indices = torch.randint(0, args.synthetic_size, (nnz,))
#    col_indices = torch.randint(0, args.synthetic_size, (nnz,))
#    indices = torch.stack([row_indices, col_indices])
#    values = torch.randn(nnz)
#
#    matrix = torch.sparse_coo_tensor(
#            indices, values,
#            size=(args.synthetic_size, args.synthetic_size),
#            device=device, dtype=torch.float32)
else:
    exit("Unrecognized matrix type!")

if args.format == Format.CSR:
    matrix = matrix.to_sparse_csr().type(torch.float32)
elif args.format == Format.COO:
    pass
else:
    exit("Unrecognized format!")

vector = torch.rand(matrix.shape[1], device=device)

print(matrix, file=sys.stderr)
print(vector, file=sys.stderr)

start = time.time()
for i in range(0, args.iterations):
    torch.mm(matrix, vector.unsqueeze(-1))
    #torch.mv(matrix, vector)
    #torch.sparse.mm(matrix, vector.unsqueeze(-1)).squeeze(-1)
    #print(i)
end = time.time()

result = dict()

result[Stat.MATRIX_TYPE.name] = args.matrix_type.value
print(f"Matrix Type: {result[Stat.MATRIX_TYPE.name]}", file=sys.stderr)

if args.matrix_type == MatrixType.SUITESPARSE:
    result[Stat.MATRIX_FILE.name] = os.path.splitext(os.path.basename(args.matrix_file))[0]
    print(f"Matrix: {result[Stat.MATRIX_FILE.name]}", file=sys.stderr)

result[Stat.MATRIX_FORMAT.name] = args.format.value
print(f"Matrix Format: {result[Stat.MATRIX_FORMAT.name]}", file=sys.stderr)

result[Stat.MATRIX_SHAPE.name] = matrix.shape
print(f"Shape: {result[Stat.MATRIX_SHAPE.name]}", file=sys.stderr)

result[Stat.MATRIX_ROWS.name] = matrix.shape[0]
print(f"Rows: {result[Stat.MATRIX_ROWS.name]}", file=sys.stderr)

result[Stat.MATRIX_SIZE.name] = matrix.shape[0] * matrix.shape[1]
print(f"Size: {result[Stat.MATRIX_SIZE.name]}", file=sys.stderr)

if args.format == Format.CSR:
    rows = matrix.values().shape[0]
elif args.format == Format.COO:
    rows = matrix.coalesce().values().shape[0]
else:
    exit("Unrecognized format!")

result[Stat.MATRIX_NNZ.name] = rows
print(f"NNZ: {result[Stat.MATRIX_NNZ.name]}", file=sys.stderr)

result[Stat.MATRIX_DENSITY.name] = rows / result[Stat.MATRIX_SIZE.name]
print(f"Density: {result[Stat.MATRIX_DENSITY.name]}", file=sys.stderr)

result[Stat.TIME_S.name] = end - start
print(f"Time: {result[Stat.TIME_S.name]} seconds", file=sys.stderr)

print(json.dumps(result))