ashvardanian/read_matrix.py

## read_matrix.py
def read_matrix(filename: str, start_row: int = 0, count_rows: Optional[int] = None):
    """
    Read *.ibin, *.hbin, *.fbin, *.dbin files with matrixes.
    Args:
        :param filename (str): path to the matrix file
        :param start_row (int): start reading vectors from this index
        :param count_rows (int): number of vectors to read. If None, read all vectors
    Returns:
        Parsed matrix (numpy.ndarray)
    """
    dtype = np.float32
    scalar_size = 4
    if filename.endswith('.fbin'):
        dtype = np.float32
        scalar_size = 4
    elif filename.endswith('.dbin'):
        dtype = np.float64
        scalar_size = 8
    elif filename.endswith('.hbin'):
        dtype = np.float16
        scalar_size = 2
    elif filename.endswith('.ibin'):
        dtype = np.int32
        scalar_size = 4
    else:
        raise Exception('Unknown file type')
    with open(filename, 'rb') as f:
        rows, cols = np.fromfile(f, count=2, dtype=np.int32)
        rows = (rows - start_row) if count_rows is None else count_rows
        arr = np.fromfile(
            f, count=rows * cols, dtype=dtype,
            offset=start_row * scalar_size * cols)
    return arr.reshape(rows, cols)
	def read_matrix(filename: str, start_row: int = 0, count_rows: Optional[int] = None):
	"""
	Read .ibin, .hbin, .fbin, .dbin files with matrixes.
	Args:
	:param filename (str): path to the matrix file
	:param start_row (int): start reading vectors from this index
	:param count_rows (int): number of vectors to read. If None, read all vectors
	Returns:
	Parsed matrix (numpy.ndarray)
	"""
	dtype = np.float32
	scalar_size = 4
	if filename.endswith('.fbin'):
	dtype = np.float32
	scalar_size = 4
	elif filename.endswith('.dbin'):
	dtype = np.float64
	scalar_size = 8
	elif filename.endswith('.hbin'):
	dtype = np.float16
	scalar_size = 2
	elif filename.endswith('.ibin'):
	dtype = np.int32
	scalar_size = 4
	else:
	raise Exception('Unknown file type')
	with open(filename, 'rb') as f:
	rows, cols = np.fromfile(f, count=2, dtype=np.int32)
	rows = (rows - start_row) if count_rows is None else count_rows
	arr = np.fromfile(
	f, count=rows * cols, dtype=dtype,
	offset=start_row * scalar_size * cols)
	return arr.reshape(rows, cols)