Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions graphblas/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
"""


def _update_matrix_dataframe(df, matrix, rows, row_offset, columns, column_offset, *, mask=None):
def _update_matrix_array(arr, matrix, rows, row_offset, columns, column_offset, *, mask=None):
if rows is None and columns is None:
if mask is None:
submatrix = matrix
Expand Down Expand Up @@ -167,13 +167,17 @@ def _update_matrix_dataframe(df, matrix, rows, row_offset, columns, column_offse
np_type = submatrix.dtype.np_type
if submatrix.dtype._is_udt and np_type.subdtype is not None:
vals = vals.tolist()
df.values[rows, cols] = vals
if isinstance(vals, np.ndarray) and vals.dtype.names is not None:
# Structured array: convert numpy.void elements to tuples for consistent display
arr[rows, cols] = [tuple(v) for v in vals]
else:
arr[rows, cols] = vals
if np.issubdtype(np_type, np.inexact):
nulls = np.isnan(vals)
df.values[rows[nulls], cols[nulls]] = "nan"
arr[rows[nulls], cols[nulls]] = "nan"


def _update_vector_dataframe(df, vector, columns, column_offset, *, mask=None):
def _update_vector_array(arr, vector, columns, column_offset, *, mask=None):
if columns is None:
if mask is None:
subvector = vector
Expand Down Expand Up @@ -205,9 +209,13 @@ def _update_vector_dataframe(df, vector, columns, column_offset, *, mask=None):
np_type = subvector.dtype.np_type
if subvector.dtype._is_udt and np_type.subdtype is not None:
vals = vals.tolist()
df.values[0, cols] = vals
if isinstance(vals, np.ndarray) and vals.dtype.names is not None:
# Structured array: convert numpy.void elements to tuples for consistent display
arr[0, cols] = [tuple(v) for v in vals]
else:
arr[0, cols] = vals
if np.issubdtype(np_type, np.inexact):
df.values[0, cols[np.isnan(vals)]] = "nan"
arr[0, cols[np.isnan(vals)]] = "nan"


def _get_max_columns():
Expand Down Expand Up @@ -244,18 +252,19 @@ def _get_matrix_dataframe(matrix, max_rows, min_rows, max_columns, *, mask=None)
max_columns = _get_max_columns()
rows, row_groups = _get_chunk(matrix._nrows, min_rows, max_rows)
columns, column_groups = _get_chunk(matrix._ncols, max_columns, max_columns)
df = pd.DataFrame(columns=columns, index=rows)
arr = np.full((len(rows), len(columns)), np.nan, dtype=object)
for row_group, row_offset in row_groups:
for column_group, column_offset in column_groups:
_update_matrix_dataframe(
df,
_update_matrix_array(
arr,
matrix,
row_group,
row_offset,
column_group,
column_offset,
mask=mask,
)
df = pd.DataFrame(arr, columns=columns, index=rows)
if (
(mask is None or mask.structure)
and df.shape != matrix.shape
Expand Down Expand Up @@ -306,9 +315,10 @@ def _get_vector_dataframe(vector, max_rows, min_rows, max_columns, *, mask=None)
if max_columns is None: # pragma: no branch
max_columns = _get_max_columns()
columns, column_groups = _get_chunk(vector._size, max_columns, max_columns)
df = pd.DataFrame(columns=columns, index=[""])
arr = np.full((1, len(columns)), np.nan, dtype=object)
for column_group, column_offset in column_groups:
_update_vector_dataframe(df, vector, column_group, column_offset, mask=mask)
_update_vector_array(arr, vector, column_group, column_offset, mask=mask)
df = pd.DataFrame(arr, columns=columns, index=[""])
if (
(mask is None or mask.structure)
and df.size != vector._size
Expand Down
23 changes: 16 additions & 7 deletions graphblas/core/ss/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -4088,13 +4088,22 @@ def serialize(self, compression="default", level=None, **opts):
dtype_size = ffi_new("size_t*")
status = lib.GrB_Type_get_SIZE(parent.dtype.gb_obj[0], dtype_size, lib.GrB_NAME)
check_status_carg(status, "Type", parent.dtype.gb_obj[0])
# Then get the name
dtype_char = ffi_new(f"char[{dtype_size[0]}]")
status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME)
check_status_carg(status, "Type", parent.dtype.gb_obj[0])
# Then set the name
status = lib.GrB_Matrix_set_String(parent._carg, dtype_char, lib.GrB_NAME)
check_status_carg(status, "Matrix", parent._carg)
if dtype_size[0] >= lib.GxB_MAX_NAME_LEN:
# The dtype name is too long to safely store in the blob (GxB_Serialized_get_SIZE
# segfaults on names >= GxB_MAX_NAME_LEN). For named UDTs, use the short
# registered name instead; anonymous UDTs cannot round-trip without dtype=.
if not parent.dtype._is_anonymous:
val_obj = ffi.new("char[]", parent.dtype.name.encode())
status = lib.GrB_Matrix_set_String(parent._carg, val_obj, lib.GrB_NAME)
check_status_carg(status, "Matrix", parent._carg)
else:
# Then get the name
dtype_char = ffi_new(f"char[{dtype_size[0]}]")
status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME)
check_status_carg(status, "Type", parent.dtype.gb_obj[0])
# Then set the name
status = lib.GrB_Matrix_set_String(parent._carg, dtype_char, lib.GrB_NAME)
check_status_carg(status, "Matrix", parent._carg)

check_status(
lib.GxB_Matrix_serialize(
Expand Down
23 changes: 16 additions & 7 deletions graphblas/core/ss/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -1659,13 +1659,22 @@ def serialize(self, compression="default", level=None, **opts):
dtype_size = ffi_new("size_t*")
status = lib.GrB_Type_get_SIZE(parent.dtype.gb_obj[0], dtype_size, lib.GrB_NAME)
check_status_carg(status, "Type", parent.dtype.gb_obj[0])
# Then get the name
dtype_char = ffi_new(f"char[{dtype_size[0]}]")
status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME)
check_status_carg(status, "Type", parent.dtype.gb_obj[0])
# Then set the name
status = lib.GrB_Vector_set_String(parent._carg, dtype_char, lib.GrB_NAME)
check_status_carg(status, "Vector", parent._carg)
if dtype_size[0] >= lib.GxB_MAX_NAME_LEN:
# The dtype name is too long to safely store in the blob (GxB_Serialized_get_SIZE
# segfaults on names >= GxB_MAX_NAME_LEN). For named UDTs, use the short
# registered name instead; anonymous UDTs cannot round-trip without dtype=.
if not parent.dtype._is_anonymous:
val_obj = ffi.new("char[]", parent.dtype.name.encode())
status = lib.GrB_Vector_set_String(parent._carg, val_obj, lib.GrB_NAME)
check_status_carg(status, "Vector", parent._carg)
else:
# Then get the name
dtype_char = ffi_new(f"char[{dtype_size[0]}]")
status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME)
check_status_carg(status, "Type", parent.dtype.gb_obj[0])
# Then set the name
status = lib.GrB_Vector_set_String(parent._carg, dtype_char, lib.GrB_NAME)
check_status_carg(status, "Vector", parent._carg)

check_status(
lib.GxB_Vector_serialize(
Expand Down
2 changes: 1 addition & 1 deletion graphblas/tests/test_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -2794,7 +2794,7 @@ def test_ss_concat(A, v):
expected[:, A.ncols] = v
assert B5.isequal(expected)

with pytest.raises(TypeError, match=""):
with pytest.raises(TypeError):
gb.ss.concat([v, [v]])
with pytest.raises(TypeError):
gb.ss.concat([[v], v])
Expand Down
7 changes: 2 additions & 5 deletions graphblas/tests/test_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2221,11 +2221,8 @@ def test_udt():
if suitesparse:
vv = Vector.ss.deserialize(v.ss.serialize(), dtype=long_udt)
assert v.isequal(vv, check_dtype=True)
if ss_version_major < 9:
with pytest.raises(SyntaxError):
# The size of the UDT name is limited
Vector.ss.deserialize(v.ss.serialize())
else:
with pytest.raises(SyntaxError):
# The dtype name is too long to embed in the blob; dtype= must be provided
Vector.ss.deserialize(v.ss.serialize())
# May be able to look up non-anonymous dtypes by name if their names are too long
named_long_dtype = np.dtype([("x", np.bool_), ("y" * 1000, np.float64)], align=False)
Expand Down
Loading