Source code for graphizy.data_interface

"""
Data interface for Graphizy

.. moduleauthor:: Charles Fosseprez
.. contact:: charles.fosseprez.pro@gmail.com
.. license:: GPL2 or later
.. copyright:: Copyright (C) 2025 Charles Fosseprez
"""

import logging
import time
import random
import timeit
from typing import List, Tuple, Dict, Any, Union, Optional
from scipy.spatial.distance import cdist, pdist, squareform
import numpy as np
from collections import defaultdict, deque

from graphizy.exceptions import (
    InvalidPointArrayError, SubdivisionError, TriangulationError,
    GraphCreationError, PositionGenerationError, DependencyError,
    IgraphMethodError, handle_subdivision_bounds_error, InvalidDataShapeError
)



[docs] class DataInterface: """Interface for handling different data formats""" def __init__(self, data_shape: Optional[List[Tuple[str, type]]] = None): """Initialize data interface Args: data_shape: List of tuples defining data structure Raises: InvalidDataShapeError: If data shape is invalid """ try: # Set default data shape if none provided if data_shape is None: data_shape = [('id', int), ('x', float), ('y', float)] # Validate data_shape if not isinstance(data_shape, list): raise InvalidDataShapeError("Data shape input should be a list") if not data_shape: raise InvalidDataShapeError("Data shape cannot be empty") if not all(isinstance(item, tuple) and len(item) == 2 for item in data_shape): raise InvalidDataShapeError("Data shape elements should be tuples of (name, type)") # Keep data_shape self.data_shape = data_shape # Find data indexes data_idx = {} for i, variable in enumerate(data_shape): if not isinstance(variable[0], str): raise InvalidDataShapeError("Variable names must be strings") data_idx[variable[0]] = i self.data_idx = data_idx # Validate required fields required_fields = ['id', 'x', 'y'] missing_fields = [field for field in required_fields if field not in self.data_idx] if missing_fields: raise InvalidDataShapeError(f"Required fields missing: {missing_fields}") except Exception as e: raise InvalidDataShapeError(f"Failed to initialize data interface: {str(e)}")
[docs] def getidx_id(self) -> int: """Get index of id column""" return self.data_idx["id"]
[docs] def getidx_xpos(self) -> int: """Get index of x position column""" return self.data_idx["x"]
[docs] def getidx_ypos(self) -> int: """Get index of y position column""" return self.data_idx["y"]
[docs] def validate_array(self, point_array: np.ndarray) -> None: """ Validate that array has the correct structure for this data interface. Args: point_array: Array to validate Raises: InvalidPointArrayError: If array structure is invalid """ try: if point_array is None or point_array.size == 0: raise InvalidPointArrayError("Point array cannot be None or empty") if point_array.ndim != 2: raise InvalidPointArrayError("Point array must be 2D") required_cols = max(self.getidx_id(), self.getidx_xpos(), self.getidx_ypos()) + 1 if point_array.shape[1] < required_cols: raise InvalidPointArrayError( f"Point array doesn't have enough columns. " f"Need at least {required_cols}, got {point_array.shape[1]}" ) id_col_idx = self.getidx_id() if point_array.shape[1] > id_col_idx: id_column_dtype = point_array[:, id_col_idx].dtype if not np.issubdtype(id_column_dtype, np.number): raise InvalidPointArrayError("Object IDs must be numeric, not string or object type") except Exception as e: if isinstance(e, InvalidPointArrayError): raise raise InvalidPointArrayError(f"Array validation failed: {str(e)}")
[docs] def to_array(self, data_points: Union[np.ndarray, Dict[str, Any]], validate_data: bool = False) -> np.ndarray: """ Convert data points to standardized array format. Automatically detects input format and converts accordingly. Args: data_points: Input data in array or dict format validate_data: Whether to validate the data (careful at each call this will degrade the performances) Returns: np.ndarray: Data in standardized array format Raises: InvalidPointArrayError: If conversion fails or data is invalid """ try: if isinstance(data_points, np.ndarray): if validate_data: self.validate_array(data_points) return data_points elif isinstance(data_points, dict): # Convert dictionary to array required_keys = ["id", "x", "y"] if not all(k in data_points for k in required_keys): raise InvalidPointArrayError(f"Dict data must contain keys: {required_keys}") if not data_points["id"]: raise InvalidPointArrayError("Input dictionary cannot be empty") # Check all values are lists/arrays of same length lengths = [len(v) for v in data_points.values()] if len(set(lengths)) > 1: raise InvalidPointArrayError("All dict values must have same length") # Build the array using only the columns present in the input dictionary, # but ordered according to the instance's data_shape. ordered_columns = [] for attr_name, _ in self.data_shape: if attr_name in data_points: ordered_columns.append(data_points[attr_name]) if not ordered_columns: raise InvalidPointArrayError( "Could not construct array from dictionary; no matching keys found in data_shape.") return np.column_stack(ordered_columns) else: raise InvalidPointArrayError( f"Invalid data type: {type(data_points)}. " f"Expected numpy array or dictionary." ) except Exception as e: if isinstance(e, InvalidPointArrayError): raise raise InvalidPointArrayError(f"Failed to convert data to array format: {str(e)}")
[docs] def to_dict(self, point_array: np.ndarray) -> Dict[str, Any]: """Convert point array to dictionary format Args: point_array: Array to convert Returns: Dictionary with id, x, y keys Raises: InvalidPointArrayError: If conversion fails """ try: if point_array is None or point_array.size == 0: raise InvalidPointArrayError("Point array cannot be None or empty") if point_array.ndim != 2: raise InvalidPointArrayError("Point array must be 2D") if point_array.shape[1] < max(self.getidx_id(), self.getidx_xpos(), self.getidx_ypos()) + 1: raise InvalidPointArrayError("Point array doesn't have enough columns for the specified data shape") max_required_index = max(self.getidx_id(), self.getidx_xpos(), self.getidx_ypos()) if point_array.shape[1] <= max_required_index: raise InvalidPointArrayError( f"Point array has {point_array.shape[1]} columns, but the data shape " f"requires an index of at least {max_required_index}." ) point_dict = { "id": point_array[:, self.getidx_id()], "x": point_array[:, self.getidx_xpos()], "y": point_array[:, self.getidx_ypos()] } return point_dict except Exception as e: raise InvalidPointArrayError(f"Failed to convert point array: {str(e)}")