Source code for graphizy.utils

"""
Just some utilities functions
"""
from pathlib import Path
import numpy as np


[docs] def setup_output_directory(): """Create output directory for saving images""" output_dir = Path("output") output_dir.mkdir(parents=True, exist_ok=True) return output_dir
[docs] def validate_graphizy_input(data_points, aspect="array", data_shape=None, dimension=(1200, 1200), proximity_thresh=None, verbose=True): """ Helper function to validate input data for graphizy operations Args: data_points: Input data (numpy array or dict) aspect: "array" or "dict" data_shape: Expected data shape structure dimension: Image dimensions (width, height) proximity_thresh: Proximity threshold if applicable verbose: Print detailed validation info Returns: dict: Validation results with errors, warnings, and info """ results = { "valid": True, "errors": [], "warnings": [], "info": {}, "suggestions": [] } try: # Validate dimension FIRST (independent of data) if not isinstance(dimension, (tuple, list)) or len(dimension) != 2: results["errors"].append("Dimension must be tuple/list of 2 integers") results["valid"] = False elif dimension[0] <= 0 or dimension[1] <= 0: results["errors"].append("Dimension values must be positive") results["valid"] = False else: results["info"]["dimension"] = dimension # Validate proximity threshold if provided if proximity_thresh is not None: if proximity_thresh <= 0: results["warnings"].append("Proximity threshold should be positive") results["info"]["proximity_threshold"] = proximity_thresh # Check aspect parameter if aspect not in ["array", "dict"]: results["errors"].append(f"Invalid aspect '{aspect}'. Must be 'array' or 'dict'") results["valid"] = False # Validate data_points based on aspect if aspect == "array": if not isinstance(data_points, np.ndarray): results["errors"].append(f"Expected numpy array for aspect 'array', got {type(data_points)}") results["valid"] = False else: # Check array properties results["info"]["shape"] = data_points.shape results["info"]["dtype"] = str(data_points.dtype) # Check for minimum required columns (id, x, y) if data_points.ndim != 2: results["errors"].append(f"Data array must be 2D, got {data_points.ndim}D") results["valid"] = False elif data_points.shape[1] < 3: results["errors"].append( f"Data array needs at least 3 columns (id, x, y), got {data_points.shape[1]}") results["valid"] = False else: # Check for string/object IDs (which cause issues) if data_points.dtype.kind in ['U', 'S', 'O']: results["errors"].append("Object IDs must be numeric, not string type") results["valid"] = False # Check coordinate ranges if data_points.shape[0] > 0: x_coords = data_points[:, 1] # assuming x is column 1 y_coords = data_points[:, 2] # assuming y is column 2 results["info"]["x_range"] = (float(np.min(x_coords)), float(np.max(x_coords))) results["info"]["y_range"] = (float(np.min(y_coords)), float(np.max(y_coords))) results["info"]["num_points"] = data_points.shape[0] # Check if coordinates are within dimension bounds if np.any(x_coords < 0) or np.any(x_coords >= dimension[0]): results["warnings"].append(f"X coordinates outside dimension bounds [0, {dimension[0]})") if np.any(y_coords < 0) or np.any(y_coords >= dimension[1]): results["warnings"].append(f"Y coordinates outside dimension bounds [0, {dimension[1]})") # Check for duplicate coordinates unique_coords = np.unique(np.column_stack((x_coords, y_coords)), axis=0) if len(unique_coords) < len(x_coords): results["warnings"].append( f"Found {len(x_coords) - len(unique_coords)} duplicate coordinate pairs") else: # Empty array case results["info"]["num_points"] = 0 elif aspect == "dict": if isinstance(data_points, dict): # Check required keys required_keys = ["id", "x", "y"] missing_keys = [key for key in required_keys if key not in data_points] if missing_keys: results["errors"].append(f"Missing required keys: {missing_keys}") results["valid"] = False else: # Check array lengths match lengths = {key: len(data_points[key]) for key in required_keys} if len(set(lengths.values())) > 1: results["errors"].append(f"Mismatched array lengths: {lengths}") results["valid"] = False else: results["info"]["num_points"] = lengths["x"] if lengths["x"] > 0: results["info"]["x_range"] = (float(min(data_points["x"])), float(max(data_points["x"]))) results["info"]["y_range"] = (float(min(data_points["y"])), float(max(data_points["y"]))) # Check coordinate bounds if any(x < 0 or x >= dimension[0] for x in data_points["x"]): results["warnings"].append(f"X coordinates outside dimension bounds [0, {dimension[0]})") if any(y < 0 or y >= dimension[1] for y in data_points["y"]): results["warnings"].append(f"Y coordinates outside dimension bounds [0, {dimension[1]})") elif isinstance(data_points, np.ndarray): results["info"]["note"] = "Numpy array provided for dict aspect - will be auto-converted" # Validate as array first temp_result = validate_graphizy_input(data_points, "array", data_shape, dimension, proximity_thresh, False) if not temp_result["valid"]: results["errors"].extend(temp_result["errors"]) results["valid"] = False else: results["errors"].append(f"For dict aspect, expected dict or numpy array, got {type(data_points)}") results["valid"] = False # Add suggestions based on findings if results["valid"]: if results["info"].get("num_points", 0) < 3: results["suggestions"].append("Need at least 3 points for Delaunay triangulation") if results["info"].get("num_points", 0) > 10000: results["suggestions"].append("Large number of points - consider performance implications") if verbose: print("=== GRAPHIZY INPUT VALIDATION ===") print(f"Valid: {results['valid']}") if results["errors"]: print("\nERRORS:") for error in results["errors"]: print(f" ❌ {error}") if results["warnings"]: print("\nWARNINGS:") for warning in results["warnings"]: print(f" ⚠️ {warning}") if results["info"]: print("\nINFO:") for key, value in results["info"].items(): print(f" ℹ️ {key}: {value}") if results["suggestions"]: print("\nSUGGESTIONS:") for suggestion in results["suggestions"]: print(f" 💡 {suggestion}") except Exception as e: results["valid"] = False results["errors"].append(f"Validation failed with exception: {str(e)}") return results