Source code for interscellar.api.wrapper_2d

# Import

import pandas as pd
import numpy as np
from typing import Optional, Tuple
from tqdm import tqdm
import time
import os

from ..core.find_cell_neighbors_2d import (
    create_neighbor_edge_table_database_2d,
    find_all_neighbors_by_surface_distance_2d,
    get_cells_dataframe,
    query_cell_type_pairs,
    get_graph_statistics,
    export_to_anndata,
    export_graph_tables,
    build_global_mask_2d,
    build_global_mask_2d_with_mapping
)

try:
    import anndata as ad
    ANNDATA_AVAILABLE = True
except ImportError:
    ANNDATA_AVAILABLE = False

# API: Wrapper functions

[docs] def find_cell_neighbors_2d( polygon_json_path: str, metadata_csv_path: str, max_distance_um: float = 1.0, pixel_size_um: float = 0.1085, centroid_prefilter_radius_um: float = 75.0, cell_id: str = 'cell_id', cell_type: str = 'subclass', centroid_x: str = 'X', centroid_y: str = 'Y', db_path: Optional[str] = None, output_csv: Optional[str] = None, output_anndata: Optional[str] = None, n_jobs: int = 1, return_connection: bool = False, save_surfaces_pickle: Optional[str] = None, load_surfaces_pickle: Optional[str] = None, save_graph_state_pickle: Optional[str] = None ) -> Tuple[Optional[pd.DataFrame], Optional[object], Optional[object]]: print("=" * 60) print("InterSCellar: Surface-based Cell Neighbor Detection - 2D") print("=" * 60) overall_start_time = time.time() print(f"\n1. Loading metadata from: {metadata_csv_path}...") step1_start = time.time() try: metadata_df = pd.read_csv(metadata_csv_path) print(f"Loaded {len(metadata_df)} cells") except Exception as e: raise ValueError(f"Error loading metadata CSV: {e}") required_cols = [cell_id, cell_type, centroid_x, centroid_y] missing_cols = [col for col in required_cols if col not in metadata_df.columns] if missing_cols: raise ValueError(f"Missing required columns in metadata: {missing_cols}") step1_time = time.time() - step1_start print(f"Step 1 completed in {step1_time:.2f} seconds") metadata_dir = os.path.dirname(metadata_csv_path) if os.path.dirname(metadata_csv_path) else "." base_name = os.path.splitext(os.path.basename(metadata_csv_path))[0] if db_path is None: db_path = os.path.join(metadata_dir, f"{base_name}_neighbor_graph_2d.db") print(f"db_path: {db_path}") if output_csv is None: output_csv = os.path.join(metadata_dir, f"{base_name}_neighbors_2d.csv") print(f"output_csv: {output_csv}") if output_anndata is None: output_anndata = os.path.join(metadata_dir, f"{base_name}_neighbors_2d.h5ad") print(f"output_anndata: {output_anndata}") print(f"\n2. Validating input file: {polygon_json_path}...") step2_start = time.time() if not os.path.exists(polygon_json_path): raise FileNotFoundError(f"JSON file not found: {polygon_json_path}") print(f"JSON file found") step2_time = time.time() - step2_start print(f"Step 2 completed in {step2_time:.2f} seconds") print(f"\n3. Building neighbor graph...") print(f"Parameters: max_distance={max_distance_um}μm, n_jobs={n_jobs}") if max_distance_um == 0.0: print(f"Mode: Touching cells only") else: print(f"Mode: Touching cells + near-neighbors") step3_start = time.time() try: conn = create_neighbor_edge_table_database_2d( polygon_json_path=polygon_json_path, metadata_df=metadata_df, max_distance_um=max_distance_um, pixel_size_um=pixel_size_um, centroid_prefilter_radius_um=centroid_prefilter_radius_um, db_path=db_path, cell_id=cell_id, cell_type=cell_type, centroid_x=centroid_x, centroid_y=centroid_y, output_csv=output_csv, output_anndata=output_anndata, n_jobs=n_jobs, save_surfaces_pickle=save_surfaces_pickle, load_surfaces_pickle=load_surfaces_pickle, save_graph_state_pickle=save_graph_state_pickle ) step3_time = time.time() - step3_start print(f"Neighbor graph created successfully") print(f"Step 3 completed in {step3_time:.2f} seconds") except Exception as e: raise RuntimeError(f"Error in neighbor detection pipeline: {e}") print(f"\n4. Retrieving results...") step4_start = time.time() neighbor_table_df = None if output_csv: try: neighbor_table_df = pd.read_sql_query("SELECT * FROM neighbors", conn) print(f"Neighbor table: {len(neighbor_table_df)} pairs") except Exception as e: print(f"Warning: Could not retrieve neighbor table: {e}") adata = None if output_anndata: try: if ANNDATA_AVAILABLE: adata = ad.read_h5ad(output_anndata) print(f"AnnData object loaded: {adata.shape}") else: print(f"Warning: AnnData not available (install with: pip install anndata)") except Exception as e: print(f"Warning: Could not load AnnData object: {e}") try: stats = get_graph_statistics(conn) print(f"Graph statistics: {stats['total_cells']} cells, {stats['total_edges']} pairs") except Exception as e: print(f"Warning: Could not retrieve statistics: {e}") step4_time = time.time() - step4_start print(f"Step 4 completed in {step4_time:.2f} seconds") overall_time = time.time() - overall_start_time print(f"\n5. Pipeline completed successfully!") print(f"Total execution time: {overall_time:.2f} seconds") print(f"Database: {db_path}") if output_csv: print(f"CSV output: {output_csv}") if output_anndata: print(f"AnnData output: {output_anndata}") print("=" * 60) if return_connection: return neighbor_table_df, adata, conn else: conn.close() return neighbor_table_df, adata, None