From 3b6d8775b63ee9fb14cf61f4fc6d419ef82d3e23 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Wed, 24 Jun 2026 18:12:50 +0200 Subject: [PATCH 1/3] fix bug --- src/methods_segmentation/custom_segmentation/script.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/methods_segmentation/custom_segmentation/script.py b/src/methods_segmentation/custom_segmentation/script.py index c108117f..d29cb9ab 100644 --- a/src/methods_segmentation/custom_segmentation/script.py +++ b/src/methods_segmentation/custom_segmentation/script.py @@ -2,6 +2,7 @@ import anndata as ad import os import shutil +import pandas as pd ## VIASH START par = { @@ -20,14 +21,18 @@ assert par["labels_key"] in sdata.labels, f"Key '{par['labels_key']}' not found in input data." print(f"Copy segmentation from '{par['labels_key']}'", flush=True) +metadata = sdata.tables["metadata"] +# Select only the columns that exist — Xenium provides cell_id and region, +# Vizgen uses different column names (or an empty obs) so we take what's available. +obs_cols = [c for c in ["cell_id", "region"] if c in metadata.obs.columns] sdata_segmentation_only = sd.SpatialData( labels={ "segmentation": sdata[par["labels_key"]] }, tables={ "table": ad.AnnData( - obs=sdata.tables["metadata"].obs[["cell_id", "region"]], - var=sdata.tables["metadata"].var[[]] + obs=metadata.obs[obs_cols], + var=metadata.var[[]] ) } ) From 207d00f190940b5e35931b1a198689f6c5bf2e0d Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Wed, 24 Jun 2026 19:59:07 +0200 Subject: [PATCH 2/3] adjust startdist --- src/methods_segmentation/stardist/config.vsh.yaml | 1 + .../basic_transcript_assignment/script.py | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/methods_segmentation/stardist/config.vsh.yaml b/src/methods_segmentation/stardist/config.vsh.yaml index 5b5207c0..b8a6e7dc 100644 --- a/src/methods_segmentation/stardist/config.vsh.yaml +++ b/src/methods_segmentation/stardist/config.vsh.yaml @@ -34,6 +34,7 @@ engines: - stardist - tensorflow==2.17.0 - numpy<2.0.0 + - scipy<1.15.0 - type: native runners: diff --git a/src/methods_transcript_assignment/basic_transcript_assignment/script.py b/src/methods_transcript_assignment/basic_transcript_assignment/script.py index e28fed31..6e0967db 100644 --- a/src/methods_transcript_assignment/basic_transcript_assignment/script.py +++ b/src/methods_transcript_assignment/basic_transcript_assignment/script.py @@ -1,6 +1,7 @@ import numpy as np import xarray as xr import dask +import dask.dataframe as dd import spatialdata as sd import anndata as ad import pandas as pd @@ -33,14 +34,14 @@ assert par['coordinate_system'] in segmentation_coord_systems, f"Coordinate system '{par['coordinate_system']}' not found in input data." print('Transforming transcripts coordinates', flush=True) -# Parquet partitions each start from index 0, causing duplicate index values in the -# combined dask DataFrame. sd.transform() internally builds pd.Series(..., index=transformed.index) -# which fails with "cannot reindex on an axis with duplicate labels". -# Fix: reset to a global monotonic index before transforming; restore attrs explicitly -# because reset_index() drops them, which would break spatialdata's PointsModel check. +# Multi-partition parquet files each start with a 0-based index, producing duplicate index +# values in the combined dask DataFrame. sd.transform() internally creates a pd.Series with +# index=transformed.index; when that dask index is computed it triggers an assign expression +# that fails on duplicate/lazy indices. Fix: materialize to pandas and rebuild as a single +# dask partition with a clean RangeIndex before transforming. # The original sdata[transcripts_key] is left unchanged so lines below remain consistent. transcripts_input = sdata[par['transcripts_key']] -transcripts_reset = transcripts_input.reset_index(drop=True) +transcripts_reset = dd.from_pandas(transcripts_input.compute().reset_index(drop=True), npartitions=1) transcripts_reset.attrs.update(transcripts_input.attrs) transcripts = sd.transform(transcripts_reset, to_coordinate_system=par['coordinate_system']) From 8eb9becb455bddafed19ff7e29d1ac062c0d3ed6 Mon Sep 17 00:00:00 2001 From: dariarom94 Date: Wed, 24 Jun 2026 20:06:28 +0200 Subject: [PATCH 3/3] adjust mem for similarity --- src/base/labels_nebius.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/base/labels_nebius.config b/src/base/labels_nebius.config index 409ba948..bce93b74 100644 --- a/src/base/labels_nebius.config +++ b/src/base/labels_nebius.config @@ -124,8 +124,8 @@ withLabel: veryhightime { time = 24.h } // similarity metric does not need veryhighmem resources withName: '.*similarity_process' { - memory = '50.GB' - disk = '50.GB' + memory = '100.GB' + disk = '100.GB' } }