Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/base/labels_nebius.config
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ withLabel: veryhightime { time = 24.h }

// similarity metric does not need veryhighmem resources
withName: '.*similarity_process' {
memory = '50.GB'
disk = '50.GB'
memory = '100.GB'
disk = '100.GB'
}
}

Expand Down
9 changes: 7 additions & 2 deletions src/methods_segmentation/custom_segmentation/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import anndata as ad
import os
import shutil
import pandas as pd

## VIASH START
par = {
Expand All @@ -20,14 +21,18 @@
assert par["labels_key"] in sdata.labels, f"Key '{par['labels_key']}' not found in input data."

print(f"Copy segmentation from '{par['labels_key']}'", flush=True)
metadata = sdata.tables["metadata"]
# Select only the columns that exist — Xenium provides cell_id and region,
# Vizgen uses different column names (or an empty obs) so we take what's available.
obs_cols = [c for c in ["cell_id", "region"] if c in metadata.obs.columns]
sdata_segmentation_only = sd.SpatialData(
labels={
"segmentation": sdata[par["labels_key"]]
},
tables={
"table": ad.AnnData(
obs=sdata.tables["metadata"].obs[["cell_id", "region"]],
var=sdata.tables["metadata"].var[[]]
obs=metadata.obs[obs_cols],
var=metadata.var[[]]
)
}
)
Expand Down
1 change: 1 addition & 0 deletions src/methods_segmentation/stardist/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ engines:
- stardist
- tensorflow==2.17.0
- numpy<2.0.0
- scipy<1.15.0
- type: native

runners:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import xarray as xr
import dask
import dask.dataframe as dd
import spatialdata as sd
import anndata as ad
import pandas as pd
Expand Down Expand Up @@ -33,14 +34,14 @@
assert par['coordinate_system'] in segmentation_coord_systems, f"Coordinate system '{par['coordinate_system']}' not found in input data."

print('Transforming transcripts coordinates', flush=True)
# Parquet partitions each start from index 0, causing duplicate index values in the
# combined dask DataFrame. sd.transform() internally builds pd.Series(..., index=transformed.index)
# which fails with "cannot reindex on an axis with duplicate labels".
# Fix: reset to a global monotonic index before transforming; restore attrs explicitly
# because reset_index() drops them, which would break spatialdata's PointsModel check.
# Multi-partition parquet files each start with a 0-based index, producing duplicate index
# values in the combined dask DataFrame. sd.transform() internally creates a pd.Series with
# index=transformed.index; when that dask index is computed it triggers an assign expression
# that fails on duplicate/lazy indices. Fix: materialize to pandas and rebuild as a single
# dask partition with a clean RangeIndex before transforming.
# The original sdata[transcripts_key] is left unchanged so lines below remain consistent.
transcripts_input = sdata[par['transcripts_key']]
transcripts_reset = transcripts_input.reset_index(drop=True)
transcripts_reset = dd.from_pandas(transcripts_input.compute().reset_index(drop=True), npartitions=1)
transcripts_reset.attrs.update(transcripts_input.attrs)
transcripts = sd.transform(transcripts_reset, to_coordinate_system=par['coordinate_system'])

Expand Down
Loading