changeset 2:3282b4920a34 draft

planemo upload for repository hhttps://github.com/npinter/ROIsplitter commit 918ae25f84e7042ed36461219ff068633c1c2427
author galaxyp
date Fri, 19 Jul 2024 14:33:33 +0000
parents f955895aed01
children 180b6dc5735a
files qupath_roi_splitter.py qupath_roi_splitter.xml
diffstat 2 files changed, 131 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/qupath_roi_splitter.py	Wed Jun 14 16:36:48 2023 +0000
+++ b/qupath_roi_splitter.py	Fri Jul 19 14:33:33 2024 +0000
@@ -6,10 +6,32 @@
 import pandas as pd
 
 
-def draw_poly(input_df, input_img, col=(0, 0, 0)):
-    s = np.array(input_df)
-    output_img = cv2.fillPoly(input_img, pts=np.int32([s]), color=col)
-    return output_img
+def collect_coords(input_coords, feature_index, coord_index=0):
+    coords_with_index = []
+    for coord in input_coords:
+        coords_with_index.append((coord[0], coord[1], feature_index, coord_index))
+        coord_index += 1
+    return coords_with_index
+
+
+def collect_roi_coords(input_roi, feature_index):
+    all_coords = []
+    if len(input_roi["geometry"]["coordinates"]) == 1:
+        # Polygon w/o holes
+        all_coords.extend(collect_coords(input_roi["geometry"]["coordinates"][0], feature_index))
+    else:
+        coord_index = 0
+        for sub_roi in input_roi["geometry"]["coordinates"]:
+            # Polygon with holes or MultiPolygon
+            if not isinstance(sub_roi[0][0], list):
+                all_coords.extend(collect_coords(sub_roi, feature_index, coord_index))
+                coord_index += len(sub_roi)
+            else:
+                # MultiPolygon with holes
+                for sub_coord in sub_roi:
+                    all_coords.extend(collect_coords(sub_coord, feature_index, coord_index))
+                    coord_index += len(sub_coord)
+    return all_coords
 
 
 def split_qupath_roi(in_roi):
@@ -17,66 +39,73 @@
         qupath_roi = geojson.load(file)
 
     # HE dimensions
-    dim_plt = [qupath_roi["dim"]["width"], qupath_roi["dim"]["height"]]
+    dim_plt = [int(qupath_roi["dim"]["width"]), int(qupath_roi["dim"]["height"])]
 
     tma_name = qupath_roi["name"]
     cell_types = [ct.rsplit(" - ", 1)[-1] for ct in qupath_roi["featureNames"]]
 
-    for cell_type in cell_types:
-        # create numpy array with white background
-        img = np.zeros((dim_plt[1], dim_plt[0], 3), dtype="uint8")
-        img.fill(255)
+    coords_by_cell_type = {ct: [] for ct in cell_types}
+    coords_by_cell_type['all'] = []  # For storing all coordinates if args.all is True
+
+    for feature_index, roi in enumerate(qupath_roi["features"]):
+        feature_coords = collect_roi_coords(roi, feature_index)
+
+        if args.all:
+            coords_by_cell_type['all'].extend(feature_coords)
+        elif "classification" in roi["properties"]:
+            cell_type = roi["properties"]["classification"]["name"]
+            if cell_type in cell_types:
+                coords_by_cell_type[cell_type].extend(feature_coords)
+
+    for cell_type, coords in coords_by_cell_type.items():
+        if coords:
+            # Generate image (white background)
+            img = np.ones((dim_plt[1], dim_plt[0]), dtype="uint8") * 255
+
+            # Convert to numpy array and ensure integer coordinates
+            coords_arr = np.array(coords).astype(int)
+
+            # Sort by feature_index first, then by coord_index
+            coords_arr = coords_arr[np.lexsort((coords_arr[:, 3], coords_arr[:, 2]))]
 
-        for i, roi in enumerate(qupath_roi["features"]):
-            if roi["properties"]["classification"]["name"] == cell_type:
-                if len(roi["geometry"]["coordinates"]) == 1:
-                    # Polygon w/o holes
-                    img = draw_poly(roi["geometry"]["coordinates"][0], img)
-                else:
-                    first_roi = True
-                    for sub_roi in roi["geometry"]["coordinates"]:
-                        # Polygon with holes
-                        if not isinstance(sub_roi[0][0], list):
-                            if first_roi:
-                                img = draw_poly(sub_roi, img)
-                                first_roi = False
-                            else:
-                                # holes in ROI
-                                img = draw_poly(sub_roi, img, col=(255, 255, 255))
-                        else:
-                            # MultiPolygon with holes
-                            for sub_coord in sub_roi:
-                                if first_roi:
-                                    img = draw_poly(sub_coord, img)
-                                    first_roi = False
-                                else:
-                                    # holes in ROI
-                                    img = draw_poly(sub_coord, img, col=(255, 255, 255))
+            # Get filled pixel coordinates
+            if args.fill:
+                filled_coords = np.column_stack(np.where(img == 0))
+                all_coords = np.unique(np.vstack((coords_arr[:, :2], filled_coords[:, ::-1])), axis=0)
+            else:
+                all_coords = coords_arr[:, :2]
+
+            # Save all coordinates to CSV
+            coords_df = pd.DataFrame(all_coords, columns=['x', 'y'], dtype=int)
+            coords_df.to_csv("{}_{}.txt".format(tma_name, cell_type), sep='\t', index=False)
 
-        # get all black pixel
-        coords_arr = np.column_stack(np.where(img == (0, 0, 0)))
-
-        # remove duplicated rows
-        coords_arr_xy = coords_arr[coords_arr[:, 2] == 0]
-
-        # remove last column
-        coords_arr_xy = np.delete(coords_arr_xy, 2, axis=1)
+            # Generate image for visualization if --img is specified
+            if args.img:
+                # Group coordinates by feature_index
+                features = {}
+                for x, y, feature_index, coord_index in coords_arr:
+                    if feature_index not in features:
+                        features[feature_index] = []
+                    features[feature_index].append((x, y))
 
-        # to pandas and rename columns to x and y
-        coords_df = pd.DataFrame(coords_arr_xy, columns=['x', 'y'])
+                # Draw each feature separately
+                for feature_coords in features.values():
+                    pts = np.array(feature_coords, dtype=np.int32)
+                    if args.fill:
+                        cv2.fillPoly(img, [pts], color=0)  # Black fill
+                    else:
+                        cv2.polylines(img, [pts], isClosed=True, color=0, thickness=1)  # Black outline
 
-        # drop duplicates
-        coords_df = coords_df.drop_duplicates(
-            subset=['x', 'y'],
-            keep='last').reset_index(drop=True)
-
-        coords_df.to_csv("{}_{}.txt".format(tma_name, cell_type), sep='\t', index=False)
+                cv2.imwrite("{}_{}.png".format(tma_name, cell_type), img)
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Split ROI coordinates of QuPath TMA annotation by cell type (classfication)")
+    parser = argparse.ArgumentParser(description="Split ROI coordinates of QuPath TMA annotation by cell type (classification)")
     parser.add_argument("--qupath_roi", default=False, help="Input QuPath annotation (GeoJSON file)")
-    parser.add_argument('--version', action='version', version='%(prog)s 0.1.0')
+    parser.add_argument("--fill", action="store_true", required=False, help="Fill pixels in ROIs (order of coordinates will be lost)")
+    parser.add_argument('--version', action='version', version='%(prog)s 0.3.0')
+    parser.add_argument("--all", action="store_true", required=False, help="Extracts all ROIs")
+    parser.add_argument("--img", action="store_true", required=False, help="Generates image of ROIs")
     args = parser.parse_args()
 
     if args.qupath_roi:
--- a/qupath_roi_splitter.xml	Wed Jun 14 16:36:48 2023 +0000
+++ b/qupath_roi_splitter.xml	Fri Jul 19 14:33:33 2024 +0000
@@ -1,24 +1,42 @@
-<tool id="qupath_roi_splitter" name="QuPath ROI Splitter" version="0.1.0+galaxy1">
+<tool id="qupath_roi_splitter" name="QuPath ROI Splitter" version="@VERSION@+galaxy@VERSION_SUFFIX@">
     <description>Split ROI coordinates of QuPath TMA annotation by cell type (classification)</description>
+    <macros>
+        <token name="@VERSION@">0.3.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
     <requirements>
         <requirement type="package" version="3.0.1">geojson</requirement>
         <requirement type="package" version="1.24.2">numpy</requirement>
         <requirement type="package" version="4.7.0">opencv</requirement>
-        <requirement type="package" version="2.0.0">pandas</requirement>        
+        <requirement type="package" version="2.0.0">pandas</requirement>
     </requirements>
      <command detect_errors="exit_code"><![CDATA[
         #for $input in $input_collection
-            python3 '$__tool_directory__/qupath_roi_splitter.py' --qupath_roi '$input' &&
+            python3 '$__tool_directory__/qupath_roi_splitter.py'
+                --qupath_roi '$input'
+                $optional.fill
+                $optional.all
+                $optional.img
+             &&
         #end for
-        mkdir out &&
-        mv *.txt out/
+        mkdir out
+        && mv *.txt out/
+        && mv *.png out/
     ]]></command>
     <inputs>
         <param name="input_collection" type="data_collection" format="geojson" label="Input QuPath annotation" help="Collection containing GeoJSON files"/>
+        <section name="optional" title="Optional">
+            <param name="fill" type="boolean" truevalue="--fill" falsevalue="" checked="false" label="Fill ROIs" help="Fill pixels in ROIs"/>
+            <param name="all" type="boolean" truevalue="--all" falsevalue="" checked="false" label="Extract all" help="Extracts all ROIs"/>
+            <param name="img" type="boolean" truevalue="--img" falsevalue="" checked="false" label="Generate image" help="Generates image from ROIs"/>
+        </section>
     </inputs>
     <outputs>
         <collection name="output_txts" type="list" label="${tool.name} on ${on_string}: ROI data">
-            <discover_datasets pattern="__name_and_ext__" directory="out" visible="false" format="txt"/>
+            <discover_datasets pattern="(?P&lt;name&gt;.*\.txt)" directory="out" visible="false" ext="txt"/>
+        </collection>
+        <collection name="output_imgs" type="list" label="${tool.name} on ${on_string}: Images of ROIs">
+            <discover_datasets pattern="(?P&lt;name&gt;.*\.png)" directory="out" visible="false" ext="png"/>
         </collection>
     </outputs>
     <tests>
@@ -29,13 +47,24 @@
                     <element name="annotations_TMA_F-5.geojson" value="annotations_TMA_F-5.geojson" />
                 </collection>
             </param>
+            <section name="optional">
+                <param name="fill" value="true"/>
+                <param name="img" value="true"/>
+            </section>
             <output_collection name="output_txts" type="list" count="4">
-                <element name="F-5_Stroma">
+                <element name="F-5_Stroma.txt">
                     <assert_contents>
                         <has_text text="x"/>
                         <has_text text="y"/>
-                        <has_text text="15561"/>
-                        <has_text text="21160"/>
+                        <has_text text="21153"/>
+                        <has_text text="15570"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="output_imgs" type="list" count="4">
+                <element name="E-5_Tumor.png">
+                    <assert_contents>
+                        <has_size value="459919"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -52,4 +81,16 @@
 
         https://github.com/npinter/ROIsplitter
     ]]></help>
+    <citations>
+        <citation type="bibtex">
+            @misc{
+                toolsGalaxyP,
+                author = {Pinter, N, Föll, MC },
+                title = {Galaxy Proteomics Tools},
+                publisher = {GitHub}, journal = {GitHub repository},
+                year = {2024},
+                url = {https://github.com/galaxyproteomics/tools-galaxyp}
+            }
+        </citation>
+    </citations>
 </tool>