Mercurial > repos > galaxyp > qupath_roi_splitter
changeset 2:3282b4920a34 draft
planemo upload for repository hhttps://github.com/npinter/ROIsplitter commit 918ae25f84e7042ed36461219ff068633c1c2427
author | galaxyp |
---|---|
date | Fri, 19 Jul 2024 14:33:33 +0000 |
parents | f955895aed01 |
children | 180b6dc5735a |
files | qupath_roi_splitter.py qupath_roi_splitter.xml |
diffstat | 2 files changed, 131 insertions(+), 61 deletions(-) [+] |
line wrap: on
line diff
--- a/qupath_roi_splitter.py Wed Jun 14 16:36:48 2023 +0000 +++ b/qupath_roi_splitter.py Fri Jul 19 14:33:33 2024 +0000 @@ -6,10 +6,32 @@ import pandas as pd -def draw_poly(input_df, input_img, col=(0, 0, 0)): - s = np.array(input_df) - output_img = cv2.fillPoly(input_img, pts=np.int32([s]), color=col) - return output_img +def collect_coords(input_coords, feature_index, coord_index=0): + coords_with_index = [] + for coord in input_coords: + coords_with_index.append((coord[0], coord[1], feature_index, coord_index)) + coord_index += 1 + return coords_with_index + + +def collect_roi_coords(input_roi, feature_index): + all_coords = [] + if len(input_roi["geometry"]["coordinates"]) == 1: + # Polygon w/o holes + all_coords.extend(collect_coords(input_roi["geometry"]["coordinates"][0], feature_index)) + else: + coord_index = 0 + for sub_roi in input_roi["geometry"]["coordinates"]: + # Polygon with holes or MultiPolygon + if not isinstance(sub_roi[0][0], list): + all_coords.extend(collect_coords(sub_roi, feature_index, coord_index)) + coord_index += len(sub_roi) + else: + # MultiPolygon with holes + for sub_coord in sub_roi: + all_coords.extend(collect_coords(sub_coord, feature_index, coord_index)) + coord_index += len(sub_coord) + return all_coords def split_qupath_roi(in_roi): @@ -17,66 +39,73 @@ qupath_roi = geojson.load(file) # HE dimensions - dim_plt = [qupath_roi["dim"]["width"], qupath_roi["dim"]["height"]] + dim_plt = [int(qupath_roi["dim"]["width"]), int(qupath_roi["dim"]["height"])] tma_name = qupath_roi["name"] cell_types = [ct.rsplit(" - ", 1)[-1] for ct in qupath_roi["featureNames"]] - for cell_type in cell_types: - # create numpy array with white background - img = np.zeros((dim_plt[1], dim_plt[0], 3), dtype="uint8") - img.fill(255) + coords_by_cell_type = {ct: [] for ct in cell_types} + coords_by_cell_type['all'] = [] # For storing all coordinates if args.all is True + + for feature_index, roi in enumerate(qupath_roi["features"]): + feature_coords = collect_roi_coords(roi, feature_index) + + if args.all: + coords_by_cell_type['all'].extend(feature_coords) + elif "classification" in roi["properties"]: + cell_type = roi["properties"]["classification"]["name"] + if cell_type in cell_types: + coords_by_cell_type[cell_type].extend(feature_coords) + + for cell_type, coords in coords_by_cell_type.items(): + if coords: + # Generate image (white background) + img = np.ones((dim_plt[1], dim_plt[0]), dtype="uint8") * 255 + + # Convert to numpy array and ensure integer coordinates + coords_arr = np.array(coords).astype(int) + + # Sort by feature_index first, then by coord_index + coords_arr = coords_arr[np.lexsort((coords_arr[:, 3], coords_arr[:, 2]))] - for i, roi in enumerate(qupath_roi["features"]): - if roi["properties"]["classification"]["name"] == cell_type: - if len(roi["geometry"]["coordinates"]) == 1: - # Polygon w/o holes - img = draw_poly(roi["geometry"]["coordinates"][0], img) - else: - first_roi = True - for sub_roi in roi["geometry"]["coordinates"]: - # Polygon with holes - if not isinstance(sub_roi[0][0], list): - if first_roi: - img = draw_poly(sub_roi, img) - first_roi = False - else: - # holes in ROI - img = draw_poly(sub_roi, img, col=(255, 255, 255)) - else: - # MultiPolygon with holes - for sub_coord in sub_roi: - if first_roi: - img = draw_poly(sub_coord, img) - first_roi = False - else: - # holes in ROI - img = draw_poly(sub_coord, img, col=(255, 255, 255)) + # Get filled pixel coordinates + if args.fill: + filled_coords = np.column_stack(np.where(img == 0)) + all_coords = np.unique(np.vstack((coords_arr[:, :2], filled_coords[:, ::-1])), axis=0) + else: + all_coords = coords_arr[:, :2] + + # Save all coordinates to CSV + coords_df = pd.DataFrame(all_coords, columns=['x', 'y'], dtype=int) + coords_df.to_csv("{}_{}.txt".format(tma_name, cell_type), sep='\t', index=False) - # get all black pixel - coords_arr = np.column_stack(np.where(img == (0, 0, 0))) - - # remove duplicated rows - coords_arr_xy = coords_arr[coords_arr[:, 2] == 0] - - # remove last column - coords_arr_xy = np.delete(coords_arr_xy, 2, axis=1) + # Generate image for visualization if --img is specified + if args.img: + # Group coordinates by feature_index + features = {} + for x, y, feature_index, coord_index in coords_arr: + if feature_index not in features: + features[feature_index] = [] + features[feature_index].append((x, y)) - # to pandas and rename columns to x and y - coords_df = pd.DataFrame(coords_arr_xy, columns=['x', 'y']) + # Draw each feature separately + for feature_coords in features.values(): + pts = np.array(feature_coords, dtype=np.int32) + if args.fill: + cv2.fillPoly(img, [pts], color=0) # Black fill + else: + cv2.polylines(img, [pts], isClosed=True, color=0, thickness=1) # Black outline - # drop duplicates - coords_df = coords_df.drop_duplicates( - subset=['x', 'y'], - keep='last').reset_index(drop=True) - - coords_df.to_csv("{}_{}.txt".format(tma_name, cell_type), sep='\t', index=False) + cv2.imwrite("{}_{}.png".format(tma_name, cell_type), img) if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Split ROI coordinates of QuPath TMA annotation by cell type (classfication)") + parser = argparse.ArgumentParser(description="Split ROI coordinates of QuPath TMA annotation by cell type (classification)") parser.add_argument("--qupath_roi", default=False, help="Input QuPath annotation (GeoJSON file)") - parser.add_argument('--version', action='version', version='%(prog)s 0.1.0') + parser.add_argument("--fill", action="store_true", required=False, help="Fill pixels in ROIs (order of coordinates will be lost)") + parser.add_argument('--version', action='version', version='%(prog)s 0.3.0') + parser.add_argument("--all", action="store_true", required=False, help="Extracts all ROIs") + parser.add_argument("--img", action="store_true", required=False, help="Generates image of ROIs") args = parser.parse_args() if args.qupath_roi:
--- a/qupath_roi_splitter.xml Wed Jun 14 16:36:48 2023 +0000 +++ b/qupath_roi_splitter.xml Fri Jul 19 14:33:33 2024 +0000 @@ -1,24 +1,42 @@ -<tool id="qupath_roi_splitter" name="QuPath ROI Splitter" version="0.1.0+galaxy1"> +<tool id="qupath_roi_splitter" name="QuPath ROI Splitter" version="@VERSION@+galaxy@VERSION_SUFFIX@"> <description>Split ROI coordinates of QuPath TMA annotation by cell type (classification)</description> + <macros> + <token name="@VERSION@">0.3.0</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> <requirements> <requirement type="package" version="3.0.1">geojson</requirement> <requirement type="package" version="1.24.2">numpy</requirement> <requirement type="package" version="4.7.0">opencv</requirement> - <requirement type="package" version="2.0.0">pandas</requirement> + <requirement type="package" version="2.0.0">pandas</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #for $input in $input_collection - python3 '$__tool_directory__/qupath_roi_splitter.py' --qupath_roi '$input' && + python3 '$__tool_directory__/qupath_roi_splitter.py' + --qupath_roi '$input' + $optional.fill + $optional.all + $optional.img + && #end for - mkdir out && - mv *.txt out/ + mkdir out + && mv *.txt out/ + && mv *.png out/ ]]></command> <inputs> <param name="input_collection" type="data_collection" format="geojson" label="Input QuPath annotation" help="Collection containing GeoJSON files"/> + <section name="optional" title="Optional"> + <param name="fill" type="boolean" truevalue="--fill" falsevalue="" checked="false" label="Fill ROIs" help="Fill pixels in ROIs"/> + <param name="all" type="boolean" truevalue="--all" falsevalue="" checked="false" label="Extract all" help="Extracts all ROIs"/> + <param name="img" type="boolean" truevalue="--img" falsevalue="" checked="false" label="Generate image" help="Generates image from ROIs"/> + </section> </inputs> <outputs> <collection name="output_txts" type="list" label="${tool.name} on ${on_string}: ROI data"> - <discover_datasets pattern="__name_and_ext__" directory="out" visible="false" format="txt"/> + <discover_datasets pattern="(?P<name>.*\.txt)" directory="out" visible="false" ext="txt"/> + </collection> + <collection name="output_imgs" type="list" label="${tool.name} on ${on_string}: Images of ROIs"> + <discover_datasets pattern="(?P<name>.*\.png)" directory="out" visible="false" ext="png"/> </collection> </outputs> <tests> @@ -29,13 +47,24 @@ <element name="annotations_TMA_F-5.geojson" value="annotations_TMA_F-5.geojson" /> </collection> </param> + <section name="optional"> + <param name="fill" value="true"/> + <param name="img" value="true"/> + </section> <output_collection name="output_txts" type="list" count="4"> - <element name="F-5_Stroma"> + <element name="F-5_Stroma.txt"> <assert_contents> <has_text text="x"/> <has_text text="y"/> - <has_text text="15561"/> - <has_text text="21160"/> + <has_text text="21153"/> + <has_text text="15570"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="output_imgs" type="list" count="4"> + <element name="E-5_Tumor.png"> + <assert_contents> + <has_size value="459919"/> </assert_contents> </element> </output_collection> @@ -52,4 +81,16 @@ https://github.com/npinter/ROIsplitter ]]></help> + <citations> + <citation type="bibtex"> + @misc{ + toolsGalaxyP, + author = {Pinter, N, Föll, MC }, + title = {Galaxy Proteomics Tools}, + publisher = {GitHub}, journal = {GitHub repository}, + year = {2024}, + url = {https://github.com/galaxyproteomics/tools-galaxyp} + } + </citation> + </citations> </tool>