Mercurial > repos > imgteam > 2d_feature_extraction
diff 2d_feature_extraction.py @ 5:5530132d500e draft
planemo upload for repository https://github.com/BMCV/galaxy-image-analysis/tree/master/tools/2d_feature_extraction/ commit bb2d58ed37d8eb09583b86e3cdd9f5d1b56c42a0
| author | imgteam |
|---|---|
| date | Sun, 04 Jan 2026 20:56:17 +0000 |
| parents | a4bc9dfde846 |
| children | 048545339ced |
line wrap: on
line diff
--- a/2d_feature_extraction.py Wed Dec 17 11:20:49 2025 +0000 +++ b/2d_feature_extraction.py Sun Jan 04 20:56:17 2026 +0000 @@ -1,127 +1,115 @@ -import argparse - -import giatools.io +import giatools import numpy as np import pandas as pd -import skimage.feature +import scipy.ndimage as ndi import skimage.measure -import skimage.morphology -import skimage.segmentation + +# Fail early if an optional backend is not available +giatools.require_backend('omezarr') + + +def surface(labels: np.ndarray, label: int) -> int: + """ + Ad-hoc implementation for computation of the "perimeter" of an object in 3D (that is a surface). + """ + assert labels.ndim == 3 # sanity check + + # Create 3-D structuring element with 4-connectivity + selem = np.zeros((3, 3, 3), bool) + for ijk in np.ndindex(*selem.shape): + if (np.array(ijk) == 1).sum() >= 2: + selem[*ijk] = True # noqa: E999 + assert selem.sum() == 7 # sanity check + + # Compute the area of the surface + cc = (labels == label) + cc_interior = ndi.binary_erosion(cc, selem) + surface = np.logical_xor(cc, cc_interior) + return surface.sum() # number of voxels on the surface of the object + + +def compute_if_dask(obj): + """ + Return the computed object or array if it is a Dask array or deferred computable Dask object. + """ + return obj.compute() if hasattr(obj, 'compute') else obj if __name__ == '__main__': + tool = giatools.ToolBaseplate() + tool.add_input_image('labels') + tool.add_input_image('intensities', required=False) + tool.parser.add_argument('--output', type=str) + tool.parse_args() - parser = argparse.ArgumentParser(description='Extract image features') + # Validate the input image + try: + label_image = tool.args.input_images['labels'] + if any(label_image.shape[label_image.axes.index(axis)] > 1 for axis in label_image.axes if axis not in 'ZYX'): + raise ValueError(f'This tool is not applicable to images with {label_image.original_axes} axes.') + + # Extract the image features + for section in tool.run('ZYX'): # the validation code above guarantees that we will have only a single iteration + df = pd.DataFrame() - # TODO create factory for boilerplate code - features = parser.add_argument_group('compute features') - features.add_argument('--all', dest='all_features', action='store_true') - features.add_argument('--label', dest='add_label', action='store_true') - features.add_argument('--patches', dest='add_roi_patches', action='store_true') - features.add_argument('--max_intensity', dest='max_intensity', action='store_true') - features.add_argument('--mean_intensity', dest='mean_intensity', action='store_true') - features.add_argument('--min_intensity', dest='min_intensity', action='store_true') - features.add_argument('--moments_hu', dest='moments_hu', action='store_true') - features.add_argument('--centroid', dest='centroid', action='store_true') - features.add_argument('--bbox', dest='bbox', action='store_true') - features.add_argument('--area', dest='area', action='store_true') - features.add_argument('--filled_area', dest='filled_area', action='store_true') - features.add_argument('--convex_area', dest='convex_area', action='store_true') - features.add_argument('--perimeter', dest='perimeter', action='store_true') - features.add_argument('--extent', dest='extent', action='store_true') - features.add_argument('--eccentricity', dest='eccentricity', action='store_true') - features.add_argument('--equivalent_diameter', dest='equivalent_diameter', action='store_true') - features.add_argument('--euler_number', dest='euler_number', action='store_true') - features.add_argument('--inertia_tensor_eigvals', dest='inertia_tensor_eigvals', action='store_true') - features.add_argument('--major_axis_length', dest='major_axis_length', action='store_true') - features.add_argument('--minor_axis_length', dest='minor_axis_length', action='store_true') - features.add_argument('--orientation', dest='orientation', action='store_true') - features.add_argument('--solidity', dest='solidity', action='store_true') - features.add_argument('--moments', dest='moments', action='store_true') - features.add_argument('--convexity', dest='convexity', action='store_true') + # Get the labels array and cast to `uint8` if it is `bool` (`skimage.measure.regionprops` refuses `bool` typed arrays) + labels_section_data = section['labels'].data.squeeze() + if np.issubdtype(labels_section_data.dtype, bool): + print('Convert labels from bool to uint8') + labels_section_data = labels_section_data.astype(np.uint8) - parser.add_argument('--label_file_binary', dest='label_file_binary', action='store_true') - - parser.add_argument('--raw', dest='raw_file', type=argparse.FileType('r'), - help='Original input file', required=False) - parser.add_argument('label_file', type=argparse.FileType('r'), - help='Label input file') - parser.add_argument('output_file', type=argparse.FileType('w'), - help='Tabular output file') - args = parser.parse_args() - - label_file_binary = args.label_file_binary - label_file = args.label_file.name - out_file = args.output_file.name - add_patch = args.add_roi_patches - - raw_image = None - if args.raw_file is not None: - raw_image = giatools.io.imread(args.raw_file.name) - - raw_label_image = giatools.io.imread(label_file) - - df = pd.DataFrame() - if label_file_binary: - raw_label_image = skimage.measure.label(raw_label_image) - regions = skimage.measure.regionprops(raw_label_image, intensity_image=raw_image) - - df['it'] = np.arange(len(regions)) + # Some features currently cannot be computed from Dask arrays + if any( + feature_name in tool.args.params['features'] for feature_name in ( + 'inertia_tensor_eigvals', + 'axis_major_length', + 'axis_minor_length', + 'eccentricity', + 'orientation', + 'moments_hu', + ) + ): + labels_section_data = compute_if_dask(labels_section_data) - if add_patch: - df['image'] = df['it'].map(lambda ait: regions[ait].image.astype(np.float).tolist()) - df['intensity_image'] = df['it'].map(lambda ait: regions[ait].intensity_image.astype(np.float).tolist()) - - # TODO no matrix features, but split in own rows? - if args.add_label or args.all_features: - df['label'] = df['it'].map(lambda ait: regions[ait].label) + # Compute the image features + if 'intensities' in tool.args.input_images: + regions = skimage.measure.regionprops(labels_section_data, intensity_image=section['intensities'].data.squeeze()) + else: + regions = skimage.measure.regionprops(labels_section_data, intensity_image=None) + df['it'] = np.arange(len(regions)) + for feature_name in tool.args.params['features']: - if raw_image is not None: - if args.max_intensity or args.all_features: - df['max_intensity'] = df['it'].map(lambda ait: regions[ait].max_intensity) - if args.mean_intensity or args.all_features: - df['mean_intensity'] = df['it'].map(lambda ait: regions[ait].mean_intensity) - if args.min_intensity or args.all_features: - df['min_intensity'] = df['it'].map(lambda ait: regions[ait].min_intensity) - if args.moments_hu or args.all_features: - df['moments_hu'] = df['it'].map(lambda ait: regions[ait].moments_hu) + # Add the object label + if feature_name == 'label': + df['label'] = df['it'].map(lambda ait: regions[ait].label) + + # Add the object perimeter/surface + elif feature_name == 'perimeter' and labels_section_data.ndim == 3: + df['perimeter'] = df['it'].map( + lambda ait: surface(labels_section_data, regions[ait].label), # `skimage.measure.regionprops` cannot compute perimeters for 3-D data + ) - if args.centroid or args.all_features: - df['centroid'] = df['it'].map(lambda ait: regions[ait].centroid) - if args.bbox or args.all_features: - df['bbox'] = df['it'].map(lambda ait: regions[ait].bbox) - if args.area or args.all_features: - df['area'] = df['it'].map(lambda ait: regions[ait].area) - if args.filled_area or args.all_features: - df['filled_area'] = df['it'].map(lambda ait: regions[ait].filled_area) - if args.convex_area or args.all_features: - df['convex_area'] = df['it'].map(lambda ait: regions[ait].convex_area) - if args.perimeter or args.all_features: - df['perimeter'] = df['it'].map(lambda ait: regions[ait].perimeter) - if args.extent or args.all_features: - df['extent'] = df['it'].map(lambda ait: regions[ait].extent) - if args.eccentricity or args.all_features: - df['eccentricity'] = df['it'].map(lambda ait: regions[ait].eccentricity) - if args.equivalent_diameter or args.all_features: - df['equivalent_diameter'] = df['it'].map(lambda ait: regions[ait].equivalent_diameter) - if args.euler_number or args.all_features: - df['euler_number'] = df['it'].map(lambda ait: regions[ait].euler_number) - if args.inertia_tensor_eigvals or args.all_features: - df['inertia_tensor_eigvals'] = df['it'].map(lambda ait: regions[ait].inertia_tensor_eigvals) - if args.major_axis_length or args.all_features: - df['major_axis_length'] = df['it'].map(lambda ait: regions[ait].major_axis_length) - if args.minor_axis_length or args.all_features: - df['minor_axis_length'] = df['it'].map(lambda ait: regions[ait].minor_axis_length) - if args.orientation or args.all_features: - df['orientation'] = df['it'].map(lambda ait: regions[ait].orientation) - if args.solidity or args.all_features: - df['solidity'] = df['it'].map(lambda ait: regions[ait].solidity) - if args.moments or args.all_features: - df['moments'] = df['it'].map(lambda ait: regions[ait].moments) - if args.convexity or args.all_features: - perimeter = df['it'].map(lambda ait: regions[ait].perimeter) - area = df['it'].map(lambda ait: regions[ait].area) - df['convexity'] = area / (perimeter * perimeter) + # Skip features that are not available when processing 3-D images + elif feature_name in ('eccentricity', 'moments_hu', 'orientation') and labels_section_data.ndim == 3: + print(f'Skip feature that is not available for 3-D images: "{feature_name}"') + + # Add another feature from `regions` that was computed via `skimage.measure.regionprops` + else: + try: + df[feature_name] = df['it'].map(lambda ait: getattr(regions[ait], feature_name)) + except TypeError: + raise ValueError(f'Unknown feature: "{feature_name}"') - del df['it'] - df.to_csv(out_file, sep='\t', lineterminator='\n', index=False) + # Resolve any remaining Dask objects to the actual values (e.g., when processing Zarrs) + df = df.map(compute_if_dask) + + # Convert lists/tuples/arrays to lists of plain Python numbers (e.g., float instead of np.float64) + df = df.map( + lambda obj: np.asarray(obj).tolist() if type(obj) in (list, tuple, np.ndarray) else obj, + ) + + del df['it'] + df.to_csv(tool.args.raw_args.output, sep='\t', lineterminator='\n', index=False) + + except ValueError as err: + exit(err.args[0])
