view generate_manifest.py @ 8:012191b79fda draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 6413a461059c4a421a7812a08f244c224cde8ee2
author galaxyp
date Fri, 17 Oct 2025 16:22:03 +0000
parents
children
line wrap: on
line source

#!/usr/bin/env python3

#
# Generates a FragPipe Manifest file.
#

import argparse
import csv

# The three columns for each scanfile are "Experiment, Bioreplicate, and Data type
column_types = ('exp', 'bio', 'type')
output_filename = 'fp.manifest'


# Add column values to a list of rows for each scan file.
def add_column(column_type, args, rows):
    nfiles = len(args.scanfiles)

    # Each scan file is numbered 1 through n in column
    if getattr(args, f'{column_type}_consec'):
        vals = range(1, nfiles + 1)

    # All scan files have same value in column
    elif getattr(args, f'{column_type}_assign_all'):
        vals = [getattr(args, f'{column_type}_assign_all')] * nfiles

    # Values are provided for scan files in a comma-delimited list
    elif getattr(args, f'{column_type}_col'):
        vals = getattr(args, f'{column_type}_col').split(',')
        if len(vals) != nfiles:
            raise ValueError((f'Incorrect number of values entered for column {column_type}. '
                              'Exactly one value must be entered for each scan file.'))

    # Otherwise, this column remains empty.
    else:
        vals = [''] * nfiles

    for i, row in enumerate(rows):
        row.append(vals[i])


def main():
    parser = argparse.ArgumentParser()

    # Each column has the same methods for populating
    for column_type in column_types:
        parser.add_argument(f'--{column_type}-consec', action='store_true')
        parser.add_argument(f'--{column_type}-assign-all')
        parser.add_argument(f'--{column_type}-col')

    # Scanfile names, which should be identical to history identifiers
    parser.add_argument('scanfiles', nargs='+')

    args = parser.parse_args()

    # Create and populate data structure for tabular output
    rows = [[scanfile] for scanfile in args.scanfiles]
    for column_type in column_types:
        add_column(column_type, args, rows)

    # Write out manifest file.
    # Use mode=a as the script will be called once for each scan group.
    with open(output_filename, mode='a') as outf:
        manifest_writer = csv.writer(outf, delimiter='\t')
        for row in rows:
            manifest_writer.writerow(row)


if __name__ == "__main__":
    main()