Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
view relabel_fasta.py @ 26:f0917c340f13 draft
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 34034189622f4cf14edd12a4de43739c37b50730-dirty
author | pjbriggs |
---|---|
date | Thu, 30 Aug 2018 08:41:11 -0400 |
parents | fe354f5dd0ee |
children |
line wrap: on
line source
#!/usr/bin/env python DESCRIPTION = \ """Replace FASTA labels with new labels <PREFIX>1, <PREFIX>2, <PREFIX>3 ... (<PREFIX> is provided by the user via the command line). Can be used to label OTUs as OTU_1, OTU_2 etc. This reimplements the functionality of the fasta_number.py utility from https://drive5.com/python/fasta_number_py.html """ import argparse def relabel_fasta(fp,prefix,include_size=False): """ Relabel sequence records in a FASTA file Arguments: fp (File): file-like object opened for reading input FASTA data from prefix (str): prefix to use in new labels include_size (bool): if True then copy 'size=...' records into new labels (default is not to copy the size) Yields: updated lines from the input FASTA. """ # Iterate over lines in file nlabel = 0 for line in fp: # Strip trailing newlines line = line.rstrip('\n') if not line: # Skip blank lines continue elif line.startswith('>'): # Deal with start of a sequence record nlabel += 1 label = line[1:].strip() if include_size: # Extract size from the label try: size = filter( lambda x: x.startswith("size="), label.split(';'))[0] except Exception as ex: raise Exception("Couldn't locate 'size' in " "label: %s" % label) yield ">%s%d;%s" % (args.prefix, nlabel, size) else: yield ">%s%d" % (args.prefix, nlabel) else: # Echo the line to output yield line if __name__ == "__main__": # Set up command line parser p = argparse.ArgumentParser(description=DESCRIPTION) p.add_argument("--needsize", action="store_true", help="include the size as part of the " "output label ('size=...' must be present " "in the input FASTA labels). Output labels " "will be '<PREFIX><NUMBER>;size=<SIZE>'") p.add_argument("--nosize", action="store_true", help="don't include the size as part of " "the output label (this is the default)") p.add_argument("fasta", metavar="FASTA", help="input FASTA file") p.add_argument("prefix", metavar="PREFIX", help="prefix to use for labels in output") # Process command line args = p.parse_args() # Relabel FASTA with open(args.fasta,'rU') as fasta: for line in relabel_fasta(fasta, args.prefix, include_size=args.needsize): print line