Mercurial > repos > devteam > trimmer
diff trimmer.py @ 0:dec27ea206c3 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/trimmer commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
author | devteam |
---|---|
date | Mon, 09 Nov 2015 12:00:48 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trimmer.py Mon Nov 09 12:00:48 2015 -0500 @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +import sys +import optparse + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def main(): + usage = """%prog [options] + +options (listed below) default to 'None' if omitted + """ + parser = optparse.OptionParser(usage=usage) + + parser.add_option( + '-a','--ascii', + dest='ascii', + action='store_true', + default = False, + help='Use ascii codes to defined ignored beginnings instead of raw characters') + + parser.add_option( + '-q','--fastq', + dest='fastq', + action='store_true', + default = False, + help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids') + + parser.add_option( + '-i','--ignore', + dest='ignore', + help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled') + + parser.add_option( + '-s','--start', + dest='start', + default = '0', + help='Trim from beginning to here (1-based)') + + parser.add_option( + '-e','--end', + dest='end', + default = '0', + help='Trim from here to the ned (1-based)') + + parser.add_option( + '-f','--file', + dest='input_txt', + default = False, + help='Name of file to be chopped. STDIN is default') + + parser.add_option( + '-c','--column', + dest='col', + default = '0', + help='Column to chop. If 0 = chop the whole line') + + + options, args = parser.parse_args() + invalid_starts = [] + + if options.input_txt: + infile = open ( options.input_txt, 'r') + else: + infile = sys.stdin + + if options.ignore and options.ignore != "None": + invalid_starts = options.ignore.split(',') + + if options.ascii and options.ignore and options.ignore != "None": + for i, item in enumerate( invalid_starts ): + invalid_starts[i] = chr( int( item ) ) + + col = int( options.col ) + + for i, line in enumerate( infile ): + line = line.rstrip( '\r\n' ) + if line: + + if options.fastq and i % 2 == 0: + print line + continue + + + if line[0] not in invalid_starts: + if col == 0: + if int( options.end ) > 0: + line = line[ int( options.start )-1 : int( options.end ) ] + else: + line = line[ int( options.start )-1 : ] + else: + fields = line.split( '\t' ) + if col-1 > len( fields ): + stop_err('Column %d does not exist. Check input parameters\n' % col) + + if int( options.end ) > 0: + fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ] + else: + fields[col - 1] = fields[col - 1][ int( options.start )-1 : ] + line = '\t'.join(fields) + print line + +if __name__ == "__main__": main() +