Mercurial > repos > jjohnson > find_in_reference
changeset 2:30975b3ff0dc
Allow user to add annotation columns from reference to found input entries
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 23 Jan 2014 10:52:30 -0600 |
parents | 856033fb26e8 |
children | fe044d480b3a |
files | find_in_reference.py find_in_reference.xml |
diffstat | 2 files changed, 48 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/find_in_reference.py Fri Jan 17 14:50:53 2014 -0600 +++ b/find_in_reference.py Thu Jan 23 10:52:30 2014 -0600 @@ -42,6 +42,9 @@ parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)') parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' ) parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' ) + parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' ) + parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' ) + parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' ) parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) (options, args) = parser.parse_args() # Input files @@ -85,8 +88,16 @@ refcol = -1 if options.reference_column and options.reference_column > 0: refcol = int(options.reference_column)-1 + if options.annotation_columns: + annotate = True + annotation_columns = [int(x) - 1 for x in options.annotation_columns.split(',')] + else: + annotate = False refFile = None + num_found = 0 + num_novel = 0 for ln,line in enumerate(inputFile): + annotations = [] try: found = False search_string = line.split('\t')[incol].rstrip('\r\n') @@ -96,22 +107,31 @@ print >> sys.stderr, "search: %s" % (search_string) refFile = open(options.reference,'r') for tn,fline in enumerate(refFile): - target_string = fline.split('\t')[refcol] + fields = fline.split('\t') + target_string =fields[refcol] if options.ignore_case: target_string = target_string.upper() if options.debug: print >> sys.stderr, "in: %s %s %s" % (search_string,search_string in target_string,target_string) if search_string in target_string: found = True - break + if annotate: + annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns]) + annotations.append(annotation) + else: + break if found: + num_found += 1 + if annotate: + line = '%s\t%s\n' % (line.rstrip('\r\n'),options.annotation_separator.join(annotations)) if options.keep == True: if outFile: - outFile.write(line) + outFile.write(line) else: if filteredFile: filteredFile.write(line) else: + num_novel += 1 if options.keep == True: if filteredFile: filteredFile.write(line) @@ -123,6 +143,7 @@ finally: if refFile: refFile.close() + print >> sys.stdout, "found: %d novel: %d" % (num_found,num_novel) if __name__ == "__main__" : __main__()
--- a/find_in_reference.xml Fri Jan 17 14:50:53 2014 -0600 +++ b/find_in_reference.xml Thu Jan 23 10:52:30 2014 -0600 @@ -13,6 +13,15 @@ #end if #if 'found' in $outputs.__str__: --filtered "$found" + #if $annotate.from_ref == 'yes' and str($annotate.annotation_columns) != 'None': + --annotation_columns $annotate.annotation_columns + #if $annotate.annotation_separator != '': + --annotation_separator '$annotate.annotation_separator' + #end if + #if $annotate.annotation_col_sep != '': + --annotation_col_sep '$annotate.annotation_col_sep' + #end if + #end if #end if </command> <inputs> @@ -38,6 +47,21 @@ <option value="novel" selected="true">lines with no match in reference</option> <option value="found">lines with match in reference</option> </param> + <conditional name="annotate"> + <param name="from_ref" type="select" label="Annotate found input entries with columns from reference"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"/> + <when value="yes"> + <param name="annotation_columns" type="data_column" data_ref="reference" multiple="true" label="columns from reference to append to found input lines" + help=""/> + <param name="annotation_separator" type="text" value="" optional="true" label="separator to place between annotations from different reference lines" + help="defaults to ;"/> + <param name="annotation_col_sep" type="text" value="" optional="true" label="separator to place between annotation columns from the same reference line" + help="defaults to ,"/> + </when> + </conditional> </inputs> <stdio> <exit_code range="1:" level="fatal" description="Error" />