# HG changeset patch # User Jim Johnson # Date 1390495950 21600 # Node ID 30975b3ff0dc6159d1764af45c2fbacc04bd1aaa # Parent 856033fb26e83c5ad65f457b09544f8048608f32 Allow user to add annotation columns from reference to found input entries diff -r 856033fb26e8 -r 30975b3ff0dc find_in_reference.py --- a/find_in_reference.py Fri Jan 17 14:50:53 2014 -0600 +++ b/find_in_reference.py Thu Jan 23 10:52:30 2014 -0600 @@ -42,6 +42,9 @@ parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)') parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' ) parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' ) + parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' ) + parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' ) + parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' ) parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) (options, args) = parser.parse_args() # Input files @@ -85,8 +88,16 @@ refcol = -1 if options.reference_column and options.reference_column > 0: refcol = int(options.reference_column)-1 + if options.annotation_columns: + annotate = True + annotation_columns = [int(x) - 1 for x in options.annotation_columns.split(',')] + else: + annotate = False refFile = None + num_found = 0 + num_novel = 0 for ln,line in enumerate(inputFile): + annotations = [] try: found = False search_string = line.split('\t')[incol].rstrip('\r\n') @@ -96,22 +107,31 @@ print >> sys.stderr, "search: %s" % (search_string) refFile = open(options.reference,'r') for tn,fline in enumerate(refFile): - target_string = fline.split('\t')[refcol] + fields = fline.split('\t') + target_string =fields[refcol] if options.ignore_case: target_string = target_string.upper() if options.debug: print >> sys.stderr, "in: %s %s %s" % (search_string,search_string in target_string,target_string) if search_string in target_string: found = True - break + if annotate: + annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns]) + annotations.append(annotation) + else: + break if found: + num_found += 1 + if annotate: + line = '%s\t%s\n' % (line.rstrip('\r\n'),options.annotation_separator.join(annotations)) if options.keep == True: if outFile: - outFile.write(line) + outFile.write(line) else: if filteredFile: filteredFile.write(line) else: + num_novel += 1 if options.keep == True: if filteredFile: filteredFile.write(line) @@ -123,6 +143,7 @@ finally: if refFile: refFile.close() + print >> sys.stdout, "found: %d novel: %d" % (num_found,num_novel) if __name__ == "__main__" : __main__() diff -r 856033fb26e8 -r 30975b3ff0dc find_in_reference.xml --- a/find_in_reference.xml Fri Jan 17 14:50:53 2014 -0600 +++ b/find_in_reference.xml Thu Jan 23 10:52:30 2014 -0600 @@ -13,6 +13,15 @@ #end if #if 'found' in $outputs.__str__: --filtered "$found" + #if $annotate.from_ref == 'yes' and str($annotate.annotation_columns) != 'None': + --annotation_columns $annotate.annotation_columns + #if $annotate.annotation_separator != '': + --annotation_separator '$annotate.annotation_separator' + #end if + #if $annotate.annotation_col_sep != '': + --annotation_col_sep '$annotate.annotation_col_sep' + #end if + #end if #end if @@ -38,6 +47,21 @@ + + + + + + + + + + + +