changeset 2:30975b3ff0dc

Allow user to add annotation columns from reference to found input entries
author Jim Johnson <jj@umn.edu>
date Thu, 23 Jan 2014 10:52:30 -0600
parents 856033fb26e8
children fe044d480b3a
files find_in_reference.py find_in_reference.xml
diffstat 2 files changed, 48 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/find_in_reference.py	Fri Jan 17 14:50:53 2014 -0600
+++ b/find_in_reference.py	Thu Jan 23 10:52:30 2014 -0600
@@ -42,6 +42,9 @@
   parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)')
   parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' )
   parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' )
+  parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' )
+  parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' )
+  parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' )
   parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout'  )
   (options, args) = parser.parse_args()
   # Input files
@@ -85,8 +88,16 @@
   refcol = -1
   if options.reference_column and options.reference_column > 0:
     refcol = int(options.reference_column)-1
+  if options.annotation_columns:
+    annotate = True
+    annotation_columns = [int(x) - 1 for x in options.annotation_columns.split(',')]
+  else:
+    annotate = False
   refFile = None
+  num_found = 0
+  num_novel = 0
   for ln,line in enumerate(inputFile):
+    annotations = []
     try:
       found = False
       search_string = line.split('\t')[incol].rstrip('\r\n')
@@ -96,22 +107,31 @@
         print >> sys.stderr, "search: %s" % (search_string)
       refFile = open(options.reference,'r')
       for tn,fline in enumerate(refFile):
-        target_string = fline.split('\t')[refcol]
+        fields = fline.split('\t')
+        target_string =fields[refcol]
         if options.ignore_case:
           target_string = target_string.upper()
         if options.debug: 
           print >> sys.stderr, "in: %s %s %s" % (search_string,search_string in target_string,target_string)
         if search_string in target_string:
           found = True
-          break
+          if annotate:
+            annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns])
+            annotations.append(annotation)  
+          else:
+            break
       if found:
+        num_found += 1
+        if annotate:
+          line = '%s\t%s\n' % (line.rstrip('\r\n'),options.annotation_separator.join(annotations))
         if options.keep == True:
           if outFile:
-              outFile.write(line)
+            outFile.write(line)
         else:
           if filteredFile:
             filteredFile.write(line)
       else:
+        num_novel += 1
         if options.keep == True:
           if filteredFile:
             filteredFile.write(line)
@@ -123,6 +143,7 @@
     finally:
       if refFile:
         refFile.close()
+  print >> sys.stdout, "found: %d novel: %d" % (num_found,num_novel)
 
 if __name__ == "__main__" : __main__()
 
--- a/find_in_reference.xml	Fri Jan 17 14:50:53 2014 -0600
+++ b/find_in_reference.xml	Thu Jan 23 10:52:30 2014 -0600
@@ -13,6 +13,15 @@
   #end if
   #if 'found' in $outputs.__str__:
     --filtered "$found"
+    #if $annotate.from_ref == 'yes' and str($annotate.annotation_columns) != 'None':
+      --annotation_columns $annotate.annotation_columns
+      #if $annotate.annotation_separator != '':
+        --annotation_separator '$annotate.annotation_separator'
+      #end if
+      #if $annotate.annotation_col_sep != '':
+        --annotation_col_sep '$annotate.annotation_col_sep'
+      #end if
+    #end if
   #end if
   </command>
   <inputs>
@@ -38,6 +47,21 @@
       <option value="novel" selected="true">lines with no match in reference</option>
       <option value="found">lines with match in reference</option>
     </param>
+    <conditional name="annotate">
+      <param name="from_ref" type="select" label="Annotate found input entries with columns from reference">
+        <option value="no" selected="true">No</option>
+        <option value="yes">Yes</option>
+      </param>
+      <when value="no"/>
+      <when value="yes">
+        <param name="annotation_columns" type="data_column" data_ref="reference" multiple="true" label="columns from reference to append to found input lines" 
+           help=""/>
+        <param name="annotation_separator" type="text" value="" optional="true" label="separator to place between annotations from different reference lines"
+           help="defaults to ;"/>
+        <param name="annotation_col_sep" type="text" value="" optional="true" label="separator to place between annotation columns from the same reference line"
+           help="defaults to ,"/>
+      </when>
+    </conditional>
   </inputs>
   <stdio>
     <exit_code range="1:" level="fatal" description="Error" />