# HG changeset patch
# User dave
# Date 1374671679 14400
# Node ID 67e0d4c5ae5625475b6cb3cc0fdeac2745f8cd22
# Parent  0f35104b5effedcb39b08caf9784970c38a80976
Uploaded

diff -r 0f35104b5eff -r 67e0d4c5ae56 filtering.py
--- a/filtering.py	Fri Mar 29 07:47:17 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-#!/usr/bin/env python
-# This tool takes a tab-delimited text file as input and creates filters on columns based on certain properties.
-# The tool will skip over invalid lines within the file, informing the user about the number of lines skipped.
-
-from __future__ import division
-import sys, re, os.path
-from galaxy import eggs
-
-# Older py compatibility
-try:
-    set()
-except:
-    from sets import Set as set
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def get_operands( filter_condition ):
-    # Note that the order of all_operators is important
-    items_to_strip = ['+', '-', '**', '*', '//', '/', '%', '<<', '>>', '&', '|', '^', '~', '<=', '<', '>=', '>', '==', '!=', '<>', ' and ', ' or ', ' not ', ' is ', ' is not ', ' in ', ' not in ']
-    for item in items_to_strip:
-        if filter_condition.find( item ) >= 0:
-            filter_condition = filter_condition.replace( item, ' ' )
-    operands = set( filter_condition.split( ' ' ) )
-    return operands
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-in_fname = sys.argv[1]
-out_fname = sys.argv[2]
-cond_text = sys.argv[3]
-try:
-    in_columns = int( sys.argv[4] )
-    assert sys.argv[5]  #check to see that the column types variable isn't null
-    in_column_types = sys.argv[5].split( ',' )
-except:
-    stop_err( "Data does not appear to be tabular.  This tool can only be used with tab-delimited data." )
-
-# Unescape if input has been escaped
-mapped_str = {
-    '__lt__': '<',
-    '__le__': '<=',
-    '__eq__': '==',
-    '__ne__': '!=',
-    '__gt__': '>',
-    '__ge__': '>=',
-    '__sq__': '\'',
-    '__dq__': '"',
-}
-for key, value in mapped_str.items():
-    cond_text = cond_text.replace( key, value )
-    
-# Attempt to determine if the condition includes executable stuff and, if so, exit
-secured = dir()
-operands = get_operands(cond_text)
-for operand in operands:
-    try:
-        check = int( operand )
-    except:
-        if operand in secured:
-            stop_err( "Illegal value '%s' in condition '%s'" % ( operand, cond_text ) )
-
-# Work out which columns are used in the filter (save using 1 based counting)
-used_cols = sorted(set(int(match.group()[1:]) \
-                   for match in re.finditer('c(\d)+', cond_text))) 
-largest_col_index = max(used_cols)
-
-# Prepare the column variable names and wrappers for column data types. Only 
-# cast columns used in the filter.
-cols, type_casts = [], []
-for col in range( 1, largest_col_index + 1 ):
-    col_name = "c%d" % col
-    cols.append( col_name )
-    col_type = in_column_types[ col - 1 ]
-    if col in used_cols:
-        type_cast = "%s(%s)" % ( col_type, col_name )
-    else:
-        #If we don't use this column, don't cast it.
-        #Otherwise we get errors on things like optional integer columns.
-        type_cast = col_name
-    type_casts.append( type_cast )
- 
-col_str = ', '.join( cols )    # 'c1, c2, c3, c4'
-type_cast_str = ', '.join( type_casts )  # 'str(c1), int(c2), int(c3), str(c4)'
-assign = "%s, = line.split( '\\t' )[:%i]" % ( col_str, largest_col_index )
-wrap = "%s = %s" % ( col_str, type_cast_str )
-skipped_lines = 0
-invalid_lines = 0
-first_invalid_line = 0
-invalid_line = None
-lines_kept = 0
-total_lines = 0
-out = open( out_fname, 'wt' )
-    
-# Read and filter input file, skipping invalid lines
-code = '''
-for i, line in enumerate( file( in_fname ) ):
-    total_lines += 1
-    line = line.rstrip( '\\r\\n' )
-    if not line or line.startswith( '#' ):
-        skipped_lines += 1
-        continue
-    try:
-        %s
-        %s
-        if %s:
-            lines_kept += 1
-            print >> out, line
-    except:
-        invalid_lines += 1
-        if not invalid_line:
-            first_invalid_line = i + 1
-            invalid_line = line
-''' % ( assign, wrap, cond_text )
-
-valid_filter = True
-try:
-    exec code
-except Exception, e:
-    out.close()
-    if str( e ).startswith( 'invalid syntax' ):
-        valid_filter = False
-        stop_err( 'Filter condition "%s" likely invalid. See tool tips, syntax and examples.' % cond_text )
-    else:
-        stop_err( str( e ) )
-
-if valid_filter:
-    out.close()
-    valid_lines = total_lines - skipped_lines
-    print 'Filtering with %s, ' % cond_text
-    if valid_lines > 0:
-        print 'kept %4.2f%% of %d valid lines (%d total lines).' % ( 100.0*lines_kept/valid_lines, valid_lines, total_lines )
-    else:
-        print 'Possible invalid filter condition "%s" or non-existent column referenced. See tool tips, syntax and examples.' % cond_text
-    if invalid_lines:
-        print 'Skipped %d invalid line(s) starting at line #%d: "%s"' % ( invalid_lines, first_invalid_line, invalid_line )
-    if skipped_lines:
-        print 'Skipped %i comment (starting with #) or blank line(s)' % skipped_lines
diff -r 0f35104b5eff -r 67e0d4c5ae56 filtering.xml
--- a/filtering.xml	Fri Mar 29 07:47:17 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-<tool id="Filter1" name="Filter" version="1.1.0">
-  <description>data on any column using simple expressions</description>
-  <command interpreter="python">
-    filtering.py $input $out_file1 "$cond" ${input.metadata.columns} "${input.metadata.column_types}"
-  </command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Filter" help="Dataset missing? See TIP below."/>
-    <param name="cond" size="40" type="text" value="c1=='chr22'" label="With following condition" help="Double equal signs, ==, must be used as shown above. To filter for an arbitrary string, use the Select tool.">
-      <validator type="empty_field" message="Enter a valid filtering condition, see syntax and examples below."/>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="input" name="out_file1" metadata_source="input"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="input" value="1.bed"/>
-      <param name="cond" value="c1=='chr22'"/>
-      <output name="out_file1" file="filter1_test1.bed"/>
-    </test>
-    <test>
-      <param name="input" value="7.bed"/>
-      <param name="cond" value="c1=='chr1' and c3-c2>=2000 and c6=='+'"/>
-      <output name="out_file1" file="filter1_test2.bed"/>
-    </test>
-    <!-- Test filtering of file with a variable number of columns. -->
-    <test>
-      <param name="input" value="filter1_in3.sam"/>
-      <param name="cond" value="c3=='chr1' and c5>5"/>
-      <output name="out_file1" file="filter1_test3.sam"/>
-    </test>
-    <test>
-      <param name="input" value="filter1_inbad.bed"/>
-      <param name="cond" value="c1=='chr22'"/>
-      <output name="out_file1" file="filter1_test4.bed"/>
-    </test>
-  </tests>
-  <help>
-
-.. class:: warningmark
-
-Double equal signs, ==, must be used as *"equal to"* (e.g., **c1 == 'chr22'**)
-
-.. class:: infomark
-
-**TIP:** Attempting to apply a filtering condition may throw exceptions if the data type (e.g., string, integer) in every line of the columns being filtered is not appropriate for the condition (e.g., attempting certain numerical calculations on strings).  If an exception is thrown when applying the condition to a line, that line is skipped as invalid for the filter condition.  The number of invalid skipped lines is documented in the resulting history item as a "Condition/data issue".
-
-.. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**Syntax**
-
-The filter tool allows you to restrict the dataset using simple conditional statements.
-
-- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
-- Make sure that multi-character operators contain no white space ( e.g., **&lt;=** is valid while **&lt; =** is not valid )
-- When using 'equal-to' operator **double equal sign '==' must be used** ( e.g., **c1=='chr1'** )
-- Non-numerical values must be included in single or double quotes ( e.g., **c6=='+'** )
-- Filtering condition can include logical operators, but **make sure operators are all lower case** ( e.g., **(c1!='chrX' and c1!='chrY') or not c6=='+'** )
-
------
-
-**Example**
-
-- **c1=='chr1'** selects lines in which the first column is chr1
-- **c3-c2&lt;100*c4** selects lines where subtracting column 3 from column 2 is less than the value of column 4 times 100
-- **len(c2.split(',')) &lt; 4** will select lines where the second column has less than four comma separated elements
-- **c2>=1** selects lines in which the value of column 2 is greater than or equal to 1
-- Numbers should not contain commas - **c2&lt;=44,554,350** will not work, but **c2&lt;=44554350** will
-- Some words in the data can be used, but must be single or double quoted ( e.g., **c3=='exon'** )
-
-</help>
-</tool>
diff -r 0f35104b5eff -r 67e0d4c5ae56 readme.txt
--- a/readme.txt	Fri Mar 29 07:47:17 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-This is a different readme file.
\ No newline at end of file
diff -r 0f35104b5eff -r 67e0d4c5ae56 tool_dependencies.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Jul 24 09:14:39 2013 -0400
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<!-- Generic repository_depencencies.xml file for misc sequence tools -->
+<repositories description="This requires Biopython as a dependency.">
+<!-- Leave out the tool shed and revision to get the current
+     tool shed and latest revision at the time of upload -->
+<repository owner="biopython" name="package_biopython_1_61" />
+</repositories>
+