changeset 1:4afa63644ac3 draft default tip

Uploaded
author saskia-hiltemann
date Mon, 09 Nov 2015 09:50:15 -0500
parents a8b089f5a429
children
files OTUtable_addblast.py OTUtable_addblast.xml r_wrapper.sh test-data/2.tabular test-data/XY_Plot_1_out.pdf tool_dependencies.xml tool_dependencies.xml~ xy_example.jpg xy_plot.xml
diffstat 9 files changed, 93 insertions(+), 380 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/OTUtable_addblast.py	Mon Nov 09 09:50:15 2015 -0500
@@ -0,0 +1,71 @@
+import requests
+import time
+import sys
+
+baseurl="http://www.ncbi.nlm.nih.gov/blast/Blast.cgi"
+
+OTUfile=sys.argv[0]
+BLASTfile=sys.argv[1]
+fastafile=sys.argv[2]
+
+    
+def make_url(seq):
+    return baseurl+"?DATABASE=nr&PERC_IDENT=97&EXCLUDE_SEQ_UNCULT=on&HITLIST_SIZE=10&FILTER=L&FILTER=m&FILTER=R&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower()
+
+def make_RIDlink(RID):
+    return "<a target=\"_blank\" href=\""+baseurl+"?CMD=Get&RID="+RID+"\">view results</a>"
+
+def make_rerun_link(seq):
+    return "<a target=\"_blank\" href=\""+baseurl+"?DATABASE=nr&HITLIST_SIZE=10&EXCLUDE_SEQ_UNCULT=true&FILTER=L&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower()+"\">resubmit query</a>"
+    
+
+    
+### for each fasta sequence create blast search
+sequences = [line.rstrip('\n').replace('-','') for line in open(fastafile) if '>' not in line]
+urls = [make_url(seq) for seq in sequences]
+
+RIDs = []
+for url in urls:
+    r=requests.get(url)    
+    RID = r.text[r.text.find("RID"):r.text.find("RTOE")]
+    RID = RID[6:-3].lstrip().rstrip()
+    RIDs.append(RID)
+    print "Submitted request, RID: "+ RID
+    time.sleep(3) # be nice to the server
+
+
+
+### Get top hits from local BLAST results file, add to OTUtable file
+blastf = open(BLASTfile, "r")
+otuf = open(OTUfile, "r")
+outfile = open("newtable.tsv","w+")
+
+linenum=0    
+for line in otuf:
+   if linenum == 0:
+       outfile.write( line.rstrip()+"\tBLAST Top Hit\n" )
+   else: 
+       outfile.write( line.rstrip() +"\t"+ blastf.readline().strip().split("\t")[-1]+"\n" )
+   linenum +=1
+
+blastf.close()
+otuf.close()
+outfile.close()
+
+
+
+### Add RID link and rerun link to table
+otuf = open("newtable.tsv","r")
+outfile = open("newtable2.tsv","w+")
+
+print len(sequences)
+print len (RIDs)
+linenum=-1    
+for line in otuf:
+   if linenum == -1:
+       outfile.write( line.rstrip()+"\tBLAST result\tBLAST resubmit\n" )
+   else: 
+       outfile.write( line.rstrip() +"\t"+ make_RIDlink(RIDs[linenum]) + "\t" + make_rerun_link(sequences[linenum])+"\n" )
+   linenum +=1
+
+ 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/OTUtable_addblast.xml	Mon Nov 09 09:50:15 2015 -0500
@@ -0,0 +1,22 @@
+<tool id="mothur-otu-addblast" name="Mothur OTUtable addBLAST" version="1">
+
+	<description>for EMC workflow</description> 
+
+	<command interpreter="python">
+		OTUtable_addblast.py $otufile $blastfile $oturepfile
+	</command>
+
+	<inputs>
+            <param name="otufile" type="data" label="Select OTU table (taxonomy output from MOTHUR Classify.otu)" help="choose input file from history"/>
+            <param name="blastfile" type="data" label="Select BLAST output" help="choose input file from history"/>
+            <param name="oturepfile" type="data" label="Select OTU representatives fasta file" help="choose input file from history"/>            
+	</inputs>
+	
+	<outputs>
+		<data format="txt" name="outputfile" from_work_dir="newtable2.tsv" label="OTU table with BLAST on ${on_string}"/>
+	</outputs>
+
+	<help>
+		description of tool for the users
+	</help>
+</tool>
\ No newline at end of file
--- a/r_wrapper.sh	Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-#!/bin/sh
-
-### Run R providing the R script in $1 as standard input and passing 
-### the remaining arguments on the command line
-
-# Function that writes a message to stderr and exits
-function fail
-{
-    echo "$@" >&2
-    exit 1
-}
-
-# Ensure R executable is found
-which R > /dev/null || fail "'R' is required by this tool but was not found on path" 
-
-# Extract first argument
-infile=$1; shift
-
-# Ensure the file exists
-test -f $infile || fail "R input file '$infile' does not exist"
-
-# Invoke R passing file named by first argument to stdin
-R --vanilla --slave $* < $infile
--- a/test-data/2.tabular	Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-1	68	4.1
-2	71	4.6
-3	62	3.8
-4	75	4.4
-5	58	3.2
-6	60	3.1
-7	67	3.8
-8	68	4.1
-9	71	4.3
-10	69	3.7
--- a/test-data/XY_Plot_1_out.pdf	Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,180 +0,0 @@
-%PDF-1.4
-%ρ\r
-1 0 obj
-<<
-/CreationDate (D:20110308102307)
-/ModDate (D:20110308102307)
-/Title (R Graphics Output)
-/Producer (R 2.11.0)
-/Creator (R)
->>
-endobj
-2 0 obj
-<<
-/Type /Catalog
-/Pages 3 0 R
->>
-endobj
-5 0 obj
-<<
-/Type /Page
-/Parent 3 0 R
-/Contents 6 0 R
-/Resources 4 0 R
->>
-endobj
-6 0 obj
-<<
-/Length 7 0 R
->>
-stream
-1 J 1 j q
-Q q
-0.000 0.000 0.000 RG
-0.75 w
-[] 0 d
-1 J
-1 j
-10.00 M
-117.07 73.44 m 458.40 73.44 l S
-117.07 73.44 m 117.07 66.24 l S
-202.40 73.44 m 202.40 66.24 l S
-287.73 73.44 m 287.73 66.24 l S
-373.07 73.44 m 373.07 66.24 l S
-458.40 73.44 m 458.40 66.24 l S
-BT
-0.000 0.000 0.000 rg
-/F2 1 Tf 12.00 0.00 -0.00 12.00 113.73 47.52 Tm (2) Tj
-ET
-BT
-/F2 1 Tf 12.00 0.00 -0.00 12.00 199.06 47.52 Tm (4) Tj
-ET
-BT
-/F2 1 Tf 12.00 0.00 -0.00 12.00 284.40 47.52 Tm (6) Tj
-ET
-BT
-/F2 1 Tf 12.00 0.00 -0.00 12.00 369.73 47.52 Tm (8) Tj
-ET
-BT
-/F2 1 Tf 12.00 0.00 -0.00 12.00 451.73 47.52 Tm (10) Tj
-ET
-59.04 127.67 m 59.04 431.20 l S
-59.04 127.67 m 51.84 127.67 l S
-59.04 228.85 m 51.84 228.85 l S
-59.04 330.02 m 51.84 330.02 l S
-59.04 431.20 m 51.84 431.20 l S
-BT
-/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 121.00 Tm (60) Tj
-ET
-BT
-/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 222.18 Tm (65) Tj
-ET
-BT
-/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 323.35 Tm (70) Tj
-ET
-BT
-/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 424.53 Tm (75) Tj
-ET
-59.04 73.44 m
-473.76 73.44 l
-473.76 444.96 l
-59.04 444.96 l
-59.04 73.44 l
-S
-Q q
-BT
-0.000 0.000 0.000 rg
-/F3 1 Tf 14.00 0.00 -0.00 14.00 211.16 469.45 Tm (Example XY Plot) Tj
-ET
-BT
-/F2 1 Tf 12.00 0.00 -0.00 12.00 240.73 18.72 Tm (Column 1) Tj
-ET
-BT
-/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 233.53 Tm (Column 2) Tj
-ET
-Q q 59.04 73.44 414.72 371.52 re W n
-1.000 0.000 0.000 RG
-0.75 w
-[ 3.00 5.00] 0 d
-1 J
-1 j
-10.00 M
-74.40 289.55 m
-117.07 350.26 l
-159.73 168.14 l
-202.40 431.20 l
-245.07 87.20 l
-287.73 127.67 l
-330.40 269.32 l
-373.07 289.55 l
-415.73 350.26 l
-458.40 309.79 l
-S
-Q
-endstream
-endobj
-7 0 obj
-1565
-endobj
-3 0 obj
-<<
-/Type /Pages
-/Kids [
-5 0 R
-]
-/Count 1
-/MediaBox [0 0 504 504]
->>
-endobj
-4 0 obj
-<<
-/ProcSet [/PDF /Text]
-/Font <</F2 9 0 R /F3 10 0 R >>
-/ExtGState << >>
->>
-endobj
-8 0 obj
-<<
-/Type /Encoding
-/BaseEncoding /WinAnsiEncoding
-/Differences [ 45/minus 96/quoteleft
-144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
-/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
->>
-endobj
-9 0 obj <<
-/Type /Font
-/Subtype /Type1
-/Name /F2
-/BaseFont /Helvetica
-/Encoding 8 0 R
->> endobj
-10 0 obj <<
-/Type /Font
-/Subtype /Type1
-/Name /F3
-/BaseFont /Helvetica-Bold
-/Encoding 8 0 R
->> endobj
-xref
-0 11
-0000000000 65535 f 
-0000000021 00000 n 
-0000000164 00000 n 
-0000001931 00000 n 
-0000002014 00000 n 
-0000000213 00000 n 
-0000000293 00000 n 
-0000001911 00000 n 
-0000002106 00000 n 
-0000002363 00000 n 
-0000002459 00000 n 
-trailer
-<<
-/Size 11
-/Info 1 0 R
-/Root 2 0 R
->>
-startxref
-2561
-%%EOF
--- a/tool_dependencies.xml	Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="R" version="2.11.0">
-        <repository changeset_revision="5824d2b3bc8b" name="package_r_2_11_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="cairo" version="1.14.2">
-        <repository changeset_revision="931dda69e1b0" name="package_cairo_1_14_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>
--- a/tool_dependencies.xml~	Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="R" version="2.11.0">
-        <repository changeset_revision="5824d2b3bc8b" name="package_r_2_11_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>
Binary file xy_example.jpg has changed
--- a/xy_plot.xml	Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,152 +0,0 @@
-<tool id="XY_Plot_1" name="Plotting tool" version="1.0.1">
-  <description>for multiple series and graph types</description>
-  <requirements>
-    <requirement type="package" version="2.11.0">R</requirement>
-    <requirement type="package" version="1.14.2">cairo</requirement>
-  </requirements>
-  <command interpreter="bash">r_wrapper.sh $script_file</command>
-
-  <inputs>
-    <param name="main" type="text" value="" label="Plot Title"/>
-    <param name="xlab" type="text" value="" label="Label for x axis"/>
-    <param name="ylab" type="text" value="" label="Label for y axis"/>
-    <repeat name="series" title="Series">
-      <param name="input" type="data" format="tabular" label="Dataset"/>
-      <param name="xcol" type="data_column" data_ref="input" label="Column for x axis"/>
-      <param name="ycol" type="data_column" data_ref="input" label="Column for y axis"/>
-      <conditional name="series_type">
-        <param name="type" type="select" label="Series Type">
-          <option value="line" selected="true">Line</option>
-          <option value="points">Points</option>
-        </param>
-        <when value="line">
-          <param name="lty" type="select" label="Line Type">
-            <option value="1">Solid</option>
-            <option value="2">Dashed</option>
-            <option value="3">Dotted</option>
-          </param>
-          <param name="col" type="select" label="Line Color">
-            <option value="1">Black</option>
-            <option value="2">Red</option>
-            <option value="3">Green</option>
-            <option value="4">Blue</option>
-            <option value="5">Cyan</option>
-            <option value="6">Magenta</option>
-            <option value="7">Yellow</option>
-            <option value="8">Gray</option>
-          </param>
-          <param name="lwd" type="float" label="Line Width" value="1.0"/>
-        </when>
-        <when value="points">
-          <param name="pch" type="select" label="Point Type">
-            <option value="1">Circle (hollow)</option>
-            <option value="2">Triangle (hollow)</option>
-            <option value="3">Cross</option>
-            <option value="4">Diamond (hollow)</option>
-            <option value="15">Square (filled)</option>
-            <option value="16">Circle (filled)</option>
-            <option value="17">Triangle (filled)</option>  
-          </param>
-          <param name="col" type="select" label="Point Color">
-            <option value="1">Black</option>
-            <option value="2">Red</option>
-            <option value="3">Green</option>
-            <option value="4">Blue</option>
-            <option value="5">Cyan</option>
-            <option value="6">Magenta</option>
-            <option value="7">Yellow</option>
-            <option value="8">Gray</option>
-          </param>
-          <param name="cex" type="float" label="Point Scale" value="1.0"/>
-        </when>
-      </conditional>
-    </repeat>       
-  </inputs>
-
-  <configfiles>
-    <configfile name="script_file">
-      ## Setup R error handling to go to stderr
-      options( show.error.messages=F, 
-               error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
-      ## Determine range of all series in the plot
-      xrange = c( NULL, NULL )
-      yrange = c( NULL, NULL )
-      #for $i, $s in enumerate( $series )
-        s${i} = read.table( "${s.input.file_name}" )
-        x${i} = s${i}[,${s.xcol}]
-        y${i} = s${i}[,${s.ycol}]
-        xrange = range( x${i}, xrange )
-        yrange = range( y${i}, yrange )
-      #end for
-      ## Open output PDF file
-      png( "${out_file1}" )
-      ## Dummy plot for axis / labels
-      plot( NULL, type="n", xlim=xrange, ylim=yrange, main="${main}", xlab="${xlab}", ylab="${ylab}" )
-      ## Plot each series
-      #for $i, $s in enumerate( $series )
-        #if $s.series_type['type'] == "line"
-          lines( x${i}, y${i}, lty=${s.series_type.lty}, lwd=${s.series_type.lwd}, col=${s.series_type.col} )
-        #elif $s.series_type.type == "points"
-          points( x${i}, y${i}, pch=${s.series_type.pch}, cex=${s.series_type.cex}, col=${s.series_type.col} )
-        #end if
-      #end for    
-      ## Close the PDF file
-      devname = dev.off()
-    </configfile>
-  </configfiles>
-
-  <outputs>
-    <data format="png" name="out_file1" />
-  </outputs>
-
-    <tests>
-        <test>
-            <param name="main" value="Example XY Plot"/>
-            <param name="xlab" value="Column 1"/>
-            <param name="ylab" value="Column 2"/>
-            <param name="input" value="2.tabular" ftype="tabular"/>
-            <param name="xcol" value="1"/>
-            <param name="ycol" value="2"/>
-            <param name="type" value="line"/>
-            <param name="lty" value="2"/>
-            <param name="col" value="2"/>
-            <param name="lwd" value="1.0"/>
-            <output name="out_file1" file="XY_Plot_1_out.pdf"/>
-        </test>
-    </tests>
-<help>
-.. class:: infomark
-
-This tool allows you to plot values contained in columns of a dataset against each other and also allows you to have different series corresponding to the same or different datasets in one plot.
-
------
-
-.. class:: warningmark
-
-This tool throws an error if the columns selected for plotting are absent or are not numeric and also if the lengths of these columns differ.
-
------
-
-**Example**
-
-Input file::
-
-    1   68  4.1
-    2   71  4.6
-    3   62  3.8
-    4   75  4.4
-    5   58  3.2
-    6   60  3.1
-    7   67  3.8
-    8   68  4.1
-    9   71  4.3
-    10  69  3.7 
-
-Create a two series XY plot on the above data:
-
-- Series 1: Red Dashed-Line plot between columns 1 and 2
-- Series 2: Blue Circular-Point plot between columns 3 and 2 
-
-.. image:: xy_example.jpg
-</help>
-</tool>