# HG changeset patch
# User saskia-hiltemann
# Date 1447080615 18000
# Node ID 4afa63644ac3954d7aeaa0076489e82e5c8afe82
# Parent a8b089f5a4290c99facef67f692ce6f065ec0a5a
Uploaded
diff -r a8b089f5a429 -r 4afa63644ac3 OTUtable_addblast.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/OTUtable_addblast.py Mon Nov 09 09:50:15 2015 -0500
@@ -0,0 +1,71 @@
+import requests
+import time
+import sys
+
+baseurl="http://www.ncbi.nlm.nih.gov/blast/Blast.cgi"
+
+OTUfile=sys.argv[0]
+BLASTfile=sys.argv[1]
+fastafile=sys.argv[2]
+
+
+def make_url(seq):
+ return baseurl+"?DATABASE=nr&PERC_IDENT=97&EXCLUDE_SEQ_UNCULT=on&HITLIST_SIZE=10&FILTER=L&FILTER=m&FILTER=R&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower()
+
+def make_RIDlink(RID):
+ return "view results"
+
+def make_rerun_link(seq):
+ return "resubmit query"
+
+
+
+### for each fasta sequence create blast search
+sequences = [line.rstrip('\n').replace('-','') for line in open(fastafile) if '>' not in line]
+urls = [make_url(seq) for seq in sequences]
+
+RIDs = []
+for url in urls:
+ r=requests.get(url)
+ RID = r.text[r.text.find("RID"):r.text.find("RTOE")]
+ RID = RID[6:-3].lstrip().rstrip()
+ RIDs.append(RID)
+ print "Submitted request, RID: "+ RID
+ time.sleep(3) # be nice to the server
+
+
+
+### Get top hits from local BLAST results file, add to OTUtable file
+blastf = open(BLASTfile, "r")
+otuf = open(OTUfile, "r")
+outfile = open("newtable.tsv","w+")
+
+linenum=0
+for line in otuf:
+ if linenum == 0:
+ outfile.write( line.rstrip()+"\tBLAST Top Hit\n" )
+ else:
+ outfile.write( line.rstrip() +"\t"+ blastf.readline().strip().split("\t")[-1]+"\n" )
+ linenum +=1
+
+blastf.close()
+otuf.close()
+outfile.close()
+
+
+
+### Add RID link and rerun link to table
+otuf = open("newtable.tsv","r")
+outfile = open("newtable2.tsv","w+")
+
+print len(sequences)
+print len (RIDs)
+linenum=-1
+for line in otuf:
+ if linenum == -1:
+ outfile.write( line.rstrip()+"\tBLAST result\tBLAST resubmit\n" )
+ else:
+ outfile.write( line.rstrip() +"\t"+ make_RIDlink(RIDs[linenum]) + "\t" + make_rerun_link(sequences[linenum])+"\n" )
+ linenum +=1
+
+
diff -r a8b089f5a429 -r 4afa63644ac3 OTUtable_addblast.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/OTUtable_addblast.xml Mon Nov 09 09:50:15 2015 -0500
@@ -0,0 +1,22 @@
+
+
+ for EMC workflow
+
+
+ OTUtable_addblast.py $otufile $blastfile $oturepfile
+
+
+
+
+
+
+
+
+
+
+
+
+
+ description of tool for the users
+
+
\ No newline at end of file
diff -r a8b089f5a429 -r 4afa63644ac3 r_wrapper.sh
--- a/r_wrapper.sh Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-#!/bin/sh
-
-### Run R providing the R script in $1 as standard input and passing
-### the remaining arguments on the command line
-
-# Function that writes a message to stderr and exits
-function fail
-{
- echo "$@" >&2
- exit 1
-}
-
-# Ensure R executable is found
-which R > /dev/null || fail "'R' is required by this tool but was not found on path"
-
-# Extract first argument
-infile=$1; shift
-
-# Ensure the file exists
-test -f $infile || fail "R input file '$infile' does not exist"
-
-# Invoke R passing file named by first argument to stdin
-R --vanilla --slave $* < $infile
diff -r a8b089f5a429 -r 4afa63644ac3 test-data/2.tabular
--- a/test-data/2.tabular Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-1 68 4.1
-2 71 4.6
-3 62 3.8
-4 75 4.4
-5 58 3.2
-6 60 3.1
-7 67 3.8
-8 68 4.1
-9 71 4.3
-10 69 3.7
diff -r a8b089f5a429 -r 4afa63644ac3 test-data/XY_Plot_1_out.pdf
--- a/test-data/XY_Plot_1_out.pdf Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,180 +0,0 @@
-%PDF-1.4
-%âãÏÓ\r
-1 0 obj
-<<
-/CreationDate (D:20110308102307)
-/ModDate (D:20110308102307)
-/Title (R Graphics Output)
-/Producer (R 2.11.0)
-/Creator (R)
->>
-endobj
-2 0 obj
-<<
-/Type /Catalog
-/Pages 3 0 R
->>
-endobj
-5 0 obj
-<<
-/Type /Page
-/Parent 3 0 R
-/Contents 6 0 R
-/Resources 4 0 R
->>
-endobj
-6 0 obj
-<<
-/Length 7 0 R
->>
-stream
-1 J 1 j q
-Q q
-0.000 0.000 0.000 RG
-0.75 w
-[] 0 d
-1 J
-1 j
-10.00 M
-117.07 73.44 m 458.40 73.44 l S
-117.07 73.44 m 117.07 66.24 l S
-202.40 73.44 m 202.40 66.24 l S
-287.73 73.44 m 287.73 66.24 l S
-373.07 73.44 m 373.07 66.24 l S
-458.40 73.44 m 458.40 66.24 l S
-BT
-0.000 0.000 0.000 rg
-/F2 1 Tf 12.00 0.00 -0.00 12.00 113.73 47.52 Tm (2) Tj
-ET
-BT
-/F2 1 Tf 12.00 0.00 -0.00 12.00 199.06 47.52 Tm (4) Tj
-ET
-BT
-/F2 1 Tf 12.00 0.00 -0.00 12.00 284.40 47.52 Tm (6) Tj
-ET
-BT
-/F2 1 Tf 12.00 0.00 -0.00 12.00 369.73 47.52 Tm (8) Tj
-ET
-BT
-/F2 1 Tf 12.00 0.00 -0.00 12.00 451.73 47.52 Tm (10) Tj
-ET
-59.04 127.67 m 59.04 431.20 l S
-59.04 127.67 m 51.84 127.67 l S
-59.04 228.85 m 51.84 228.85 l S
-59.04 330.02 m 51.84 330.02 l S
-59.04 431.20 m 51.84 431.20 l S
-BT
-/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 121.00 Tm (60) Tj
-ET
-BT
-/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 222.18 Tm (65) Tj
-ET
-BT
-/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 323.35 Tm (70) Tj
-ET
-BT
-/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 424.53 Tm (75) Tj
-ET
-59.04 73.44 m
-473.76 73.44 l
-473.76 444.96 l
-59.04 444.96 l
-59.04 73.44 l
-S
-Q q
-BT
-0.000 0.000 0.000 rg
-/F3 1 Tf 14.00 0.00 -0.00 14.00 211.16 469.45 Tm (Example XY Plot) Tj
-ET
-BT
-/F2 1 Tf 12.00 0.00 -0.00 12.00 240.73 18.72 Tm (Column 1) Tj
-ET
-BT
-/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 233.53 Tm (Column 2) Tj
-ET
-Q q 59.04 73.44 414.72 371.52 re W n
-1.000 0.000 0.000 RG
-0.75 w
-[ 3.00 5.00] 0 d
-1 J
-1 j
-10.00 M
-74.40 289.55 m
-117.07 350.26 l
-159.73 168.14 l
-202.40 431.20 l
-245.07 87.20 l
-287.73 127.67 l
-330.40 269.32 l
-373.07 289.55 l
-415.73 350.26 l
-458.40 309.79 l
-S
-Q
-endstream
-endobj
-7 0 obj
-1565
-endobj
-3 0 obj
-<<
-/Type /Pages
-/Kids [
-5 0 R
-]
-/Count 1
-/MediaBox [0 0 504 504]
->>
-endobj
-4 0 obj
-<<
-/ProcSet [/PDF /Text]
-/Font <>
-/ExtGState << >>
->>
-endobj
-8 0 obj
-<<
-/Type /Encoding
-/BaseEncoding /WinAnsiEncoding
-/Differences [ 45/minus 96/quoteleft
-144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
-/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
->>
-endobj
-9 0 obj <<
-/Type /Font
-/Subtype /Type1
-/Name /F2
-/BaseFont /Helvetica
-/Encoding 8 0 R
->> endobj
-10 0 obj <<
-/Type /Font
-/Subtype /Type1
-/Name /F3
-/BaseFont /Helvetica-Bold
-/Encoding 8 0 R
->> endobj
-xref
-0 11
-0000000000 65535 f
-0000000021 00000 n
-0000000164 00000 n
-0000001931 00000 n
-0000002014 00000 n
-0000000213 00000 n
-0000000293 00000 n
-0000001911 00000 n
-0000002106 00000 n
-0000002363 00000 n
-0000002459 00000 n
-trailer
-<<
-/Size 11
-/Info 1 0 R
-/Root 2 0 R
->>
-startxref
-2561
-%%EOF
diff -r a8b089f5a429 -r 4afa63644ac3 tool_dependencies.xml
--- a/tool_dependencies.xml Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-
-
-
-
-
-
-
-
-
diff -r a8b089f5a429 -r 4afa63644ac3 tool_dependencies.xml~
--- a/tool_dependencies.xml~ Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-
-
-
-
-
-
diff -r a8b089f5a429 -r 4afa63644ac3 xy_example.jpg
Binary file xy_example.jpg has changed
diff -r a8b089f5a429 -r 4afa63644ac3 xy_plot.xml
--- a/xy_plot.xml Thu Oct 29 10:48:55 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,152 +0,0 @@
-
- for multiple series and graph types
-
- R
- cairo
-
- r_wrapper.sh $script_file
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ## Setup R error handling to go to stderr
- options( show.error.messages=F,
- error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
- ## Determine range of all series in the plot
- xrange = c( NULL, NULL )
- yrange = c( NULL, NULL )
- #for $i, $s in enumerate( $series )
- s${i} = read.table( "${s.input.file_name}" )
- x${i} = s${i}[,${s.xcol}]
- y${i} = s${i}[,${s.ycol}]
- xrange = range( x${i}, xrange )
- yrange = range( y${i}, yrange )
- #end for
- ## Open output PDF file
- png( "${out_file1}" )
- ## Dummy plot for axis / labels
- plot( NULL, type="n", xlim=xrange, ylim=yrange, main="${main}", xlab="${xlab}", ylab="${ylab}" )
- ## Plot each series
- #for $i, $s in enumerate( $series )
- #if $s.series_type['type'] == "line"
- lines( x${i}, y${i}, lty=${s.series_type.lty}, lwd=${s.series_type.lwd}, col=${s.series_type.col} )
- #elif $s.series_type.type == "points"
- points( x${i}, y${i}, pch=${s.series_type.pch}, cex=${s.series_type.cex}, col=${s.series_type.col} )
- #end if
- #end for
- ## Close the PDF file
- devname = dev.off()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. class:: infomark
-
-This tool allows you to plot values contained in columns of a dataset against each other and also allows you to have different series corresponding to the same or different datasets in one plot.
-
------
-
-.. class:: warningmark
-
-This tool throws an error if the columns selected for plotting are absent or are not numeric and also if the lengths of these columns differ.
-
------
-
-**Example**
-
-Input file::
-
- 1 68 4.1
- 2 71 4.6
- 3 62 3.8
- 4 75 4.4
- 5 58 3.2
- 6 60 3.1
- 7 67 3.8
- 8 68 4.1
- 9 71 4.3
- 10 69 3.7
-
-Create a two series XY plot on the above data:
-
-- Series 1: Red Dashed-Line plot between columns 1 and 2
-- Series 2: Blue Circular-Point plot between columns 3 and 2
-
-.. image:: xy_example.jpg
-
-