# HG changeset patch # User saskia-hiltemann # Date 1447080615 18000 # Node ID 4afa63644ac3954d7aeaa0076489e82e5c8afe82 # Parent a8b089f5a4290c99facef67f692ce6f065ec0a5a Uploaded diff -r a8b089f5a429 -r 4afa63644ac3 OTUtable_addblast.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OTUtable_addblast.py Mon Nov 09 09:50:15 2015 -0500 @@ -0,0 +1,71 @@ +import requests +import time +import sys + +baseurl="http://www.ncbi.nlm.nih.gov/blast/Blast.cgi" + +OTUfile=sys.argv[0] +BLASTfile=sys.argv[1] +fastafile=sys.argv[2] + + +def make_url(seq): + return baseurl+"?DATABASE=nr&PERC_IDENT=97&EXCLUDE_SEQ_UNCULT=on&HITLIST_SIZE=10&FILTER=L&FILTER=m&FILTER=R&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower() + +def make_RIDlink(RID): + return "view results" + +def make_rerun_link(seq): + return "resubmit query" + + + +### for each fasta sequence create blast search +sequences = [line.rstrip('\n').replace('-','') for line in open(fastafile) if '>' not in line] +urls = [make_url(seq) for seq in sequences] + +RIDs = [] +for url in urls: + r=requests.get(url) + RID = r.text[r.text.find("RID"):r.text.find("RTOE")] + RID = RID[6:-3].lstrip().rstrip() + RIDs.append(RID) + print "Submitted request, RID: "+ RID + time.sleep(3) # be nice to the server + + + +### Get top hits from local BLAST results file, add to OTUtable file +blastf = open(BLASTfile, "r") +otuf = open(OTUfile, "r") +outfile = open("newtable.tsv","w+") + +linenum=0 +for line in otuf: + if linenum == 0: + outfile.write( line.rstrip()+"\tBLAST Top Hit\n" ) + else: + outfile.write( line.rstrip() +"\t"+ blastf.readline().strip().split("\t")[-1]+"\n" ) + linenum +=1 + +blastf.close() +otuf.close() +outfile.close() + + + +### Add RID link and rerun link to table +otuf = open("newtable.tsv","r") +outfile = open("newtable2.tsv","w+") + +print len(sequences) +print len (RIDs) +linenum=-1 +for line in otuf: + if linenum == -1: + outfile.write( line.rstrip()+"\tBLAST result\tBLAST resubmit\n" ) + else: + outfile.write( line.rstrip() +"\t"+ make_RIDlink(RIDs[linenum]) + "\t" + make_rerun_link(sequences[linenum])+"\n" ) + linenum +=1 + + diff -r a8b089f5a429 -r 4afa63644ac3 OTUtable_addblast.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OTUtable_addblast.xml Mon Nov 09 09:50:15 2015 -0500 @@ -0,0 +1,22 @@ + + + for EMC workflow + + + OTUtable_addblast.py $otufile $blastfile $oturepfile + + + + + + + + + + + + + + description of tool for the users + + \ No newline at end of file diff -r a8b089f5a429 -r 4afa63644ac3 r_wrapper.sh --- a/r_wrapper.sh Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -#!/bin/sh - -### Run R providing the R script in $1 as standard input and passing -### the remaining arguments on the command line - -# Function that writes a message to stderr and exits -function fail -{ - echo "$@" >&2 - exit 1 -} - -# Ensure R executable is found -which R > /dev/null || fail "'R' is required by this tool but was not found on path" - -# Extract first argument -infile=$1; shift - -# Ensure the file exists -test -f $infile || fail "R input file '$infile' does not exist" - -# Invoke R passing file named by first argument to stdin -R --vanilla --slave $* < $infile diff -r a8b089f5a429 -r 4afa63644ac3 test-data/2.tabular --- a/test-data/2.tabular Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -1 68 4.1 -2 71 4.6 -3 62 3.8 -4 75 4.4 -5 58 3.2 -6 60 3.1 -7 67 3.8 -8 68 4.1 -9 71 4.3 -10 69 3.7 diff -r a8b089f5a429 -r 4afa63644ac3 test-data/XY_Plot_1_out.pdf --- a/test-data/XY_Plot_1_out.pdf Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,180 +0,0 @@ -%PDF-1.4 -%âãÏÓ\r -1 0 obj -<< -/CreationDate (D:20110308102307) -/ModDate (D:20110308102307) -/Title (R Graphics Output) -/Producer (R 2.11.0) -/Creator (R) ->> -endobj -2 0 obj -<< -/Type /Catalog -/Pages 3 0 R ->> -endobj -5 0 obj -<< -/Type /Page -/Parent 3 0 R -/Contents 6 0 R -/Resources 4 0 R ->> -endobj -6 0 obj -<< -/Length 7 0 R ->> -stream -1 J 1 j q -Q q -0.000 0.000 0.000 RG -0.75 w -[] 0 d -1 J -1 j -10.00 M -117.07 73.44 m 458.40 73.44 l S -117.07 73.44 m 117.07 66.24 l S -202.40 73.44 m 202.40 66.24 l S -287.73 73.44 m 287.73 66.24 l S -373.07 73.44 m 373.07 66.24 l S -458.40 73.44 m 458.40 66.24 l S -BT -0.000 0.000 0.000 rg -/F2 1 Tf 12.00 0.00 -0.00 12.00 113.73 47.52 Tm (2) Tj -ET -BT -/F2 1 Tf 12.00 0.00 -0.00 12.00 199.06 47.52 Tm (4) Tj -ET -BT -/F2 1 Tf 12.00 0.00 -0.00 12.00 284.40 47.52 Tm (6) Tj -ET -BT -/F2 1 Tf 12.00 0.00 -0.00 12.00 369.73 47.52 Tm (8) Tj -ET -BT -/F2 1 Tf 12.00 0.00 -0.00 12.00 451.73 47.52 Tm (10) Tj -ET -59.04 127.67 m 59.04 431.20 l S -59.04 127.67 m 51.84 127.67 l S -59.04 228.85 m 51.84 228.85 l S -59.04 330.02 m 51.84 330.02 l S -59.04 431.20 m 51.84 431.20 l S -BT -/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 121.00 Tm (60) Tj -ET -BT -/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 222.18 Tm (65) Tj -ET -BT -/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 323.35 Tm (70) Tj -ET -BT -/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 424.53 Tm (75) Tj -ET -59.04 73.44 m -473.76 73.44 l -473.76 444.96 l -59.04 444.96 l -59.04 73.44 l -S -Q q -BT -0.000 0.000 0.000 rg -/F3 1 Tf 14.00 0.00 -0.00 14.00 211.16 469.45 Tm (Example XY Plot) Tj -ET -BT -/F2 1 Tf 12.00 0.00 -0.00 12.00 240.73 18.72 Tm (Column 1) Tj -ET -BT -/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 233.53 Tm (Column 2) Tj -ET -Q q 59.04 73.44 414.72 371.52 re W n -1.000 0.000 0.000 RG -0.75 w -[ 3.00 5.00] 0 d -1 J -1 j -10.00 M -74.40 289.55 m -117.07 350.26 l -159.73 168.14 l -202.40 431.20 l -245.07 87.20 l -287.73 127.67 l -330.40 269.32 l -373.07 289.55 l -415.73 350.26 l -458.40 309.79 l -S -Q -endstream -endobj -7 0 obj -1565 -endobj -3 0 obj -<< -/Type /Pages -/Kids [ -5 0 R -] -/Count 1 -/MediaBox [0 0 504 504] ->> -endobj -4 0 obj -<< -/ProcSet [/PDF /Text] -/Font <> -/ExtGState << >> ->> -endobj -8 0 obj -<< -/Type /Encoding -/BaseEncoding /WinAnsiEncoding -/Differences [ 45/minus 96/quoteleft -144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent -/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] ->> -endobj -9 0 obj << -/Type /Font -/Subtype /Type1 -/Name /F2 -/BaseFont /Helvetica -/Encoding 8 0 R ->> endobj -10 0 obj << -/Type /Font -/Subtype /Type1 -/Name /F3 -/BaseFont /Helvetica-Bold -/Encoding 8 0 R ->> endobj -xref -0 11 -0000000000 65535 f -0000000021 00000 n -0000000164 00000 n -0000001931 00000 n -0000002014 00000 n -0000000213 00000 n -0000000293 00000 n -0000001911 00000 n -0000002106 00000 n -0000002363 00000 n -0000002459 00000 n -trailer -<< -/Size 11 -/Info 1 0 R -/Root 2 0 R ->> -startxref -2561 -%%EOF diff -r a8b089f5a429 -r 4afa63644ac3 tool_dependencies.xml --- a/tool_dependencies.xml Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ - - - - - - - - - diff -r a8b089f5a429 -r 4afa63644ac3 tool_dependencies.xml~ --- a/tool_dependencies.xml~ Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ - - - - - - diff -r a8b089f5a429 -r 4afa63644ac3 xy_example.jpg Binary file xy_example.jpg has changed diff -r a8b089f5a429 -r 4afa63644ac3 xy_plot.xml --- a/xy_plot.xml Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,152 +0,0 @@ - - for multiple series and graph types - - R - cairo - - r_wrapper.sh $script_file - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Setup R error handling to go to stderr - options( show.error.messages=F, - error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) - ## Determine range of all series in the plot - xrange = c( NULL, NULL ) - yrange = c( NULL, NULL ) - #for $i, $s in enumerate( $series ) - s${i} = read.table( "${s.input.file_name}" ) - x${i} = s${i}[,${s.xcol}] - y${i} = s${i}[,${s.ycol}] - xrange = range( x${i}, xrange ) - yrange = range( y${i}, yrange ) - #end for - ## Open output PDF file - png( "${out_file1}" ) - ## Dummy plot for axis / labels - plot( NULL, type="n", xlim=xrange, ylim=yrange, main="${main}", xlab="${xlab}", ylab="${ylab}" ) - ## Plot each series - #for $i, $s in enumerate( $series ) - #if $s.series_type['type'] == "line" - lines( x${i}, y${i}, lty=${s.series_type.lty}, lwd=${s.series_type.lwd}, col=${s.series_type.col} ) - #elif $s.series_type.type == "points" - points( x${i}, y${i}, pch=${s.series_type.pch}, cex=${s.series_type.cex}, col=${s.series_type.col} ) - #end if - #end for - ## Close the PDF file - devname = dev.off() - - - - - - - - - - - - - - - - - - - - - - - -.. class:: infomark - -This tool allows you to plot values contained in columns of a dataset against each other and also allows you to have different series corresponding to the same or different datasets in one plot. - ------ - -.. class:: warningmark - -This tool throws an error if the columns selected for plotting are absent or are not numeric and also if the lengths of these columns differ. - ------ - -**Example** - -Input file:: - - 1 68 4.1 - 2 71 4.6 - 3 62 3.8 - 4 75 4.4 - 5 58 3.2 - 6 60 3.1 - 7 67 3.8 - 8 68 4.1 - 9 71 4.3 - 10 69 3.7 - -Create a two series XY plot on the above data: - -- Series 1: Red Dashed-Line plot between columns 1 and 2 -- Series 2: Blue Circular-Point plot between columns 3 and 2 - -.. image:: xy_example.jpg - -