Mercurial > repos > saskia-hiltemann > testrepo
changeset 1:4afa63644ac3 draft default tip
Uploaded
| author | saskia-hiltemann |
|---|---|
| date | Mon, 09 Nov 2015 09:50:15 -0500 |
| parents | a8b089f5a429 |
| children | |
| files | OTUtable_addblast.py OTUtable_addblast.xml r_wrapper.sh test-data/2.tabular test-data/XY_Plot_1_out.pdf tool_dependencies.xml tool_dependencies.xml~ xy_example.jpg xy_plot.xml |
| diffstat | 9 files changed, 93 insertions(+), 380 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OTUtable_addblast.py Mon Nov 09 09:50:15 2015 -0500 @@ -0,0 +1,71 @@ +import requests +import time +import sys + +baseurl="http://www.ncbi.nlm.nih.gov/blast/Blast.cgi" + +OTUfile=sys.argv[0] +BLASTfile=sys.argv[1] +fastafile=sys.argv[2] + + +def make_url(seq): + return baseurl+"?DATABASE=nr&PERC_IDENT=97&EXCLUDE_SEQ_UNCULT=on&HITLIST_SIZE=10&FILTER=L&FILTER=m&FILTER=R&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower() + +def make_RIDlink(RID): + return "<a target=\"_blank\" href=\""+baseurl+"?CMD=Get&RID="+RID+"\">view results</a>" + +def make_rerun_link(seq): + return "<a target=\"_blank\" href=\""+baseurl+"?DATABASE=nr&HITLIST_SIZE=10&EXCLUDE_SEQ_UNCULT=true&FILTER=L&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower()+"\">resubmit query</a>" + + + +### for each fasta sequence create blast search +sequences = [line.rstrip('\n').replace('-','') for line in open(fastafile) if '>' not in line] +urls = [make_url(seq) for seq in sequences] + +RIDs = [] +for url in urls: + r=requests.get(url) + RID = r.text[r.text.find("RID"):r.text.find("RTOE")] + RID = RID[6:-3].lstrip().rstrip() + RIDs.append(RID) + print "Submitted request, RID: "+ RID + time.sleep(3) # be nice to the server + + + +### Get top hits from local BLAST results file, add to OTUtable file +blastf = open(BLASTfile, "r") +otuf = open(OTUfile, "r") +outfile = open("newtable.tsv","w+") + +linenum=0 +for line in otuf: + if linenum == 0: + outfile.write( line.rstrip()+"\tBLAST Top Hit\n" ) + else: + outfile.write( line.rstrip() +"\t"+ blastf.readline().strip().split("\t")[-1]+"\n" ) + linenum +=1 + +blastf.close() +otuf.close() +outfile.close() + + + +### Add RID link and rerun link to table +otuf = open("newtable.tsv","r") +outfile = open("newtable2.tsv","w+") + +print len(sequences) +print len (RIDs) +linenum=-1 +for line in otuf: + if linenum == -1: + outfile.write( line.rstrip()+"\tBLAST result\tBLAST resubmit\n" ) + else: + outfile.write( line.rstrip() +"\t"+ make_RIDlink(RIDs[linenum]) + "\t" + make_rerun_link(sequences[linenum])+"\n" ) + linenum +=1 + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OTUtable_addblast.xml Mon Nov 09 09:50:15 2015 -0500 @@ -0,0 +1,22 @@ +<tool id="mothur-otu-addblast" name="Mothur OTUtable addBLAST" version="1"> + + <description>for EMC workflow</description> + + <command interpreter="python"> + OTUtable_addblast.py $otufile $blastfile $oturepfile + </command> + + <inputs> + <param name="otufile" type="data" label="Select OTU table (taxonomy output from MOTHUR Classify.otu)" help="choose input file from history"/> + <param name="blastfile" type="data" label="Select BLAST output" help="choose input file from history"/> + <param name="oturepfile" type="data" label="Select OTU representatives fasta file" help="choose input file from history"/> + </inputs> + + <outputs> + <data format="txt" name="outputfile" from_work_dir="newtable2.tsv" label="OTU table with BLAST on ${on_string}"/> + </outputs> + + <help> + description of tool for the users + </help> +</tool> \ No newline at end of file
--- a/r_wrapper.sh Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -#!/bin/sh - -### Run R providing the R script in $1 as standard input and passing -### the remaining arguments on the command line - -# Function that writes a message to stderr and exits -function fail -{ - echo "$@" >&2 - exit 1 -} - -# Ensure R executable is found -which R > /dev/null || fail "'R' is required by this tool but was not found on path" - -# Extract first argument -infile=$1; shift - -# Ensure the file exists -test -f $infile || fail "R input file '$infile' does not exist" - -# Invoke R passing file named by first argument to stdin -R --vanilla --slave $* < $infile
--- a/test-data/2.tabular Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -1 68 4.1 -2 71 4.6 -3 62 3.8 -4 75 4.4 -5 58 3.2 -6 60 3.1 -7 67 3.8 -8 68 4.1 -9 71 4.3 -10 69 3.7
--- a/test-data/XY_Plot_1_out.pdf Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,180 +0,0 @@ -%PDF-1.4 -%ρ\r -1 0 obj -<< -/CreationDate (D:20110308102307) -/ModDate (D:20110308102307) -/Title (R Graphics Output) -/Producer (R 2.11.0) -/Creator (R) ->> -endobj -2 0 obj -<< -/Type /Catalog -/Pages 3 0 R ->> -endobj -5 0 obj -<< -/Type /Page -/Parent 3 0 R -/Contents 6 0 R -/Resources 4 0 R ->> -endobj -6 0 obj -<< -/Length 7 0 R ->> -stream -1 J 1 j q -Q q -0.000 0.000 0.000 RG -0.75 w -[] 0 d -1 J -1 j -10.00 M -117.07 73.44 m 458.40 73.44 l S -117.07 73.44 m 117.07 66.24 l S -202.40 73.44 m 202.40 66.24 l S -287.73 73.44 m 287.73 66.24 l S -373.07 73.44 m 373.07 66.24 l S -458.40 73.44 m 458.40 66.24 l S -BT -0.000 0.000 0.000 rg -/F2 1 Tf 12.00 0.00 -0.00 12.00 113.73 47.52 Tm (2) Tj -ET -BT -/F2 1 Tf 12.00 0.00 -0.00 12.00 199.06 47.52 Tm (4) Tj -ET -BT -/F2 1 Tf 12.00 0.00 -0.00 12.00 284.40 47.52 Tm (6) Tj -ET -BT -/F2 1 Tf 12.00 0.00 -0.00 12.00 369.73 47.52 Tm (8) Tj -ET -BT -/F2 1 Tf 12.00 0.00 -0.00 12.00 451.73 47.52 Tm (10) Tj -ET -59.04 127.67 m 59.04 431.20 l S -59.04 127.67 m 51.84 127.67 l S -59.04 228.85 m 51.84 228.85 l S -59.04 330.02 m 51.84 330.02 l S -59.04 431.20 m 51.84 431.20 l S -BT -/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 121.00 Tm (60) Tj -ET -BT -/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 222.18 Tm (65) Tj -ET -BT -/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 323.35 Tm (70) Tj -ET -BT -/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 424.53 Tm (75) Tj -ET -59.04 73.44 m -473.76 73.44 l -473.76 444.96 l -59.04 444.96 l -59.04 73.44 l -S -Q q -BT -0.000 0.000 0.000 rg -/F3 1 Tf 14.00 0.00 -0.00 14.00 211.16 469.45 Tm (Example XY Plot) Tj -ET -BT -/F2 1 Tf 12.00 0.00 -0.00 12.00 240.73 18.72 Tm (Column 1) Tj -ET -BT -/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 233.53 Tm (Column 2) Tj -ET -Q q 59.04 73.44 414.72 371.52 re W n -1.000 0.000 0.000 RG -0.75 w -[ 3.00 5.00] 0 d -1 J -1 j -10.00 M -74.40 289.55 m -117.07 350.26 l -159.73 168.14 l -202.40 431.20 l -245.07 87.20 l -287.73 127.67 l -330.40 269.32 l -373.07 289.55 l -415.73 350.26 l -458.40 309.79 l -S -Q -endstream -endobj -7 0 obj -1565 -endobj -3 0 obj -<< -/Type /Pages -/Kids [ -5 0 R -] -/Count 1 -/MediaBox [0 0 504 504] ->> -endobj -4 0 obj -<< -/ProcSet [/PDF /Text] -/Font <</F2 9 0 R /F3 10 0 R >> -/ExtGState << >> ->> -endobj -8 0 obj -<< -/Type /Encoding -/BaseEncoding /WinAnsiEncoding -/Differences [ 45/minus 96/quoteleft -144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent -/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] ->> -endobj -9 0 obj << -/Type /Font -/Subtype /Type1 -/Name /F2 -/BaseFont /Helvetica -/Encoding 8 0 R ->> endobj -10 0 obj << -/Type /Font -/Subtype /Type1 -/Name /F3 -/BaseFont /Helvetica-Bold -/Encoding 8 0 R ->> endobj -xref -0 11 -0000000000 65535 f -0000000021 00000 n -0000000164 00000 n -0000001931 00000 n -0000002014 00000 n -0000000213 00000 n -0000000293 00000 n -0000001911 00000 n -0000002106 00000 n -0000002363 00000 n -0000002459 00000 n -trailer -<< -/Size 11 -/Info 1 0 R -/Root 2 0 R ->> -startxref -2561 -%%EOF
--- a/tool_dependencies.xml Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="R" version="2.11.0"> - <repository changeset_revision="5824d2b3bc8b" name="package_r_2_11_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="cairo" version="1.14.2"> - <repository changeset_revision="931dda69e1b0" name="package_cairo_1_14_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>
--- a/tool_dependencies.xml~ Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="R" version="2.11.0"> - <repository changeset_revision="5824d2b3bc8b" name="package_r_2_11_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>
--- a/xy_plot.xml Thu Oct 29 10:48:55 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,152 +0,0 @@ -<tool id="XY_Plot_1" name="Plotting tool" version="1.0.1"> - <description>for multiple series and graph types</description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.14.2">cairo</requirement> - </requirements> - <command interpreter="bash">r_wrapper.sh $script_file</command> - - <inputs> - <param name="main" type="text" value="" label="Plot Title"/> - <param name="xlab" type="text" value="" label="Label for x axis"/> - <param name="ylab" type="text" value="" label="Label for y axis"/> - <repeat name="series" title="Series"> - <param name="input" type="data" format="tabular" label="Dataset"/> - <param name="xcol" type="data_column" data_ref="input" label="Column for x axis"/> - <param name="ycol" type="data_column" data_ref="input" label="Column for y axis"/> - <conditional name="series_type"> - <param name="type" type="select" label="Series Type"> - <option value="line" selected="true">Line</option> - <option value="points">Points</option> - </param> - <when value="line"> - <param name="lty" type="select" label="Line Type"> - <option value="1">Solid</option> - <option value="2">Dashed</option> - <option value="3">Dotted</option> - </param> - <param name="col" type="select" label="Line Color"> - <option value="1">Black</option> - <option value="2">Red</option> - <option value="3">Green</option> - <option value="4">Blue</option> - <option value="5">Cyan</option> - <option value="6">Magenta</option> - <option value="7">Yellow</option> - <option value="8">Gray</option> - </param> - <param name="lwd" type="float" label="Line Width" value="1.0"/> - </when> - <when value="points"> - <param name="pch" type="select" label="Point Type"> - <option value="1">Circle (hollow)</option> - <option value="2">Triangle (hollow)</option> - <option value="3">Cross</option> - <option value="4">Diamond (hollow)</option> - <option value="15">Square (filled)</option> - <option value="16">Circle (filled)</option> - <option value="17">Triangle (filled)</option> - </param> - <param name="col" type="select" label="Point Color"> - <option value="1">Black</option> - <option value="2">Red</option> - <option value="3">Green</option> - <option value="4">Blue</option> - <option value="5">Cyan</option> - <option value="6">Magenta</option> - <option value="7">Yellow</option> - <option value="8">Gray</option> - </param> - <param name="cex" type="float" label="Point Scale" value="1.0"/> - </when> - </conditional> - </repeat> - </inputs> - - <configfiles> - <configfile name="script_file"> - ## Setup R error handling to go to stderr - options( show.error.messages=F, - error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) - ## Determine range of all series in the plot - xrange = c( NULL, NULL ) - yrange = c( NULL, NULL ) - #for $i, $s in enumerate( $series ) - s${i} = read.table( "${s.input.file_name}" ) - x${i} = s${i}[,${s.xcol}] - y${i} = s${i}[,${s.ycol}] - xrange = range( x${i}, xrange ) - yrange = range( y${i}, yrange ) - #end for - ## Open output PDF file - png( "${out_file1}" ) - ## Dummy plot for axis / labels - plot( NULL, type="n", xlim=xrange, ylim=yrange, main="${main}", xlab="${xlab}", ylab="${ylab}" ) - ## Plot each series - #for $i, $s in enumerate( $series ) - #if $s.series_type['type'] == "line" - lines( x${i}, y${i}, lty=${s.series_type.lty}, lwd=${s.series_type.lwd}, col=${s.series_type.col} ) - #elif $s.series_type.type == "points" - points( x${i}, y${i}, pch=${s.series_type.pch}, cex=${s.series_type.cex}, col=${s.series_type.col} ) - #end if - #end for - ## Close the PDF file - devname = dev.off() - </configfile> - </configfiles> - - <outputs> - <data format="png" name="out_file1" /> - </outputs> - - <tests> - <test> - <param name="main" value="Example XY Plot"/> - <param name="xlab" value="Column 1"/> - <param name="ylab" value="Column 2"/> - <param name="input" value="2.tabular" ftype="tabular"/> - <param name="xcol" value="1"/> - <param name="ycol" value="2"/> - <param name="type" value="line"/> - <param name="lty" value="2"/> - <param name="col" value="2"/> - <param name="lwd" value="1.0"/> - <output name="out_file1" file="XY_Plot_1_out.pdf"/> - </test> - </tests> -<help> -.. class:: infomark - -This tool allows you to plot values contained in columns of a dataset against each other and also allows you to have different series corresponding to the same or different datasets in one plot. - ------ - -.. class:: warningmark - -This tool throws an error if the columns selected for plotting are absent or are not numeric and also if the lengths of these columns differ. - ------ - -**Example** - -Input file:: - - 1 68 4.1 - 2 71 4.6 - 3 62 3.8 - 4 75 4.4 - 5 58 3.2 - 6 60 3.1 - 7 67 3.8 - 8 68 4.1 - 9 71 4.3 - 10 69 3.7 - -Create a two series XY plot on the above data: - -- Series 1: Red Dashed-Line plot between columns 1 and 2 -- Series 2: Blue Circular-Point plot between columns 3 and 2 - -.. image:: xy_example.jpg -</help> -</tool>
