changeset 3:c7c7000de220 draft

Uploaded
author davidvanzessen
date Thu, 30 Jul 2015 09:31:38 -0400
parents d8d61e65dfd5
children 778c2fd5cea3
files igblast/igblast.r igblast/igblast.sh igblastn.xml tool_dependencies.xml
diffstat 4 files changed, 189 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igblast/igblast.r	Thu Jul 30 09:31:38 2015 -0400
@@ -0,0 +1,56 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+infile=args[1]
+outfile=args[2]
+
+blasted = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="")
+
+blasted$ID = 1:nrow(blasted)
+blasted$VDJ.Frame = "Out-of-frame"
+
+search = blasted$inFrame == "true" & blasted$noStop == "false"
+if(sum(search) > 0){
+  blasted[search ,]$VDJ.Frame = "In-frame with stop codon"
+}
+
+search = blasted$inFrame == "true" & blasted$noStop == "true"
+if(sum(search) > 0){
+  blasted[search ,]$VDJ.Frame = "In-frame"
+}
+
+blasted$Top.V.Gene = blasted$vSegment
+blasted$Top.D.Gene = blasted$dSegment
+blasted$Top.J.Gene = blasted$jSegment
+blasted$CDR1.Seq = blasted$cdr1aa
+blasted$CDR1.Length = nchar(blasted$CDR1.Seq)
+blasted$CDR2.Seq = blasted$cdr2aa
+blasted$CDR2.Length = nchar(blasted$CDR2.Seq)
+blasted$CDR3.Seq = blasted$cdr3aa
+blasted$CDR3.Length = nchar(blasted$CDR3.Seq)
+blasted$CDR3.Seq.DNA = blasted$cdr3nt
+blasted$CDR3.Length.DNA = nchar(blasted$CDR3.Seq.DNA)
+blasted$Strand = "+/-"
+blasted$CDR3.Found.How = "found"
+
+search = blasted$cdr3nt == ""
+if(sum(search) > 0){
+  blasted[search,]$CDR3.Found.How = "NOT_FOUND"
+}
+
+blasted$AA.JUNCTION = blasted$CDR3.Seq
+
+n = c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "Functionality", "AA.JUNCTION")
+
+n[!(n %in% names(blasted))]
+
+blasted = blasted[,c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "AA.JUNCTION")]
+
+names(blasted) = c("frequency.count", "ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")
+
+#duplicate rows based on frequency.count
+blasted = blasted[rep(seq_len(nrow(blasted)), blasted$frequency.count),]
+blasted$ID = 1:nrow(blasted)
+
+blasted = blasted[,c("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")]
+
+write.table(blasted, outfile, quote=F, sep="\t", row.names=F, col.names=T)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igblast/igblast.sh	Thu Jul 30 09:31:38 2015 -0400
@@ -0,0 +1,15 @@
+set -e
+
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+input=$1
+species=$2
+locus=$3
+output=$4
+
+
+echo "$input $species $locus $output"
+
+java -jar igblastwrp.jar -cf -S $species -R $locus ${input} $PWD/blasted_output
+
+Rscript --verbose $dir/igblast.r "$PWD/blasted_output.L2.txt" "$output" 2>&1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igblastn.xml	Thu Jul 30 09:31:38 2015 -0400
@@ -0,0 +1,98 @@
+<tool id="igblastn" name="igBLASTn" version="0.1.0">
+    <description> </description>
+    <command interpreter="bash">
+		igblast/igblast.sh $input $species $locus $output
+    </command>
+    <inputs>
+        <param name="input" type="data" format="fasta" label="Fasta file"/>
+        <param name="species" type="select" label="Species">
+					<option value="human">Homo sapiens</option>
+					<option value="mouse">Mus musculus</option>
+					<option value="rat">Rattus norvegicus</option>
+					<option value="rabbit">Oryctolagus cuniculus</option>
+					<option value="rhesus_monkey">Macaca mulatta</option>
+				</param>
+        <param name="locus" type="select" label="Locus">
+					<option value="TRA">TRA</option>
+					<option value="TRB">TRB</option>
+					<option value="TRG">TRG</option>
+					<option value="TRD">TRD</option>	
+					<option value="IGH">IGH</option>
+					<option value="IGK">IGK</option>
+					<option value="IGL">IGL</option>					
+				</param>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" type="data" label="${input.name}-igBLASTn aligned"/>
+	<!--<data name="log" format="text" label="log"/>-->
+    </outputs>
+	<requirements>
+		<requirement type="package" version="0.6">igblastwrp</requirement>
+	</requirements>
+    <help>
+============
+iReport
+============
+
+This tool uses the online igBLAST website hosted by NCBI to blast a FASTA file, it retrieves the result and generates a convenient tabular format for further processing.
+
+**NOTE**
+
+.. class:: warningmark
+
+- Everything goes through the servers of NCBI, so if you have sensitive data that that isn't allowed to leave your local network, this isn't the tool the use.
+
+**USAGE**
+
+.. class:: infomark
+
+- This tool uses a free service provided by NCBI, and although there doesn't seem to be any restrictions on usage, avoid unnecessary usage to lighten the load on NCBI's servers.
+
+
+**INPUT**
+
+This tool accepts FASTA files as input:
+
+::
+
+		>lcl|FLN1FA002RWEZA.1| 
+		ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc
+		tccagaaacaatgccaaggactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgc
+		gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct
+		cag
+		>lcl|FLN1FA001BLION.1| 
+		aggcttgagtggatgggatggatcaacgctggcaatggtaacacaaaatattcacagaagttccagggcagagtcaccat
+		taccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtg
+		cgagagtgggcagcagctggtctgatgcttttgattatctggggccaagggacaatggtcaccgtctcctcag
+
+**OUTPUT**
+
+The following data is used for ARGalaxy
+
++-----------------+----------------------------------------------+
+| Column name     | Column contents                              |
++-----------------+----------------------------------------------+
+| ID              | The Sequence ID provided by the sequencer.   |
++-----------------+----------------------------------------------+
+| VDJ Frame       | In-frame/Out-frame                           |
++-----------------+----------------------------------------------+
+| Top V Gene      | The best matching V gene found.              |
++-----------------+----------------------------------------------+
+| Top D Gene      | The best matching D gene found.              |
++-----------------+----------------------------------------------+
+| Top J Gene      | The best matching J gene found.              |
++-----------------+----------------------------------------------+
+| CDR3 Seq        | The CDR3 region.                             |
++-----------------+----------------------------------------------+
+| CDR3 Length     | The length of the CDR3 region.               |
++-----------------+----------------------------------------------+
+| CDR3 Seq DNA    | The CDR3 sequence region.                    |
++-----------------+----------------------------------------------+
+| CDR3 Length DNA | The length of the CDR3 sequence region.      |
++-----------------+----------------------------------------------+
+| Functionality   | If sequence is productive/unproductive       |
++-----------------+----------------------------------------------+
+
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Jul 30 09:31:38 2015 -0400
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="igblastwrp" version="0.6">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">https://github.com/mikessh/higblast/releases/download/v0.6/igblastwrapper_linux64.tar.gz</action>
+                <action type="move_file">
+                    <source>igblastwrapper_linux64</source>
+                    <destination>$INSTALL_DIR/</destination>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/igblastwrapper_linux64</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+Downloads https://github.com/mikessh/higblast/
+        </readme>
+    </package>
+</tool_dependency>