# HG changeset patch
# User davidvanzessen
# Date 1438263098 14400
# Node ID c7c7000de220458bb922bebeeddd12740fa0d061
# Parent  d8d61e65dfd52c0294caea6c0da2ed5a349567fc
Uploaded
diff -r d8d61e65dfd5 -r c7c7000de220 igblast/igblast.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igblast/igblast.r	Thu Jul 30 09:31:38 2015 -0400
@@ -0,0 +1,56 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+infile=args[1]
+outfile=args[2]
+
+blasted = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="")
+
+blasted$ID = 1:nrow(blasted)
+blasted$VDJ.Frame = "Out-of-frame"
+
+search = blasted$inFrame == "true" & blasted$noStop == "false"
+if(sum(search) > 0){
+  blasted[search ,]$VDJ.Frame = "In-frame with stop codon"
+}
+
+search = blasted$inFrame == "true" & blasted$noStop == "true"
+if(sum(search) > 0){
+  blasted[search ,]$VDJ.Frame = "In-frame"
+}
+
+blasted$Top.V.Gene = blasted$vSegment
+blasted$Top.D.Gene = blasted$dSegment
+blasted$Top.J.Gene = blasted$jSegment
+blasted$CDR1.Seq = blasted$cdr1aa
+blasted$CDR1.Length = nchar(blasted$CDR1.Seq)
+blasted$CDR2.Seq = blasted$cdr2aa
+blasted$CDR2.Length = nchar(blasted$CDR2.Seq)
+blasted$CDR3.Seq = blasted$cdr3aa
+blasted$CDR3.Length = nchar(blasted$CDR3.Seq)
+blasted$CDR3.Seq.DNA = blasted$cdr3nt
+blasted$CDR3.Length.DNA = nchar(blasted$CDR3.Seq.DNA)
+blasted$Strand = "+/-"
+blasted$CDR3.Found.How = "found"
+
+search = blasted$cdr3nt == ""
+if(sum(search) > 0){
+  blasted[search,]$CDR3.Found.How = "NOT_FOUND"
+}
+
+blasted$AA.JUNCTION = blasted$CDR3.Seq
+
+n = c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "Functionality", "AA.JUNCTION")
+
+n[!(n %in% names(blasted))]
+
+blasted = blasted[,c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "AA.JUNCTION")]
+
+names(blasted) = c("frequency.count", "ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")
+
+#duplicate rows based on frequency.count
+blasted = blasted[rep(seq_len(nrow(blasted)), blasted$frequency.count),]
+blasted$ID = 1:nrow(blasted)
+
+blasted = blasted[,c("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")]
+
+write.table(blasted, outfile, quote=F, sep="\t", row.names=F, col.names=T)
diff -r d8d61e65dfd5 -r c7c7000de220 igblast/igblast.sh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igblast/igblast.sh	Thu Jul 30 09:31:38 2015 -0400
@@ -0,0 +1,15 @@
+set -e
+
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+input=$1
+species=$2
+locus=$3
+output=$4
+
+
+echo "$input $species $locus $output"
+
+java -jar igblastwrp.jar -cf -S $species -R $locus ${input} $PWD/blasted_output
+
+Rscript --verbose $dir/igblast.r "$PWD/blasted_output.L2.txt" "$output" 2>&1
diff -r d8d61e65dfd5 -r c7c7000de220 igblastn.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igblastn.xml	Thu Jul 30 09:31:38 2015 -0400
@@ -0,0 +1,98 @@
+
+     
+    
+		igblast/igblast.sh $input $species $locus $output
+    
+    
+        
+        
+					
+					
+					
+					
+					
+				
+        
+					
+					
+					
+						
+					
+					
+										
+				
+    
+    
+        
+	
+    
+	
+		igblastwrp
+	
+    
+============
+iReport
+============
+
+This tool uses the online igBLAST website hosted by NCBI to blast a FASTA file, it retrieves the result and generates a convenient tabular format for further processing.
+
+**NOTE**
+
+.. class:: warningmark
+
+- Everything goes through the servers of NCBI, so if you have sensitive data that that isn't allowed to leave your local network, this isn't the tool the use.
+
+**USAGE**
+
+.. class:: infomark
+
+- This tool uses a free service provided by NCBI, and although there doesn't seem to be any restrictions on usage, avoid unnecessary usage to lighten the load on NCBI's servers.
+
+
+**INPUT**
+
+This tool accepts FASTA files as input:
+
+::
+
+		>lcl|FLN1FA002RWEZA.1| 
+		ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc
+		tccagaaacaatgccaaggactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgc
+		gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct
+		cag
+		>lcl|FLN1FA001BLION.1| 
+		aggcttgagtggatgggatggatcaacgctggcaatggtaacacaaaatattcacagaagttccagggcagagtcaccat
+		taccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtg
+		cgagagtgggcagcagctggtctgatgcttttgattatctggggccaagggacaatggtcaccgtctcctcag
+
+**OUTPUT**
+
+The following data is used for ARGalaxy
+
++-----------------+----------------------------------------------+
+| Column name     | Column contents                              |
++-----------------+----------------------------------------------+
+| ID              | The Sequence ID provided by the sequencer.   |
++-----------------+----------------------------------------------+
+| VDJ Frame       | In-frame/Out-frame                           |
++-----------------+----------------------------------------------+
+| Top V Gene      | The best matching V gene found.              |
++-----------------+----------------------------------------------+
+| Top D Gene      | The best matching D gene found.              |
++-----------------+----------------------------------------------+
+| Top J Gene      | The best matching J gene found.              |
++-----------------+----------------------------------------------+
+| CDR3 Seq        | The CDR3 region.                             |
++-----------------+----------------------------------------------+
+| CDR3 Length     | The length of the CDR3 region.               |
++-----------------+----------------------------------------------+
+| CDR3 Seq DNA    | The CDR3 sequence region.                    |
++-----------------+----------------------------------------------+
+| CDR3 Length DNA | The length of the CDR3 sequence region.      |
++-----------------+----------------------------------------------+
+| Functionality   | If sequence is productive/unproductive       |
++-----------------+----------------------------------------------+
+
+
+    
+
diff -r d8d61e65dfd5 -r c7c7000de220 tool_dependencies.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Jul 30 09:31:38 2015 -0400
@@ -0,0 +1,20 @@
+
+
+    
+        
+            
+                https://github.com/mikessh/higblast/releases/download/v0.6/igblastwrapper_linux64.tar.gz
+                
+                    igblastwrapper_linux64
+                    $INSTALL_DIR/
+                
+                
+                    $INSTALL_DIR/igblastwrapper_linux64
+                
+            
+        
+        
+Downloads https://github.com/mikessh/higblast/
+        
+    
+