Mercurial > repos > davidvanzessen > argalaxy_tools
changeset 3:c7c7000de220 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 30 Jul 2015 09:31:38 -0400 |
parents | d8d61e65dfd5 |
children | 778c2fd5cea3 |
files | igblast/igblast.r igblast/igblast.sh igblastn.xml tool_dependencies.xml |
diffstat | 4 files changed, 189 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igblast/igblast.r Thu Jul 30 09:31:38 2015 -0400 @@ -0,0 +1,56 @@ +args <- commandArgs(trailingOnly = TRUE) + +infile=args[1] +outfile=args[2] + +blasted = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="") + +blasted$ID = 1:nrow(blasted) +blasted$VDJ.Frame = "Out-of-frame" + +search = blasted$inFrame == "true" & blasted$noStop == "false" +if(sum(search) > 0){ + blasted[search ,]$VDJ.Frame = "In-frame with stop codon" +} + +search = blasted$inFrame == "true" & blasted$noStop == "true" +if(sum(search) > 0){ + blasted[search ,]$VDJ.Frame = "In-frame" +} + +blasted$Top.V.Gene = blasted$vSegment +blasted$Top.D.Gene = blasted$dSegment +blasted$Top.J.Gene = blasted$jSegment +blasted$CDR1.Seq = blasted$cdr1aa +blasted$CDR1.Length = nchar(blasted$CDR1.Seq) +blasted$CDR2.Seq = blasted$cdr2aa +blasted$CDR2.Length = nchar(blasted$CDR2.Seq) +blasted$CDR3.Seq = blasted$cdr3aa +blasted$CDR3.Length = nchar(blasted$CDR3.Seq) +blasted$CDR3.Seq.DNA = blasted$cdr3nt +blasted$CDR3.Length.DNA = nchar(blasted$CDR3.Seq.DNA) +blasted$Strand = "+/-" +blasted$CDR3.Found.How = "found" + +search = blasted$cdr3nt == "" +if(sum(search) > 0){ + blasted[search,]$CDR3.Found.How = "NOT_FOUND" +} + +blasted$AA.JUNCTION = blasted$CDR3.Seq + +n = c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "Functionality", "AA.JUNCTION") + +n[!(n %in% names(blasted))] + +blasted = blasted[,c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "AA.JUNCTION")] + +names(blasted) = c("frequency.count", "ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION") + +#duplicate rows based on frequency.count +blasted = blasted[rep(seq_len(nrow(blasted)), blasted$frequency.count),] +blasted$ID = 1:nrow(blasted) + +blasted = blasted[,c("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")] + +write.table(blasted, outfile, quote=F, sep="\t", row.names=F, col.names=T)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igblast/igblast.sh Thu Jul 30 09:31:38 2015 -0400 @@ -0,0 +1,15 @@ +set -e + +dir="$(cd "$(dirname "$0")" && pwd)" + +input=$1 +species=$2 +locus=$3 +output=$4 + + +echo "$input $species $locus $output" + +java -jar igblastwrp.jar -cf -S $species -R $locus ${input} $PWD/blasted_output + +Rscript --verbose $dir/igblast.r "$PWD/blasted_output.L2.txt" "$output" 2>&1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igblastn.xml Thu Jul 30 09:31:38 2015 -0400 @@ -0,0 +1,98 @@ +<tool id="igblastn" name="igBLASTn" version="0.1.0"> + <description> </description> + <command interpreter="bash"> + igblast/igblast.sh $input $species $locus $output + </command> + <inputs> + <param name="input" type="data" format="fasta" label="Fasta file"/> + <param name="species" type="select" label="Species"> + <option value="human">Homo sapiens</option> + <option value="mouse">Mus musculus</option> + <option value="rat">Rattus norvegicus</option> + <option value="rabbit">Oryctolagus cuniculus</option> + <option value="rhesus_monkey">Macaca mulatta</option> + </param> + <param name="locus" type="select" label="Locus"> + <option value="TRA">TRA</option> + <option value="TRB">TRB</option> + <option value="TRG">TRG</option> + <option value="TRD">TRD</option> + <option value="IGH">IGH</option> + <option value="IGK">IGK</option> + <option value="IGL">IGL</option> + </param> + </inputs> + <outputs> + <data name="output" format="tabular" type="data" label="${input.name}-igBLASTn aligned"/> + <!--<data name="log" format="text" label="log"/>--> + </outputs> + <requirements> + <requirement type="package" version="0.6">igblastwrp</requirement> + </requirements> + <help> +============ +iReport +============ + +This tool uses the online igBLAST website hosted by NCBI to blast a FASTA file, it retrieves the result and generates a convenient tabular format for further processing. + +**NOTE** + +.. class:: warningmark + +- Everything goes through the servers of NCBI, so if you have sensitive data that that isn't allowed to leave your local network, this isn't the tool the use. + +**USAGE** + +.. class:: infomark + +- This tool uses a free service provided by NCBI, and although there doesn't seem to be any restrictions on usage, avoid unnecessary usage to lighten the load on NCBI's servers. + + +**INPUT** + +This tool accepts FASTA files as input: + +:: + + >lcl|FLN1FA002RWEZA.1| + ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc + tccagaaacaatgccaaggactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgc + gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct + cag + >lcl|FLN1FA001BLION.1| + aggcttgagtggatgggatggatcaacgctggcaatggtaacacaaaatattcacagaagttccagggcagagtcaccat + taccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtg + cgagagtgggcagcagctggtctgatgcttttgattatctggggccaagggacaatggtcaccgtctcctcag + +**OUTPUT** + +The following data is used for ARGalaxy + ++-----------------+----------------------------------------------+ +| Column name | Column contents | ++-----------------+----------------------------------------------+ +| ID | The Sequence ID provided by the sequencer. | ++-----------------+----------------------------------------------+ +| VDJ Frame | In-frame/Out-frame | ++-----------------+----------------------------------------------+ +| Top V Gene | The best matching V gene found. | ++-----------------+----------------------------------------------+ +| Top D Gene | The best matching D gene found. | ++-----------------+----------------------------------------------+ +| Top J Gene | The best matching J gene found. | ++-----------------+----------------------------------------------+ +| CDR3 Seq | The CDR3 region. | ++-----------------+----------------------------------------------+ +| CDR3 Length | The length of the CDR3 region. | ++-----------------+----------------------------------------------+ +| CDR3 Seq DNA | The CDR3 sequence region. | ++-----------------+----------------------------------------------+ +| CDR3 Length DNA | The length of the CDR3 sequence region. | ++-----------------+----------------------------------------------+ +| Functionality | If sequence is productive/unproductive | ++-----------------+----------------------------------------------+ + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jul 30 09:31:38 2015 -0400 @@ -0,0 +1,20 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="igblastwrp" version="0.6"> + <install version="1.0"> + <actions> + <action type="download_by_url">https://github.com/mikessh/higblast/releases/download/v0.6/igblastwrapper_linux64.tar.gz</action> + <action type="move_file"> + <source>igblastwrapper_linux64</source> + <destination>$INSTALL_DIR/</destination> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/igblastwrapper_linux64</environment_variable> + </action> + </actions> + </install> + <readme> +Downloads https://github.com/mikessh/higblast/ + </readme> + </package> +</tool_dependency>