Mercurial > repos > davidvanzessen > baseline_bayesian_estimation
changeset 3:38c038c17d0c draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 13 Aug 2014 09:07:54 -0400 |
parents | 94fada165724 |
children | 1726dc8f3e0a |
files | baseline.xml filter.r wrapper.sh |
diffstat | 3 files changed, 47 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/baseline.xml Wed Aug 13 07:32:38 2014 -0400 +++ b/baseline.xml Wed Aug 13 09:07:54 2014 -0400 @@ -11,7 +11,7 @@ ${input.id} #end for " - $reference $out_file + $reference $out_file "$selection" </command> <inputs> <repeat name="inputs" title="inputs" min="1" default="1"> @@ -50,6 +50,12 @@ <option value="1:26:38:55:65:104:-">IMGT® No CDR3</option> <option value="1:26:38:55:65:104:116">IMGT®</option> </param> + <param name="selection" type="select" label="Unique Selection Definition"> + <option value="VGene,AA.JUNCTION">VGene, AA CDR3</option> + <option value="VGene,JGene,AA.JUNCTION">VGene, JGene, AA CDR3</option> + <option value="VGene,DGene,JGene,CDR3.Seq">VGene, DGene, JGene, AA CDR3</option> + <option value="Sequence.ID">None</option> + </param> </inputs> <outputs> <data format="pdf" name="out_file" label = "Baseline on ${on_string}"/>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter.r Wed Aug 13 09:07:54 2014 -0400 @@ -0,0 +1,29 @@ +arg = commandArgs(TRUE) +summaryfile = arg[1] +gappedfile = arg[2] +selection = arg[3] +output = arg[4] +print(paste("-----", selection, "------")) + +summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)[,c("Sequence.ID", "AA.JUNCTION")] +gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) +head(summarydat) +head(gappeddat) + +dat = merge(gappeddat, summarydat, by="Sequence.ID") +head(dat) + +dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) +dat$VGene = gsub("[*].*", "", dat$VGene) + +dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele) +dat$DGene = gsub("[*].*", "", dat$DGene) + +dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) +dat$JGene = gsub("[*].*", "", dat$JGene) + +dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) + +dat = dat[!duplicated(dat$past), ] + +write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)
--- a/wrapper.sh Wed Aug 13 07:32:38 2014 -0400 +++ b/wrapper.sh Wed Aug 13 09:07:54 2014 -0400 @@ -14,6 +14,7 @@ IDs=($IDs) ref=${10} output=${11} +selection=${12} outID="result" echo "testID = $testID" @@ -42,9 +43,16 @@ id=${IDs[$count]} echo "id=$id" unzip $current -d $PWD/$id/ >> $PWD/unziplog.log - filename="$PWD/gappednt_${id}.txt" - cat $PWD/$id/*/2_* | cut -f2,4,7 > $filename - python $dir/script_imgt.py --input $filename --ref $ref --output $fasta --id $id + summaryfile="$PWD/summary_${id}.txt" + gappedfile="$PWD/gappednt_${id}.txt" + filtered="$PWD/filtered_${id}.txt" + cat $PWD/$id/*/1_* > $summaryfile + cat $PWD/$id/*/2_* > $gappedfile + Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered + + final="$PWD/final_${id}.txt" + cat $filtered | cut -f2,4,7 > $final + python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id else python $dir/script_xlsx.py --input $current --ref $ref --output $fasta fi