# HG changeset patch
# User davidvanzessen
# Date 1407935274 14400
# Node ID 38c038c17d0c0007441936ba70a940e3e4eaecad
# Parent 94fada16572480fb394a7079150202510ad8809a
Uploaded
diff -r 94fada165724 -r 38c038c17d0c baseline.xml
--- a/baseline.xml Wed Aug 13 07:32:38 2014 -0400
+++ b/baseline.xml Wed Aug 13 09:07:54 2014 -0400
@@ -11,7 +11,7 @@
${input.id}
#end for
"
- $reference $out_file
+ $reference $out_file "$selection"
@@ -50,6 +50,12 @@
+
+
+
+
+
+
diff -r 94fada165724 -r 38c038c17d0c filter.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter.r Wed Aug 13 09:07:54 2014 -0400
@@ -0,0 +1,29 @@
+arg = commandArgs(TRUE)
+summaryfile = arg[1]
+gappedfile = arg[2]
+selection = arg[3]
+output = arg[4]
+print(paste("-----", selection, "------"))
+
+summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)[,c("Sequence.ID", "AA.JUNCTION")]
+gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
+head(summarydat)
+head(gappeddat)
+
+dat = merge(gappeddat, summarydat, by="Sequence.ID")
+head(dat)
+
+dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
+dat$VGene = gsub("[*].*", "", dat$VGene)
+
+dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele)
+dat$DGene = gsub("[*].*", "", dat$DGene)
+
+dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
+dat$JGene = gsub("[*].*", "", dat$JGene)
+
+dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
+
+dat = dat[!duplicated(dat$past), ]
+
+write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)
diff -r 94fada165724 -r 38c038c17d0c wrapper.sh
--- a/wrapper.sh Wed Aug 13 07:32:38 2014 -0400
+++ b/wrapper.sh Wed Aug 13 09:07:54 2014 -0400
@@ -14,6 +14,7 @@
IDs=($IDs)
ref=${10}
output=${11}
+selection=${12}
outID="result"
echo "testID = $testID"
@@ -42,9 +43,16 @@
id=${IDs[$count]}
echo "id=$id"
unzip $current -d $PWD/$id/ >> $PWD/unziplog.log
- filename="$PWD/gappednt_${id}.txt"
- cat $PWD/$id/*/2_* | cut -f2,4,7 > $filename
- python $dir/script_imgt.py --input $filename --ref $ref --output $fasta --id $id
+ summaryfile="$PWD/summary_${id}.txt"
+ gappedfile="$PWD/gappednt_${id}.txt"
+ filtered="$PWD/filtered_${id}.txt"
+ cat $PWD/$id/*/1_* > $summaryfile
+ cat $PWD/$id/*/2_* > $gappedfile
+ Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered
+
+ final="$PWD/final_${id}.txt"
+ cat $filtered | cut -f2,4,7 > $final
+ python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id
else
python $dir/script_xlsx.py --input $current --ref $ref --output $fasta
fi