changeset 5:d8de51314d3f draft

Uploaded
author davidvanzessen
date Thu, 14 Aug 2014 07:17:26 -0400
parents 1726dc8f3e0a
children 8a4183eed741
files Baseline_Main.r baseline.xml filter.r script_imgt.py wrapper.sh
diffstat 4 files changed, 23 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/Baseline_Main.r	Wed Aug 13 09:09:56 2014 -0400
+++ b/Baseline_Main.r	Thu Aug 14 07:17:26 2014 -0400
@@ -24,7 +24,6 @@
   library("multicore")
 }
 
-
 # Load functions and initialize global variables
 source("Baseline_Functions.r")
 
--- a/filter.r	Wed Aug 13 09:09:56 2014 -0400
+++ b/filter.r	Thu Aug 14 07:17:26 2014 -0400
@@ -1,17 +1,21 @@
-arg = commandArgs(TRUE)                       
+arg = commandArgs(TRUE)
 summaryfile = arg[1]
 gappedfile = arg[2]
 selection = arg[3]
 output = arg[4]
-print(paste("-----", selection, "------"))
+print(paste("selection = ", selection))
 
-summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)[,c("Sequence.ID", "AA.JUNCTION")]
+
+summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
 gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
-head(summarydat)
-head(gappeddat)
+
+
 
-dat = merge(gappeddat, summarydat, by="Sequence.ID")
-head(dat)
+#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T))
+
+dat = cbind(gappeddat, summarydat$AA.JUNCTION)
+
+colnames(dat)[length(dat)] = "AA.JUNCTION"
 
 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
 dat$VGene = gsub("[*].*", "", dat$VGene)
@@ -22,6 +26,8 @@
 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
 dat$JGene = gsub("[*].*", "", dat$JGene)
 
+print(str(dat))
+
 dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
 
 dat = dat[!duplicated(dat$past), ]
--- a/script_imgt.py	Wed Aug 13 09:09:56 2014 -0400
+++ b/script_imgt.py	Thu Aug 14 07:17:26 2014 -0400
@@ -14,7 +14,7 @@
 with open(args.ref, 'r') as ref:
 	currentSeq = ""
 	currentId = ""
-	for line in ref.readlines():
+	for line in ref:
 		if line[0] is ">":
 			if currentSeq is not "" and currentId is not "":
 				refdic[currentId[1:]] = currentSeq
@@ -48,11 +48,15 @@
 
 currentSeq = ""
 currentId = ""
+first=True
 with open(args.input, 'r') as i:
 	with open(args.output, 'a') as o:
 		o.write(">>>" + args.id + "\n")
 		outputdic = dict()
-		for line in i.readlines()[1:]:
+		for line in i:
+			if first:
+				first = False
+				continue
 			linesplt = line.split("\t")
 			ref = filterGene(linesplt[1], vPattern)
 			if not ref or not linesplt[2].rstrip():
--- a/wrapper.sh	Wed Aug 13 09:09:56 2014 -0400
+++ b/wrapper.sh	Thu Aug 14 07:17:26 2014 -0400
@@ -1,3 +1,4 @@
+
 #!/bin/bash
 dir="$(cd "$(dirname "$0")" && pwd)"
 
@@ -48,7 +49,7 @@
 		filtered="$PWD/filtered_${id}.txt"
 		cat $PWD/$id/*/1_* > $summaryfile
 		cat $PWD/$id/*/2_* > $gappedfile
-		Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered
+		Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered 2>&1
 		
 		final="$PWD/final_${id}.txt"
 		cat $filtered | cut -f2,4,7 > $final
@@ -64,9 +65,9 @@
 
 echo "$workdir/${outID}.txt"
 
-rows=`tail -n +2 $workdir/${outID}.txt | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '`
+rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '`
 rows=($rows)
-unset rows[${#rows[@]}-1]
+#unset rows[${#rows[@]}-1]
 
 cd $dir
 Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1