Mercurial > repos > davidvanzessen > baseline_bayesian_estimation
changeset 5:d8de51314d3f draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 14 Aug 2014 07:17:26 -0400 |
parents | 1726dc8f3e0a |
children | 8a4183eed741 |
files | Baseline_Main.r baseline.xml filter.r script_imgt.py wrapper.sh |
diffstat | 4 files changed, 23 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/Baseline_Main.r Wed Aug 13 09:09:56 2014 -0400 +++ b/Baseline_Main.r Thu Aug 14 07:17:26 2014 -0400 @@ -24,7 +24,6 @@ library("multicore") } - # Load functions and initialize global variables source("Baseline_Functions.r")
--- a/filter.r Wed Aug 13 09:09:56 2014 -0400 +++ b/filter.r Thu Aug 14 07:17:26 2014 -0400 @@ -1,17 +1,21 @@ -arg = commandArgs(TRUE) +arg = commandArgs(TRUE) summaryfile = arg[1] gappedfile = arg[2] selection = arg[3] output = arg[4] -print(paste("-----", selection, "------")) +print(paste("selection = ", selection)) -summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)[,c("Sequence.ID", "AA.JUNCTION")] + +summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) -head(summarydat) -head(gappeddat) + + -dat = merge(gappeddat, summarydat, by="Sequence.ID") -head(dat) +#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) + +dat = cbind(gappeddat, summarydat$AA.JUNCTION) + +colnames(dat)[length(dat)] = "AA.JUNCTION" dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) dat$VGene = gsub("[*].*", "", dat$VGene) @@ -22,6 +26,8 @@ dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) dat$JGene = gsub("[*].*", "", dat$JGene) +print(str(dat)) + dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) dat = dat[!duplicated(dat$past), ]
--- a/script_imgt.py Wed Aug 13 09:09:56 2014 -0400 +++ b/script_imgt.py Thu Aug 14 07:17:26 2014 -0400 @@ -14,7 +14,7 @@ with open(args.ref, 'r') as ref: currentSeq = "" currentId = "" - for line in ref.readlines(): + for line in ref: if line[0] is ">": if currentSeq is not "" and currentId is not "": refdic[currentId[1:]] = currentSeq @@ -48,11 +48,15 @@ currentSeq = "" currentId = "" +first=True with open(args.input, 'r') as i: with open(args.output, 'a') as o: o.write(">>>" + args.id + "\n") outputdic = dict() - for line in i.readlines()[1:]: + for line in i: + if first: + first = False + continue linesplt = line.split("\t") ref = filterGene(linesplt[1], vPattern) if not ref or not linesplt[2].rstrip():
--- a/wrapper.sh Wed Aug 13 09:09:56 2014 -0400 +++ b/wrapper.sh Thu Aug 14 07:17:26 2014 -0400 @@ -1,3 +1,4 @@ + #!/bin/bash dir="$(cd "$(dirname "$0")" && pwd)" @@ -48,7 +49,7 @@ filtered="$PWD/filtered_${id}.txt" cat $PWD/$id/*/1_* > $summaryfile cat $PWD/$id/*/2_* > $gappedfile - Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered + Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered 2>&1 final="$PWD/final_${id}.txt" cat $filtered | cut -f2,4,7 > $final @@ -64,9 +65,9 @@ echo "$workdir/${outID}.txt" -rows=`tail -n +2 $workdir/${outID}.txt | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '` +rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '` rows=($rows) -unset rows[${#rows[@]}-1] +#unset rows[${#rows[@]}-1] cd $dir Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1