Mercurial > repos > davidvanzessen > mutation_analysis
changeset 76:becea91089ed draft
Uploaded
| author | davidvanzessen | 
|---|---|
| date | Mon, 09 May 2016 03:45:39 -0400 | 
| parents | 14749ced7ff2 | 
| children | c5c86d15cb94 | 
| files | merge_and_filter.r mutation_analysis.py mutation_analysis.r sequence_overview.r wrapper.sh | 
| diffstat | 3 files changed, 103 insertions(+), 1 deletions(-) [+] | 
line wrap: on
 line diff
--- a/mutation_analysis.py Wed May 04 08:44:16 2016 -0400 +++ b/mutation_analysis.py Mon May 09 03:45:39 2016 -0400 @@ -57,7 +57,6 @@ linesplt = line.split("\t") ID = linesplt[IDIndex] genedic[ID] = linesplt[best_matchIndex] - print line try: mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sequence_overview.r Mon May 09 03:45:39 2016 -0400 @@ -0,0 +1,99 @@ +library(reshape2) + +args <- commandArgs(trailingOnly = TRUE) + +gene.matches = args[1] +sequence.file = args[2] +outputdir = args[3] +main.html = "index.html" + +setwd(outputdir) + +genes = read.table(gene.matches, header=T, sep="\t", fill=T) +sequences = read.table(sequence.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") + +dat = merge(sequences, genes, by="Sequence.ID") + +dat$seq_conc = paste(dat$CDR1.IMGT, dat$CDR2.IMGT, dat$CDR3.IMGT, dat$FR2.IMGT, dat$FR3.IMGT) + +IDs = dat[,c("Sequence.ID", "seq_conc", "best_match")] +IDs$best_match = as.character(IDs$best_match) + +#dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) + +dat = data.frame(table(dat$best_match, dat$seq_conc)) + +dat = dat[dat$Freq > 1,] + +names(dat) = c("best_match", "seq_conc", "Freq") + +dat$seq_conc = factor(dat$seq_conc) + +dat = dat[order(nchar(as.character(dat$seq_conc))),] + +#writing html from R... +td = function(val) { paste("<td>", val, "</td>", sep="") } +tr = function(val) { capture.output(cat("<tr>", td(val), "</tr>", sep="")) } +make.link = function(id, clss, val) { paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") } +tbl = function(df) { res = "<table border='1'>"; for(i in 1:nrow(df)){ res = paste(res, tr(df[i,]), sep=""); }; res = paste(res, "</table>"); } + +cat("<table border='1'>", file=main.html, append=F) +cat("<tr><th>Sequence</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th></tr>", file=main.html, append=T) + +for(i in 1:nrow(dat)){ + ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "ca1",] + ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "ca2",] + + cg1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cg1",] + cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cg2",] + cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cg3",] + cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cg4",] + + cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cm",] + + id = as.numeric(dat[i,"seq_conc"]) + + if(nrow(ca1) > 0){ + cat(tbl(ca1), file=paste("ca1_", id, ".html", sep="")) + } + + if(nrow(ca2) > 0){ + cat(tbl(ca2), file=paste("ca2_", id, ".html", sep="")) + } + + if(nrow(cg1) > 0){ + cat(tbl(cg1), file=paste("cg1_", id, ".html", sep="")) + } + + if(nrow(cg2) > 0){ + cat(tbl(cg2), file=paste("cg2_", id, ".html", sep="")) + } + + if(nrow(cg3) > 0){ + cat(tbl(cg3), file=paste("cg3_", id, ".html", sep="")) + } + + if(nrow(cg4) > 0){ + cat(tbl(cg4), file=paste("cg4_", id, ".html", sep="")) + } + + if(nrow(cm) > 0){ + cat(tbl(cm), file=paste("cm_", id, ".html", sep="")) + } + + ca1.html = make.link(id, "ca1", nrow(ca1)) + ca2.html = make.link(id, "ca2", nrow(ca2)) + + cg1.html = make.link(id, "cg1", nrow(cg1)) + cg2.html = make.link(id, "cg2", nrow(cg2)) + cg3.html = make.link(id, "cg3", nrow(cg3)) + cg4.html = make.link(id, "cg4", nrow(cg4)) + + cm.html = make.link(id, "cm", nrow(cm)) + + rw = c(as.character(dat[i,"seq_conc"]), ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html) + + cat(tr(rw), file=main.html, append=T) +} + +cat("</table>", file=main.html, append=T)
--- a/wrapper.sh Wed May 04 08:44:16 2016 -0400 +++ b/wrapper.sh Mon May 09 03:45:39 2016 -0400 @@ -229,7 +229,11 @@ Rscript $dir/naive_output.r $naive_output_ca $outdir/merged.txt $naive_output_ca $naive_output_cg $naive_output_cm 2>&1 fi +echo "---------------- sequence_overview.r ----------------" +mkdir $outdir/sequence_overview + +Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/sequence_overview 2>&1
