Mercurial > repos > davidvanzessen > report_clonality_igg
changeset 27:f919965e8816 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 06 Mar 2015 09:59:53 -0500 |
parents | 01f05258f672 |
children | 4f85b8ed0b2c |
files | RScript.r r_wrapper.sh report_clonality_igg.xml |
diffstat | 3 files changed, 145 insertions(+), 105 deletions(-) [+] |
line wrap: on
line diff
--- a/RScript.r Mon Feb 09 08:53:30 2015 -0500 +++ b/RScript.r Fri Mar 06 09:59:53 2015 -0500 @@ -23,6 +23,11 @@ } library(reshape2) +if (!("lymphclon" %in% rownames(installed.packages()))) { + install.packages("lymphclon", repos="http://cran.xl-mirror.nl/") +} +library(lymphclon) + # ---------------------- parameters ---------------------- args <- commandArgs(trailingOnly = TRUE) @@ -35,6 +40,7 @@ species = args[5] #human or mouse locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) +clonality_method = args[8] # ---------------------- Data preperation ---------------------- @@ -217,7 +223,6 @@ useD = FALSE cat("No D Genes in this species/locus") } - print(paste("useD:", useD)) # ---------------------- merge with the frequency count ---------------------- @@ -453,84 +458,96 @@ if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available { - write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) - - ClonalitySampleReplicatePrint <- function(dat){ - write.table(dat, paste("clonality_", unique(inputdata$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) - } - - clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")]) - #lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint) - - ClonalitySamplePrint <- function(dat){ - write.table(dat, paste("clonality_", unique(inputdata$Sample) , ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) - } - - clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"]) - #lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint) - - clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) - clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) - clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count - clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) - clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") - - ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15') - tcct = textConnection(ct) - CT = read.table(tcct, sep="\t", header=TRUE) - close(tcct) - clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) - clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight - - ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonality_clonaltype")]) - ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) - clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) - ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads - - ReplicatePrint <- function(dat){ - write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) + if(clonality_method == "boyd"){ + samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) + + for (sample in samples){ + res = data.frame(paste=character(0)) + sample_id = unique(sample$Sample)[[1]] + for(replicate in unique(sample$Replicate)){ + tmp = sample[sample$Replicate == replicate,] + clone_table = data.frame(table(tmp$clonaltype)) + clone_col_name = paste("V", replicate, sep="") + colnames(clone_table) = c("paste", clone_col_name) + res = merge(res, clone_table, by="paste", all=T) + } + + res[is.na(res)] = 0 + infer.result = infer.clonality(as.matrix(res[,2:ncol(res)])) + + write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F) + + res$type = rowSums(res[,2:ncol(res)]) + + coincidence.table = data.frame(table(res$type)) + colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") + write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) + } + } else { + write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) + + clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) + clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) + clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count + clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) + clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") + + ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15') + tcct = textConnection(ct) + CT = read.table(tcct, sep="\t", header=TRUE) + close(tcct) + clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) + clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight + + ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")]) + ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) + clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) + ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads + + ReplicatePrint <- function(dat){ + write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) + } + + ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) + lapply(ReplicateSplit, FUN=ReplicatePrint) + + ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) + clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) + + ReplicateSumPrint <- function(dat){ + write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) + } + + ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) + lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) + + clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) + clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) + clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow + clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) + clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1) + + ClonalityScorePrint <- function(dat){ + write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) + } + + clonalityScore = clonalFreqCount[c("Sample", "Result")] + clonalityScore = unique(clonalityScore) + + clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"]) + lapply(clonalityScoreSplit, FUN=ClonalityScorePrint) + + clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")] + + + + ClonalityOverviewPrint <- function(dat){ + write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) + } + + clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) + lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) } - - ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) - lapply(ReplicateSplit, FUN=ReplicatePrint) - - ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) - clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) - - - ReplicateSumPrint <- function(dat){ - write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) - } - - ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) - lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) - - clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) - clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) - clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow - clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) - clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1) - - ClonalityScorePrint <- function(dat){ - write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) - } - - clonalityScore = clonalFreqCount[c("Sample", "Result")] - clonalityScore = unique(clonalityScore) - - clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"]) - lapply(clonalityScoreSplit, FUN=ClonalityScorePrint) - - clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")] - - - - ClonalityOverviewPrint <- function(dat){ - write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) - } - - clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) - lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) } imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb")
--- a/r_wrapper.sh Mon Feb 09 08:53:30 2015 -0500 +++ b/r_wrapper.sh Fri Mar 06 09:59:53 2015 -0500 @@ -7,6 +7,7 @@ species=$5 locus=$6 filterproductive=$7 +clonality_method=$8 dir="$(cd "$(dirname "$0")" && pwd)" useD="false" if grep -q "$species.*${locus}D" "$dir/genes.txt" ; then @@ -22,9 +23,10 @@ useD="false" fi fi +useD="false" mkdir $3 cp $dir/genes.txt $outputDir -Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive 2>&1 +Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive ${clonality_method} 2>&1 cp $dir/tabber.js $outputDir cp $dir/style.css $outputDir cp $dir/script.js $outputDir @@ -89,34 +91,49 @@ hasReplicateColumn="$(if head -n 1 $inputFile | grep -q 'Replicate'; then echo 'Yes'; else echo 'No'; fi)" echo "$hasReplicateColumn" #if its a 'new' merged file with replicate info -if [[ "$hasReplicateColumn" == "Yes" ]] ; then +if [[ "$hasReplicateColumn" == "Yes" && "$clonalType" != "none" ]] ; then echo "<div class='tabbertab' title='Clonality'><div class='tabber'>" >> $outputFile for sample in $samples; do - clonalityScore="$(cat $outputDir/ClonalityScore_$sample.csv)" - echo "<div class='tabbertab' title='$sample'><table border='1'>" >> $outputFile - echo "<tr><td colspan='4'>Clonality Score: $clonalityScore</td></tr>" >> $outputFile + echo "${clonality_method}" + if [[ "${clonality_method}" == "old" ]] ; then + echo "in old" + clonalityScore="$(cat $outputDir/ClonalityScore_$sample.csv)" + echo "<div class='tabbertab' title='$sample'><table border='1'>" >> $outputFile + echo "<tr><td colspan='4'>Clonality Score: $clonalityScore</td></tr>" >> $outputFile - #replicate,reads,squared - echo "<tr><td>Replicate ID</td><td>Number of Reads</td><td>Reads Squared</td><td></td></tr>" >> $outputFile - while IFS=, read replicate reads squared - do + #replicate,reads,squared + echo "<tr><td>Replicate ID</td><td>Number of Reads</td><td>Reads Squared</td><td></td></tr>" >> $outputFile + while IFS=, read replicate reads squared + do + + echo "<tr><td>$replicate</td><td>$reads</td><td>$squared</td><td></td></tr>" >> $outputFile + done < $outputDir/ReplicateReads_$sample.csv - echo "<tr><td>$replicate</td><td>$reads</td><td>$squared</td><td></td></tr>" >> $outputFile - done < $outputDir/ReplicateReads_$sample.csv - - #sum of reads and reads squared - while IFS=, read readsSum squaredSum + #sum of reads and reads squared + while IFS=, read readsSum squaredSum + do + echo "<tr><td>Sum</td><td>$readsSum</td><td>$squaredSum</td></tr>" >> $outputFile + done < $outputDir/ReplicateSumReads_$sample.csv + + #overview + echo "<tr><td>Coincidence Type</td><td>Raw Coincidence Freq</td><td>Coincidence Weight</td><td>Coincidences, Weighted</td></tr>" >> $outputFile + while IFS=, read type count weight weightedCount do - echo "<tr><td>Sum</td><td>$readsSum</td><td>$squaredSum</td></tr>" >> $outputFile - done < $outputDir/ReplicateSumReads_$sample.csv - - #overview - echo "<tr><td>Coincidence Type</td><td>Raw Coincidence Freq</td><td>Coincidence Weight</td><td>Coincidences, Weighted</td></tr>" >> $outputFile - while IFS=, read type count weight weightedCount - do - echo "<tr><td>$type</td><td>$count</td><td>$weight</td><td>$weightedCount</td></tr>" >> $outputFile - done < $outputDir/ClonalityOverView_$sample.csv - echo "</table></div>" >> $outputFile + echo "<tr><td>$type</td><td>$count</td><td>$weight</td><td>$weightedCount</td></tr>" >> $outputFile + done < $outputDir/ClonalityOverView_$sample.csv + echo "</table></div>" >> $outputFile + else + echo "in new" + clonalityScore="$(cat $outputDir/lymphclon_clonality_${sample}.csv)" + echo "<div class='tabbertab' title='$sample'>" >> $outputFile + echo "Lymphclon clonality score: <br />$clonalityScore<br /><br />" >> $outputFile + echo "<table border = 1>" >> $outputFile + while IFS=, read type count + do + echo "<tr><td>$type</td><td>$count</td></tr>" >> $outputFile + done < $outputDir/lymphclon_coincidences_$sample.csv + echo "</table></div>" >> $outputFile + fi done echo "</div></div>" >> $outputFile fi
--- a/report_clonality_igg.xml Mon Feb 09 08:53:30 2015 -0500 +++ b/report_clonality_igg.xml Fri Mar 06 09:59:53 2015 -0500 @@ -2,14 +2,14 @@ <description> </description> <command interpreter="bash"> #if $gene_selection.source == "imgtdb" - r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive + r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive $clonality_method #else - r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive + r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive $clonality_method #end if </command> <inputs> <param name="in_file" format="tabular" type="data" label="Data to Process" /> - <param name="clonaltype" type="select" label="Clonal Type Definition"> + <param name="clonaltype" type="select" label="Clonal Type Definition (Needed for clonality calculation)"> <option value="none">Dont remove duplicates based on clonaltype</option> <option value="Top.V.Gene,CDR3.Seq">Top.V.Gene, CDR3.Seq</option> <option value="Top.V.Gene,CDR3.Seq.DNA">Top.V.Gene, CDR3.Seq.DNA</option> @@ -89,6 +89,12 @@ <option value="yes">Yes</option> <option value="no">No</option> </param> + + <param name="clonality_method" type="select" label="Old clonality algorithm or the newer R package"> + <option value="old">Old</option> + <option value="boyd">R Package</option> + </param> + </inputs> <outputs> <data format="html" name="out_file" />