# HG changeset patch
# User stef
# Date 1410426671 14400
# Node ID 9285f791716dadd55e22e2781cec5fb27bc22598
# Parent 1b81dd6b68b55dc1c76421caabbd2bdcfb31b613
Uploaded
diff -r 1b81dd6b68b5 -r 9285f791716d README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,30 @@
+Falco Amplicon Analysis Pipeline & Galaxy wrapper
+========================================
+
+Text
+
+
+History
+=======
+
+============== ========================================================================
+Date Changes
+-------------- ------------------------------------------------------------------------
+September 2014 * first release in Tool Shed
+July 2014 * first release in Test Tool Shed
+============== ========================================================================
+
+Tool Versioning
+===============
+
+Tools will have versions of the form X.Y.Z. Versions
+differing only after the second decimal should be completely
+compatible with each other. Breaking changes should result in an
+increment of the number before and/or after the first decimal. All
+tools of version less than 1.0.0 should be considered beta.
+
+
+Bug Reports & other questions
+=============================
+
+Issues can be reported via the contact form at
diff -r 1b81dd6b68b5 -r 9285f791716d falco.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco.sh Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,138 @@
+#!/bin/bash
+TOOLDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+echo "Start falco sh"
+
+## ----------
+## Variables setup
+## ----------
+#bam_file=$1
+#bam_name=$2
+#html_out=$3
+#vcf_out=$4
+#out_path=$5
+source $1
+
+bam_base=`echo $bam_name | sed 's#.bam$##' - `
+
+## ----------
+## Create extra init script
+## ----------
+#if [ $design_file != 'None' && $design_file != '' ]; then BED_PATH=$design_file; fi
+#if [ $manifest_file != 'None' && $manifest_file != '' ]; then MAN_PATH=$manifest_file; fi
+if [ $design_file != 'None' ] # Galaxy default is "None"
+then
+ BED_PATH=$design_file
+else
+ BED_PATH="$TOOLDIR/tool-data/GALAXY_TSACP.bed"
+fi
+
+if [ $manifest_file != 'None' ] # Galaxy default is "None"
+then
+ MAN_PATH=$manifest_file
+else
+ MAN_PATH="$TOOLDIR/tool-data/GALAXY_TruSeq_Amplicon_Cancer_Panel_Manifest_AFP1_PN15032433.txt"
+fi
+
+echo "BED:"$design_file
+echo "MAN:"$manifest_file
+echo "BED chosen:"$BED_PATH
+echo "MAN chosen:"$MAN_PATH
+
+init_file="galaxy_init.sh"
+echo '#!/bin/bash' > $init_file
+
+## executables
+#echo "snpEff=/ccagc/lib/snpEff/snpEff_v3_4" >> $init_file
+echo "snpEff=$SNPEFF_JAR_PATH/" >> $init_file
+#echo "snpSift=/ccagc/lib/SnpSift" >> $init_file
+echo "snpSift=$SNPEFF_JAR_PATH/" >> $init_file
+#echo "varscan=/ccagc/lib/VarScan/VarScan.v2.2.11.jar" >> $init_file
+echo "varscan=$JAVA_JAR_PATH/VarScan.v2.3.5.jar" >> $init_file
+
+## files
+echo "targetBed=$BED_PATH" >> $init_file
+echo "manifest=$MAN_PATH" >> $init_file
+
+## ----------
+## Status / debug
+## ----------
+echo "BAM: "$bam_file
+echo "OUT: "$bam_name
+echo "BAMBASE: "$bam_base
+echo "OUTPATH: "$out_path
+echo "HTMLOUT: "$html_out
+echo "VCFOUT : "$vcf_out
+echo "INIT : "$init_file
+
+## ----------
+## create output files dir
+## ----------
+mkdir $out_path
+mkdir $out_path/$bam_base
+
+## ----------
+## running analysis
+## ----------
+echo "Starting VcAn"
+$TOOLDIR/falco_new/amplicon-VcAn-TSACP.sh $bam_file $bam_base
+#$TOOLDIR/falco/amplicon-AlVcAn-TSACP.sh ## this contains more
+
+echo "Starting amplicon-Report"
+$TOOLDIR/falco_new/amplicon-Report.sh $bam_base
+
+echo "Starting QC"
+$TOOLDIR/falco_new/amplicon-QC.sh $bam_file
+
+#echo "Starting plots.R"
+#$TOOLDIR/falco/misc/plots.R $bam_base.qc.ann.txt $bam_base.qc.targets.txt $bam_base.pdf
+
+echo "Starting mkHtmlReport"
+$TOOLDIR/falco_new/misc/mkHtmlReport.pl ./ ./
+
+
+
+## ----------
+## create index html for main galaxy output
+## ----------
+echo "" >> $html_out
+echo "" >> $html_out
+echo "
" >> $html_out
+echo "" >> $html_out
+echo "" >> $html_out
+echo "" >> $html_out
+echo " FALCO
" >> $html_out
+echo " HTML" >> $html_out
+echo " " >> $html_out
+#for file in *.vcf *.txt *.tsv
+for file in *
+do
+ lineCount=`wc -l $file | cut -f 1 -d " "`
+ echo " $file has $lineCount lines |
" >> $html_out
+ #echo " HEAD: " `head $file` " |
" >> $html_out
+done
+echo "
" >> $html_out
+echo "" >> $html_out
+echo "" >> $html_out
+
+## ----------
+## creating galaxy history outputs
+## ----------
+#cp 'index.html' $html_out # this is the overview of samples html
+cp $bam_base'.html' $out_path/'out.html' # this is the sample html
+cp $bam_base'.vcf' $vcf_out
+
+## ----------
+## copy files to keep to output path
+## ----------
+cp -r ./$bam_base/*png $out_path/$bam_base/
+cp -r ./* $out_path
+#cp *.vcf $out_path; cp *.tsv $out_path; cp *.txt $out_path; cp *.html $out_path
+
+## ----------
+echo "END falco sh"
+exit 0
\ No newline at end of file
diff -r 1b81dd6b68b5 -r 9285f791716d falco.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco.xml Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,120 @@
+
+
+
+
+ samtools
+ R
+ snpeff
+ VarScan
+
+ TOOL_DIR
+
+
+ FALCO: Amplicon Analysis Pipeline
+
+
+
+
+
+
+
+ falco.sh $falco_cfg;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ \S+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ design_file=$design_file
+ manifest_file=$manifest_file
+ bam_file=$bam
+ bam_name=$bam.name
+ html_out=$html_output
+ vcf_out=$vcf_output
+ out_path=${html_output.files_path}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Introduction**
+
+This tool is a wrapper for the Amplicon Analysis Pipeline (AAP) FALCO_
+
+.. _FALCO: https://bitbucket.org/daoud/falco
+
+Calls and annotates genomic variants for each amplicon in a design.
+
+-----
+
+**Citation**
+
+For the underlying tool please cite: Daoud Sie et al. Title TBA (submitted).
+
+
+
+
diff -r 1b81dd6b68b5 -r 9285f791716d falco/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/README Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,31 @@
+#
+# Author Daoud Sie
+# e-mail d.sie@vumc.nl
+#
+######
+
+Let's put some useful stuff here...
+
+The Amplicon Analysis Pipeline (AAP) takes fastq files as a starting point. However, each
+stage of the pipeline can be run separately.
+
+amplicon-Al-TSACP.sh R1.fq R2.fq output
+
+This will run the following:
+
+- flash read merging
+- adapter clipping
+- quality trimmer
+- alignemt using BWA
+
+- amplicon-VcAn-TSACP.sh
+-- variant calling and annotation:
+-- samtools variant caller
+-- custom variant caller
+-- snpEff effect prediction
+-- dbSNP annotations
+
+- amplicon-Report.sh
+-- filtering of variants
+-- report generation
+
diff -r 1b81dd6b68b5 -r 9285f791716d falco/amplicon-Al-TSACP.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/amplicon-Al-TSACP.sh Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,105 @@
+#!/bin/bash
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+source $DIR/init.sh
+
+TMP=./tmp
+if [ ! -e $TMP ]
+then
+ mkdir $TMP
+fi
+
+fq1=$1
+fq2=$2
+output=$3
+
+bfq1=`basename $fq1`
+bfq2=`basename $fq2`
+
+if [ ! -e $TMP/$bfq1.trim ]
+then
+echo `date` " [$$] - $adapt_clipper -a $illmP5 $fq1 > $TMP/$bfq1.trim"
+$adapt_clipper -a $illmP5 $fq1 > $TMP/$bfq1.trim
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $TMP/$bfq1.qtrim ]
+then
+echo `date` " [$$] - $qTrim -t30 -i $TMP/$bfq1.trim -o $TMP/$bfq1.qtrim -Q34"
+$qTrim -t30 -i $TMP/$bfq1.trim -o $TMP/$bfq1.qtrim -Q34
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $TMP/$bfq2.trim ]
+then
+echo `date` " [$$] - $adapt_clipper -a $illmP7 $fq2 > $TMP/$bfq2.trim"
+$adapt_clipper -a $illmP7 $fq2 > $TMP/$bfq2.trim
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $TMP/$bfq2.qtrim ]
+then
+echo `date` " [$$] - $qTrim -t10 -i $TMP/$bfq2.trim -o $TMP/$bfq2.qtrim -Q34"
+$qTrim -t30 -i $TMP/$bfq2.trim -o $TMP/$bfq2.qtrim -Q34
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $TMP/$bfq1.qtrim.paired ]
+then
+echo `date` " [$$] - perl $pairUp $TMP/$bfq1.qtrim $TMP/$bfq2.qtrim"
+perl $pairUp $TMP/$bfq1.qtrim $TMP/$bfq2.qtrim
+echo `date` " [$$] - Done!"
+fi
+
+# FLASH Fastq joiner
+if [ ! -e $TMP/$output.flashed.fq ]
+then
+
+$flash -r150 -f180 -c $TMP/$bfq1.qtrim.paired $TMP/$bfq2.qtrim.paired > $TMP/$output.flashed.fq
+
+fi
+
+#if [ ! -e $TMP/$output ]
+#then
+#echo `date` " [$$] - $stampy -g $index -h $hash --readgroup=ID:$output,SM:$output --gatkcigarworkaround --bwaoptions=\"$bwaref\" --bwa="$bwa" --bwamark -o $TMP/$output -M $TMP/$bfq1.qtrim.paired $TMP/$bfq2.qtrim.paired"
+#$stampy -g $index -h $hash --readgroup=ID:$output,SM:$output --gatkcigarworkaround --bwaoptions="$bwaref" --bwa="$bwa" --bwamark -o $TMP/$output -M $TMP/$bfq1.qtrim.paired $TMP/$bfq2.qtrim.paired
+#echo `date` " [$$] - Done!"
+#fi
+
+# Added -e 40 for maximum indel size of 40,...
+#if [ ! -e $TMP/$output ]
+#then
+#echo `date` " [$$] - $stampy -g $index -h $hash --readgroup=ID:$output,SM:$output --gatkcigarworkaround --bwaoptions=\"-e 40 $bwaref\" --bwa="$bwa" --bwamark -o $TMP/$output -M $TMP/$output.flashed.fq"
+#$stampy -g $index -h $hash --readgroup=ID:$output,SM:$output --gatkcigarworkaround --bwaoptions="-e 40 $bwaref" --bwa="$bwa" --bwamark -o $TMP/$output -M $TMP/$output.flashed.fq
+#echo `date` " [$$] - Done!"
+#fi
+
+if [ ! -e $output.bam ]
+then
+echo `date` " [$$] - $bwa aln -e 40 $bwaref $TMP/$output.flashed.fq > $TMP/$output.sai"
+$bwa aln -e 40 $bwaref $TMP/$output.flashed.fq > $TMP/$output.sai
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $bwa samse -r \"@RG\\tID:$output\\tSM:$output\" $bwaref $TMP/$output.sai $TMP/$output.flashed.fq > $TMP/$output.sam"
+$bwa samse -r "@RG\tID:$output\tSM:$output" $bwaref $TMP/$output.sai $TMP/$output.flashed.fq > $TMP/$output.sam
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $samtools view -uS $TMP/$output.sam | $samtools sort - $output"
+$samtools view -uS $TMP/$output.sam | $samtools sort - $output
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $samtools index $output.bam"
+$samtools index $output.bam
+echo `date` " [$$] - Done!"
+fi
+
+echo `date` "[$$] - $VcAn $output.bam $output"
+$VcAn $output.bam $output
+echo `date` "[$$] - Done!"
+
+echo `date` "[$$] - $report $output"
+$report $output
+echo `date` "[$$] - Done!"
+
+exit
diff -r 1b81dd6b68b5 -r 9285f791716d falco/amplicon-AlVcAn-HaloPlex.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/amplicon-AlVcAn-HaloPlex.sh Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,127 @@
+#!/bin/bash
+
+index=/scratch/data/ref/hg19/stampy/hg19
+hash=/scratch/data/ref/hg19/stampy/hg19
+bwaref=/scratch/data/ref/hg19/bwa-5/hg19.fa
+
+adapt_clipper=/opt/cutadapt/cutadapt-1.1/bin/cutadapt
+illmP5="AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC"
+illmP7="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT"
+qTrim=/opt/bin/fastq_quality_trimmer
+pairUp=/storage/shark1/data/Git/ampliconPipeLine/pairUp.pl
+stampy=/opt/stampy/stampy-1.0.18/stampy.py
+samtools=/opt/samtools/samtools-0.1.18/samtools
+pileup2cov=/storage/shark1/data/Git/ampliconPipeLine/pileup2cov.pl
+attachManifest=/storage/shark1/data/Git/ampliconPipeLine/attachGff.pl
+manifest=/scratch/data/ref/manifests/HaloPlex/Analyzable-region-01355-1335878719.gff
+snpEff=/opt/snpEff/snpEff_3_0a
+
+plots=/storage/shark1/data/Git/ampliconPipeLine/plots.R
+
+fq1=$1
+fq2=$2
+output=$3
+
+if [ ! -e $fq1.trim ]
+then
+echo `date` " [$$] - $adapt_clipper -a $illmP5 $fq1 > $fq1.trim"
+$adapt_clipper -a $illmP5 $fq1 > $fq1.trim
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $fq1.qtrim ]
+then
+echo `date` " [$$] - $qTrim -t30 -i $fq1.trim -o $fq1.qtrim -Q34"
+$qTrim -t30 -i $fq1.trim -o $fq1.qtrim -Q34
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $fq2.trim ]
+then
+echo `date` " [$$] - $adapt_clipper -a $illmP7 $fq2 > $fq2.trim"
+$adapt_clipper -a $illmP7 $fq2 > $fq2.trim
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $fq2.qtrim ]
+then
+echo `date` " [$$] - $qTrim -t30 -i $fq2.trim -o $fq2.qtrim -Q34"
+$qTrim -t30 -i $fq2.trim -o $fq2.qtrim -Q34
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $fq1.qtrim.paired ]
+then
+echo `date` " [$$] - perl $pairUp $fq1.qtrim $fq2.qtrim"
+perl $pairUp $fq1.qtrim $fq2.qtrim
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output ]
+then
+echo `date` " [$$] - $stampy -g $index -h $hash --bwaoptions=\"$bwaref\" -o $output -M $fq1.qtrim.paired $fq2.qtrim.paired"
+$stampy -g $index -h $hash --bwaoptions="$bwaref" -o $output -M $fq1.qtrim.paired $fq2.qtrim.paired
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.bam ]
+then
+echo `date` " [$$] - $samtools view -uS $output | $samtools sort - $output"
+$samtools view -uS $output | $samtools sort - $output
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $samtools index $output.bam"
+$samtools index $output.bam
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.vcf ]
+then
+echo `date` " [$$] - $samtools mpileup -Euf $bwaref -L 1000000 -d 1000000 -F 0.01 -m 10 -o 20 $output.bam | bcftools view -bvcg - > $output.bcf"
+$samtools mpileup -Euf $bwaref -L 1000000 -d 1000000 -F 0.01 -m 10 -o 20 $output.bam | bcftools view -bvcg - > $output.bcf
+echo `date` " [$$] - Done!"
+echo `date` " [$$] - bcftools view $output.bcf > $output.vcf"
+bcftools view $output.bcf > $output.vcf
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.txt ]
+then
+echo `date` " [$$] - java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o txt -c $snpEff/snpEff.config $output.vcf > $output.snpeff.vcf"
+java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o txt -c $snpEff/snpEff.config $output.vcf > $output.snpeff.vcf
+echo `date` " [$$] - Done!"
+
+dbSnp=/scratch/data/ref/dbSNP/dbSnp_120616.vcf
+dbNSFP=/scratch/data/ref/dbNSFP/dbNSFP2.0b3.txt
+snpSift=/opt/SnpSift
+
+echo `date` " [$$] [$$] - java -jar /opt/SnpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf"
+java -jar /opt/SnpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf
+echo `date` " [$$] [$$] - Done!"
+
+echo `date` " [$$] [$$] - java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf"
+java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf
+echo `date` " [$$] [$$] - Done!"
+
+fi
+
+if [ ! -e $output.qc.txt ]
+then
+echo `date` " [$$] - $pileup2cov $bwaref $output.bam > $output.qc.txt"
+$samtools view -Xq10 $output.bam | $pileup2cov $bwaref > $output.qc.txt
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.qc.ann.txt ]
+then
+echo `date` " [$$] - $attachManifest $manifest $output.qc.txt $output.qc.targets.txt > $output.qc.ann.txt"
+$attachManifest $manifest $output.qc.txt $output.qc.targets.txt > $output.qc.ann.txt
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.pdf ]
+then
+echo `date` " [$$] - $plots $output.qc.ann.targets.txt $output.qc.targets.txt"
+$plots $output.qc.ann.txt $output.qc.targets.txt $output.pdf
+echo `date` " [$$] - Done!"
+fi
diff -r 1b81dd6b68b5 -r 9285f791716d falco/amplicon-AlVcAn-TSACP.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/amplicon-AlVcAn-TSACP.sh Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,236 @@
+#!/bin/bash
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+TMP=./tmp
+if [ ! -e $TMP ]
+then
+ mkdir $TMP
+fi
+
+index=$DIR/ref/hg19/stampy/hg19
+hash=$DIR/ref/hg19/stampy/hg19
+bwaref=$DIR/ref/hg19/bwa-5/hg19.fa
+manifest=$DIR/ref/manifests/TSACP/TruSeq_Amplicon_Cancer_Panel_Manifest_AFP1_PN15032433.txt
+
+illmP5="AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC"
+illmP7="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT"
+
+adapt_clipper=$DIR/bin/cutadapt/cutadapt-1.1/bin/cutadapt
+qTrim=$DIR/bin/fastq_quality_trimmer
+stampy=$DIR/bin/stampy/stampy-1.0.18/stampy.py
+samtools=$DIR/bin/samtools/samtools-0.1.18/samtools
+snpEff=$DIR/bin/snpEff/snpEff_3_0a
+varscan=$DIR/bin/VarScan/VarScan.v2.2.11.jar
+java=/usr/java/latest/bin/java
+bwa=$DIR/bin/bwa/bwa-0.5.9/bwa
+
+pairUp=$DIR/misc/pairUp.pl
+pileup2cov=$DIR/misc/pileup2cov.pl
+attachManifest=$DIR/misc/attachManifest.pl
+plots=$DIR/misc/plots.R
+VcAn=$DIR/amplicon-VcAn-TSACP.sh
+report=$DIR/amplicon-Report.sh
+
+fq1=$1
+fq2=$2
+output=$3
+
+bfq1=`basename $fq1`
+bfq2=`basename $fq2`
+
+if [ ! -e $TMP/$bfq1.trim ]
+then
+echo `date` " [$$] - $adapt_clipper -a $illmP5 $fq1 > $TMP/$bfq1.trim"
+$adapt_clipper -a $illmP5 $fq1 > $TMP/$bfq1.trim
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $TMP/$bfq1.qtrim ]
+then
+echo `date` " [$$] - $qTrim -t30 -i $TMP/$bfq1.trim -o $TMP/$bfq1.qtrim -Q34"
+$qTrim -t30 -i $TMP/$bfq1.trim -o $TMP/$bfq1.qtrim -Q34
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $TMP/$bfq2.trim ]
+then
+echo `date` " [$$] - $adapt_clipper -a $illmP7 $fq2 > $TMP/$bfq2.trim"
+$adapt_clipper -a $illmP7 $fq2 > $TMP/$bfq2.trim
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $TMP/$bfq2.qtrim ]
+then
+echo `date` " [$$] - $qTrim -t10 -i $TMP/$bfq2.trim -o $TMP/$bfq2.qtrim -Q34"
+$qTrim -t30 -i $TMP/$bfq2.trim -o $TMP/$bfq2.qtrim -Q34
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $TMP/$bfq1.qtrim.paired ]
+then
+echo `date` " [$$] - perl $pairUp $TMP/$bfq1.qtrim $TMP/$bfq2.qtrim"
+perl $pairUp $TMP/$bfq1.qtrim $TMP/$bfq2.qtrim
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $TMP/$output ]
+then
+echo `date` " [$$] - $stampy -g $index -h $hash --readgroup=ID:$output,SM:$output --gatkcigarworkaround --bwaoptions=\"$bwaref\" --bwa="$bwa" --bwamark -o $TMP/$output -M $TMP/$bfq1.qtrim.paired $TMP/$bfq2.qtrim.paired"
+$stampy -g $index -h $hash --readgroup=ID:$output,SM:$output --gatkcigarworkaround --bwaoptions="$bwaref" --bwa="$bwa" --bwamark -o $TMP/$output -M $TMP/$bfq1.qtrim.paired $TMP/$bfq2.qtrim.paired
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.bam ]
+then
+echo `date` " [$$] - $samtools view -uS $TMP/$output | $samtools sort - $output"
+$samtools view -uS $TMP/$output | $samtools sort - $output
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $samtools index $output.bam"
+$samtools index $output.bam
+echo `date` " [$$] - Done!"
+fi
+
+echo `date` "[$$] - $VcAn $output.bam $output"
+$VcAn $output.bam $output
+echo `date` "[$$] - Done!"
+
+echo `date` "[$$] - $report $output"
+$report $output
+echo `date` "[$$] - Done!"
+
+exit
+if [ ! -e $output.vcf ]
+then
+# Use Extended BAQ!
+# Tweak indel settings -m and -F
+echo `date` " [$$] - $samtools mpileup -Euf $bwaref -L 1000000 -d 1000000 -m 150 -F 0.01 -o 20 $output.bam | bcftools view -bvcg - > $output.bcf"
+$samtools mpileup -Euf $bwaref -L 1000000 -d 1000000 -m 150 -F 0.01 -o 20 $output.bam | bcftools view -bvcg - > $output.bcf
+echo `date` " [$$] - Done!"
+echo `date` " [$$] - bcftools view $output.bcf > $output.vcf"
+bcftools view $output.bcf > $output.vcf
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.vs.snp.vcf ]
+then
+# VarScan seems a better alternative for smaller VAF's Samtools seems to obscure some false negatives....
+echo `date` " [$$] - $samtools mpileup -Bf $bwaref $output.bam | $java -jar $varscan mpileup2snp --min-var-freq 0.05 --output-vcf > $output.vs.snp.vcf"
+$samtools mpileup -Bf $bwaref $output.bam | $java -jar $varscan mpileup2snp --min-var-freq 0.05 --output-vcf > $output.vs.snp.vcf
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.vs.snp.tsv ]
+then
+# VarScan seems a better alternative for smaller VAF's Samtools seems to obscure some false negatives....
+echo `date` " [$$] - $samtools mpileup -Bf $bwaref $output.bam | $java -jar $varscan mpileup2snp --min-var-freq 0.05 > $output.vs.snp.tsv"
+$samtools mpileup -Bf $bwaref $output.bam | $java -jar $varscan mpileup2snp --min-var-freq 0.05 > $output.vs.snp.tsv
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.vs.indel.vcf ]
+then
+echo `date` " [$$] - $samtools mpileup -Bf $bwaref $output.bam | $java -jar $varscan mpileup2indel --min-var-freq 0.05 --output-vcf > $output.vs.indel.vcf"
+$samtools mpileup -Bf $bwaref $output.bam | $java -jar $varscan mpileup2indel --min-var-freq 0.05 --output-vcf > $output.vs.indel.vcf
+echo `date` " [$$] - Done!"
+fi
+
+# Annotation
+dbSnp=/scratch/data/ref/dbSNP/dbSnp_120616.vcf
+dbNSFP=/scratch/data/ref/dbNSFP/dbNSFP2.0b3.txt
+snpSift=/opt/SnpSift
+
+VcfIn=$output.vcf
+VcfOut=$output.samtools.vcf
+
+if [ ! -e $VcfOut ]
+then
+# Samtools annotation
+echo `date` " [$$] - $java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > tmp.snpeff.vcf"
+$java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > $output.snpeff.vcf
+mv $output.snpeff.vcf $VcfOut
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $java -jar /opt/SnpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf"
+#$java -jar /opt/SnpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf"
+#$java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf
+#mv $output.snpeff.3.vcf $VcfOut
+echo `date` " [$$] - Done!"
+fi
+
+VcfIn=$output.vs.snp.vcf
+VcfOut=$output.vs.snp.a.vcf
+
+if [ ! -e $VcfOut ]
+then
+# VarScan SNP annotation
+echo `date` " [$$] - $java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > tmp.snpeff.vcf"
+$java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > $output.snpeff.vcf
+mv $output.snpeff.vcf $VcfOut
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $java -jar /opt/SnpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf"
+#$java -jar /opt/SnpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf"
+#$java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf
+#mv $output.snpeff.3.vcf $VcfOut
+echo `date` " [$$] - Done!"
+fi
+
+VcfIn=$output.vs.indel.vcf
+VcfOut=$output.vs.indel.a.vcf
+
+if [ ! -e $VcfOut ]
+then
+# VarScan INDEL annotation
+echo `date` " [$$] - $java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > tmp.snpeff.vcf"
+$java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > $output.snpeff.vcf
+mv $output.snpeff.vcf $VcfOut
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $java -jar /opt/SnpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf"
+#$java -jar /opt/SnpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf"
+#$java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf
+#mv $output.snpeff.3.vcf $VcfOut
+echo `date` " [$$] - Done!"
+fi
+
+
+if [ ! -e $output.qc.txt ]
+then
+echo `date` " [$$] - $pileup2cov $bwaref $output.bam > $output.qc.txt"
+$samtools view -Xq10 $output.bam | $pileup2cov $bwaref > $output.qc.txt
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $samtools view -Xq10 -f0x0040 $output.bam | $pileup2cov $bwaref $output.bam > $output.R1.qc.txt"
+$samtools view -Xq10 -f0x0040 $output.bam | $pileup2cov $bwaref > $output.R1.qc.txt
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $samtools view -Xq10 -f0x0080 $output.bam | $pileup2cov $bwaref $output.bam > $output.R2.qc.txt"
+$samtools view -Xq10 -f0x0080 $output.bam | $pileup2cov $bwaref > $output.R2.qc.txt
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $attachManifest $manifest $output.qc.txt $output.qc.targets.txt > $output.qc.ann.txt"
+$attachManifest $manifest $output.qc.txt $output.qc.targets.txt > $output.qc.ann.txt
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $attachManifest $manifest $output.R1.qc.txt $output.R1.qc.targets.txt > $output.R1.qc.ann.txt"
+$attachManifest $manifest $output.R1.qc.txt $output.R1.qc.targets.txt > $output.R1.qc.ann.txt
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $attachManifest $manifest $output.R2.qc.txt $output.R2.qc.targets.txt > $output.R2.qc.ann.txt"
+$attachManifest $manifest $output.R2.qc.txt $output.R2.qc.targets.txt > $output.R2.qc.ann.txt
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $plots $output.qc.ann.targets.txt $output.qc.targets.txt"
+$plots $output.qc.ann.txt $output.qc.targets.txt $output.pdf
+echo `date` " [$$] - Done!"
+fi
diff -r 1b81dd6b68b5 -r 9285f791716d falco/amplicon-QC.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/amplicon-QC.sh Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+bam=$1
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+TMP=./tmp
+if [ ! -e $TMP ]
+then
+ mkdir $TMP
+fi
+
+source $DIR/init.sh
+
+#targetBed=$DIR/ref/manifests/TSACP/TSACP.bed
+#samtools=$DIR/bin/samtools/samtools-0.1.18/samtools
+#alignmentStats=$DIR/misc/alignmentStats.pl
+#errorRate=$DIR/misc/errorRate.pl
+#plotQc=$DIR/misc/plotsQC.R
+
+#export PATH=$PATH:$DIR/bin/samtools/samtools-0.1.18/
+
+if [ ! -e alignmentStats.tsv ]
+then
+$alignmentStats $targetBed $bam > alignmentStats.tsv
+fi
+
+if [ ! -e errorStats.tsv ]
+then
+$errorRate *.qc.ann.txt > errorStats.tsv
+fi
+
+Rscript $plotQc alignmentStats.tsv errorStats.tsv
+
+
diff -r 1b81dd6b68b5 -r 9285f791716d falco/amplicon-Report.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/amplicon-Report.sh Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+source $DIR/init.sh
+
+
+base=$1
+
+vcf2tsv=$DIR/misc/vcf2tsv.pl
+spliteff=$DIR/misc/splitEff.pl
+addQc=$DIR/misc/addQc.pl
+filterR=$DIR/misc/filter.R
+filter=$DIR/misc/filter.pl
+plotPng=$DIR/misc/plotsPng.R
+
+# convert to tsv
+
+echo `date` " [$$] - converting vcf to tsv"
+$vcf2tsv $base.samtools.vcf > $base.samtools.tsv
+#$vcf2tsv $base.vs.snp.a.vcf > $base.vs.snp.a.tsv
+$vcf2tsv $base.qc.ann.filt.a.vcf > $base.qc.ann.filt.a.tsv
+$vcf2tsv $base.qc.ann.clinvar.a.vcf > $base.qc.ann.clinvar.a.tsv
+#$vcf2tsv $base.qc.ann.2.a.vcf > $base.qc.ann.2.a.tsv
+
+# split eff columns
+echo `date` " [$$] - splitting vcf columns"
+$spliteff $base.samtools.tsv Samtools > $base.res.tsv
+#$spliteff $base.vs.snp.a.tsv >> $base.res.tsv
+$spliteff $base.qc.ann.filt.a.tsv Custom >> $base.res.tsv
+$spliteff $base.qc.ann.clinvar.a.tsv Clinvar >> $base.res.tsv
+#$spliteff $base.qc.ann.2.a.tsv Custom2 >> $base.res.tsv
+
+# add count data
+#$addQc $base.res.tmp $base.qc.ann.txt > $base.res.tsv
+
+# Filter
+echo `date` " [$$] - filtering data"
+#Rscript $filterR $base.res.tsv $base.qc.ann.txt 10 100 .95 $base
+$filter $base.res.tsv $canonicals > $base.res.filtered.tsv
+
+# Plot
+echo `date` " [$$] - plotting data"
+Rscript $plotPng $base.qc.ann.qual.txt $base.qc2.ann.txt $base.qc.targets.txt $base.res.filtered.tsv $base.qc.ann.qual.clinvar.txt $locifilt $base
diff -r 1b81dd6b68b5 -r 9285f791716d falco/amplicon-VcAn-TSACP.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/amplicon-VcAn-TSACP.sh Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+source $DIR/init.sh
+
+TMP=./tmp
+if [ ! -e $TMP ]
+then
+ mkdir $TMP
+fi
+
+bam=$1
+output=$2
+
+if [ ! -e $output.vcf ]
+then
+# Use Extended BAQ!
+# Tweak indel settings -m and -F
+echo `date` " [$$] - $samtools mpileup -Euf $faref -L 1000000 -d 1000000 -m 150 -F 0.01 -o 20 $bam | bcftools view -bvcg - > $output.bcf"
+$samtools mpileup -Euf $faref -L 1000000 -d 1000000 -m 150 -F 0.01 -o 20 $bam | $bcftools view -bvcg - > $output.bcf
+echo `date` " [$$] - Done!"
+echo `date` " [$$] - bcftools view $output.bcf > $output.vcf"
+$bcftools view $output.bcf > $output.vcf
+echo `date` " [$$] - Done!"
+fi
+
+# Annotation
+VcfIn=$output.vcf
+VcfOut=$output.samtools.vcf
+
+if [ ! -e $VcfOut ]
+then
+# Samtools annotation
+echo `date` " [$$] - $java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > tmp.snpeff.vcf"
+$java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > $output.snpeff.vcf
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $java -jar $snpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf"
+$java -jar $snpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf
+mv $output.snpeff.2.vcf $VcfOut
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf"
+#$java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf
+#mv $output.snpeff.3.vcf $VcfOut
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.qc.ann.txt ]
+then
+echo `date` " [$$] - $pileup2cov2 $bam $faref $manifest $output $samtools"
+$pileup2cov2 $bam $faref $manifest $output $samtools
+echo `date` " [$$] - Done!"
+fi
+
+if [ ! -e $output.qc.ann.qual.txt ]
+then
+echo `date` " [$$] - RScript $addQual $output.qc.ann.txt $output.qc.ann.qual.txt $DIR/misc/func.R $locifilt"
+Rscript $addQual $output.qc.ann.txt $output.qc.ann.qual.txt $DIR/misc/func.R $locifilt
+echo `date` " [$$] - Done!"
+fi
+
+echo `date` " [$$] - $qcFilt $output.qc.ann.qual.txt $clinvar $cosmic $cosmicNC $output"
+$qcFilt $output.qc.ann.qual.txt $clinvar $cosmic $cosmicNC $output
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $qc2vcf $output.qc.ann.qual.filt.txt > $output.qc.ann.filt.vcf"
+$qc2vcf $output.qc.ann.qual.filt.txt > $output.qc.ann.filt.vcf
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $qc2vcf $output.qc.ann.qual.clinvar.txt > $output.qc.ann.clinvar.vcf"
+$qc2vcf $output.qc.ann.qual.clinvar.txt > $output.qc.ann.clinvar.vcf
+echo `date` " [$$] - Done!"
+
+
+
+VcfIn=$output.qc.ann.filt.vcf
+VcfOut=$output.qc.ann.filt.a.vcf
+
+if [ ! -e $VcfOut ]
+then
+
+# qc vcf annotation
+echo `date` " [$$] - $java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > tmp.snpeff.vcf"
+$java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > $output.snpeff.vcf
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $java -jar $snpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf"
+$java -jar $snpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf
+mv $output.snpeff.2.vcf $VcfOut
+echo `date` " [$$] - Done!"
+
+#echo `date` " [$$] - $java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf"
+#$java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf
+#mv $output.snpeff.3.vcf $VcfOut
+#echo `date` " [$$] - Done!"
+fi
+
+VcfIn=$output.qc.ann.clinvar.vcf
+VcfOut=$output.qc.ann.clinvar.a.vcf
+
+if [ ! -e $VcfOut ]
+then
+
+# qc vcf annotation
+echo `date` " [$$] - $java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > tmp.snpeff.vcf"
+$java -jar $snpEff/snpEff.jar eff hg19 -noStats -noLog -o vcf -c $snpEff/snpEff.config $VcfIn > $output.snpeff.vcf
+echo `date` " [$$] - Done!"
+
+echo `date` " [$$] - $java -jar $snpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf"
+$java -jar $snpSift/SnpSift_latest.jar annotate -v $dbSnp $output.snpeff.vcf > $output.snpeff.2.vcf
+mv $output.snpeff.2.vcf $VcfOut
+echo `date` " [$$] - Done!"
+
+#echo `date` " [$$] - $java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf"
+#$java -jar /opt/SnpSift/SnpSift_latest.jar dbnsfp -v $dbNSFP $output.snpeff.2.vcf > $output.snpeff.3.vcf
+#mv $output.snpeff.3.vcf $VcfOut
+#echo `date` " [$$] - Done!"
+fi
diff -r 1b81dd6b68b5 -r 9285f791716d falco/bin/fastq_quality_trimmer
Binary file falco/bin/fastq_quality_trimmer has changed
diff -r 1b81dd6b68b5 -r 9285f791716d falco/init.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/init.sh Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+index=/ccagc/data/ref/hg19/stampy/hg19
+hash=/ccagc/data/ref/hg19/stampy/hg19
+faref=/ccagc/data/ref/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa
+bwaref=/ccagc/data/ref/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.5.x/genome.fa
+manifest=/ccagc/data/ref/manifests/TSACP/TruSeq_Amplicon_Cancer_Panel_Manifest_AFP1_PN15032433.txt
+targetBed=/ccagc/data/ref/manifests/TSACP/TSACP.bed
+
+dbSnp=/ccagc/data/ref/dbSNP/common_no_known_medical_impact_00-latest.f.vcf
+dbNSFP=/ccagc/data/ref/dbNSFP/dbNSFP2.0b3.txt
+clinvar=/ccagc/data/ref/dbSNP/clinvar_00-latest.f.vcf
+cosmic=/ccagc/data/ref/cosmic/CosmicCodingMuts_v64_26032013_noLimit_wgs.f.vcf
+cosmicNC=/ccagc/data/ref/cosmic/CosmicNonCodingVariants_v64_26032013_noLimit_wgs.f.vcf
+locifilt=/ccagc/data/ref/dbSNP/locifilt.tsv
+canonicals=/ccagc/data/ref/manifests/TSACP/canonicals.tsv
+
+illmP5="AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC"
+illmP7="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT"
+
+adapt_clipper=/ccagc/lib/cutadapt/cutadapt-1.1/bin/cutadapt
+qTrim=/ccagc/bin/fastq_quality_trimmer
+stampy=/ccagc/lib/stampy/stampy-1.0.18/stampy.py
+samtools=/ccagc/lib/samtools/samtools-0.1.18/samtools
+bcftools=/ccagc/lib/samtools/samtools-0.1.18/bcftools/bcftools
+#snpEff=/ccagc/lib/snpEff/snpEff_3_0a
+snpEff=/ccagc/lib/snpEff/snpEff_v3_4
+snpSift=/ccagc/lib/SnpSift
+
+varscan=/ccagc/lib/VarScan/VarScan.v2.2.11.jar
+java=/usr/bin/java
+bwa=/ccagc/lib/bwa/bwa-0.5.9/bwa
+flash=/ccagc/lib/flash/FLASH-1.2.2/flash
+
+pairUp=$DIR/misc/pairUp.pl
+pileup2cov=$DIR/misc/pileup2cov.pl
+pileup2cov2=$DIR/misc/perAmpliconAnalysis.pl
+addQual=$DIR/misc/addQual.R
+
+qc2vcf=$DIR/misc/qc2vcf.pl
+qcFilt=$DIR/misc/qcFilt.pl
+plots=$DIR/misc/plots.R
+plotsPng=$DIR/misc/plotsPng.R
+
+attachManifest=$DIR/misc/attachManifest.pl
+plots=$DIR/misc/plots.R
+VcAn=$DIR/amplicon-VcAn-TSACP.sh
+report=$DIR/amplicon-Report.sh
+
+alignmentStats=$DIR/misc/alignmentStats.pl
+errorRate=$DIR/misc/errorRate.pl
+plotQc=$DIR/misc/plotsQC.R
+
+export PATH=$PATH:/ccagc/lib/samtools/samtools-0.1.18/
+source ./galaxy_init.sh
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/addQc.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/addQc.pl Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,74 @@
+#!/usr/bin/perl
+
+use strict;
+
+my $file = shift;
+my $qc = shift;
+my %crds = ();
+open F, "<$file";
+my $fhead = readline F;
+chomp $fhead;
+print $fhead . "\t" . join("\t", qw/dp dpref dpvar vaf/) . "\n";
+while () {
+ chomp;
+ my @row = split(/\t/, $_, -1);
+# print STDERR scalar(@row) . "\n";
+ push @{$crds{$row[0]}{$row[1]}}, [@row];
+}
+close F;
+
+# INDELS?
+my %ntcol = (A => 8, C => 9, G => 10, T => 11);
+open QC, "<$qc";
+while () {
+ chomp;
+ my @row = split(/\t/, $_);
+ next if ($row[2] eq ".");
+ my $muts = $crds{$row[0]}{$row[1]} || undef;
+ next unless $muts;
+ foreach my $mut (@$muts) {
+ my $rl = length($mut->[3]);
+ my $vl = length($mut->[4]);
+
+ my $refDP = $row[$ntcol{uc($row[4])}];
+
+ if ($rl > $vl && $row[14] ne ".") { # Deletion
+ my %dels = split(/[\|\:]/, $row[14]);
+ my @del = sort { $dels{$b} <=> $dels{$a} } keys(%dels);
+ my $deldp = $dels{$del[0]};
+ print join("\t", @{$mut}, $row[3], $row[6] - $deldp, $deldp, $deldp / $row[3]) . "\n";
+
+ }
+ elsif ($rl < $vl && $row[13] ne ".") { # Insertion
+ my %inss = split(/[\|:]/, $row[13]);
+ my @ins = sort { $inss{$b} <=> $inss{$a} } keys(%inss);
+ my $insdp = $inss{$ins[0]};
+ print join("\t", @{$mut}, $row[3], $row[6] - $insdp, $insdp, $insdp / $row[3]) . "\n";
+ }
+ elsif ($rl == $vl) {
+ my @varnts = sort { $row[$ntcol{$b}] <=> $row[$ntcol{$a}] } split(/\//, $row[5]);
+ next if ($row[$ntcol{$varnts[0]}] == $row[$ntcol{$varnts[1]}]); # Equeal variant counts
+ my $vardp = $row[$ntcol{$varnts[0]}];
+ print join("\t", @{$mut}, $row[3], $row[6], $vardp, $vardp / ($row[6] + $vardp)) . "\n";
+ }
+
+ }
+ next;
+# if (exists($crds{$row[0]}{$row[1]}) && length($crds{$row[0]}{$row[1]}->[3]) > 1) { # In case of snp
+ if (exists($crds{$row[0]}{$row[1]})) { # && length($crds{$row[0]}{$row[1]}->[3]) > 1) { # In case of snp
+ print join("\t", @{$_}, $row[3], $row[6], $row[7], $row[7] / $row[3]) . "\n" foreach @{$crds{$row[0]}{$row[1]}};
+ }
+ if ($row[14] ne "." && exists($crds{$row[0]}{$row[1]})) { # In case of deletion!
+ my %dels = split(/[\|\:]/, $row[14]);
+ my @del = sort { $dels{$b} <=> $dels{$a} } keys(%dels);
+ $row[7] = $dels{$del[0]};
+ print join("\t", @{$_}, $row[3], $row[6] - $row[7], $row[7], $row[7] / $row[3]) . "\n" foreach @{$crds{$row[0]}{$row[1]}};
+ }
+ if ($row[13] ne "." && exists($crds{$row[0]}{$row[1]})) { # In case of insertion
+ my %inss = split(/[\|:]/, $row[13]);
+ my @ins = sort { $inss{$b} <=> $inss{$a} } keys(%inss);
+ $row[7] = $inss{$ins[0]};
+ print join("\t", @{$_}, $row[3], $row[6] - $row[7], $row[7], $row[7] / $row[3]) . "\n" foreach @{$crds{$row[0]}{$row[1]}};
+ }
+}
+close QC;
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/addQual.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/addQual.R Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/Rscript
+args <- commandArgs(T)
+qcFile <- args[1]
+output <- args[2]
+funcR <- args[3]
+lociFiltFile <- args[4]
+
+read.delim(lociFiltFile, header=F) -> lociFiltD
+lociFilt <- paste(paste("chr", lociFiltD[,1], sep=""), lociFiltD[,2], sep=":")
+
+source(funcR)
+read.delim(qcFile, header=T) -> qc
+
+#addQual(qc) -> qc.qual
+addQualTransCor(qc, lociFilt) -> qc.qual
+#addQualVarCor(qc) -> qc.qual
+
+write.table(qc.qual, output, row.names=F, col.names=T, quote=F, sep="\t")
+#write.table(qc[selection,], output, row.names=F, col.names=T, quote=F, sep="\t")
+
+
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/alignmentStats.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/alignmentStats.pl Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my $targetBed = shift;
+my @bams = @ARGV;
+$|= 1;
+
+foreach my $bam (@bams) {
+ my $file = `basename $bam`;
+ chomp $file;
+ my $res0 = `samtools idxstats $bam`;
+ my @Qdat = ();
+ my %Q = ();
+ my $mapped = 0;
+ my $ontarget = 0;
+ my $unmapped = 0;
+ while ($res0 =~ /(\S+)\s(\S+)\s(\S+)\s(\S+)\n/g) {
+ if ($1 ne "*") {
+ $mapped += $3;
+ $unmapped += $4;
+ }
+ else {
+ $unmapped += $3;
+ $unmapped += $4;
+ }
+ }
+ my %Qcon = map { chr($_ + 33) => $_ } (0 .. 40);
+ open PIPE, "samtools view -XF0x014 -L $targetBed $bam | cut -f11 |";
+ while () {
+ my @is = (0 .. (length($_) - 2));
+ $ontarget++;
+ foreach my $i (@is) {
+ $Qdat[$i][$Qcon{substr($_, $i, 1)}]++;
+ }
+ }
+
+ open PIPE, "samtools view -XF0x004 -f0x010 -L $targetBed $bam | cut -f11 |";
+ while () {
+ my @is = reverse(0 .. (length($_) - 2));
+ $ontarget++;
+ foreach my $i (@is) {
+ $Qdat[$i][$Qcon{substr($_, $i, 1)}]++;
+ }
+ }
+
+ open QUAL, ">$file.qual.tsv";
+ print QUAL join("\t", "pos", 0 .. $#Qdat) . "\n";
+ my $q0 = 0;
+ my $q10 = 0;
+ my $q20 = 0;
+ my $q30 = 0;
+ my $qtot = 0;
+ foreach my $p (0 .. $#Qdat) {
+ print QUAL join("\t", $p, map { $Qdat[$p][$_] || 0 } (0 .. $#{$Qdat[$p]})) . "\n";
+ foreach my $q (0 .. $#{$Qdat[$p]}) {
+ $qtot += $Qdat[$p][$q] || 0;
+ $q30 += $Qdat[$p][$q] || 0 if ($q >= 30);
+ $q20 += $Qdat[$p][$q] || 0 if ($q >= 20 && $q < 30);
+ $q10 += $Qdat[$p][$q] || 0 if ($q >= 10 && $q < 20);
+ $q0 += $Qdat[$p][$q] || 0 if ($q < 10);
+ }
+ }
+ close QUAL;
+
+ my $offtarget = $mapped - $ontarget;
+ print join("\t", $file, $mapped, $ontarget, $offtarget, $unmapped, $q30/$qtot, $q20/$qtot, $q10/$qtot, $q0/$qtot) . "\n";
+
+}
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/attachGff.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/attachGff.pl Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/perl -w
+#
+# TODO: tiled targets give wrong length...
+use strict;
+
+my $gff = shift;
+my $covtable = shift;
+
+my $QCrep = shift || "qc.txt";
+
+my %targets = ();
+my %stats = ();
+my %manif = ();
+open M, "<$gff";
+while () {
+ if (/^track/) {
+ my $h = readline M;
+ chomp $h;
+ my @H = split(/\t/, $h);
+# print $_ . ":" . $H[$_] . "\n" foreach (0 .. $#H);
+ last;
+ }
+}
+while () {
+ chomp;
+ s/\s+$//;
+ my @row = split(/\t/, $_);
+ push @{$targets{$row[0]}{$_}}, $row[8] foreach ($row[3] .. $row[4]);
+ $manif{$row[8]} = [@row];
+ if (not exists($stats{$row[8]}{dat})) {
+ $stats{$row[8]}{dat} = [@row];
+ $stats{$row[8]}{ln} = $row[4] - $row[3] + 1;
+ $stats{$row[8]}{n} = 0;
+ $stats{$row[8]}{covs} = [];
+ }
+}
+close M;
+
+open C, "<$covtable";
+while () {
+ chomp;
+ my @row = split(/\t/, $_);
+ $#row = 13;
+ if (exists($targets{$row[0]}{$row[1]})) {
+ foreach my $t (@{$targets{$row[0]}{$row[1]}}) {
+ $stats{$t}{n}++;
+ $stats{$t}{covSum} += $row[2];
+ push @{$stats{$t}{covs}}, $row[2];
+ }
+ }
+ print join("\t", @row[0 .. 1],
+ (join("|", @{$targets{$row[0]}{$row[1]} || ["."]} )),
+ (@row[2 .. 11]),
+ (map { $_ || "." } @row[12 .. $#row]),
+ ). "\n";
+# sleep 1;
+}
+close C;
+
+# Calc assay stats
+open QC, ">$QCrep";
+foreach my $t (sort keys %stats) {
+ my @covs = sort {$a <=> $b} @{$stats{$t}{covs}};
+ my $mean = ($stats{$t}{n} != 0)?$stats{$t}{covSum} / $stats{$t}{n}:0;
+ #print join("\t", @{$stats{$t}{dat}}, $mean, map { $stats{$t}{$_} } qw/ln n/). "\n";
+ print QC join("\t", $t, sprintf("%.2f", $mean), map { $stats{$t}{$_} } qw/ln n/). "\n";
+
+}
+close QC;
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/attachManifest.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/attachManifest.pl Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,83 @@
+#!/usr/bin/perl -w
+#
+# TODO: tiled targets give wrong length...
+use strict;
+
+my $manifest = shift;
+my $covtable = shift;
+
+my $QCrep = shift || "qc.txt";
+
+my %targets = ();
+my %stats = ();
+my %manif = ();
+open M, "<$manifest";
+while () { last if (/\[Probes\]/) }
+readline(M);
+while () {
+ chomp;
+ if (/\[Targets\]/) {
+ my $h = readline M;
+ chomp $h;
+ my @H = split(/\t/, $h);
+# print $_ . ":" . $H[$_] . "\n" foreach (0 .. $#H);
+ last;
+ }
+ my @row = split(/\t/, $_);
+ $stats{$row[2]}{probe} = [@row];
+ $stats{$row[2]}{ULSO} = $row[9];
+ $stats{$row[2]}{DLSO} = $row[11];
+
+
+}
+while () {
+ chomp;
+ s/\s+$//;
+ my @row = split(/\t/, $_);
+ push @{$targets{$row[3]}{$_}}, $row[0] foreach ($row[4] .. $row[5]);
+ $manif{$row[0]} = [@row];
+ if (not exists($stats{$row[0]}{target})) {
+ $stats{$row[0]}{target} = [@row];
+ $stats{$row[0]}{ln} = $row[5] - $row[4] + 1;
+ $stats{$row[0]}{n} = 0;
+ $stats{$row[0]}{covs} = [];
+ $stats{$row[0]}{chr} = $row[3];
+ $stats{$row[0]}{start} = $row[4];
+ $stats{$row[0]}{end} = $row[5];
+ $stats{$row[0]}{assayStart} = $row[4] + length($stats{$row[0]}{ULSO});
+ $stats{$row[0]}{assayEnd} = $row[5] - length($stats{$row[0]}{DLSO});
+ }
+}
+close M;
+
+open C, "<$covtable";
+while () {
+ chomp;
+ my @row = split(/\t/, $_);
+ $#row = 13;
+ if (exists($targets{$row[0]}{$row[1]})) {
+ foreach my $t (@{$targets{$row[0]}{$row[1]}}) {
+ $stats{$t}{n}++;
+ $stats{$t}{covSum} += $row[2];
+ push @{$stats{$t}{covs}}, $row[2];
+ }
+ }
+ print join("\t", @row[0 .. 1],
+ (join("|", @{$targets{$row[0]}{$row[1]} || ["."]} )),
+ (@row[2 .. 11]),
+ (map { $_ || "." } @row[12 .. $#row]),
+ ). "\n";
+# sleep 1;
+}
+close C;
+
+# Calc assay stats
+open QC, ">$QCrep";
+foreach my $t (sort keys %stats) {
+ my @covs = sort {$a <=> $b} @{$stats{$t}{covs} || []};
+ my $mean = ($stats{$t}{n} != 0)?$stats{$t}{covSum} / $stats{$t}{n}:0;
+ #print join("\t", @{$stats{$t}{dat}}, $mean, map { $stats{$t}{$_} } qw/ln n/). "\n";
+ print QC join("\t", $t, sprintf("%.2f", $mean), map { $stats{$t}{$_} } qw/ln n chr start end assayStart assayEnd/). "\n";
+
+}
+close QC;
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/checkRunComplete.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/checkRunComplete.pl Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,61 @@
+#!/usr/bin/perl
+
+use strict;
+
+# Check RTAcomplete.txt
+# Check existence in AnalisysOutput
+
+my $base = "/ccagc/lib/pipelines/TSACP/falco/misc";
+my $getData = "$base/getData.pl";
+my $startAn = "$base/startAnalysis.pl";
+
+my $Project = shift or die "No project specified!\n";
+my $AnalysisOutput = shift || "./"; #"/ccagc/data/SeqAnalysis/$Project";
+
+my $MiSeqOutput = shift || "/ccagc/data/processing/MiSeqOutput/";
+
+
+my $LOG = shift || "$AnalysisOutput/log/$Project.log";
+
+if (! -d "$AnalysisOutput/log") {
+ mkdir "$AnalysisOutput/log";
+}
+
+my %log = ();
+open LOG, "<$LOG";
+while () {
+ chomp;
+ # runDir date
+ my @row = split(/\t/, $_);
+ $log{$row[0]} = [@row];
+}
+close LOG;
+
+my @runs = `find $MiSeqOutput -maxdepth 2 -iname "RTAComplete.txt"`;
+my %runHsh = ();
+
+foreach my $r (@runs) {
+ chomp;
+ my $runDir = `dirname $r`;
+ chomp $runDir;
+ if (exists($log{$runDir})) {
+ print STDERR "$runDir previously checked\n";
+ next;
+ }
+ my @row = split(/\//, $runDir);
+ my @run = split(/\_/, $row[$#row]);
+ my $outDir = $AnalysisOutput . "/" . join("_", @run[0 .. 2]);
+ my $baseDir = $AnalysisOutput;
+ if (-e $outDir) {
+ print STDERR "$outDir exists, skipping\n";
+ } else {
+ print STDERR "perl $getData $runDir $Project\n";
+ system("perl $getData $runDir $Project $baseDir");
+ print STDERR "perl $startAn $outDir\n";
+ system("perl $startAn $outDir");
+ open LOG, ">>$LOG";
+ print LOG join("\t", $runDir, scalar(localtime())) . "\n";
+ close LOG;
+ }
+}
+
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/errorRate.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/errorRate.pl Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,36 @@
+#!/usr/bin/perl -w
+#
+
+use strict;
+my @files = @ARGV;
+foreach my $file (@files) {
+my $ref = 0;
+my $var = 0;
+
+my $file = shift;
+
+open F, "<$file";
+my $header = readline F; # Skip header
+chomp $header;
+my @hrow = split(/\t/, $header);
+my $ci = 0;
+my %col = ();
+$col{$_} = $ci++ foreach (@hrow);
+
+
+while () {
+ chomp;
+ my @row = split(/\t/, $_);
+ #next if ($row[2] eq ".");
+ #print join("\t", $row[6], $row[7]) . "\n"; sleep 1;
+ my $ntRef = uc($row[4]);
+ $ref += $row[$col{"n$ntRef"}];
+ $var += $row[$col{"n$_"}] foreach (grep { $_ ne $ntRef } qw/A C G T/);
+}
+
+close F;
+my @path = split(/\//, $file);
+my $rate = $var / ($ref + $var) * 100;
+#print STDERR "$var/$ref -> $rate%\n";
+print "$path[$#path]\t$var\t$ref\t$rate\n";
+}
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/filter.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/filter.R Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,38 @@
+#!/usr/bin/Rscript
+args <- commandArgs(T)
+resFile <- args[1]
+qcFile <- args[2]
+varLim <- args[3]
+dpLim <- args[4]
+quants <- args[5]
+output <- args[6]
+
+read.delim(resFile, header=T) -> d
+read.delim(qcFile, header=T) -> qc
+
+# Calculate errorrate per amplicon
+amps <- unique(qc$X.amp)
+for (i in amps) {
+ sel <- qc$X.amp == amps
+ prob <- sum(qc$nVar[sel]) / (sum(qc$nRef) + sum(qc$nVar))
+ pees <- pbinom(qc$nVar[sel], qc$nRef + qc$nVar, prob, lower.tail=F)
+ Qscore <- -10 * log10(pees)
+
+}
+
+quantile(d$vaf, as.numeric(quants), na.rm=T) -> vafCut
+
+sel1 <- d$vaf > vafCut
+sel2 <- d$dp > dpLim
+sel3 <- d$dpvar > varLim
+
+sel <- sel1 & sel2 & sel3
+write.table(d[sel,], paste(output, "res.f1.tsv", sep="."), row.names=F, col.names=T, quote=F, sep="\t")
+
+dir.create(output)
+setwd(output)
+
+png(paste(output, "vafcut", "png", sep="."))
+plot(density(log10(d$vaf), na.rm=T), main=paste("VAF cutoff", vafCut, sep=" "))
+abline(v=log10(vafCut), col=2)
+dev.off()
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/filter.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/filter.pl Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,100 @@
+#!/usr/bin/perl -w
+#
+
+use strict;
+
+my $file = shift;
+my $canonicals = shift;
+
+my %can = ();
+my %nm = ();
+
+open CAN, "<$canonicals";
+
+while () {
+ chomp;
+ my @row = split(/\t/, $_);
+ $can{$row[0]} = @row[1 .. $#row];
+ my $pri = 0;
+ $nm{$_} = $pri++ foreach @row[1 .. $#row];
+# print STDERR join(":", @row[0,1]) . "\n"; sleep 1;
+}
+
+open F, "<$file";
+my $head = readline(F);
+chomp $head;
+print $head . "\tvaf\n";
+my @rowHead = split(/\t/, $head);
+my $colN = 0;
+my %col = map {
+# print STDERR $_ . " $colN\n";
+ $_ => $colN++ } @rowHead;
+
+while () {
+ chomp;
+ my $line = $_;
+ my @row = split(/\t/, $_);
+ #g12 t15
+ next unless (exists($can{$row[$col{Gene_Name}]}));
+ my $trans = $row[$col{Transcript_ID}];
+ $trans =~ s/\..*$//;
+
+ if (not exists($nm{$trans})) {
+ print STDERR "Unknown transcript: $row[$col{Gene_Name}] $trans\n";
+ next;
+ }
+ elsif ($nm{$trans} != 0) {
+ print STDERR "Non cannonical: $row[$col{Gene_Name}] $trans\n";
+ next;
+ }
+
+ #next unless ($nm{$trans} == 0);
+
+ # Do not filter out clinicvar loci
+ #print $_ . "\n" if ($row[$col{Tag}] eq "Clinvar");
+ next if ($row[$col{Tag}] eq "Clinvar");
+# print STDERR $row[15]; sleep 1;
+# print STDERR $nm{$row[15]}; sleep 1;
+
+#CODON_CHANGE_PLUS_CODON_DELETION
+#CODON_DELETION
+#DOWNSTREAM
+#EXON
+#FRAME_SHIFT
+#INTRON
+#NON_SYNONYMOUS_CODING
+#SPLICE_SITE_ACCEPTOR
+#SYNONYMOUS_CODING
+#UPSTREAM
+#UTR_3_PRIME
+
+ # Filter INTERGENIC
+ next if ($row[$col{Context}] eq "INTERGENIC");
+
+ # Filter SYNONYMOUS_CODING
+ # Dubbel mutatie BRAF
+# next if ($row[$col{Context}] eq "SYNONYMOUS_CODING");
+
+ # Filter INTRON
+ next if ($row[$col{Context}] eq "INTRON");
+
+ # Filter dbSNP
+# next if ($row[2] ne ".");
+
+ # Filter depth
+ next if ($row[$col{DP}] < 100);
+
+ my ($nref, $nvar) = split(/,/, $row[$col{AD}]);
+ # Filter var depth
+# print STDERR "DEBUG: " . $nvar . "\n"; sleep 1;
+ next if ($nvar < 10);
+
+ my $vaf = $nvar / ($nref + $nvar);
+# print STDERR "DEBUG: " . $vaf . "\n";
+ # Filter vaf
+ next if ($vaf < 0.01);
+
+
+ print $_ . "\t$vaf\n";
+}
+close F;
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/filterQC.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/filterQC.R Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,118 @@
+#!/usr/bin/Rscript
+args <- commandArgs(T)
+qcFile <- args[1]
+varLim <- as.numeric(args[2])
+dpLim <- as.numeric(args[3])
+quants <- as.numeric(args[4])
+output <- args[5]
+
+read.delim(qcFile, header=T) -> d
+
+vaf <- d$nVar / (d$nRef + d$nVar)
+
+sapply(1:nrow(d), function(x) {
+ if (d$del[x] != ".") {
+ dels <- matrix(unlist(strsplit(as.character(d$del[x]), c("[:|]"))), byrow=T, ncol=2)
+ delRanks <- rank(dels[,2])
+ delDp <- as.numeric(dels[rev(delRanks)[1],2])
+ if (delDp > varLim) {
+ return(T)
+ }
+ }
+ if (d$ins[x] != ".") {
+ ins <- matrix(unlist(strsplit(as.character(d$ins[x]), c("[:|]"))), byrow=T, ncol=2)
+ insRanks <- rank(ins[,2])
+ insDp <- as.numeric(ins[rev(insRanks)[1],2])
+ if (insDp > varLim) {
+ return(T)
+ }
+
+ }
+ return(F)
+}) -> indelSel
+
+
+#dels <- matrix(unlist(strsplit(d$del, ":|"))
+#inss <- strsplit(d$ins, ":|")
+#d$vafDel
+#d$vafIns
+
+quantile(vaf, as.numeric(quants), na.rm=T) -> vafCut
+
+sel1 <- vaf > vafCut
+sel2 <- d$dp > dpLim
+sel3 <- (d$nVar > varLim) | indelSel
+
+sel <- sel1 & sel2 & sel3
+write.table(d[sel,], paste(output, "res.sigfilt.tsv", sep="."), row.names=F, col.names=F, quote=F, sep="\t")
+
+dir.create(output)
+setwd(output)
+
+png(paste(output, "vafcut", "png", sep="."))
+plot(density(log10(vaf), na.rm=T), main=paste("VAF cutoff", vafCut, sep=" "))
+abline(v=log10(vafCut), col=2)
+dev.off()
+
+ampls <- unique(d$amp)
+
+pdf(paste(output, "pdf", sep="."), width=14)
+sapply(ampls, function(x) {
+ sel <- d[,3] == x
+ main = x
+# plot(density(as.numeric(as.vector(d[sel,8]))), main=main)
+# hist(as.numeric(as.vector(d[sel,8])), breaks=range(d[sel,8])[2], main=main)
+
+ # Get indels
+ dels <- (d[,15] != ".") & sel
+ ins <- (d[,14] != ".") & sel
+
+ pNoise <- sum(d$nVar[sel]) / (sum(d$nRef[sel]) + sum(d$nVar[sel]))
+ print(pNoise)
+ pVals <- ppois(d$nVar[sel], (d$nVar[sel] + d$nRef[sel]) * pNoise, lower.tail=F)
+ Qval <- -10*log10(pVals)
+ noiseThrs <- qpois(0.01, (d$nVar[sel] + d$nRef[sel]) * pNoise)
+
+ plot(d[sel,2], d[sel,4], pch=20, type="s", ylim=c(0, max(d[sel,4])), main=main)
+ lines(d[sel,2], d[sel,7], pch=20, type="s", col=2)
+ lines(d[sel,2], d[sel,8], pch=20, type="s",col=3)
+
+ ampMax <- max(d[sel,4])
+
+ for (idx in which(dels)) {
+ matrix(unlist(strsplit(as.character(as.vector(d[idx,15])), "[:|]")), ncol=2, byrow=T) -> delDat
+ nchar(delDat[,1]) -> lngts
+ dpths <- as.numeric(delDat[,2])
+ rect(d[idx,2] + 1, ampMax - dpths , d[idx,2] + lngts, ampMax, col="#00000040")
+ }
+
+ for (idx in which(ins)) {
+ matrix(unlist(strsplit(as.character(as.vector(d[idx,14])), "[:|]")), ncol=2, byrow=T) -> insDat
+ nchar(insDat[,1]) -> lngts
+ dpths <- as.numeric(insDat[,2])
+ for (i in 1:ncol(insDat)) {
+ polygon(
+ c(d[idx,2], d[idx,2] - (.5 * lngts[i]), d[idx, 2] + (.5 * lngts[i])),
+ c(ampMax, ampMax - dpths[i], ampMax - dpths[i]),
+ , col="#00000040")
+ }
+ }
+
+ #abline(v=d[dels,2], col=4)
+ #abline(v=d[ins,2], col=5)
+
+ vaf <- d[sel,8]/(d[sel,7] + d[sel,8])
+
+ plot(density(vaf), main=main)
+
+ hist(vaf, breaks=100, main=main)
+
+ plot(d[sel,2], d[sel,8]/(d[sel,7] + d[sel,8]), pch=20, cex=.5, ylim=c(0,1), main=main)
+ points(d[sel,2], noiseThrs / (d[sel,7] + d[sel,8]), col=2, pch=20)
+ plot(d[sel,2], log10(d[sel,8]/(d[sel,7] + d[sel,8])), pch=20, cex=.5, ylim=c(-4,0), main=main)
+ points(d[sel,2], log10(noiseThrs / (d[sel,7] + d[sel,8])), col=2, pch=20)
+ print(x)
+# print(sum(sel))
+ })
+
+dev.off()
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/func.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/func.R Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,569 @@
+#!/usr/bin/Rscript
+#args <- commandArgs(T)
+#qcFile <- args[1]
+#output <- args[2]
+
+#read.delim(qcFile, header=T) -> qc
+
+addQual <- function(qc) {
+
+qc$del <- as.character(as.vector(qc$del))
+qc$ins <- as.vector(qc$ins)
+qc$pVal <- 1
+qc$pValAdj <- 1
+#qc$pValPois <- 1
+qc$qScore <- 0
+qc$qScoreI <- 0
+qc$qScoreD <- 0
+
+pVal.I <- rep(1, nrow(qc))
+pVal.D <- rep(1, nrow(qc))
+
+delSel <- qc$del != "."
+data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$del[delSel]), "\\|")), ":")), ncol=2, byrow=T)) -> dels
+dels$ln <- nchar(as.vector(dels$X1))
+colnames(dels) <- c("seq", "occ", "ln")
+delPees <- ppois(1:50, mean(rep(dels$ln, dels$occ)), lower.tail=F)
+
+insSel <- qc$ins != "."
+data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$ins[insSel]), "\\|")), ":")), ncol=2, byrow=T)) -> inss
+inss$ln <- nchar(as.vector(inss$X1))
+colnames(inss) <- c("seq", "occ", "ln")
+insPees <- ppois(1:50, mean(rep(dels$ln, dels$occ)), lower.tail=F)
+
+# Calculate errorrate per amplicon
+amps <- unique(qc$amp)
+for (i in amps) {
+ print(i)
+ sel <- qc$amp == i
+ prob <- sum(qc$nVar[sel]) / (sum(qc$nRef[sel]) + sum(qc$nVar[sel]))
+ #prob <- median(qc$nVar[sel] / (qc$nRef[sel] + qc$nVar[sel]))
+ pees <- pbinom(qc$nVar[sel], qc$nRef[sel] + qc$nVar[sel], prob, lower.tail=F)
+ qc$pVal[sel] <- pees
+# pois <- ppois(qc$nVar[sel], (qc$nRef + qc$nVar) * prob, lower.tail=F)
+# qc$pValPois[sel] <- pois
+
+ # Seperate for indel/deletion events
+ selDels <- delSel & sel
+ if (sum(selDels) > 0) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$del[selDels]), "\\|")), ":")), ncol=2, byrow=T)) -> dels2
+ dels2$ln <- nchar(as.vector(dels2$X1))
+ colnames(dels2) <- c("seq", "occ", "ln")
+ dels2$occ <- as.numeric(as.vector(dels2$occ))
+ }
+ for (j in which(selDels)) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$del[j]), "\\|")), ":")), ncol=2, byrow=T)) -> dels
+ dels$ln <- nchar(as.vector(dels$X1))
+ colnames(dels) <- c("seq", "occ", "ln")
+ dels$occ <- as.numeric(as.vector(dels$occ))
+ #dels$pDel <- delPees[as.integer(dels$ln)]
+ #dels$pVal <- ppois(as.numeric(dels$occ), dels$pDel, lower.tail=F)
+ dels$pDel <- sapply(dels$ln, function(x) {sum(dels2$occ[dels2$ln == x]) / (sum(qc$nRef[sel]) + sum(qc$nVar[sel])) })
+ dels$pVal <- pbinom(dels$occ, qc$dp[j], dels$pDel, lower.tail=F)
+ dels$Q <- -10 * log10(dels$pVal)
+
+ high <- order(dels$occ, -dels$Q, decreasing=T)[1]
+ qc$del[j] <- paste(dels$seq[high], dels$occ[high], sep=":")
+ qc$qScoreD[j] <- dels$Q[high]
+ pVal.D[j] <- dels$pVal[high]
+ }
+ selIns <- insSel & sel
+ if (sum(selIns) > 0) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$ins[selIns]), "\\|")), ":")), ncol=2, byrow=T)) -> inss2
+ inss2$ln <- nchar(as.vector(inss2$X1))
+ colnames(inss2) <- c("seq", "occ", "ln")
+ inss2$occ <- as.numeric(as.vector(inss2$occ))
+ }
+ for (j in which(selIns)) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$ins[j]), "\\|")), ":")), ncol=2, byrow=T)) -> inss
+ inss$ln <- nchar(as.vector(inss$X1))
+ colnames(inss) <- c("seq", "occ", "ln")
+ inss$occ <- as.numeric(as.vector(inss$occ))
+ #inss$pIns <- insPees[as.integer(inss$ln)]
+ #inss$pVal <- ppois(as.numeric(inss$occ), inss$pIns, lower.tail=F)
+ inss$pIns <- sapply(inss$ln, function(x) {sum(inss2$occ[inss2$ln == x]) / (sum(qc$nRef[sel]) + sum(qc$nVar[sel])) })
+ inss$pVal <- pbinom(inss$occ, qc$dp[j], inss$pIns, lower.tail=F)
+ inss$Q <- -10 * log10(inss$pVal)
+
+ high <- order(inss$occ, -inss$Q, decreasing=T)[1]
+ qc$ins[j] <- paste(inss$seq[high], inss$occ[high], sep=":")
+ qc$qScoreI[j] <- inss$Q[high]
+ pVal.I[j] <- inss$pVal[high]
+ }
+ print(sum(-10 * log10(pees) > 100))
+ print(sum(qc$qScoreI[selIns] > 100))
+ print(sum(qc$qScoreD[selDels] > 100))
+}
+
+p.adjust(qc$pVal) -> qc$pValAdj
+-10 * log(p.adjust(pVal.I)) -> qc$qScoreI
+-10 * log(p.adjust(pVal.D)) -> qc$qScoreD
+qc$qScore <- -10 * log10(qc$pValAdj)
+#qc$qScore[qc$pValAdj == 0] <- 1000
+qc$qScore[qc$qScore > 1000] <- 1000
+qc$qScoreI[qc$qScoreI > 1000] <- 1000
+qc$qScoreD[qc$qScoreD > 1000] <- 1000
+qsel <- (qc$qScore >= 100 & qc$nVar >= 10) | (qc$qScoreI >= 200) | (qc$qScoreD >= 200)
+dpfilt <- as.numeric(qc$dp) >= 100
+#varfilt <- as.numeric(qc$nVar) >= 10
+
+selection <- qsel & dpfilt
+
+print(paste("Filtered out: ", sum(!selection), sep=""))
+print(paste("Variants selected: ", sum(selection), sep=""))
+return(qc)
+}
+
+addQualTransCor <- function(qc, lociFilt) {
+
+
+# lociFilt contains snp, cosmic and clinvar positions
+
+qc$del <- as.character(as.vector(qc$del))
+#qc$del <- as.character(as.vector(qc$del))
+qc$ins <- as.character(as.vector(qc$ins))
+qc$pVal <- 1
+qc$pValAdj <- 1
+#qc$pValPois <- 1
+qc$qScore <- 0
+qc$qScoreI <- 0
+qc$qScoreD <- 0
+
+pVal.I <- rep(1, nrow(qc))
+pVal.D <- rep(1, nrow(qc))
+
+rle(as.vector(qc$ntRef)) -> srle
+hompols <- srle$lengths >= 5
+homPosS <- cumsum(srle$lengths)[hompols]
+homPosE <- cumsum(srle$lengths)[which(hompols) - 1]
+homFilt <- rep(F, nrow(qc))
+homFilt[homPosS] <- T
+homFilt[homPosE] <- T
+
+delSel <- qc$del != "." & !homFilt
+if (sum(delSel > 0)) {
+
+
+data.frame(matrix(unlist(strsplit(unlist(strsplit(qc$del[delSel], "\\|")), ":")), ncol=2, byrow=T)) -> dels
+dels$ln <- nchar(as.vector(dels$X1))
+colnames(dels) <- c("seq", "occ", "ln")
+delPees <- ppois(1:50, mean(rep(dels$ln, dels$occ)), lower.tail=F)
+}
+
+insSel <- qc$ins != "." & !homFilt
+if (sum(insSel) > 0) {
+data.frame(matrix(unlist(strsplit(unlist(strsplit(qc$ins[insSel], "\\|")), ":")), ncol=2, byrow=T)) -> inss
+inss$ln <- nchar(as.vector(inss$X1))
+colnames(inss) <- c("seq", "occ", "ln")
+insPees <- ppois(1:50, mean(rep(inss$ln, inss$occ)), lower.tail=F)
+}
+# Calculate transition bias
+
+
+#calcTransCor(qc) -> pCor
+globalProb <- sum(qc$nVar) / (sum(qc$nRef) + sum(qc$nVar))
+# Calculate errorrate per amplicon
+amps <- unique(qc$amp)
+for (i in amps) {
+ print(i)
+ sel <- qc$amp == i
+ prob1 <- sum(qc$nVar[sel]) / (sum(qc$nRef[sel]) + sum(qc$nVar[sel]))
+ prob <- prob1 * calcTransCor(qc[sel,], lociFilt)
+ pees <- pbinom(qc$nVar[sel], qc$nRef[sel] + qc$nVar[sel], prob , lower.tail=F)
+ qc$pVal[sel] <- pees
+# pois <- ppois(qc$nVar[sel], (qc$nRef + qc$nVar) * prob, lower.tail=F)
+# qc$pValPois[sel] <- pois
+
+ # Seperate for indel/deletion events
+ selDels <- delSel & sel
+ if (sum(selDels) > 0) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$del[selDels]), "\\|")), ":")), ncol=2, byrow=T)) -> dels2
+ dels2$ln <- nchar(as.vector(dels2$X1))
+ colnames(dels2) <- c("seq", "occ", "ln")
+ dels2$occ <- as.numeric(as.vector(dels2$occ))
+
+ for (j in which(selDels)) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$del[j]), "\\|")), ":")), ncol=2, byrow=T)) -> dels
+ dels$ln <- nchar(as.vector(dels$X1))
+ colnames(dels) <- c("seq", "occ", "ln")
+ dels$occ <- as.numeric(as.vector(dels$occ))
+ #dels$pDel <- delPees[as.integer(dels$ln)]
+ #dels$pVal <- ppois(as.numeric(dels$occ), dels$pDel, lower.tail=F)
+ dels$pDel <- sapply(dels$ln, function(x) {sum(dels2$occ[dels2$ln == x]) / (sum(qc$nRef[sel]) + sum(qc$nVar[sel])) })
+ dels$pVal <- pbinom(dels$occ, qc$dp[j], dels$pDel, lower.tail=F)
+ dels$Q <- -10 * log10(dels$pVal)
+
+ high <- order(dels$occ, -dels$Q, decreasing=T)[1]
+ qc$del[j] <- paste(dels$seq[high], dels$occ[high], sep=":")
+ qc$qScoreD[j] <- dels$Q[high]
+ pVal.D[j] <- dels$pVal[high]
+ }
+ }
+ selIns <- insSel & sel
+ if (sum(selIns) > 0) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$ins[selIns]), "\\|")), ":")), ncol=2, byrow=T)) -> inss2
+ inss2$ln <- nchar(as.vector(inss2$X1))
+ colnames(inss2) <- c("seq", "occ", "ln")
+ inss2$occ <- as.numeric(as.vector(inss2$occ))
+
+ for (j in which(selIns)) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$ins[j]), "\\|")), ":")), ncol=2, byrow=T)) -> inss
+ inss$ln <- nchar(as.vector(inss$X1))
+ colnames(inss) <- c("seq", "occ", "ln")
+ inss$occ <- as.numeric(as.vector(inss$occ))
+ #inss$pIns <- insPees[as.integer(inss$ln)]
+ #inss$pVal <- ppois(as.numeric(inss$occ), inss$pIns, lower.tail=F)
+ inss$pIns <- sapply(inss$ln, function(x) {sum(inss2$occ[inss2$ln == x]) / (sum(qc$nRef[sel]) + sum(qc$nVar[sel])) })
+ inss$pVal <- pbinom(inss$occ, qc$dp[j], inss$pIns, lower.tail=F)
+ inss$Q <- -10 * log10(inss$pVal)
+
+ high <- order(inss$occ, -inss$Q, decreasing=T)[1]
+ qc$ins[j] <- paste(inss$seq[high], inss$occ[high], sep=":")
+ qc$qScoreI[j] <- inss$Q[high]
+ pVal.I[j] <- inss$pVal[high]
+} }
+ print(sum(-10 * log10(pees) > 100))
+ print(sum(qc$qScoreI[selIns] > 100))
+ print(sum(qc$qScoreD[selDels] > 100))
+}
+
+p.adjust(qc$pVal) -> qc$pValAdj
+-10 * log(p.adjust(pVal.I)) -> qc$qScoreI
+-10 * log(p.adjust(pVal.D)) -> qc$qScoreD
+qc$qScore <- -10 * log10(qc$pValAdj)
+#qc$qScore[qc$pValAdj == 0] <- 1000
+qc$qScore[qc$qScore > 1000] <- 1000
+qc$qScoreI[qc$qScoreI > 1000] <- 1000
+qc$qScoreD[qc$qScoreD > 1000] <- 1000
+qsel <- (qc$qScore >= 100 & qc$nVar >= 10) | (qc$qScoreI >= 200) | (qc$qScoreD >= 200)
+dpfilt <- as.numeric(qc$dp) >= 100
+#varfilt <- as.numeric(qc$nVar) >= 10
+
+selection <- qsel & dpfilt
+
+print(paste("Filtered out: ", sum(!selection), sep=""))
+print(paste("Variants selected: ", sum(selection), sep=""))
+return(qc)
+}
+
+
+addQualVarCor <- function(qc) {
+# Corrected for variant occurence
+
+qc$del <- as.character(as.vector(qc$del))
+qc$ins <- as.vector(qc$ins)
+qc$pVal <- 1
+qc$pValAdj <- 1
+#qc$pValPois <- 1
+qc$qScore <- 0
+qc$qScoreI <- 0
+qc$qScoreD <- 0
+
+pVal.I <- rep(1, nrow(qc))
+pVal.D <- rep(1, nrow(qc))
+
+delSel <- qc$del != "."
+data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$del[delSel]), "\\|")), ":")), ncol=2, byrow=T)) -> dels
+dels$ln <- nchar(as.vector(dels$X1))
+colnames(dels) <- c("seq", "occ", "ln")
+delPees <- ppois(1:50, mean(rep(dels$ln, dels$occ)), lower.tail=F)
+
+insSel <- qc$ins != "."
+data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$ins[insSel]), "\\|")), ":")), ncol=2, byrow=T)) -> inss
+inss$ln <- nchar(as.vector(inss$X1))
+colnames(inss) <- c("seq", "occ", "ln")
+insPees <- ppois(1:50, mean(rep(dels$ln, dels$occ)), lower.tail=F)
+
+# Calculate base bias per run
+sum(qc$nA[grepl("A", qc$ntRef, ignore.case=T)]) -> refA
+sum(qc$nA[!grepl("A", qc$ntRef, ignore.case=T)]) -> varA
+sum(qc$nC[grepl("C", qc$ntRef, ignore.case=T)]) -> refC
+sum(qc$nC[!grepl("C", qc$ntRef, ignore.case=T)]) ->varC
+sum(qc$nG[grepl("G", qc$ntRef, ignore.case=T)]) -> refG
+sum(qc$nG[!grepl("G", qc$ntRef, ignore.case=T)]) -> varG
+sum(qc$nT[grepl("T", qc$ntRef, ignore.case=T)]) -> refT
+sum(qc$nT[!grepl("T", qc$ntRef, ignore.case=T)]) -> varT
+
+varA / (refA + varA) -> errA
+varC / (refC + varC) -> errC
+varG / (refG + varG) -> errG
+varT / (refT + varT) -> errT
+c(errA, errC, errG, errT) / mean(c(errA, errC, errG, errT)) -> errACGT
+
+errMat <- qc[,9:12]
+errMat[grepl("A", qc$ntRef, ignore.case=T),1] <- 0
+errMat[grepl("C", qc$ntRef, ignore.case=T),2] <- 0
+errMat[grepl("G", qc$ntRef, ignore.case=T),3] <- 0
+errMat[grepl("T", qc$ntRef, ignore.case=T),4] <- 0
+
+pCor <- rep(1, nrow(qc))
+pCor[grepl("^A", qc$ntVar, ignore.case=T)] <- errACGT[1]
+pCor[grepl("^C", qc$ntVar, ignore.case=T)] <- errACGT[2]
+pCor[grepl("^G", qc$ntVar, ignore.case=T)] <- errACGT[3]
+pCor[grepl("^T", qc$ntVar, ignore.case=T)] <- errACGT[4]
+
+# Calculate errorrate per amplicon
+amps <- unique(qc$amp)
+for (i in amps) {
+ print(i)
+ sel <- qc$amp == i
+ prob <- sum(qc$nVar[sel]) / (sum(qc$nRef[sel]) + sum(qc$nVar[sel]))
+ #prob <- median(qc$nVar[sel] / (qc$nRef[sel] + qc$nVar[sel]))
+ pees <- pbinom(qc$nVar[sel], qc$nRef[sel] + qc$nVar[sel], prob * pCor[sel], lower.tail=F)
+ qc$pVal[sel] <- pees
+# pois <- ppois(qc$nVar[sel], (qc$nRef + qc$nVar) * prob, lower.tail=F)
+# qc$pValPois[sel] <- pois
+
+ # Seperate for indel/deletion events
+ selDels <- delSel & sel
+ if (sum(selDels) > 0) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$del[selDels]), "\\|")), ":")), ncol=2, byrow=T)) -> dels2
+ dels2$ln <- nchar(as.vector(dels2$X1))
+ colnames(dels2) <- c("seq", "occ", "ln")
+ dels2$occ <- as.numeric(as.vector(dels2$occ))
+ }
+ for (j in which(selDels)) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$del[j]), "\\|")), ":")), ncol=2, byrow=T)) -> dels
+ dels$ln <- nchar(as.vector(dels$X1))
+ colnames(dels) <- c("seq", "occ", "ln")
+ dels$occ <- as.numeric(as.vector(dels$occ))
+ #dels$pDel <- delPees[as.integer(dels$ln)]
+ #dels$pVal <- ppois(as.numeric(dels$occ), dels$pDel, lower.tail=F)
+ dels$pDel <- sapply(dels$ln, function(x) {sum(dels2$occ[dels2$ln == x]) / (sum(qc$nRef[sel]) + sum(qc$nVar[sel])) })
+ dels$pVal <- pbinom(dels$occ, qc$dp[j], dels$pDel, lower.tail=F)
+ dels$Q <- -10 * log10(dels$pVal)
+
+ high <- order(dels$occ, -dels$Q, decreasing=T)[1]
+ qc$del[j] <- paste(dels$seq[high], dels$occ[high], sep=":")
+ qc$qScoreD[j] <- dels$Q[high]
+ pVal.D[j] <- dels$pVal[high]
+ }
+ selIns <- insSel & sel
+ if (sum(selIns) > 0) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$ins[selIns]), "\\|")), ":")), ncol=2, byrow=T)) -> inss2
+ inss2$ln <- nchar(as.vector(inss2$X1))
+ colnames(inss2) <- c("seq", "occ", "ln")
+ inss2$occ <- as.numeric(as.vector(inss2$occ))
+ }
+ for (j in which(selIns)) {
+ data.frame(matrix(unlist(strsplit(unlist(strsplit(as.vector(qc$ins[j]), "\\|")), ":")), ncol=2, byrow=T)) -> inss
+ inss$ln <- nchar(as.vector(inss$X1))
+ colnames(inss) <- c("seq", "occ", "ln")
+ inss$occ <- as.numeric(as.vector(inss$occ))
+ #inss$pIns <- insPees[as.integer(inss$ln)]
+ #inss$pVal <- ppois(as.numeric(inss$occ), inss$pIns, lower.tail=F)
+ inss$pIns <- sapply(inss$ln, function(x) {sum(inss2$occ[inss2$ln == x]) / (sum(qc$nRef[sel]) + sum(qc$nVar[sel])) })
+ inss$pVal <- pbinom(inss$occ, qc$dp[j], inss$pIns, lower.tail=F)
+ inss$Q <- -10 * log10(inss$pVal)
+
+ high <- order(inss$occ, -inss$Q, decreasing=T)[1]
+ qc$ins[j] <- paste(inss$seq[high], inss$occ[high], sep=":")
+ qc$qScoreI[j] <- inss$Q[high]
+ pVal.I[j] <- inss$pVal[high]
+ }
+ print(sum(-10 * log10(pees) > 100))
+ print(sum(qc$qScoreI[selIns] > 100))
+ print(sum(qc$qScoreD[selDels] > 100))
+}
+
+p.adjust(qc$pVal) -> qc$pValAdj
+-10 * log(p.adjust(pVal.I)) -> qc$qScoreI
+-10 * log(p.adjust(pVal.D)) -> qc$qScoreD
+qc$qScore <- -10 * log10(qc$pValAdj)
+#qc$qScore[qc$pValAdj == 0] <- 1000
+qc$qScore[qc$qScore > 1000] <- 1000
+qc$qScoreI[qc$qScoreI > 1000] <- 1000
+qc$qScoreD[qc$qScoreD > 1000] <- 1000
+qsel <- (qc$qScore >= 100 & qc$nVar >= 10) | (qc$qScoreI >= 200) | (qc$qScoreD >= 200)
+dpfilt <- as.numeric(qc$dp) >= 100
+#varfilt <- as.numeric(qc$nVar) >= 10
+
+selection <- qsel & dpfilt
+
+print(paste("Filtered out: ", sum(!selection), sep=""))
+print(paste("Variants selected: ", sum(selection), sep=""))
+return(qc)
+}
+
+#addQual(qc) -> qc
+#write.table(qc, output, row.names=F, col.names=T, quote=F, sep="\t")
+#write.table(qc[selection,], output, row.names=F, col.names=T, quote=F, sep="\t")
+
+calcTrans <- function(qcRaw, lociFilt = NULL) {
+
+sel <- rep(T, nrow(qcRaw))
+if (!is.null(lociFilt)) {
+paste(qcRaw$X.chr, qcRaw$pos, sep=":") -> qcpos
+sel <- ! qcpos %in% lociFilt
+}
+qc <- qcRaw[sel,]
+
+print(sum(sel))
+
+# Calculate transition bias
+refA <- grepl("A", qc$ntRef, ignore.case=T)
+refC <- grepl("C", qc$ntRef, ignore.case=T)
+refG <- grepl("G", qc$ntRef, ignore.case=T)
+refT <- grepl("T", qc$ntRef, ignore.case=T)
+varA <- grepl("A", qc$ntVar, ignore.case=T)
+varC <- grepl("C", qc$ntVar, ignore.case=T)
+varG <- grepl("G", qc$ntVar, ignore.case=T)
+varT <- grepl("T", qc$ntVar, ignore.case=T)
+varN <- grepl(".", qc$ntVar, ignore.case=T)
+callA <- grepl("^A", qc$ntVar, ignore.case=T)
+callC <- grepl("^C", qc$ntVar, ignore.case=T)
+callG <- grepl("^G", qc$ntVar, ignore.case=T)
+callT <- grepl("^T", qc$ntVar, ignore.case=T)
+
+callList <- list(callA, callC, callG, callT)
+varList <- list(varA, varC, varG, varT)
+refList <- list(refA, refC, refG, refT)
+
+#tots <- c(
+#rep(sum(qc$nRef[refA], qc$nVar[refA]), 4),
+#rep(sum(qc$nRef[refC], qc$nVar[refC]), 4),
+#rep(sum(qc$nRef[refG], qc$nVar[refG]), 4),
+#rep(sum(qc$nRef[refT], qc$nVar[refT]), 4))
+
+nvar <- c(
+rep(sum(qc$nC[refA], qc$nG[refA], qc$nT[refA]), 4),
+rep(sum(qc$nA[refC], qc$nG[refC], qc$nT[refC]), 4),
+rep(sum(qc$nA[refG], qc$nC[refG], qc$nT[refG]), 4),
+rep(sum(qc$nA[refT], qc$nC[refT], qc$nG[refT]), 4))
+
+ncall <- c(
+rep(sum(qc$nVar[refA]), 4),
+rep(sum(qc$nVar[refC]), 4),
+rep(sum(qc$nVar[refG]), 4),
+rep(sum(qc$nVar[refT]), 4))
+
+nref <- c(
+rep(sum(qc$nRef[refA]), 4),
+rep(sum(qc$nRef[refC]), 4),
+rep(sum(qc$nRef[refG]), 4),
+rep(sum(qc$nRef[refT]), 4))
+
+tots <- nref + nvar
+
+nts <- c("A", "C", "G", "T")
+sumTrans <- data.frame(trans=paste(rep(nts, each=4), rep(nts, 4), sep=":"), cnt=rep(0, 16), tot=tots, nvar=nvar, nref=nref, ncall=ncall)
+
+for (i in 1:4) {
+ pos <- (i - 1) * 4
+for (j in 1:4) {
+# print(sum(refList[[i]] & varList[[j]]))
+
+ cnt <- sum(qc[refList[[i]] & varList[[j]], 8 + j])
+ if (i == j) {
+# cnt <- sum(qc[refList[[i]] & varN, 8 + j])
+ cnt <- NA
+ }
+ sumTrans$cnt[pos + j] <- cnt
+ print(cnt)
+}}
+
+#sumTrans$cnt / sumTrans$nvar / sumTrans$tot * 1000000 -> tmp
+sumTrans$cnt / sumTrans$tot * 1000000 -> tmp
+
+#totalNT <- sum(qc$nVar) + sum(qc$nRef)
+#sumTrans$cnt / median(sumTrans$cnt, na.rm=T) -> sumTrans$cor
+sumTrans$cnt / median(sumTrans$cnt, na.rm=T) -> sumTrans$cor
+tmp / median(tmp, na.rm=T) -> sumTrans$cor2
+#sumTrans$cnt / totalNT -> sumTrans$cor
+#sumTrans$cnt / min(sumTrans$cnt, na.rm=T) -> sumTrans$cor
+
+
+return(sumTrans)
+
+}
+
+calcTransCor <- function(qcRaw, lociFilt = NULL) {
+
+calcTrans(qcRaw, lociFilt) -> sumTrans
+
+pCor <- rep(1, nrow(qcRaw))
+paste(qcRaw$ntRef, substr(qcRaw$ntVar, 1, 1), sep=":") -> qcTrans
+
+for (i in c("A", "C", "G", "T")) {
+for (j in c("A", "C", "G", "T")) {
+ if (i != j) {
+ tTrans <- paste(i, j, sep=":")
+ pCor[grepl(tTrans, qcTrans, ignore.case=T)] <- sumTrans$cor2[sumTrans$trans == tTrans]
+ }
+
+}}
+return(pCor)
+}
+
+
+calcTransCorOld <- function(qc) {
+# Calculate transition bias
+refA <- grepl("A", qc$ntRef, ignore.case=T)
+refC <- grepl("C", qc$ntRef, ignore.case=T)
+refG <- grepl("G", qc$ntRef, ignore.case=T)
+refT <- grepl("T", qc$ntRef, ignore.case=T)
+varA <- grepl("A", qc$ntVar, ignore.case=T)
+varC <- grepl("C", qc$ntVar, ignore.case=T)
+varG <- grepl("G", qc$ntVar, ignore.case=T)
+varT <- grepl("T", qc$ntVar, ignore.case=T)
+varN <- grepl(".", qc$ntVar, ignore.case=T)
+callA <- grepl("^A", qc$ntVar, ignore.case=T)
+callC <- grepl("^C", qc$ntVar, ignore.case=T)
+callG <- grepl("^G", qc$ntVar, ignore.case=T)
+callT <- grepl("^T", qc$ntVar, ignore.case=T)
+
+callList <- list(callA, callC, callG, callT)
+varList <- list(varA, varC, varG, varT)
+refList <- list(refA, refC, refG, refT)
+
+tots <- c(
+rep(sum(qc$nRef[refA], qc$nVar[refA]), 4),
+rep(sum(qc$nRef[refC], qc$nVar[refC]), 4),
+rep(sum(qc$nRef[refG], qc$nVar[refG]), 4),
+rep(sum(qc$nRef[refT], qc$nVar[refT]), 4))
+
+nvar <- c(
+rep(sum(qc$nVar[refA]), 4),
+rep(sum(qc$nVar[refC]), 4),
+rep(sum(qc$nVar[refG]), 4),
+rep(sum(qc$nVar[refT]), 4))
+
+nref <- c(
+rep(sum(qc$nRef[refA]), 4),
+rep(sum(qc$nRef[refC]), 4),
+rep(sum(qc$nRef[refG]), 4),
+rep(sum(qc$nRef[refT]), 4))
+
+nts <- c("A", "C", "G", "T")
+sumTrans <- data.frame(trans=paste(rep(nts, each=4), rep(nts, 4), sep=":"), cnt=rep(0, 16), tot=tots, nvar=nvar, nref=nref)
+
+for (i in 1:4) {
+ pos <- (i - 1) * 4
+for (j in 1:4) {
+# print(sum(refList[[i]] & varList[[j]]))
+
+ cnt <- sum(qc[refList[[i]] & varList[[j]], 8 + j])
+ if (i == j) {
+# cnt <- sum(qc[refList[[i]] & varN, 8 + j])
+ cnt <- NA
+ }
+ sumTrans$cnt[pos + j] <- cnt
+ print(cnt)
+}}
+#totalNT <- sum(qc$nVar) + sum(qc$nRef)
+sumTrans$cnt / median(sumTrans$cnt, na.rm=T) -> sumTrans$cor
+#sumTrans$cnt / totalNT -> sumTrans$cor
+#sumTrans$cnt / min(sumTrans$cnt, na.rm=T) -> sumTrans$cor
+print(sumTrans)
+
+pCor <- rep(1, nrow(qc))
+for (i in 1:4) {
+ pos <- (i - 1) * 4
+for (j in 1:4) {
+ if (i != j) {
+ pCor[refList[[i]] & callList[[j]]] <- sumTrans$cor[pos + j]
+ }
+
+}}
+return(pCor)
+}
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/getData.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/getData.pl Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,140 @@
+#!/usr/bin/perl
+
+use strict;
+
+# Read in path(s) and run bcl2fastq to fetch data into appropriate directories
+
+my $runDir = shift;
+my $project = shift || die "Project required";
+my $baseDir = shift || "/ccagc/data/SeqAnalysis/$project";
+my $dry = 0;
+my $outDir = undef;
+(my $FCID = $runDir) =~ s/.*_(.*?)\/?$/$1/;
+
+if (not defined($outDir)) {
+ ($outDir = $runDir) =~ s/.*\/(.*_.*_.*)_.*/$1/;
+ $outDir = $baseDir ."/". $outDir;
+}
+
+print STDERR localtime() . " : PATH: " . $runDir . "\n";
+print STDERR localtime() . " : BASEDIR: " . $baseDir . "\n";
+print STDERR localtime() . " : OUTDIR: " . $outDir . "\n";
+print STDERR localtime() . " : FCID: " . $FCID . "\n";
+
+# open samplesheet
+# Check run type and convert for bcl2fastq
+my $col = 0;
+
+my @headOut = qw/FCID Lane SampleID SampleRef Index Description Control Recipe Operator SampleProject/;
+my $Lane = 1;
+my $SampleRef = "human";
+my $Control = "N";
+my $SampleProject = $project;
+my %header = ();
+open SAMPLE, "<$runDir/SampleSheet.csv" or die;
+while () {
+ chomp;
+ s/\s+$//;
+ my @row = split(/,/, $_);
+# if (/^(.*),(.*)$/) {
+# $header{$1} = $2;
+# }
+ $header{$row[0]} = $row[1];
+ last if (/\[Data\]/);
+}
+
+if ($header{Assay} !~ /Amplicon/) {
+ print STDERR localtime() . " : Wrong assay, exiting.\n";
+ exit;
+}
+
+# Skipping project name check for now
+#if ($header{"Project Name"} !~ /$project/) {
+# print STDERR localtime() . " : Wrong project, exiting.\n";
+# exit;
+#}
+
+my $Operator = $header{"Investigator Name"};
+my $Recipe = $header{"Workflow"};
+
+my $head = readline(SAMPLE);
+chomp $head;
+my %headIn = map { $_ => $col++ } split(/,/, $head);#"Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description,Manifest,GenomeFolder");
+
+my @samples = ();
+my @ctrls = ();
+while () {
+ chomp;
+ my @row = split(/,/, $_);
+ print STDERR localtime() . " : " . join(":", @row) . "\n";
+ my $bc = substr($row[$headIn{index}],0,7) ."-". substr($row[$headIn{index2}],0,7);
+ if ($row[$headIn{Sample_Project}] != /$SampleProject/) {
+ push @samples, join(",", $FCID, $Lane, $row[$headIn{Sample_ID}], $SampleRef, $bc, $row[$headIn{Description}], $Control, $Recipe, $Operator, $SampleProject) . "\n";
+ }
+ elsif ($row[$headIn{Sample_Project}] != /CONTROL/i) {
+ push @ctrls, join(",", $FCID, $Lane, $row[$headIn{Sample_ID}], $SampleRef, $bc, $row[$headIn{Description}], $Control, $Recipe, $Operator, $SampleProject) . "\n";
+ }
+
+}
+close SAMPLE;
+
+if ($dry == 1) {
+ print STDERR localtime() . " : mkdir $outDir\n";
+ print STDERR localtime() . " : " . join(",", @headOut) . "\n";
+ print STDERR localtime() . " : " . join("", @samples);
+}
+elsif (-e $outDir) {
+ print STDERR localtime() . " : Folder $outDir exists, exiting\n";
+ exit;
+}
+elsif ($#samples >= 0) {
+ print STDERR localtime() . " : mkdir $outDir\n";
+ system("mkdir -p $outDir");
+ open SAMPLEOUT, ">$outDir/SampleSheet.csv";
+ print SAMPLEOUT join(",", @headOut) . "\n";
+ print STDERR localtime() . " : " . join(",", @headOut) . "\n";
+ print SAMPLEOUT join("", @samples, @ctrls);
+ print STDERR localtime() . " : " . join("", @samples, @ctrls);
+ close SAMPLEOUT;
+}
+else {
+ # Nothing to do exit!
+ print STDERR localtime() . " : Nothing to do for $SampleProject, exiting!";
+ exit;
+}
+
+
+# run configureBCL2fastq
+my $bcl2fqbin = "/usr/local/bin/configureBclToFastq.pl";
+my $bcl2fq = "$bcl2fqbin --no-eamss --input-dir $runDir/Data/Intensities/BaseCalls --output-dir $outDir/Unaligned --sample-sheet $outDir/SampleSheet.csv --mismatches 0 --fastq-cluster-count 0";
+my $make = "cd $outDir/Unaligned && nohup make && touch bcl2fq.done && cd ..";
+
+print STDERR localtime() . " : $bcl2fq\n";
+print STDERR localtime() . " : $make\n";
+if ($dry == 1) {
+ exit;
+}
+
+my $pid1 = open BCL2FQ, "$bcl2fq 2>&1 |";
+while () {
+ print STDERR localtime() . " - $_";
+}
+close BCL2FQ;
+
+my $pid2 = open MAKE, "$make 2>&1 |";
+while () {
+ print STDERR localtime() . " - $_";
+}
+close MAKE;
+
+
+#my $rc1 = 0; #system($bcl2fq);
+# run make
+#if ($rc1 == 0) {
+# my $rc2 = system($make);
+#}
+#else {
+# print STDERR localtime() . " : Error or warning encountered :$rc1\n";
+# print STDERR localtime() . " : Run make manually\n";
+# print STDERR localtime() . " : $make\n";
+#}
diff -r 1b81dd6b68b5 -r 9285f791716d falco/misc/mkHtmlReport.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/falco/misc/mkHtmlReport.pl Thu Sep 11 05:11:11 2014 -0400
@@ -0,0 +1,201 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Spreadsheet::WriteExcel;
+
+$| = 1;
+my $dir = shift;
+my $outdir = shift || "./";
+my $pat = shift || "";
+
+# QC
+# Results
+my @samples = ();
+open INDEX, ">$outdir/index.html";
+my $htmlHead = qq(
+
+
+
+
+
+
+);
+print INDEX $htmlHead;
+opendir DIR, "$dir";
+while (my $cd = readdir DIR) {
+ if ($cd =~ /(.*$pat)\.qc\.targets\.txt$/) {
+ my $sam = $1;
+# next if ($sam =~ /R[12]/);
+ print STDERR $1 . "\n";
+ push @samples, $1;
+ }
+}
+close DIR;
+
+print INDEX "";
+print INDEX "Sample | BAM | snp | indel | readCnt | Amp > 100 | \n";
+#foreach my $sam (sort @samples) {
+# print INDEX "
---|
$sam | BAM | BAI |
\n";
+
+#}
+my %link = ();
+
+foreach my $sample (sort @samples) {
+ print STDERR "Processing $sample\n";
+# next if ($sample =~ /R[12]/);
+
+ my $readCnt = 0;
+ my $amp100 = 0;
+
+ open OUT, ">$outdir/$sample.html";
+ open OUT2, ">$outdir/$sample.tsv";
+ my $excelBook = Spreadsheet::WriteExcel->new("$outdir/$sample.xls");
+ my $excel = $excelBook->add_worksheet();
+ print OUT $htmlHead;
+ my %QC = ();
+ open QC, "<$dir/$sample.qc.targets.txt";
+ readline QC;
+ print STDERR "Reading in $sample.qc.targets.txt\n";
+ while () {
+ chomp;
+ my @row = split(/\t/, $_);
+ my @id = split(/[\_\.\-:]/, $row[0]);
+ $readCnt += $row[-1]; # DP
+ $amp100++ if ($row[-1] >= 100); #
+ if ($#id != 10) {
+ $id[0] =~ /(\D+)(\d+)/;
+ $id[0] = $2;
+ unshift @id, $1;
+ }
+ $QC{$row[0]}{QC} = [@id, @row];# if ($id[0]);
+ foreach my $c ($row[4] .. $row[5]) {
+ $link{$id[-3] . ":" . $c}{$row[0]} = "Assay";
+ }
+ foreach my $c ($row[2] .. $row[4], $row[5] .. $row[3]) {
+ $link{$id[-3] . ":" . $c}{$row[0]} = "LSO";
+ }
+ }
+ close QC;
+
+ open RES, "<$dir/$sample.res.filtered.tsv" or die "Unable to open $dir/$sample\n";
+ my %uniq = ();
+ my $colCnt = 0;
+ my $resHead = readline(RES);
+ chomp $resHead;
+ $resHead =~ s/^#//;
+ $resHead =~ s/\s+$//;
+ my %resCol = map { $_ => $colCnt++ } split(/\t/, $resHead);
+ my @keyColsN = qw/QUAL Gene_Name Codon_Change Amino_Acid_change vaf DP AD Tag CHROM POS ID REF ALT Context Effect_Impact Functional_Class Amino_Acid_length Gene_Coding Transcript_ID Exon_Rank/;
+ #my @keyColsN = qw/QUAL Gene_Name Codon_Change Amino_Acid_change vaf DP AD Tag CHROM POS ID REF ALT Context Effect_Impact Functional_Class Amino_Acid_length Coding Transcript Exon/;
+# my @keyColsN = qw/CHROM POS ID REF ALT QUAL DP AD vaf Context Effect_Impact Functional_Class Codon_Change Amino_Acid_change Amino_Acid_length Gene_Name Coding Transcript Exon Tag/;
+ my @keyColsI = map { $resCol{$_} } @keyColsN;
+# print STDERR join(":", @keyColsN) . "\n";
+# print STDERR join(":", @keyColsI) . "\n";
+# sleep 10;
+ print STDERR "Processing results\n";
+ while () {
+ chomp;
+ my @row = split(/\t/, $_);
+ my $cpos = join(":", @row[0, 1]);
+ if (exists $link{$cpos}) {
+# my $key = join(":", @row[0 .. 4,6, 9,10,11,12,14 .. 21]);
+ my $key = join(":", @row[@keyColsI]);
+ #print STDERR join(":", @keyColsI) . "\n";
+# print STDERR join(":", @row) . "\n"; sleep 1;
+ if (not exists($uniq{$key})) {
+ foreach my $locus (keys(%{$link{$cpos}})) {
+ next if ($link{$cpos}{$locus} eq "LSO");
+ push @{$QC{$locus}{RES}}, [@row, $link{$cpos}{$locus}];
+ # print STDERR "Adding $key to $locus\n\n"; #sleep 1;
+ }
+ $uniq{$key} = 0;
+ }
+ else {
+ # print STDERR $key . " : Exists\n\n"; #sleep 1;
+ }
+ }
+ }
+ close RES;
+ print OUT "
";
+ print OUT "
";
+ print OUT "
";
+ print OUT "
";
+ print OUT "
";
+ print OUT "
";
+ print OUT "
";
+ print OUT "
";
+ print OUT "
";
+ print OUT "
";
+ #print OUT "
";
+ print OUT "Download: | TSV | ";
+ print OUT "XLS |
---|
";
+ print OUT "\n";
+# my @colnames = qw/depth chr pos id ref var qual DP AD vaf context impact effectClass codonChange AAChange RefSeqLength geneName RefSeqClass RefSeqID Exon Tag/;
+ my @colnames = ("depth", @keyColsN);
+ print OUT "" . join("", map { "$_ | " } ("Amplicon", "c","c2", "b", @colnames)) . "
";
+ print OUT2 join("\t", "Amplicon", @colnames) . "\n";
+ my $excelAref = [["Amplicon", @colnames]];
+ my %rescnt = ();
+ foreach my $locus (sort keys(%QC)) {
+ # my @targets = keys(%{$QC{$locus}{QC}});
+# print OUT "";
+ my $nres = 1;
+ $nres = scalar(@{$QC{$locus}{RES}}) if ($QC{$locus}{RES});
+
+ print OUT "$locus | \n";
+ print OUT "c | \n";
+ print OUT "c2 | \n";
+ print OUT "b | \n";
+ print OUT "$QC{$locus}{QC}->[-1] | \n"; #";
+
+ foreach my $res (@{$QC{$locus}{RES}}) {
+ print OUT " | \n";
+ print OUT join(" | \n", @{$res}[@keyColsI]);
+ print OUT " |
\n";
+ print OUT2 join("\t", $locus, $QC{$locus}{QC}->[-1], @{$res}[@keyColsI]) . "\n";
+ push @$excelAref, [$locus, $QC{$locus}{QC}->[-1], @{$res}[@keyColsI]];
+ my $pl = $res->[$keyColsI[7]];
+ my $ref = $res->[$keyColsI[11]];
+ my $var = $res->[$keyColsI[12]];
+# print STDERR "$pl $ref $var\n"; sleep 1;
+ if (length($ref) == length($var)) {
+ $rescnt{$pl . "snp"}++;
+ }
+ else {
+ $rescnt{$pl . "indel"}++;
+ }
+# $rescnt++;
+ }
+ print STDERR $locus . ":" . $nres . "\n";
+ print STDERR $locus . ":" . join("-", @{$QC{$locus}{RES}}) . "\n";
+ if (scalar(@{$QC{$locus}{RES}}) == 0) { #$nres == 0) {
+ print OUT2 join("\t", $locus, $QC{$locus}{QC}->[-1], ("-") x scalar(@keyColsI)) . "\n";
+ push @$excelAref, [$locus, $QC{$locus}{QC}->[-1], ("-") x scalar(@keyColsI)];
+ }
+ print OUT "
\n";
+ }
+ print OUT "
\n";
+ print OUT "\n";
+ close OUT;
+ close OUT2;
+ $excel->write_col(0,0, $excelAref);
+ $excelBook->close();
+ my @resCnt = map { $rescnt{$_} || 0 } qw/Customsnp Customindel/;
+ my $pctGood = sprintf("%.2f", $amp100 / scalar(keys(%QC)) * 100);
+ print INDEX "$sample | BAM | $resCnt[0] | $resCnt[1] | $readCnt | $pctGood |
\n";
+
+}
+
+print INDEX "
";
+print INDEX "
";
+print INDEX "
";
+print INDEX "
";
+print INDEX "