diff scripts/to_get_vcf_files.sh @ 9:695d28139f3e

toolshed8
author biomonika <biomonika@psu.edu>
date Tue, 09 Sep 2014 14:31:02 -0400
parents 1955f03f092e
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/to_get_vcf_files.sh	Tue Sep 09 14:31:02 2014 -0400
@@ -0,0 +1,113 @@
+#!/bin/bash
+set -e;
+
+#read config variables
+echo "Reading config...." >&2
+#echo `ls -l`
+computer_name=`hostname`;
+if [ $computer_name == "misa" ]; then
+	#local_computer
+	source config_devel;
+else 
+	#metacentrum machine
+	source config_meta;
+fi
+
+#files needed: reference.fasta $reads_1 $reads_2
+
+reference=$1;
+reads_1="input/$2"; echo "reads_1: " $reads_1;
+reads_2="input/$3"; echo "reads_2: " $reads_2;
+name=$4;
+fragments=$5;
+
+echo "reference:" $reference "reads 1:" $reads_1 "reads 2:" $reads_2 "name:" $name;
+
+#INDEXING FILES
+
+if [ -e "references/reference.dict" ];
+then
+	echo "Picard reference exists. Not being created again."
+else
+	echo "seconds of run: $SECONDS"
+	echo "reference:"$reference;
+	java -jar $CreateSequenceDictionary R=$reference O=references/reference.dict >mlogfile 2>>errlogfile
+	echo "0/9 INDEXING FILES successful"
+	echo "seconds of run: $SECONDS"
+fi
+
+if [ -e "references/reference.fai" ];
+then
+	echo "Samtools reference exists. Not being created again."
+else
+	echo "seconds of run: $SECONDS"
+	echo "reference:"$reference;
+	samtools faidx $reference 2>>errlogfile;
+	echo "0/9 INDEXING FILES successful"
+	echo "seconds of run: $SECONDS"
+fi
+
+#1 MAPPING WITH BWA
+bwa index $reference >>mlogfile 2>>errlogfile
+
+#differs based on the fact wheter reads are single or paired-end
+bwa aln -t 2 $reference $reads_1 > bam/aln_sa1_$name.sai & 2>>errlogfile
+wait;
+
+if [[ $fragments =~ (single) ]]; then
+    #single-end reads
+    bwa samse  $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile
+    echo "bwa samse  $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile";
+    wait;
+else
+    #pair-end reads
+    bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile
+    echo "bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile";
+    wait;
+    bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile
+    echo "bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile";
+fi
+
+wait;
+rm -f bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai 
+
+echo "1/7 MAPPING WITH BWA successful"
+echo "seconds of run: $SECONDS"
+
+#2 CLEANING FILES - adjust MAPQ scores
+echo "$SECONDS"
+java -jar $CleanSam I=bam/aln_$name.sam O=bam/aln_cleaned_$name.sam 2>>/dev/null
+echo "2/7 CLEANING FILES - adjust MAPQ scores successful"
+echo "seconds of run: $SECONDS"
+wait
+
+#3 CONVERTING TO BAM FILE
+samtools view -bS bam/aln_cleaned_$name.sam > bam/aln_cleaned_$name.bam 2>>errlogfile
+echo "3/7 CONVERTING TO BAM FILE successful"
+echo "seconds of run: $SECONDS"
+wait
+
+#4 SORTING BAM FILE
+samtools sort bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name >mlogfile 2>>errlogfile
+echo "4/7 SORTING BAM FILE successful"
+echo "seconds of run: $SECONDS"
+wait
+#rm aln.bam aln_cleaned.bam
+
+#5 REMOVING DUPLICATES
+java -jar $MarkDuplicates INPUT=bam/aln_cleaned_sorted_$name.bam OUTPUT=bam/aln_cleaned_sorted_deduplicated_$name.bam METRICS_FILE=picard_info.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 2>>/dev/null
+echo "5/7 REMOVING DUPLICATES successful"
+echo "seconds of run: $SECONDS"
+wait;
+
+#6 SNP AND VARIANT CALLING
+samtools mpileup -uf $reference bam/aln_cleaned_sorted_deduplicated_$name.bam | $bcftools view -p 0.85 -cgv - | $vcfutils varFilter >vcf/aln_0.85_$name.vcf 2>>errlogfile
+echo "6/7 SNP AND VARIANT CALLING successful"
+echo "seconds of run: $SECONDS"
+
+#7 REMOVING FILES
+rm -f bam/aln_$name.sam bam/aln_cleaned_$name.sam bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name.bam >>mlogfile 2>>errlogfile
+echo "7/7 REMOVING FILES successful"
+echo "seconds of run: $SECONDS"
+echo "DONE"
+