view scripts/to_get_vcf_files.sh @ 9:695d28139f3e

toolshed8
author biomonika <biomonika@psu.edu>
date Tue, 09 Sep 2014 14:31:02 -0400
parents 1955f03f092e
children
line wrap: on
line source

#!/bin/bash
set -e;

#read config variables
echo "Reading config...." >&2
#echo `ls -l`
computer_name=`hostname`;
if [ $computer_name == "misa" ]; then
	#local_computer
	source config_devel;
else 
	#metacentrum machine
	source config_meta;
fi

#files needed: reference.fasta $reads_1 $reads_2

reference=$1;
reads_1="input/$2"; echo "reads_1: " $reads_1;
reads_2="input/$3"; echo "reads_2: " $reads_2;
name=$4;
fragments=$5;

echo "reference:" $reference "reads 1:" $reads_1 "reads 2:" $reads_2 "name:" $name;

#INDEXING FILES

if [ -e "references/reference.dict" ];
then
	echo "Picard reference exists. Not being created again."
else
	echo "seconds of run: $SECONDS"
	echo "reference:"$reference;
	java -jar $CreateSequenceDictionary R=$reference O=references/reference.dict >mlogfile 2>>errlogfile
	echo "0/9 INDEXING FILES successful"
	echo "seconds of run: $SECONDS"
fi

if [ -e "references/reference.fai" ];
then
	echo "Samtools reference exists. Not being created again."
else
	echo "seconds of run: $SECONDS"
	echo "reference:"$reference;
	samtools faidx $reference 2>>errlogfile;
	echo "0/9 INDEXING FILES successful"
	echo "seconds of run: $SECONDS"
fi

#1 MAPPING WITH BWA
bwa index $reference >>mlogfile 2>>errlogfile

#differs based on the fact wheter reads are single or paired-end
bwa aln -t 2 $reference $reads_1 > bam/aln_sa1_$name.sai & 2>>errlogfile
wait;

if [[ $fragments =~ (single) ]]; then
    #single-end reads
    bwa samse  $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile
    echo "bwa samse  $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile";
    wait;
else
    #pair-end reads
    bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile
    echo "bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile";
    wait;
    bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile
    echo "bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile";
fi

wait;
rm -f bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai 

echo "1/7 MAPPING WITH BWA successful"
echo "seconds of run: $SECONDS"

#2 CLEANING FILES - adjust MAPQ scores
echo "$SECONDS"
java -jar $CleanSam I=bam/aln_$name.sam O=bam/aln_cleaned_$name.sam 2>>/dev/null
echo "2/7 CLEANING FILES - adjust MAPQ scores successful"
echo "seconds of run: $SECONDS"
wait

#3 CONVERTING TO BAM FILE
samtools view -bS bam/aln_cleaned_$name.sam > bam/aln_cleaned_$name.bam 2>>errlogfile
echo "3/7 CONVERTING TO BAM FILE successful"
echo "seconds of run: $SECONDS"
wait

#4 SORTING BAM FILE
samtools sort bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name >mlogfile 2>>errlogfile
echo "4/7 SORTING BAM FILE successful"
echo "seconds of run: $SECONDS"
wait
#rm aln.bam aln_cleaned.bam

#5 REMOVING DUPLICATES
java -jar $MarkDuplicates INPUT=bam/aln_cleaned_sorted_$name.bam OUTPUT=bam/aln_cleaned_sorted_deduplicated_$name.bam METRICS_FILE=picard_info.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 2>>/dev/null
echo "5/7 REMOVING DUPLICATES successful"
echo "seconds of run: $SECONDS"
wait;

#6 SNP AND VARIANT CALLING
samtools mpileup -uf $reference bam/aln_cleaned_sorted_deduplicated_$name.bam | $bcftools view -p 0.85 -cgv - | $vcfutils varFilter >vcf/aln_0.85_$name.vcf 2>>errlogfile
echo "6/7 SNP AND VARIANT CALLING successful"
echo "seconds of run: $SECONDS"

#7 REMOVING FILES
rm -f bam/aln_$name.sam bam/aln_cleaned_$name.sam bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name.bam >>mlogfile 2>>errlogfile
echo "7/7 REMOVING FILES successful"
echo "seconds of run: $SECONDS"
echo "DONE"