Mercurial > repos > biomonika > linkyx
diff scripts/to_get_vcf_files.sh @ 0:1955f03f092e
initial commit
| author | biomonika <biomonika@psu.edu> |
|---|---|
| date | Tue, 02 Sep 2014 14:16:04 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/to_get_vcf_files.sh Tue Sep 02 14:16:04 2014 -0400 @@ -0,0 +1,113 @@ +#!/bin/bash +set -e; + +#read config variables +echo "Reading config...." >&2 +#echo `ls -l` +computer_name=`hostname`; +if [ $computer_name == "misa" ]; then + #local_computer + source config_devel; +else + #metacentrum machine + source config_meta; +fi + +#files needed: reference.fasta $reads_1 $reads_2 + +reference=$1; +reads_1="input/$2"; echo "reads_1: " $reads_1; +reads_2="input/$3"; echo "reads_2: " $reads_2; +name=$4; +fragments=$5; + +echo "reference:" $reference "reads 1:" $reads_1 "reads 2:" $reads_2 "name:" $name; + +#INDEXING FILES + +if [ -e "references/reference.dict" ]; +then + echo "Picard reference exists. Not being created again." +else + echo "seconds of run: $SECONDS" + echo "reference:"$reference; + java -jar $CreateSequenceDictionary R=$reference O=references/reference.dict >mlogfile 2>>errlogfile + echo "0/9 INDEXING FILES successful" + echo "seconds of run: $SECONDS" +fi + +if [ -e "references/reference.fai" ]; +then + echo "Samtools reference exists. Not being created again." +else + echo "seconds of run: $SECONDS" + echo "reference:"$reference; + samtools faidx $reference 2>>errlogfile; + echo "0/9 INDEXING FILES successful" + echo "seconds of run: $SECONDS" +fi + +#1 MAPPING WITH BWA +bwa index $reference >>mlogfile 2>>errlogfile + +#differs based on the fact wheter reads are single or paired-end +bwa aln -t 2 $reference $reads_1 > bam/aln_sa1_$name.sai & 2>>errlogfile +wait; + +if [[ $fragments =~ (single) ]]; then + #single-end reads + bwa samse $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile + echo "bwa samse $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile"; + wait; +else + #pair-end reads + bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile + echo "bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile"; + wait; + bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile + echo "bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile"; +fi + +wait; +rm -f bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai + +echo "1/7 MAPPING WITH BWA successful" +echo "seconds of run: $SECONDS" + +#2 CLEANING FILES - adjust MAPQ scores +echo "$SECONDS" +java -jar $CleanSam I=bam/aln_$name.sam O=bam/aln_cleaned_$name.sam 2>>/dev/null +echo "2/7 CLEANING FILES - adjust MAPQ scores successful" +echo "seconds of run: $SECONDS" +wait + +#3 CONVERTING TO BAM FILE +samtools view -bS bam/aln_cleaned_$name.sam > bam/aln_cleaned_$name.bam 2>>errlogfile +echo "3/7 CONVERTING TO BAM FILE successful" +echo "seconds of run: $SECONDS" +wait + +#4 SORTING BAM FILE +samtools sort bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name >mlogfile 2>>errlogfile +echo "4/7 SORTING BAM FILE successful" +echo "seconds of run: $SECONDS" +wait +#rm aln.bam aln_cleaned.bam + +#5 REMOVING DUPLICATES +java -jar $MarkDuplicates INPUT=bam/aln_cleaned_sorted_$name.bam OUTPUT=bam/aln_cleaned_sorted_deduplicated_$name.bam METRICS_FILE=picard_info.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 2>>/dev/null +echo "5/7 REMOVING DUPLICATES successful" +echo "seconds of run: $SECONDS" +wait; + +#6 SNP AND VARIANT CALLING +samtools mpileup -uf $reference bam/aln_cleaned_sorted_deduplicated_$name.bam | $bcftools view -p 0.85 -cgv - | $vcfutils varFilter >vcf/aln_0.85_$name.vcf 2>>errlogfile +echo "6/7 SNP AND VARIANT CALLING successful" +echo "seconds of run: $SECONDS" + +#7 REMOVING FILES +rm -f bam/aln_$name.sam bam/aln_cleaned_$name.sam bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name.bam >>mlogfile 2>>errlogfile +echo "7/7 REMOVING FILES successful" +echo "seconds of run: $SECONDS" +echo "DONE" +
