Mercurial > repos > biomonika > linkyx
view scripts/to_get_vcf_files.sh @ 9:695d28139f3e
toolshed8
| author | biomonika <biomonika@psu.edu> |
|---|---|
| date | Tue, 09 Sep 2014 14:31:02 -0400 |
| parents | 1955f03f092e |
| children |
line wrap: on
line source
#!/bin/bash set -e; #read config variables echo "Reading config...." >&2 #echo `ls -l` computer_name=`hostname`; if [ $computer_name == "misa" ]; then #local_computer source config_devel; else #metacentrum machine source config_meta; fi #files needed: reference.fasta $reads_1 $reads_2 reference=$1; reads_1="input/$2"; echo "reads_1: " $reads_1; reads_2="input/$3"; echo "reads_2: " $reads_2; name=$4; fragments=$5; echo "reference:" $reference "reads 1:" $reads_1 "reads 2:" $reads_2 "name:" $name; #INDEXING FILES if [ -e "references/reference.dict" ]; then echo "Picard reference exists. Not being created again." else echo "seconds of run: $SECONDS" echo "reference:"$reference; java -jar $CreateSequenceDictionary R=$reference O=references/reference.dict >mlogfile 2>>errlogfile echo "0/9 INDEXING FILES successful" echo "seconds of run: $SECONDS" fi if [ -e "references/reference.fai" ]; then echo "Samtools reference exists. Not being created again." else echo "seconds of run: $SECONDS" echo "reference:"$reference; samtools faidx $reference 2>>errlogfile; echo "0/9 INDEXING FILES successful" echo "seconds of run: $SECONDS" fi #1 MAPPING WITH BWA bwa index $reference >>mlogfile 2>>errlogfile #differs based on the fact wheter reads are single or paired-end bwa aln -t 2 $reference $reads_1 > bam/aln_sa1_$name.sai & 2>>errlogfile wait; if [[ $fragments =~ (single) ]]; then #single-end reads bwa samse $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile echo "bwa samse $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile"; wait; else #pair-end reads bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile echo "bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile"; wait; bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile echo "bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile"; fi wait; rm -f bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai echo "1/7 MAPPING WITH BWA successful" echo "seconds of run: $SECONDS" #2 CLEANING FILES - adjust MAPQ scores echo "$SECONDS" java -jar $CleanSam I=bam/aln_$name.sam O=bam/aln_cleaned_$name.sam 2>>/dev/null echo "2/7 CLEANING FILES - adjust MAPQ scores successful" echo "seconds of run: $SECONDS" wait #3 CONVERTING TO BAM FILE samtools view -bS bam/aln_cleaned_$name.sam > bam/aln_cleaned_$name.bam 2>>errlogfile echo "3/7 CONVERTING TO BAM FILE successful" echo "seconds of run: $SECONDS" wait #4 SORTING BAM FILE samtools sort bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name >mlogfile 2>>errlogfile echo "4/7 SORTING BAM FILE successful" echo "seconds of run: $SECONDS" wait #rm aln.bam aln_cleaned.bam #5 REMOVING DUPLICATES java -jar $MarkDuplicates INPUT=bam/aln_cleaned_sorted_$name.bam OUTPUT=bam/aln_cleaned_sorted_deduplicated_$name.bam METRICS_FILE=picard_info.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 2>>/dev/null echo "5/7 REMOVING DUPLICATES successful" echo "seconds of run: $SECONDS" wait; #6 SNP AND VARIANT CALLING samtools mpileup -uf $reference bam/aln_cleaned_sorted_deduplicated_$name.bam | $bcftools view -p 0.85 -cgv - | $vcfutils varFilter >vcf/aln_0.85_$name.vcf 2>>errlogfile echo "6/7 SNP AND VARIANT CALLING successful" echo "seconds of run: $SECONDS" #7 REMOVING FILES rm -f bam/aln_$name.sam bam/aln_cleaned_$name.sam bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name.bam >>mlogfile 2>>errlogfile echo "7/7 REMOVING FILES successful" echo "seconds of run: $SECONDS" echo "DONE"
