Mercurial > repos > biomonika > linkyx
comparison scripts/to_get_vcf_files.sh @ 9:695d28139f3e
toolshed8
| author | biomonika <biomonika@psu.edu> |
|---|---|
| date | Tue, 09 Sep 2014 14:31:02 -0400 |
| parents | 1955f03f092e |
| children |
comparison
equal
deleted
inserted
replaced
| 8:983278b1fdb2 | 9:695d28139f3e |
|---|---|
| 1 #!/bin/bash | |
| 2 set -e; | |
| 3 | |
| 4 #read config variables | |
| 5 echo "Reading config...." >&2 | |
| 6 #echo `ls -l` | |
| 7 computer_name=`hostname`; | |
| 8 if [ $computer_name == "misa" ]; then | |
| 9 #local_computer | |
| 10 source config_devel; | |
| 11 else | |
| 12 #metacentrum machine | |
| 13 source config_meta; | |
| 14 fi | |
| 15 | |
| 16 #files needed: reference.fasta $reads_1 $reads_2 | |
| 17 | |
| 18 reference=$1; | |
| 19 reads_1="input/$2"; echo "reads_1: " $reads_1; | |
| 20 reads_2="input/$3"; echo "reads_2: " $reads_2; | |
| 21 name=$4; | |
| 22 fragments=$5; | |
| 23 | |
| 24 echo "reference:" $reference "reads 1:" $reads_1 "reads 2:" $reads_2 "name:" $name; | |
| 25 | |
| 26 #INDEXING FILES | |
| 27 | |
| 28 if [ -e "references/reference.dict" ]; | |
| 29 then | |
| 30 echo "Picard reference exists. Not being created again." | |
| 31 else | |
| 32 echo "seconds of run: $SECONDS" | |
| 33 echo "reference:"$reference; | |
| 34 java -jar $CreateSequenceDictionary R=$reference O=references/reference.dict >mlogfile 2>>errlogfile | |
| 35 echo "0/9 INDEXING FILES successful" | |
| 36 echo "seconds of run: $SECONDS" | |
| 37 fi | |
| 38 | |
| 39 if [ -e "references/reference.fai" ]; | |
| 40 then | |
| 41 echo "Samtools reference exists. Not being created again." | |
| 42 else | |
| 43 echo "seconds of run: $SECONDS" | |
| 44 echo "reference:"$reference; | |
| 45 samtools faidx $reference 2>>errlogfile; | |
| 46 echo "0/9 INDEXING FILES successful" | |
| 47 echo "seconds of run: $SECONDS" | |
| 48 fi | |
| 49 | |
| 50 #1 MAPPING WITH BWA | |
| 51 bwa index $reference >>mlogfile 2>>errlogfile | |
| 52 | |
| 53 #differs based on the fact wheter reads are single or paired-end | |
| 54 bwa aln -t 2 $reference $reads_1 > bam/aln_sa1_$name.sai & 2>>errlogfile | |
| 55 wait; | |
| 56 | |
| 57 if [[ $fragments =~ (single) ]]; then | |
| 58 #single-end reads | |
| 59 bwa samse $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile | |
| 60 echo "bwa samse $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile"; | |
| 61 wait; | |
| 62 else | |
| 63 #pair-end reads | |
| 64 bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile | |
| 65 echo "bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile"; | |
| 66 wait; | |
| 67 bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile | |
| 68 echo "bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile"; | |
| 69 fi | |
| 70 | |
| 71 wait; | |
| 72 rm -f bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai | |
| 73 | |
| 74 echo "1/7 MAPPING WITH BWA successful" | |
| 75 echo "seconds of run: $SECONDS" | |
| 76 | |
| 77 #2 CLEANING FILES - adjust MAPQ scores | |
| 78 echo "$SECONDS" | |
| 79 java -jar $CleanSam I=bam/aln_$name.sam O=bam/aln_cleaned_$name.sam 2>>/dev/null | |
| 80 echo "2/7 CLEANING FILES - adjust MAPQ scores successful" | |
| 81 echo "seconds of run: $SECONDS" | |
| 82 wait | |
| 83 | |
| 84 #3 CONVERTING TO BAM FILE | |
| 85 samtools view -bS bam/aln_cleaned_$name.sam > bam/aln_cleaned_$name.bam 2>>errlogfile | |
| 86 echo "3/7 CONVERTING TO BAM FILE successful" | |
| 87 echo "seconds of run: $SECONDS" | |
| 88 wait | |
| 89 | |
| 90 #4 SORTING BAM FILE | |
| 91 samtools sort bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name >mlogfile 2>>errlogfile | |
| 92 echo "4/7 SORTING BAM FILE successful" | |
| 93 echo "seconds of run: $SECONDS" | |
| 94 wait | |
| 95 #rm aln.bam aln_cleaned.bam | |
| 96 | |
| 97 #5 REMOVING DUPLICATES | |
| 98 java -jar $MarkDuplicates INPUT=bam/aln_cleaned_sorted_$name.bam OUTPUT=bam/aln_cleaned_sorted_deduplicated_$name.bam METRICS_FILE=picard_info.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 2>>/dev/null | |
| 99 echo "5/7 REMOVING DUPLICATES successful" | |
| 100 echo "seconds of run: $SECONDS" | |
| 101 wait; | |
| 102 | |
| 103 #6 SNP AND VARIANT CALLING | |
| 104 samtools mpileup -uf $reference bam/aln_cleaned_sorted_deduplicated_$name.bam | $bcftools view -p 0.85 -cgv - | $vcfutils varFilter >vcf/aln_0.85_$name.vcf 2>>errlogfile | |
| 105 echo "6/7 SNP AND VARIANT CALLING successful" | |
| 106 echo "seconds of run: $SECONDS" | |
| 107 | |
| 108 #7 REMOVING FILES | |
| 109 rm -f bam/aln_$name.sam bam/aln_cleaned_$name.sam bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name.bam >>mlogfile 2>>errlogfile | |
| 110 echo "7/7 REMOVING FILES successful" | |
| 111 echo "seconds of run: $SECONDS" | |
| 112 echo "DONE" | |
| 113 |
