|
0
|
1 #!/bin/bash
|
|
|
2 set -e;
|
|
|
3
|
|
|
4 #read config variables
|
|
|
5 echo "Reading config...." >&2
|
|
|
6 #echo `ls -l`
|
|
|
7 computer_name=`hostname`;
|
|
|
8 if [ $computer_name == "misa" ]; then
|
|
|
9 #local_computer
|
|
|
10 source config_devel;
|
|
|
11 else
|
|
|
12 #metacentrum machine
|
|
|
13 source config_meta;
|
|
|
14 fi
|
|
|
15
|
|
|
16 #files needed: reference.fasta $reads_1 $reads_2
|
|
|
17
|
|
|
18 reference=$1;
|
|
|
19 reads_1="input/$2"; echo "reads_1: " $reads_1;
|
|
|
20 reads_2="input/$3"; echo "reads_2: " $reads_2;
|
|
|
21 name=$4;
|
|
|
22 fragments=$5;
|
|
|
23
|
|
|
24 echo "reference:" $reference "reads 1:" $reads_1 "reads 2:" $reads_2 "name:" $name;
|
|
|
25
|
|
|
26 #INDEXING FILES
|
|
|
27
|
|
|
28 if [ -e "references/reference.dict" ];
|
|
|
29 then
|
|
|
30 echo "Picard reference exists. Not being created again."
|
|
|
31 else
|
|
|
32 echo "seconds of run: $SECONDS"
|
|
|
33 echo "reference:"$reference;
|
|
|
34 java -jar $CreateSequenceDictionary R=$reference O=references/reference.dict >mlogfile 2>>errlogfile
|
|
|
35 echo "0/9 INDEXING FILES successful"
|
|
|
36 echo "seconds of run: $SECONDS"
|
|
|
37 fi
|
|
|
38
|
|
|
39 if [ -e "references/reference.fai" ];
|
|
|
40 then
|
|
|
41 echo "Samtools reference exists. Not being created again."
|
|
|
42 else
|
|
|
43 echo "seconds of run: $SECONDS"
|
|
|
44 echo "reference:"$reference;
|
|
|
45 samtools faidx $reference 2>>errlogfile;
|
|
|
46 echo "0/9 INDEXING FILES successful"
|
|
|
47 echo "seconds of run: $SECONDS"
|
|
|
48 fi
|
|
|
49
|
|
|
50 #1 MAPPING WITH BWA
|
|
|
51 bwa index $reference >>mlogfile 2>>errlogfile
|
|
|
52
|
|
|
53 #differs based on the fact wheter reads are single or paired-end
|
|
|
54 bwa aln -t 2 $reference $reads_1 > bam/aln_sa1_$name.sai & 2>>errlogfile
|
|
|
55 wait;
|
|
|
56
|
|
|
57 if [[ $fragments =~ (single) ]]; then
|
|
|
58 #single-end reads
|
|
|
59 bwa samse $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile
|
|
|
60 echo "bwa samse $reference bam/aln_sa1_$name.sai $reads_1 > bam/aln_$name.sam 2>>errlogfile";
|
|
|
61 wait;
|
|
|
62 else
|
|
|
63 #pair-end reads
|
|
|
64 bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile
|
|
|
65 echo "bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_$name.sai & 2>>errlogfile";
|
|
|
66 wait;
|
|
|
67 bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile
|
|
|
68 echo "bwa sampe $reference bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai $reads_1 $reads_2 > bam/aln_$name.sam 2>>errlogfile";
|
|
|
69 fi
|
|
|
70
|
|
|
71 wait;
|
|
|
72 rm -f bam/aln_sa1_$name.sai bam/aln_sa2_$name.sai
|
|
|
73
|
|
|
74 echo "1/7 MAPPING WITH BWA successful"
|
|
|
75 echo "seconds of run: $SECONDS"
|
|
|
76
|
|
|
77 #2 CLEANING FILES - adjust MAPQ scores
|
|
|
78 echo "$SECONDS"
|
|
|
79 java -jar $CleanSam I=bam/aln_$name.sam O=bam/aln_cleaned_$name.sam 2>>/dev/null
|
|
|
80 echo "2/7 CLEANING FILES - adjust MAPQ scores successful"
|
|
|
81 echo "seconds of run: $SECONDS"
|
|
|
82 wait
|
|
|
83
|
|
|
84 #3 CONVERTING TO BAM FILE
|
|
|
85 samtools view -bS bam/aln_cleaned_$name.sam > bam/aln_cleaned_$name.bam 2>>errlogfile
|
|
|
86 echo "3/7 CONVERTING TO BAM FILE successful"
|
|
|
87 echo "seconds of run: $SECONDS"
|
|
|
88 wait
|
|
|
89
|
|
|
90 #4 SORTING BAM FILE
|
|
|
91 samtools sort bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name >mlogfile 2>>errlogfile
|
|
|
92 echo "4/7 SORTING BAM FILE successful"
|
|
|
93 echo "seconds of run: $SECONDS"
|
|
|
94 wait
|
|
|
95 #rm aln.bam aln_cleaned.bam
|
|
|
96
|
|
|
97 #5 REMOVING DUPLICATES
|
|
|
98 java -jar $MarkDuplicates INPUT=bam/aln_cleaned_sorted_$name.bam OUTPUT=bam/aln_cleaned_sorted_deduplicated_$name.bam METRICS_FILE=picard_info.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 2>>/dev/null
|
|
|
99 echo "5/7 REMOVING DUPLICATES successful"
|
|
|
100 echo "seconds of run: $SECONDS"
|
|
|
101 wait;
|
|
|
102
|
|
|
103 #6 SNP AND VARIANT CALLING
|
|
|
104 samtools mpileup -uf $reference bam/aln_cleaned_sorted_deduplicated_$name.bam | $bcftools view -p 0.85 -cgv - | $vcfutils varFilter >vcf/aln_0.85_$name.vcf 2>>errlogfile
|
|
|
105 echo "6/7 SNP AND VARIANT CALLING successful"
|
|
|
106 echo "seconds of run: $SECONDS"
|
|
|
107
|
|
|
108 #7 REMOVING FILES
|
|
|
109 rm -f bam/aln_$name.sam bam/aln_cleaned_$name.sam bam/aln_cleaned_$name.bam bam/aln_cleaned_sorted_$name.bam >>mlogfile 2>>errlogfile
|
|
|
110 echo "7/7 REMOVING FILES successful"
|
|
|
111 echo "seconds of run: $SECONDS"
|
|
|
112 echo "DONE"
|
|
|
113
|