annotate scripts/to_get_Y_variants.sh @ 9:695d28139f3e

toolshed8
author biomonika <biomonika@psu.edu>
date Tue, 09 Sep 2014 14:31:02 -0400
parents 1955f03f092e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
1 #!/bin/bash
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
2 set -e;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
3
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
4 #read config variables
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
5 echo "Reading config...." >&2
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
6 #echo `ls -l`
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
7 computer_name=`hostname`;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
8 if [ $computer_name == "misa" ]; then
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
9 #local_computer
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
10 source config_devel;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
11 else
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
12 #metacentrum machine
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
13 source config_meta;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
14 fi
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
15
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
16 #files needed: reference_male.fasta $reads_1 $reads_2
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
17
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
18 reference=$1;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
19 reads_1="input/$2"; echo "reads_1: " $reads_1;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
20 reads_2="input/$3"; echo "reads_2: " $reads_2;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
21 name=$4;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
22 fragments=$5;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
23
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
24 #INDEXING FILES
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
25
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
26 if [ -e "references/reference_male.dict" ];
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
27 then
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
28 echo "Reference exists. Not being created again.";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
29 else
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
30 echo "seconds of run: $SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
31 java -jar $CreateSequenceDictionary R=$reference O=references/reference_male.dict >mlogfile 2>>errlogfile;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
32 echo "0/9 INDEXING FILES successful";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
33 echo "seconds of run: $SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
34 fi
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
35
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
36 if [ -e "references/reference_male.fai" ];
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
37 then
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
38 echo "Samtools reference exists. Not being created again.";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
39 else
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
40 echo "seconds of run: $SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
41 echo "reference:"$reference;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
42 samtools faidx $reference 2>>errlogfile;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
43 echo "0/9 INDEXING FILES successful";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
44 echo "seconds of run: $SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
45 fi
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
46
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
47 #1 MAPPING WITH BWA
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
48 bwa index $reference >>mlogfile 2>>errlogfile;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
49
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
50 #differs based on the fact whether reads are single or paired-end
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
51 bwa aln -t 2 $reference $reads_1 > bam/aln_sa1_${name}_male.sai & 2>>errlogfile;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
52 wait;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
53
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
54 if [[ $fragments =~ (single) ]]; then
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
55 #single end reads
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
56 bwa samse $reference bam/aln_sa1_${name}_male.sai $reads_1 > bam/aln_${name}_male.sam 2>>errlogfile;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
57 echo "bwa samse $reference bam/aln_sa1_${name}_male.sai $reads_1 > bam/aln_${name}_male.sam 2>>errlogfile";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
58 else
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
59 #pair-end reads
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
60 bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_${name}_male.sai & 2>>errlogfile;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
61 echo "bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_${name}_male.sai & 2>>errlogfile";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
62 wait;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
63 bwa sampe $reference bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai $reads_1 $reads_2 > bam/aln_${name}_male.sam 2>>errlogfile;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
64 echo "bwa sampe $reference bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai $reads_1 $reads_2 > bam/aln_${name}_male.sam 2>>errlogfile";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
65 fi
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
66
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
67 wait;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
68 rm -f bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
69
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
70
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
71 echo "1/6 MAPPING WITH BWA successful";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
72 echo "seconds of run: $SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
73
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
74 #2 CLEANING FILES - adjust MAPQ scores
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
75 echo "$SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
76 java -jar $CleanSam I=bam/aln_${name}_male.sam O=bam/aln_cleaned_${name}_male.sam 2>>/dev/null;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
77 echo "2/6 CLEANING FILES - adjust MAPQ scores successful";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
78 echo "seconds of run: $SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
79 wait
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
80
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
81 #3 CONVERTING TO BAM FILE
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
82 samtools view -bS bam/aln_cleaned_${name}_male.sam > bam/aln_cleaned_${name}_male.bam 2>>errlogfile;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
83 echo "3/6 CONVERTING TO BAM FILE successful";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
84 echo "seconds of run: $SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
85 wait
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
86
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
87 #4 SORTING BAM FILE
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
88 samtools sort bam/aln_cleaned_${name}_male.bam bam/aln_cleaned_sorted_${name}_male >mlogfile 2>>errlogfile;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
89 echo "4/6 SORTING BAM FILE successful";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
90 echo "seconds of run: $SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
91 wait;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
92
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
93 #5 REMOVING DUPLICATES
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
94 java -jar $MarkDuplicates INPUT=bam/aln_cleaned_sorted_${name}_male.bam OUTPUT=bam/aln_cleaned_sorted_deduplicated_${name}_male.bam METRICS_FILE=picard_info.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 2>>/dev/null;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
95 echo "5/6 REMOVING DUPLICATES successful";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
96 echo "seconds of run: $SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
97 wait;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
98
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
99 #9 REMOVING FILES
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
100 rm -f bam/aln_${name}_male.sam bam/aln_cleaned_${name}_male.sam bam/aln_cleaned_${name}_male.bam bam/aln_cleaned_sorted_${name}_male.bam >>mlogfile 2>>errlogfile;
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
101 echo "6/6 REMOVING FILES successful";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
102 echo "seconds of run: $SECONDS";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
103 echo "DONE";
1955f03f092e initial commit
biomonika <biomonika@psu.edu>
parents:
diff changeset
104