diff scripts/to_get_Y_variants.sh @ 9:695d28139f3e

toolshed8
author biomonika <biomonika@psu.edu>
date Tue, 09 Sep 2014 14:31:02 -0400
parents 1955f03f092e
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/to_get_Y_variants.sh	Tue Sep 09 14:31:02 2014 -0400
@@ -0,0 +1,104 @@
+#!/bin/bash
+set -e;
+
+#read config variables
+echo "Reading config...." >&2
+#echo `ls -l`
+computer_name=`hostname`;
+if [ $computer_name == "misa" ]; then
+	#local_computer
+	source config_devel;
+else 
+	#metacentrum machine
+	source config_meta;
+fi
+
+#files needed: reference_male.fasta $reads_1 $reads_2
+
+reference=$1;
+reads_1="input/$2"; echo "reads_1: " $reads_1;
+reads_2="input/$3"; echo "reads_2: " $reads_2;
+name=$4;
+fragments=$5;
+
+#INDEXING FILES
+
+if [ -e "references/reference_male.dict" ];
+then
+	echo "Reference exists. Not being created again.";
+else
+	echo "seconds of run: $SECONDS";
+	java -jar $CreateSequenceDictionary R=$reference O=references/reference_male.dict >mlogfile 2>>errlogfile;
+	echo "0/9 INDEXING FILES successful";
+	echo "seconds of run: $SECONDS";
+fi
+
+if [ -e "references/reference_male.fai" ];
+then
+	echo "Samtools reference exists. Not being created again.";
+else
+	echo "seconds of run: $SECONDS";
+	echo "reference:"$reference;
+	samtools faidx $reference 2>>errlogfile;
+	echo "0/9 INDEXING FILES successful";
+	echo "seconds of run: $SECONDS";
+fi
+
+#1 MAPPING WITH BWA
+bwa index $reference >>mlogfile 2>>errlogfile;
+
+#differs based on the fact whether reads are single or paired-end
+bwa aln -t 2 $reference $reads_1 > bam/aln_sa1_${name}_male.sai & 2>>errlogfile;
+wait;
+
+if [[ $fragments =~ (single) ]]; then
+    #single end reads
+    bwa samse  $reference bam/aln_sa1_${name}_male.sai $reads_1 > bam/aln_${name}_male.sam 2>>errlogfile;
+    echo "bwa samse  $reference bam/aln_sa1_${name}_male.sai $reads_1 > bam/aln_${name}_male.sam 2>>errlogfile";
+else
+    #pair-end reads
+    bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_${name}_male.sai & 2>>errlogfile;
+    echo "bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_${name}_male.sai & 2>>errlogfile";
+    wait;
+    bwa sampe $reference bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai $reads_1 $reads_2 > bam/aln_${name}_male.sam 2>>errlogfile;
+    echo "bwa sampe $reference bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai $reads_1 $reads_2 > bam/aln_${name}_male.sam 2>>errlogfile";
+fi
+
+wait;
+rm -f bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai 
+
+
+echo "1/6 MAPPING WITH BWA successful";
+echo "seconds of run: $SECONDS";
+
+#2 CLEANING FILES - adjust MAPQ scores
+echo "$SECONDS";
+java -jar $CleanSam I=bam/aln_${name}_male.sam O=bam/aln_cleaned_${name}_male.sam 2>>/dev/null;
+echo "2/6 CLEANING FILES - adjust MAPQ scores successful";
+echo "seconds of run: $SECONDS";
+wait
+
+#3 CONVERTING TO BAM FILE
+samtools view -bS bam/aln_cleaned_${name}_male.sam > bam/aln_cleaned_${name}_male.bam 2>>errlogfile;
+echo "3/6 CONVERTING TO BAM FILE successful";
+echo "seconds of run: $SECONDS";
+wait
+
+#4 SORTING BAM FILE
+samtools sort bam/aln_cleaned_${name}_male.bam bam/aln_cleaned_sorted_${name}_male >mlogfile 2>>errlogfile;
+echo "4/6 SORTING BAM FILE successful";
+echo "seconds of run: $SECONDS";
+wait;
+
+#5 REMOVING DUPLICATES
+java -jar $MarkDuplicates INPUT=bam/aln_cleaned_sorted_${name}_male.bam OUTPUT=bam/aln_cleaned_sorted_deduplicated_${name}_male.bam METRICS_FILE=picard_info.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 2>>/dev/null;
+echo "5/6 REMOVING DUPLICATES successful";
+echo "seconds of run: $SECONDS";
+wait;
+
+#9 REMOVING FILES
+rm -f bam/aln_${name}_male.sam bam/aln_cleaned_${name}_male.sam bam/aln_cleaned_${name}_male.bam bam/aln_cleaned_sorted_${name}_male.bam >>mlogfile 2>>errlogfile;
+echo "6/6 REMOVING FILES successful";
+echo "seconds of run: $SECONDS";
+echo "DONE";
+