Mercurial > repos > biomonika > linkyx
diff scripts/to_get_Y_variants.sh @ 0:1955f03f092e
initial commit
| author | biomonika <biomonika@psu.edu> |
|---|---|
| date | Tue, 02 Sep 2014 14:16:04 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/to_get_Y_variants.sh Tue Sep 02 14:16:04 2014 -0400 @@ -0,0 +1,104 @@ +#!/bin/bash +set -e; + +#read config variables +echo "Reading config...." >&2 +#echo `ls -l` +computer_name=`hostname`; +if [ $computer_name == "misa" ]; then + #local_computer + source config_devel; +else + #metacentrum machine + source config_meta; +fi + +#files needed: reference_male.fasta $reads_1 $reads_2 + +reference=$1; +reads_1="input/$2"; echo "reads_1: " $reads_1; +reads_2="input/$3"; echo "reads_2: " $reads_2; +name=$4; +fragments=$5; + +#INDEXING FILES + +if [ -e "references/reference_male.dict" ]; +then + echo "Reference exists. Not being created again."; +else + echo "seconds of run: $SECONDS"; + java -jar $CreateSequenceDictionary R=$reference O=references/reference_male.dict >mlogfile 2>>errlogfile; + echo "0/9 INDEXING FILES successful"; + echo "seconds of run: $SECONDS"; +fi + +if [ -e "references/reference_male.fai" ]; +then + echo "Samtools reference exists. Not being created again."; +else + echo "seconds of run: $SECONDS"; + echo "reference:"$reference; + samtools faidx $reference 2>>errlogfile; + echo "0/9 INDEXING FILES successful"; + echo "seconds of run: $SECONDS"; +fi + +#1 MAPPING WITH BWA +bwa index $reference >>mlogfile 2>>errlogfile; + +#differs based on the fact whether reads are single or paired-end +bwa aln -t 2 $reference $reads_1 > bam/aln_sa1_${name}_male.sai & 2>>errlogfile; +wait; + +if [[ $fragments =~ (single) ]]; then + #single end reads + bwa samse $reference bam/aln_sa1_${name}_male.sai $reads_1 > bam/aln_${name}_male.sam 2>>errlogfile; + echo "bwa samse $reference bam/aln_sa1_${name}_male.sai $reads_1 > bam/aln_${name}_male.sam 2>>errlogfile"; +else + #pair-end reads + bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_${name}_male.sai & 2>>errlogfile; + echo "bwa aln -t 2 $reference $reads_2 > bam/aln_sa2_${name}_male.sai & 2>>errlogfile"; + wait; + bwa sampe $reference bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai $reads_1 $reads_2 > bam/aln_${name}_male.sam 2>>errlogfile; + echo "bwa sampe $reference bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai $reads_1 $reads_2 > bam/aln_${name}_male.sam 2>>errlogfile"; +fi + +wait; +rm -f bam/aln_sa1_${name}_male.sai bam/aln_sa2_${name}_male.sai + + +echo "1/6 MAPPING WITH BWA successful"; +echo "seconds of run: $SECONDS"; + +#2 CLEANING FILES - adjust MAPQ scores +echo "$SECONDS"; +java -jar $CleanSam I=bam/aln_${name}_male.sam O=bam/aln_cleaned_${name}_male.sam 2>>/dev/null; +echo "2/6 CLEANING FILES - adjust MAPQ scores successful"; +echo "seconds of run: $SECONDS"; +wait + +#3 CONVERTING TO BAM FILE +samtools view -bS bam/aln_cleaned_${name}_male.sam > bam/aln_cleaned_${name}_male.bam 2>>errlogfile; +echo "3/6 CONVERTING TO BAM FILE successful"; +echo "seconds of run: $SECONDS"; +wait + +#4 SORTING BAM FILE +samtools sort bam/aln_cleaned_${name}_male.bam bam/aln_cleaned_sorted_${name}_male >mlogfile 2>>errlogfile; +echo "4/6 SORTING BAM FILE successful"; +echo "seconds of run: $SECONDS"; +wait; + +#5 REMOVING DUPLICATES +java -jar $MarkDuplicates INPUT=bam/aln_cleaned_sorted_${name}_male.bam OUTPUT=bam/aln_cleaned_sorted_deduplicated_${name}_male.bam METRICS_FILE=picard_info.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 2>>/dev/null; +echo "5/6 REMOVING DUPLICATES successful"; +echo "seconds of run: $SECONDS"; +wait; + +#9 REMOVING FILES +rm -f bam/aln_${name}_male.sam bam/aln_cleaned_${name}_male.sam bam/aln_cleaned_${name}_male.bam bam/aln_cleaned_sorted_${name}_male.bam >>mlogfile 2>>errlogfile; +echo "6/6 REMOVING FILES successful"; +echo "seconds of run: $SECONDS"; +echo "DONE"; +
