Mercurial > repos > bgruening > bismark

--- a/README	Sun Feb 24 14:49:36 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-Bismark uses Bowtie or Bowtie2 to map the reads.
-
-Create your reference index with bismark_genome_preparation in your normal Bowtie2/Botwie index directory.
-bismark_genome_preparation will create a Bisulfite_Genome folder directly in your Bowtie2/Bowtie index directory. If you follow that approach you do not need to specify or modify an extra *.loc file.
-That wrapper will extract the path to the Bisulfite_Genome folder from ./tool-data/bowtie2_indices.loc or ./tool-data/bowtie_indices.loc.
--- a/bismark	Sun Feb 24 14:49:36 2013 -0500
+++ b/bismark	Mon Apr 14 16:42:38 2014 -0400
@@ -7,7 +7,7 @@
 use Getopt::Long;


-## This program is Copyright (C) 2010-12, Felix Krueger (felix.krueger@babraham.ac.uk)
+## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)

 ## This program is free software: you can redistribute it and/or modify
 ## it under the terms of the GNU General Public License as published by
@@ -24,7 +24,7 @@


 my $parent_dir = getcwd;
-my $bismark_version = 'v0.7.7';
+my $bismark_version = 'v0.10.0';
 my $command_line = join (" ",@ARGV);

 ### before processing the command line we will replace --solexa1.3-quals with --phred64-quals as the '.' in the option name will cause Getopt::Long to fail
@@ -35,7 +35,7 @@
 }
 my @filenames;   # will be populated by processing the command line

-my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_file_format,$bowtie_options,$directional,$unmapped,$ambiguous,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$upto,$temp_dir) = process_command_line();
+my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_file_format,$bowtie_options,$directional,$unmapped,$ambiguous,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$upto,$temp_dir,$non_bs_mm,$insertion_open,$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat,$prefix,$old_flag) = process_command_line();

 my @fhs;         # stores alignment process names, bisulfite index location, bowtie filehandles and the number of times sequences produced an alignment
 my %chromosomes; # stores the chromosome sequences of the mouse genome
@@ -59,17 +59,17 @@
     $fhs[2]->{name} = 'GAread1CTread2CTgenome';
     $fhs[3]->{name} = 'CTread1GAread2GAgenome';

-    print "\nPaired-end alignments will be performed\n",'='x39,"\n\n";
+    warn "\nPaired-end alignments will be performed\n",'='x39,"\n\n";

     my ($filename_1,$filename_2) = (split (/,/,$filename));
-    print "The provided filenames for paired-end alignments are $filename_1 and $filename_2\n";
+    warn "The provided filenames for paired-end alignments are $filename_1 and $filename_2\n";

     ### additional variables only for paired-end alignments
     my ($C_to_T_infile_2,$G_to_A_infile_2); # to be made from mate2 file

     ### FastA format
     if ($sequence_file_format eq 'FASTA'){
-      print "Input files are in FastA format\n";
+      warn "Input files are in FastA format\n";

       if ($directional){
 	($C_to_T_infile_1) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number
@@ -108,39 +108,110 @@

     ### FastQ format
     else{
-      print "Input files are in FastQ format\n";
+      warn "Input files are in FastQ format\n";
       if ($directional){
-	($C_to_T_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
-	($G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+	if ($bowtie2){
+	  ($C_to_T_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	  ($G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+
+	  $fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	  $fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	  $fhs[1]->{inputfile_1} = undef;
+	  $fhs[1]->{inputfile_2} = undef;
+	  $fhs[2]->{inputfile_1} = undef;
+	  $fhs[2]->{inputfile_2} = undef;
+	  $fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	  $fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+	}
+	else{ # Bowtie 1 alignments
+	  if ($gzip){
+	    ($C_to_T_infile_1) = biTransformFastQFiles_paired_end_bowtie1_gzip ($filename_1,$filename_2); # passing both reads at the same time

-	$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
-	$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
-	$fhs[1]->{inputfile_1} = undef;
-	$fhs[1]->{inputfile_2} = undef;
-	$fhs[2]->{inputfile_1} = undef;
-	$fhs[2]->{inputfile_2} = undef;
-	$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
-	$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
-      }
-      else{
+	    $fhs[0]->{inputfile_1} = $C_to_T_infile_1; # this file contains both read 1 and read 2 in tab delimited format
+	    $fhs[0]->{inputfile_2} = undef; # no longer needed
+	    $fhs[1]->{inputfile_1} = undef;
+	    $fhs[1]->{inputfile_2} = undef;
+	    $fhs[2]->{inputfile_1} = undef;
+	    $fhs[2]->{inputfile_2} = undef;
+	    $fhs[3]->{inputfile_1} = $C_to_T_infile_1; # this file contains both read 1 and read 2 in tab delimited format
+	    $fhs[3]->{inputfile_2} = undef; # no longer needed
+	  }
+	  else{
+	    ($C_to_T_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	    ($G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+
+	    $fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	    $fhs[1]->{inputfile_1} = undef;
+	    $fhs[1]->{inputfile_2} = undef;
+	    $fhs[2]->{inputfile_1} = undef;
+	    $fhs[2]->{inputfile_2} = undef;
+	    $fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+	  }
+	}
+      }
+      elsif($pbat){ # PBAT-Seq
+	### At the moment we are only performing uncompressed FastQ alignments with Bowtie1
 	($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
 	($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
-
-	$fhs[0]->{inputfile_1} = $C_to_T_infile_1;
-	$fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+
+	$fhs[0]->{inputfile_1} = undef;
+	$fhs[0]->{inputfile_2} = undef;
 	$fhs[1]->{inputfile_1} = $G_to_A_infile_1;
 	$fhs[1]->{inputfile_2} = $C_to_T_infile_2;
 	$fhs[2]->{inputfile_1} = $G_to_A_infile_1;
 	$fhs[2]->{inputfile_2} = $C_to_T_infile_2;
-	$fhs[3]->{inputfile_1} = $C_to_T_infile_1;
-	$fhs[3]->{inputfile_2} = $G_to_A_infile_2;
-      }
-
+	$fhs[3]->{inputfile_1} = undef;
+	$fhs[3]->{inputfile_2} = undef;
+      }
+      else{
+	if ($bowtie2){
+	  ($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	  ($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+
+	  $fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	  $fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	  $fhs[1]->{inputfile_1} = $G_to_A_infile_1;
+	  $fhs[1]->{inputfile_2} = $C_to_T_infile_2;
+	  $fhs[2]->{inputfile_1} = $G_to_A_infile_1;
+	  $fhs[2]->{inputfile_2} = $C_to_T_infile_2;
+	  $fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	  $fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+	}
+	else{ # Bowtie 1 alignments
+	  if ($gzip){
+	    ($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastQFiles_paired_end_bowtie1_gzip ($filename_1,$filename_2); # passing both reads at the same time
+
+	    $fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[0]->{inputfile_2} = undef; # not needed for compressed temp files
+	    $fhs[1]->{inputfile_1} = $G_to_A_infile_1;
+	    $fhs[1]->{inputfile_2} = undef;
+	    $fhs[2]->{inputfile_1} = $G_to_A_infile_1;
+	    $fhs[2]->{inputfile_2} = undef;
+	    $fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[3]->{inputfile_2} = undef; # not needed for compressed temp files
+	  }
+	  else{ #uncompressed temp files
+	    ($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastQFiles_paired_end ($filename_1,1); # also passing the read number
+	    ($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastQFiles_paired_end ($filename_2,2);
+
+	    $fhs[0]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[0]->{inputfile_2} = $G_to_A_infile_2;
+	    $fhs[1]->{inputfile_1} = $G_to_A_infile_1;
+	    $fhs[1]->{inputfile_2} = $C_to_T_infile_2;
+	    $fhs[2]->{inputfile_1} = $G_to_A_infile_1;
+	    $fhs[2]->{inputfile_2} = $C_to_T_infile_2;
+	    $fhs[3]->{inputfile_1} = $C_to_T_infile_1;
+	    $fhs[3]->{inputfile_2} = $G_to_A_infile_2;
+	  }
+	}
+      }
       if ($bowtie2){
 	paired_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
       }
       else{
-	paired_end_align_fragments_to_bisulfite_genome_fastQ ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
+       	paired_end_align_fragments_to_bisulfite_genome_fastQ ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
       }
     }
     start_methylation_call_procedure_paired_ends($filename_1,$filename_2,$C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);
@@ -148,14 +219,14 @@

   ### Else we are performing SINGLE-END ALIGNMENTS
   else{
-    print "\nSingle-end alignments will be performed\n",'='x39,"\n\n";
+    warn "\nSingle-end alignments will be performed\n",'='x39,"\n\n";
     ### Initialising bisulfite conversion filenames
     my ($C_to_T_infile,$G_to_A_infile);


     ### FastA format
     if ($sequence_file_format eq 'FASTA'){
-      print "Inut file is in FastA format\n";
+      warn "Inut file is in FastA format\n";
       if ($directional){
 	($C_to_T_infile) = biTransformFastAFiles ($filename);
 	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
@@ -177,21 +248,28 @@

     ## FastQ format
     else{
-      print "Input file is in FastQ format\n";
+      warn "Input file is in FastQ format\n";
       if ($directional){
 	($C_to_T_infile) = biTransformFastQFiles ($filename);
 	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
       }
+      elsif($pbat){
+	($G_to_A_infile) = biTransformFastQFiles ($filename);
+	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $G_to_A_infile; # PBAT-Seq only uses the G to A converted files
+      }
       else{
 	($C_to_T_infile,$G_to_A_infile) = biTransformFastQFiles ($filename);
 	$fhs[0]->{inputfile} = $fhs[1]->{inputfile} = $C_to_T_infile;
 	$fhs[2]->{inputfile} = $fhs[3]->{inputfile} = $G_to_A_infile;
       }

-      ### Creating 4 different bowtie filehandles and storing the first entry
+      ### Creating up to 4 different bowtie filehandles and storing the first entry
       if ($bowtie2){
 	single_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 ($C_to_T_infile,$G_to_A_infile);
       }
+      elsif ($pbat){
+	single_end_align_fragments_to_bisulfite_genome_fastQ (undef,$G_to_A_infile);
+      }
       else{
 	single_end_align_fragments_to_bisulfite_genome_fastQ ($C_to_T_infile,$G_to_A_infile);
       }
@@ -215,9 +293,13 @@

   ### printing all alignments to a results file
   my $outfile = $filename;
+  if ($prefix){
+    $outfile = "$prefix.$outfile";
+  }
+

   if ($bowtie2){ # SAM format is the default for Bowtie 2
-    $outfile =~ s/$/_bt2_bismark.sam/;
+    $outfile =~ s/$/_bismark_bt2.sam/;
   }
   elsif ($vanilla){ # vanilla custom Bismark output single-end output (like Bismark versions 0.5.X)
     $outfile =~ s/$/_bismark.txt/;
@@ -225,19 +307,39 @@
   else{ # SAM is the default output
     $outfile =~ s/$/_bismark.sam/;
   }
-  print "Writing bisulfite mapping results to $output_dir$outfile\n\n";
-  open (OUT,'>',"$output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+
+  $bam = 0 unless (defined $bam);
+
+  if ($bam == 1){ ### Samtools is installed, writing out BAM directly
+    $outfile =~ s/sam/bam/;
+    open (OUT,"| $samtools_path view -bSh 2>/dev/null - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+  }
+  elsif($bam == 2){ ### no Samtools found on system. Using GZIP compression instead
+    $outfile .= '.gz';
+    open (OUT,"| gzip -c - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+  }
+  else{ # uncompressed ouput, default
+    open (OUT,'>',"$output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+  }
+
+  warn "\n>>> Writing bisulfite mapping results to $output_dir$outfile <<<\n\n";
+  sleep(1);
+
   if ($vanilla){
     print OUT "Bismark version: $bismark_version\n";
   }

   ### printing alignment and methylation call summary to a report file
   my $reportfile = $filename;
+  if ($prefix){
+    $reportfile = "$prefix.$reportfile";
+  }
+
   if ($bowtie2){
-    $reportfile =~ s/$/_bt2_Bismark_mapping_report.txt/;
+    $reportfile =~ s/$/_bismark_bt2_SE_report.txt/;
   }
   else{
-    $reportfile =~ s/$/_Bismark_mapping_report.txt/;
+    $reportfile =~ s/$/_bismark_SE_report.txt/;
   }

   open (REPORT,'>',"$output_dir$reportfile") or die "Failed to write to $reportfile: $!\n";
@@ -245,12 +347,19 @@

   if ($unmapped){
     my $unmapped_file = $filename;
+    if ($prefix){
+      $unmapped_file = "$prefix.$unmapped_file";
+    }
+
     $unmapped_file =~ s/$/_unmapped_reads.txt/;
     open (UNMAPPED,'>',"$output_dir$unmapped_file") or die "Failed to write to $unmapped_file: $!\n";
     print "Unmapped sequences will be written to $output_dir$unmapped_file\n";
   }
   if ($ambiguous){
     my $ambiguous_file = $filename;
+    if ($prefix){
+      $ambiguous_file = "$prefix.$ambiguous_file";
+    }
     $ambiguous_file =~ s/$/_ambiguous_reads.txt/;
     open (AMBIG,'>',"$output_dir$ambiguous_file") or die "Failed to write to $ambiguous_file: $!\n";
     print "Ambiguously mapping sequences will be written to $output_dir$ambiguous_file\n";
@@ -305,7 +414,12 @@
   }

   ### printing all alignments to a results file
-  my $outfile = $filename_1;
+  my $outfile = $filename_1;
+
+  if ($prefix){
+    $outfile = "$prefix.$outfile";
+  }
+
   if ($bowtie2){ # SAM format is the default Bowtie 2 output
     $outfile =~ s/$/_bismark_bt2_pe.sam/;
   }
@@ -316,19 +430,38 @@
     $outfile =~ s/$/_bismark_pe.sam/;
   }

-  print "Writing bisulfite mapping results to $outfile\n\n";
-  open (OUT,'>',"$output_dir$outfile") or die "Failed to write to $outfile: $!";
+  $bam = 0 unless (defined $bam);
+
+  if ($bam == 1){ ### Samtools is installed, writing out BAM directly
+    $outfile =~ s/sam/bam/;
+    open (OUT,"| $samtools_path view -bSh 2>/dev/null - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+  }
+  elsif($bam == 2){ ### no Samtools found on system. Using GZIP compression instead
+    $outfile .= '.gz';
+    open (OUT,"| gzip -c - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+  }
+  else{ # uncompressed ouput, default
+    open (OUT,'>',"$output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+  }
+
+  warn "\n>>> Writing bisulfite mapping results to $outfile <<<\n\n";
+  sleep(1);
+
   if ($vanilla){
     print OUT "Bismark version: $bismark_version\n";
   }

   ### printing alignment and methylation call summary to a report file
   my $reportfile = $filename_1;
+  if ($prefix){
+    $reportfile = "$prefix.$reportfile";
+  }
+
   if ($bowtie2){
-    $reportfile =~ s/$/_Bismark_bt2_paired-end_mapping_report.txt/;
+    $reportfile =~ s/$/_bismark_bt2_PE_report.txt/;
   }
   else{
-    $reportfile =~ s/$/_Bismark_paired-end_mapping_report.txt/;
+    $reportfile =~ s/$/_bismark_PE_report.txt/;
   }

   open (REPORT,'>',"$output_dir$reportfile") or die "Failed to write to $reportfile: $!\n";
@@ -340,6 +473,10 @@
   if ($unmapped){
     my $unmapped_1 = $filename_1;
     my $unmapped_2 = $filename_2;
+    if ($prefix){
+      $unmapped_1 = "$prefix.$unmapped_1";
+      $unmapped_2 = "$prefix.$unmapped_2";
+    }
     $unmapped_1 =~ s/$/_unmapped_reads_1.txt/;
     $unmapped_2 =~ s/$/_unmapped_reads_2.txt/;
     open (UNMAPPED_1,'>',"$output_dir$unmapped_1") or die "Failed to write to $unmapped_1: $!\n";
@@ -350,6 +487,11 @@
   if ($ambiguous){
     my $amb_1 = $filename_1;
     my $amb_2 = $filename_2;
+    if ($prefix){
+      $amb_1 = "$prefix.$amb_1";
+      $amb_2 = "$prefix.$amb_2";
+    }
+
     $amb_1 =~ s/$/_ambiguous_reads_1.txt/;
     $amb_2 =~ s/$/_ambiguous_reads_2.txt/;
     open (AMBIG_1,'>',"$output_dir$amb_1") or die "Failed to write to $amb_1: $!\n";
@@ -396,7 +538,15 @@
       warn "Could not delete temporary file $C_to_T_infile properly $!\n";
     }
   }
-
+  elsif ($pbat){
+    my $deletion_successful =  unlink "$temp_dir$G_to_A_infile";
+    if ($deletion_successful == 1){
+      warn "\nSuccessfully deleted the temporary file $temp_dir$G_to_A_infile\n\n";
+    }
+    else{
+      warn "Could not delete temporary file $G_to_A_infile properly $!\n";
+    }
+  }
   else{
     my $deletion_successful =  unlink "$temp_dir$C_to_T_infile","$temp_dir$G_to_A_infile";
     if ($deletion_successful == 2){
@@ -409,7 +559,7 @@

   ### printing a final report for the alignment procedure
   print REPORT "Final Alignment report\n",'='x22,"\n";
-  print "Final Alignment report\n",'='x22,"\n";
+  warn "Final Alignment report\n",'='x22,"\n";
   #  foreach my $index (0..$#fhs){
   #    print "$fhs[$index]->{name}\n";
   #    print "$fhs[$index]->{seen}\talignments on the correct strand in total\n";
@@ -461,19 +611,38 @@
   warn "Total number of C's analysed:\t$total_number_of_C\n\n";
   warn "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
   warn "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
-  warn "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
-  warn "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
-  warn "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
-  warn "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+  warn "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n";
+  if ($bowtie2){
+    warn "Total methylated C's in Unknown context:\t$counting{total_meC_unknown_count}\n";
+  }
+  warn "\n";
+
+  warn "Total unmethylated C's in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+  warn "Total unmethylated C's in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+  warn "Total unmethylated C's in CHH context:\t$counting{total_unmethylated_CHH_count}\n";
+  if ($bowtie2){
+    warn "Total unmethylated C's in Unknown context:\t$counting{total_unmethylated_C_unknown_count}\n";
+  }
+  warn "\n";

   print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";
   print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
-  print REPORT "Total methylated C's in CpG context:\t $counting{total_meCpG_count}\n";
+
+  print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
   print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
-  print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
-  print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
-  print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
-  print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+  print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n";
+  if ($bowtie2){
+    print REPORT "Total methylated C's in Unknown context:\t$counting{total_meC_unknown_count}\n";
+  }
+  print REPORT "\n";
+
+  print REPORT "Total unmethylated C's in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+  print REPORT "Total unmethylated C's in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+  print REPORT "Total unmethylated C's in CHH context:\t$counting{total_unmethylated_CHH_count}\n";
+  if ($bowtie2){
+    print REPORT "Total unmethylated C's in Unknown context:\t$counting{total_unmethylated_C_unknown_count}\n";
+  }
+  print REPORT "\n";

   my $percent_meCHG;
   if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
@@ -490,6 +659,12 @@
     $percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
   }

+  my $percent_meC_unknown;
+  if (($counting{total_meC_unknown_count}+$counting{total_unmethylated_C_unknown_count}) > 0){
+    $percent_meC_unknown = sprintf("%.1f",100*$counting{total_meC_unknown_count}/($counting{total_meC_unknown_count}+$counting{total_unmethylated_C_unknown_count}));
+  }
+
+
   ### printing methylated CpG percentage if applicable
   if ($percent_meCpG){
     warn "C methylated in CpG context:\t${percent_meCpG}%\n";
@@ -512,39 +687,158 @@

   ### printing methylated C percentage (CHH context) if applicable
   if ($percent_meCHH){
-    warn "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
-    print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+    warn "C methylated in CHH context:\t${percent_meCHH}%\n";
+    print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n";
   }
   else{
-    warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
-    print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
-  }
+    warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n";
+    print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n";
+  }
+
+  ### printing methylated C percentage (Unknown C context) if applicable
+  if ($bowtie2){
+    if ($percent_meC_unknown){
+      warn "C methylated in Unknown context (CN or CHN):\t${percent_meC_unknown}%\n";
+      print REPORT "C methylated in Unknown context (CN or CHN):\t${percent_meC_unknown}%\n";
+    }
+    else{
+      warn "Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0\n";
+      print REPORT "Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0\n";
+    }
+  }
+  print REPORT "\n\n";
+  warn "\n\n";

   if ($seqID_contains_tabs){
     warn "The sequence IDs in the provided file contain tab-stops which might prevent sequence alignments. If this happened, please replace all tab characters within the seqID field with spaces before running Bismark.\n\n";
     print REPORT "The sequence IDs in the provided file contain tab-stops which might prevent sequence alignments. If this happened, please replace all tab characters within the seqID field with spaces before running Bismark.\n\n";
   }
+
+
+  ###########################################################################################################################################
+  ### create pie-chart with mapping stats
+  ###########################################################################################################################################
+
+
+  my $filename;
+  if ($pbat){
+    $filename = $G_to_A_infile;
+  }
+  else{
+    $filename = $C_to_T_infile;
+  }
+
+  my $pie_chart = (split (/\//,$filename))[-1]; # extracting the filename if a full path was specified
+  $pie_chart =~ s/gz$//;
+  $pie_chart =~ s/_C_to_T\.fastq$//;
+  $pie_chart =~ s/_G_to_A\.fastq$//;
+
+  #  if ($prefix){
+  #    $pie_chart = "$prefix.$pie_chart"; # this is now being taken care of in file transformation
+  # }
+  $pie_chart = "${output_dir}${pie_chart}_bismark_SE.alignment_overview.png";
+
+
+  #Check whether the module GD::Graph is installed
+  my $gd_graph_installed = 0;
+  eval{
+    require GD::Graph::pie;
+    GD::Graph::pie->import();
+  };
+
+  unless($@) {
+    $gd_graph_installed = 1;
+  }
+  else{
+    warn "Perl module GD::Graph::pie is not installed, skipping graphical alignment summary\n";
+    sleep(2);
+  }
+
+  if ($gd_graph_installed){
+    warn "Generating pie chart\n\n";
+    sleep(1);
+    my $graph = GD::Graph::pie->new(600,600);
+
+    my $percent_unaligned;
+    my $percent_multiple;
+    my $percent_unextractable;
+
+    if ($counting{sequences_count}){
+      $percent_unaligned = sprintf ("%.1f",$counting{no_single_alignment_found}*100/$counting{sequences_count});
+      $percent_multiple = sprintf ("%.1f",$counting{unsuitable_sequence_count}*100/$counting{sequences_count});
+      $percent_unextractable = sprintf ("%.1f",$counting{genomic_sequence_could_not_be_extracted_count}*100/$counting{sequences_count});
+    }
+    else{
+      $percent_unaligned = $percent_multiple = $percent_unextractable = 'N/A';
+    }
+
+    my @aln_stats = (
+		     ["Uniquely aligned $percent_alignable_sequences%","Unaligned $percent_unaligned%","Multiple alignments $percent_multiple%","sequence unextractable $percent_unextractable%"],
+		     [$counting{unique_best_alignment_count},$counting{no_single_alignment_found},$counting{unsuitable_sequence_count},$counting{genomic_sequence_could_not_be_extracted_count}],
+		    );
+
+    $graph->set(
+		start_angle => 180,
+		'3d' => 0,
+		label => 'Alignment stats (single-end)',
+		suppress_angle => 2,    # Only label slices of sufficient size
+		transparent => 0,
+		dclrs => [ qw(red lorange dgreen cyan) ],
+	       ) or die $graph->error;
+
+    my $gd = $graph->plot(\@aln_stats) or die $graph->error;
+
+    open (PIE,'>',$pie_chart) or die "Failed to write to file for alignments pie chart: $!\n\n";
+    binmode PIE;
+    print PIE $gd->png;
+  }
+
+  warn "====================\nBismark run complete\n====================\n\n";
+
 }

+
 sub print_final_analysis_report_paired_ends{
   my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
   ### All sequences from the original sequence file have been analysed now, therefore deleting temporary C->T or G->A infiles
   if ($directional){
-    my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_2";
-    if ($deletion_successful == 2){
-      warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_2\n\n";
-    }
-    else{
-      warn "Could not delete temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_2 properly: $!\n";
+    if ($G_to_A_infile_2){
+      my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_2";
+      if ($deletion_successful == 2){
+	warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_2\n\n";
+      }
+      else{
+	warn "Could not delete temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_2 properly: $!\n";
+      }
+    }
+    else{ # for paired-end FastQ infiles with Bowtie1 there is only one file to delete
+      my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1";
+      if ($deletion_successful == 1){
+	warn "\nSuccessfully deleted the temporary file $temp_dir$C_to_T_infile_1\n\n";
+      }
+      else{
+	warn "Could not delete temporary file $temp_dir$C_to_T_infile_1 properly: $!\n";
+      }
     }
   }
   else{
-    my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_1","$temp_dir$C_to_T_infile_2","$temp_dir$G_to_A_infile_2";
-    if ($deletion_successful == 4){
-      warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1, $temp_dir$G_to_A_infile_1, $temp_dir$C_to_T_infile_2 and $temp_dir$G_to_A_infile_2\n\n";
-    }
-    else{
-      warn "Could not delete temporary files properly: $!\n";
+    if ($G_to_A_infile_2 and $C_to_T_infile_2){
+      my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_1","$temp_dir$C_to_T_infile_2","$temp_dir$G_to_A_infile_2";
+      if ($deletion_successful == 4){
+	warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1, $temp_dir$G_to_A_infile_1, $temp_dir$C_to_T_infile_2 and $temp_dir$G_to_A_infile_2\n\n";
+      }
+      else{
+	warn "Could not delete temporary files properly: $!\n";
+      }
+    }
+    else{ # for paired-end FastQ infiles with Bowtie1 there are only two files to delete
+      my $deletion_successful =  unlink "$temp_dir$C_to_T_infile_1","$temp_dir$G_to_A_infile_1";
+      if ($deletion_successful == 2){
+	warn "\nSuccessfully deleted the temporary files $temp_dir$C_to_T_infile_1 and $temp_dir$G_to_A_infile_1\n\n";
+      }
+      else{
+	warn "Could not delete temporary files properly: $!\n";
+      }
     }
   }

@@ -597,18 +891,36 @@
   warn "Total number of C's analysed:\t$total_number_of_C\n\n";
   warn "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
   warn "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
-  warn "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
-  warn "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
-  warn "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
-  warn "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+  warn "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n";
+  if ($bowtie2){
+    warn "Total methylated C's in Unknown context:\t$counting{total_meC_unknown_count}\n";
+  }
+  warn "\n";
+
+  warn "Total unmethylated C's in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+  warn "Total unmethylated C's in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+  warn "Total unmethylated C's in CHH context:\t$counting{total_unmethylated_CHH_count}\n";
+  if ($bowtie2){
+    warn "Total unmethylated C's in Unknown context:\t$counting{total_unmethylated_C_unknown_count}\n";
+  }
+  warn "\n";

   print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";
   print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
   print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
-  print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";
-  print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
-  print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
-  print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";
+  print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n";
+  if ($bowtie2){
+    print REPORT "Total methylated C's in Unknown context:\t$counting{total_meC_unknown_count}\n\n";
+  }
+  print REPORT "\n";
+
+  print REPORT "Total unmethylated C's in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
+  print REPORT "Total unmethylated C's in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
+  print REPORT "Total unmethylated C's in CHH context:\t$counting{total_unmethylated_CHH_count}\n";
+  if ($bowtie2){
+    print REPORT "Total unmethylated C's in Unknown context:\t$counting{total_unmethylated_C_unknown_count}\n\n";
+  }
+  print REPORT "\n";

   my $percent_meCHG;
   if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
@@ -625,6 +937,12 @@
     $percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
   }

+  my $percent_meC_unknown;
+  if (($counting{total_meC_unknown_count}+$counting{total_unmethylated_C_unknown_count}) > 0){
+    $percent_meC_unknown = sprintf("%.1f",100*$counting{total_meC_unknown_count}/($counting{total_meC_unknown_count}+$counting{total_unmethylated_C_unknown_count}));
+  }
+
+
   ### printing methylated CpG percentage if applicable
   if ($percent_meCpG){
     warn "C methylated in CpG context:\t${percent_meCpG}%\n";
@@ -647,13 +965,112 @@

   ### printing methylated C percentage in CHH context if applicable
   if ($percent_meCHH){
-    warn "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
-    print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
+    warn "C methylated in CHH context:\t${percent_meCHH}%\n";
+    print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n";
+  }
+  else{
+    warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n";
+    print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n";
+  }
+
+  ### printing methylated C percentage (Unknown C context) if applicable
+  if ($bowtie2){
+    if ($percent_meC_unknown){
+      warn "C methylated in unknown context (CN or CHN):\t${percent_meC_unknown}%\n";
+      print REPORT "C methylated in unknown context (CN or CHN):\t${percent_meC_unknown}%\n";
+    }
+    else{
+      warn "Can't determine percentage of methylated Cs in unknown context (CN or CHN) if value was 0\n";
+      print REPORT "Can't determine percentage of methylated Cs in unknown context (CN or CHN) if value was 0\n";
+    }
+  }
+  print REPORT "\n\n";
+  warn "\n\n";
+
+
+  ############################################################################################################################################
+  ### create pie-chart with mapping stats
+  ###########################################################################################################################################
+
+  my $filename;
+  if ($pbat){
+    $filename = $G_to_A_infile_1;
   }
   else{
-    warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
-    print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
-  }
+    $filename = $C_to_T_infile_1;
+  }
+
+  my $pie_chart = (split (/\//,$filename))[-1]; # extracting the filename if a full path was specified
+  $pie_chart =~ s/gz$//;
+  $pie_chart =~ s/_C_to_T.fastq$//;
+  $pie_chart =~ s/_G_to_A.fastq$//;
+  ### special format for gzipped PE Bowtie1 files
+  $pie_chart =~ s/\.CT_plus_GA\.fastq\.$//;
+  $pie_chart =~ s/\.GA_plus_CT\.fastq\.$//;
+
+  if ($prefix){
+    # prefix is now being prepended to the temp files already
+    # $pie_chart = "$prefix.$pie_chart";
+  }
+  $pie_chart = "${output_dir}${pie_chart}_bismark_PE.alignment_overview.png";
+
+  #Check whether the module GD::Graph is installed
+  my $gd_graph_installed = 0;
+  eval{
+    require GD::Graph::pie;
+    GD::Graph::pie->import();
+  };
+
+  unless($@) {
+    $gd_graph_installed = 1;
+  }
+  else{
+    warn "Perl module GD::Graph::pie is not installed, skipping graphical alignment summary\n";
+    sleep(2);
+  }
+
+  if ($gd_graph_installed){
+    warn "Generating pie chart\n\n";
+    sleep(1);
+    my $graph = GD::Graph::pie->new(600,600);
+
+    my $percent_unaligned;
+    my $percent_multiple;
+    my $percent_unextractable;
+
+    if ($counting{sequences_count}){
+      $percent_unaligned = sprintf ("%.1f",$counting{no_single_alignment_found}*100/$counting{sequences_count});
+      $percent_multiple = sprintf ("%.1f",$counting{unsuitable_sequence_count}*100/$counting{sequences_count});
+      $percent_unextractable = sprintf ("%.1f",$counting{genomic_sequence_could_not_be_extracted_count}*100/$counting{sequences_count});
+    }
+    else{
+      $percent_unaligned = $percent_multiple = $percent_unextractable = 'N/A';
+    }
+
+    my @aln_stats = (
+		     ["Uniquely aligned pairs $percent_alignable_sequence_pairs%","Unaligned $percent_unaligned%","Multiple alignments $percent_multiple%","sequence unextractable $percent_unextractable%"],
+		     [$counting{unique_best_alignment_count},$counting{no_single_alignment_found},$counting{unsuitable_sequence_count},$counting{genomic_sequence_could_not_be_extracted_count}],
+		    );
+
+    # push @{$mbias_read1[0]},$pos;
+
+    $graph->set(
+		start_angle => 180,
+		'3d' => 0,
+		label => 'Alignment stats (paired-end)',
+		suppress_angle => 2,    # Only label slices of sufficient size
+		transparent => 0,
+		dclrs => [ qw(red lorange dgreen cyan) ],
+	       ) or die $graph->error;
+
+    my $gd = $graph->plot(\@aln_stats) or die $graph->error;
+
+    open (PIE,'>',$pie_chart) or die "Failed to write to file for alignments pie chart: $!\n\n";
+    binmode PIE;
+    print PIE $gd->png;
+  }
+
+  warn "====================\nBismark run complete\n====================\n\n";

 }

@@ -692,7 +1109,7 @@
     }

     $counting{sequences_count}++;
-    if ($counting{sequences_count}%100000==0) {
+    if ($counting{sequences_count}%1000000==0) {
       warn "Processed $counting{sequences_count} sequences so far\n";
     }
     chomp $sequence;
@@ -854,8 +1271,8 @@
     }

     $counting{sequences_count}++;
-    if ($counting{sequences_count}%100000==0) {
-      warn "Processed $counting{sequences_count} sequences so far\n";
+    if ($counting{sequences_count}%1000000==0) {
+      warn "Processed $counting{sequences_count} sequence pairs so far\n";
     }
     my $orig_identifier_1 = $identifier_1;
     my $orig_identifier_2 = $identifier_2;
@@ -896,7 +1313,9 @@
     }
   }

-  print "Processed $counting{sequences_count} sequences in total\n\n";
+  warn "Processed $counting{sequences_count} sequences in total\n\n";
+
+  close OUT or die $!;

   print_final_analysis_report_paired_ends($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);

@@ -949,8 +1368,8 @@
     }

     $counting{sequences_count}++;
-    if ($counting{sequences_count}%100000==0) {
-      warn "Processed $counting{sequences_count} sequences so far\n";
+    if ($counting{sequences_count}%1000000==0) {
+      warn "Processed $counting{sequences_count} sequence pairs so far\n";
     }

     my $orig_identifier_1 = $identifier_1;
@@ -1006,7 +1425,9 @@
     }
   }

-  print "Processed $counting{sequences_count} sequences in total\n\n";
+  warn "Processed $counting{sequences_count} sequences in total\n\n";
+
+  close OUT or die $!;

   print_final_analysis_report_paired_ends($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2);

@@ -1297,7 +1718,13 @@

   ### If the sequence has not been rejected so far it will have a unique best alignment
   $counting{unique_best_alignment_count}++;
-  extract_corresponding_genomic_sequence_single_end($identifier,$methylation_call_params);
+  if ($pbat){
+    extract_corresponding_genomic_sequence_single_end_pbat($identifier,$methylation_call_params);
+  }
+  else{
+    extract_corresponding_genomic_sequence_single_end($identifier,$methylation_call_params);
+  }
+
   ### check test to see if the genomic sequence we extracted has the same length as the observed sequence+2, and only then we perform the methylation call
   if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence}) != length($sequence)+2){
     warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{position}\n";
@@ -1315,12 +1742,14 @@
 sub check_bowtie_results_single_end_bowtie2{
   my ($sequence,$identifier,$quality_value) = @_;

+
   unless ($quality_value){ # FastA sequences get assigned a quality value of Phred 40 throughout
     $quality_value = 'I'x(length$sequence);
   }

   # as of version Bowtie 2 2.0.0 beta7, when input reads are unpaired, Bowtie 2 no longer removes the trailing /1 or /2 from the read name.
   # $identifier =~ s/\/[1234567890]+$//; # some sequencers don't just have /1 or /2 at the end of read IDs
+  # print "sequence $sequence\nid $identifier\nquality: '$quality_value'\n";

   my $alignment_ambiguous = 0;

@@ -1328,18 +1757,17 @@

   ### reading from the Bowtie 2 output filehandles
   foreach my $index (0..$#fhs){
-    # print "Index: $index\n";
-    # print "$fhs[$index]->{last_line}\n";
-    # print "$fhs[$index]->{last_seq_id}\n\n";
-
+    #  print "Index: $index\n";
+    #   print "$fhs[$index]->{last_line}\n";
+    #   print "$fhs[$index]->{last_seq_id}\n";
+    # sleep (1);
     ### skipping this index if the last alignment has been set to undefined already (i.e. end of bowtie output)
     next unless ($fhs[$index]->{last_line} and defined $fhs[$index]->{last_seq_id});

     ### if the sequence we are currently looking at produced an alignment we are doing various things with it
     # print "last seq id: $fhs[$index]->{last_seq_id} and identifier: $identifier\n";

-   if ($fhs[$index]->{last_seq_id} eq $identifier) {
-
+    if ($fhs[$index]->{last_seq_id} eq $identifier) {
       #  SAM format specifications for Bowtie 2
       #  (1) Name of read that aligned
       #  (2) Sum of all applicable flags. Flags relevant to Bowtie are:
@@ -1405,7 +1833,7 @@
       else{
 	die "Chromosome number extraction failed for $mapped_chromosome\n";
       }
-
+
       ### We will use the optional field to determine the best alignment. Later on we extract the number of mismatches and/or indels from the CIGAR string
       my ($alignment_score,$second_best,$MD_tag);
       my @fields = split (/\t/,$fhs[$index]->{last_line});
@@ -1422,12 +1850,12 @@
 	}
       }

-      # warn "First  best alignment_score is: '$alignment_score'\n";
-      # warn "MD tag is: '$MD_tag'\n";
+      #      warn "First  best alignment_score is: '$alignment_score'\n";
+      #     warn "MD tag is: '$MD_tag'\n";
       die "Failed to extract alignment score ($alignment_score) and MD tag ($MD_tag)!\n" unless (defined $alignment_score and defined $MD_tag);

       if (defined $second_best){
-	# warn "second best alignment_score is: '$second_best'\n";
+	#	warn "second best alignment_score is: '$second_best'\n\n";

 	# If the first alignment score is the same as the alignment score of the second best hit we are going to boot this sequence altogether
 	if ($alignment_score == $second_best){
@@ -1796,8 +2224,8 @@
     $quality_value_2 = 'I'x(length$sequence_2);
   }

-  #  print "$identifier\n$fhs[0]->{last_seq_id}\n$fhs[1]->{last_seq_id}\n$fhs[2]->{last_seq_id}\n$fhs[3]->{last_seq_id}\n\n";
-
+  #  warn "$identifier\n$fhs[0]->{last_seq_id}\n$fhs[1]->{last_seq_id}\n$fhs[2]->{last_seq_id}\n$fhs[3]->{last_seq_id}\n\n";
+  #  sleep (1);
   my %mismatches = ();
   ### reading from the bowtie output files to see if this sequence pair aligned to a bisulfite converted genome

@@ -2628,12 +3056,12 @@

   ### check to see if the genomic sequences we extracted has the same length as the observed sequences +2, and only then we perform the methylation call
   if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_1}) != length($sequence_1)+2){
-    warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{start_seq_1}\n";
+    warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{position_1}\n";
     $counting{genomic_sequence_could_not_be_extracted_count}++;
     return 0;
   }
   if (length($methylation_call_params->{$identifier}->{unmodified_genomic_sequence_2}) != length($sequence_2)+2){
-    warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{start_seq_2}\n";
+    warn "Chromosomal sequence could not be extracted for\t$identifier\t$methylation_call_params->{$identifier}->{chromosome}\t$methylation_call_params->{$identifier}->{position_2}\n";
     $counting{genomic_sequence_could_not_be_extracted_count}++;
     return 0;
   }
@@ -2927,7 +3355,8 @@
   my $cigar_2 = $methylation_call_params->{$sequence_identifier}->{CIGAR_2};
   my $flag_1 =  $methylation_call_params->{$sequence_identifier}->{flag_1};
   my $flag_2 =  $methylation_call_params->{$sequence_identifier}->{flag_2};
-#  print "$cigar_1\t$cigar_2\t$flag_1\t$flag_2\n";
+  # print "$cigar_1\t$cigar_2\t$flag_1\t$flag_2\n";
+  # sleep(10);
   ### We are now extracting the corresponding genomic sequence, +2 extra bases at the end (or start) so that we can also make a CpG methylation call and
   ### in addition make differential calls for Cs in CHG or CHH context if the C happens to be at the last (or first)  position of the actually observed sequence

@@ -2961,7 +3390,7 @@

   my $indels_1 = 0; # addiong these to the hemming distance value (needed for the NM field in the final SAM output
   my $indels_2 = 0;
-
+
   ### Extracting read 1 genomic sequence ###

   # extracting 2 additional bp at the 5' end (read 1)
@@ -3010,6 +3439,7 @@
       $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
       return;
     }
+
     $non_bisulfite_sequence_1 .= substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$pos_1,2);
   }

@@ -3059,6 +3489,10 @@
   if ( ($methylation_call_params->{$sequence_identifier}->{index} == 0) or ($methylation_call_params->{$sequence_identifier}->{index} == 2) ){
     ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
     unless (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) >= $pos_2+2){# exiting with en empty genomic sequence otherwise
+      # need to set read 1 as well now to prevent warning
+      #  warn "'$non_bisulfite_sequence_1'\n'$non_bisulfite_sequence_2'\n\n";
+      #  sleep(5);
+      $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_1} = $non_bisulfite_sequence_1;
       $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence_2} = $non_bisulfite_sequence_2;
       return;
     }
@@ -3399,6 +3833,112 @@
   $methylation_call_params->{$sequence_identifier}->{end_position} = $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence});
 }

+sub extract_corresponding_genomic_sequence_single_end_pbat {
+  my ($sequence_identifier,$methylation_call_params) = @_;
+  ### A bisulfite sequence for 1 location in the genome can theoretically be any of the 4 possible converted strands. We are also giving the
+  ### sequence a 'memory' of the conversion we are expecting which we will need later for the methylation call
+
+  ### the alignment_strand information is needed to determine which strand of the genomic sequence we are comparing the read against,
+  ### the read_conversion information is needed to know whether we are looking for C->T or G->A substitutions
+  my $alignment_strand;
+  my $read_conversion_info;
+  my $genome_conversion;
+  ### Also extracting the corresponding genomic sequence, +2 extra bases at the end so that we can also make a CpG methylation call and
+  ### in addition make differential calls for Cs non-CpG context, which will now be divided into CHG and CHH methylation,
+  ### if the C happens to be at the last position of the actually observed sequence
+  my $non_bisulfite_sequence;
+  ### depending on the conversion we want to make need to capture 1 extra base at the 3' end
+
+  my $pbat_index = $methylation_call_params->{$sequence_identifier}->{index} + 2; # (we are simply not running indexes 0 or 1!
+
+  ### results from CT converted read vs. CT converted genome (+ orientation alignments are reported only)
+  if ($pbat_index == 0){
+    ### [Index 0, sequence originated from (converted) forward strand]
+    $counting{CT_CT_count}++;
+    $alignment_strand = '+';
+    $read_conversion_info = 'CT';
+    $genome_conversion = 'CT';
+
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+1){ ## CHH changed to +1
+      ### + 2 extra base at the 3' end
+      $non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position},length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to +2
+    }
+    else{
+      $non_bisulfite_sequence = '';
+    }
+  }
+
+  ### results from CT converted reads vs. GA converted genome (- orientation alignments are reported only)
+  elsif ($pbat_index == 1){
+    ### [Index 1, sequence originated from (converted) reverse strand]
+    $counting{CT_GA_count}++;
+    $alignment_strand = '-';
+    $read_conversion_info = 'CT';
+    $genome_conversion = 'GA';
+
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    if ($methylation_call_params->{$sequence_identifier}->{position}-2 >= 0){ ## CHH changed to -2 # 02 02 2012 Changed this to >= from >
+      ### Extracting 2 extra 5' bases on forward strand which will become 2 extra 3' bases after reverse complementation
+      $non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position}-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to -2/+2
+      ## reverse complement!
+      $non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+    }
+    else{
+      $non_bisulfite_sequence = '';
+    }
+  }
+
+  ### results from GA converted reads vs. CT converted genome (- orientation alignments are reported only)
+  elsif ($pbat_index == 2){
+    ### [Index 2, sequence originated from complementary to (converted) forward strand]
+    $counting{GA_CT_count}++;
+    $alignment_strand = '-';
+    $read_conversion_info = 'GA';
+    $genome_conversion = 'CT';
+
+    ### +2 extra bases on the forward strand 3', which will become 2 extra 5' bases after reverse complementation
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    if (length($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}}) > $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+1){ ## changed to +1 on 02 02 2012
+      $non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position},length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to +2
+      ## reverse complement!
+      $non_bisulfite_sequence = reverse_complement($non_bisulfite_sequence);
+    }
+    else{
+      $non_bisulfite_sequence = '';
+    }
+  }
+
+  ### results from GA converted reads vs. GA converted genome (+ orientation alignments are reported only)
+  elsif ($pbat_index == 3){
+    ### [Index 3, sequence originated from complementary to (converted) reverse strand]
+    $counting{GA_GA_count}++;
+    $alignment_strand = '+';
+    $read_conversion_info = 'GA';
+    $genome_conversion = 'GA';
+
+    ## checking if the substring will be valid or if we can't extract the sequence because we are right at the edge of a chromosome
+    if ($methylation_call_params->{$sequence_identifier}->{position}-2 >= 0){ ## CHH changed to +2 # 02 02 2012 Changed this to >= from >
+      ### +2 extra base at the 5' end as we are nominally checking the converted reverse strand
+      $non_bisulfite_sequence = substr ($chromosomes{$methylation_call_params->{$sequence_identifier}->{chromosome}},$methylation_call_params->{$sequence_identifier}->{position}-2,length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence})+2); ## CHH changed to -2/+2
+    }
+    else{
+      $non_bisulfite_sequence = '';
+    }
+  }
+  else{
+    die "Too many bowtie result filehandles\n";
+  }
+
+  $methylation_call_params->{$sequence_identifier}->{alignment_strand} = $alignment_strand;
+  $methylation_call_params->{$sequence_identifier}->{read_conversion} = $read_conversion_info;
+  $methylation_call_params->{$sequence_identifier}->{genome_conversion} = $genome_conversion;
+  $methylation_call_params->{$sequence_identifier}->{unmodified_genomic_sequence} = $non_bisulfite_sequence;
+
+  ### at this point we can also determine the end position of a read
+  $methylation_call_params->{$sequence_identifier}->{end_position} = $methylation_call_params->{$sequence_identifier}->{position}+length($methylation_call_params->{$sequence_identifier}->{bowtie_sequence});
+}
+

 sub extract_corresponding_genomic_sequence_single_end_bowtie2{
   my ($sequence_identifier,$methylation_call_params) = @_;
@@ -3553,6 +4093,8 @@
   ### h for not methylated C in CHH context (was converted)     ###
   ### Z for methylated C in CpG context (was protected)         ###
   ### z for not methylated C in CpG context (was converted)     ###
+  ### U for methylated C in unknown context (was protected)     ###
+  ### u for not methylated C in unknwon context (was converted) ###
   #################################################################

   my @match =();
@@ -3560,9 +4102,11 @@
   my $methyl_CHH_count = 0;
   my $methyl_CHG_count = 0;
   my $methyl_CpG_count = 0;
+  my $methyl_C_unknown_count = 0;
   my $unmethylated_CHH_count = 0;
   my $unmethylated_CHG_count = 0;
   my $unmethylated_CpG_count = 0;
+  my $unmethylated_C_unknown_count = 0;

   if ($read_conversion eq 'CT'){
     for my $index (0..$#seq) {
@@ -3576,7 +4120,10 @@
 	    ++$methyl_CpG_count;
 	    push @match,'Z'; # protected C, methylated, in CpG context
 	  }
-
+	  elsif ($downstream_base eq 'N'){ # if the downstream base was an N we cannot really be sure about the sequence context (as it might have been a CG)
+	    ++$methyl_C_unknown_count;
+	    push @match,'U'; # protected C, methylated, in Unknown context
+	  }
 	  else {
 	    ### C in not in CpG-context, determining the second downstream base context
 	    my $second_downstream_base = $genomic[$index+2];
@@ -3585,6 +4132,10 @@
 	      ++$methyl_CHG_count;
 	      push @match,'X'; # protected C, methylated, in CHG context
 	    }
+	    elsif ($second_downstream_base eq 'N'){
+	      ++$methyl_C_unknown_count; # if the second downstream base was an N we cannot really be sure about the sequence context (as it might have been a CHH or CHG)
+	      push @match,'U'; # protected C, methylated, in Unknown context
+	    }
 	    else{
 	      ++$methyl_CHH_count;
 	      push @match,'H'; # protected C, methylated, in CHH context
@@ -3606,7 +4157,10 @@
 	    ++$unmethylated_CpG_count;
 	    push @match,'z'; # converted C, not methylated, in CpG context
 	  }
-
+	  elsif ($downstream_base eq 'N'){ # if the downstream base was an N we cannot really be sure about the sequence context (as it might have been a CG)
+	    ++$unmethylated_C_unknown_count;
+	    push @match,'u'; # converted C, not methylated, in Unknown context
+	  }
 	  else{
 	    ### C in not in CpG-context, determining the second downstream base context
 	    my $second_downstream_base = $genomic[$index+2];
@@ -3615,6 +4169,10 @@
 	      ++$unmethylated_CHG_count;
 	      push @match,'x'; # converted C, not methylated, in CHG context
 	    }
+	    elsif ($second_downstream_base eq 'N'){
+	      ++$unmethylated_C_unknown_count; # if the second downstream base was an N we cannot really be sure about the sequence context (as it might have been a CHH or CHG)
+	      push @match,'u'; # converted C, not methylated, in Unknown context
+	    }
 	    else{
 	      ++$unmethylated_CHH_count;
 	      push @match,'h'; # converted C, not methylated, in CHH context
@@ -3647,7 +4205,10 @@
 	    ++$methyl_CpG_count;
 	    push @match,'Z'; # protected C on opposing strand, methylated, in CpG context
 	  }
-
+	  elsif ($upstream_base eq 'N'){ # if the upstream base was an N we cannot really be sure about the sequence context (as it might have been a CG)
+	    ++$methyl_C_unknown_count;
+	    push @match,'U'; # protected C on opposing strand, methylated, in Unknown context
+	  }
 	  else{
 	    ### C in not in CpG-context, determining the second upstream base context
 	    my $second_upstream_base = $genomic[$index];
@@ -3656,6 +4217,10 @@
 	      ++$methyl_CHG_count;
 	      push @match,'X'; # protected C on opposing strand, methylated, in CHG context
 	    }
+	    elsif ($second_upstream_base eq 'N'){
+	      ++$methyl_C_unknown_count; # if the second upstream base was an N we cannot really be sure about the sequence context (as it might have been a CHH or CHG)
+	      push @match,'U'; # protected C, methylated, in Unknown context
+	    }
 	    else{
 	      ++$methyl_CHH_count;
 	      push @match,'H'; # protected C on opposing strand, methylated, in CHH context
@@ -3679,7 +4244,10 @@
 	    ++$unmethylated_CpG_count;
 	    push @match,'z'; # converted C on opposing strand, not methylated, in CpG context
 	  }
-
+	  elsif ($upstream_base eq 'N'){ # if the upstream base was an N we cannot really be sure about the sequence context (as it might have been a CG)
+	    ++$unmethylated_C_unknown_count;
+	    push @match,'u'; # converted C on opposing strand, not methylated, in Unknown context
+	  }
 	  else{
 	    ### C in not in CpG-context, determining the second upstream base context
 	    my $second_upstream_base = $genomic[$index];
@@ -3688,6 +4256,10 @@
 	      ++$unmethylated_CHG_count;
 	      push @match,'x'; # converted C on opposing strand, not methylated, in CHG context
 	    }
+	    elsif ($second_upstream_base eq 'N'){
+	      ++$unmethylated_C_unknown_count; # if the second upstream base was an N we cannot really be sure about the sequence context (as it might have been a CHH or CHG)
+	      push @match,'u'; # converted C on opposing strand, not methylated, in Unknown context
+	    }
 	    else{
 	      ++$unmethylated_CHH_count;
 	      push @match,'h'; # converted C on opposing strand, not methylated, in CHH context
@@ -3713,9 +4285,11 @@
   $counting{total_meCHH_count} += $methyl_CHH_count;
   $counting{total_meCHG_count} += $methyl_CHG_count;
   $counting{total_meCpG_count} += $methyl_CpG_count;
+  $counting{total_meC_unknown_count} += $methyl_C_unknown_count;
   $counting{total_unmethylated_CHH_count} += $unmethylated_CHH_count;
   $counting{total_unmethylated_CHG_count} += $unmethylated_CHG_count;
   $counting{total_unmethylated_CpG_count} += $unmethylated_CpG_count;
+  $counting{total_unmethylated_C_unknown_count} += $unmethylated_C_unknown_count;

   # print "\n$sequence_actually_observed\n$genomic_sequence\n",@match,"\n$read_conversion\n\n";
   return $methylation_call;
@@ -3840,17 +4414,44 @@
   }

   my $C_to_T_infile = my $G_to_A_infile = $filename;
-  $C_to_T_infile =~ s/$/_C_to_T.fa/;
-  $G_to_A_infile =~ s/$/_G_to_A.fa/;
-  print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
-  open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+
+  if ($gzip){
+    $C_to_T_infile =~ s/$/_C_to_T.fa.gz/;
+    $G_to_A_infile =~ s/$/_G_to_A.fa.gz/;
+  }
+  else{
+    $C_to_T_infile =~ s/$/_C_to_T.fa/;
+    $G_to_A_infile =~ s/$/_G_to_A.fa/;
+  }
+
+  if ($prefix){
+    #  warn "Prefixing $prefix:\nold: $C_to_T_infile\nold: $G_to_A_infile\n\n";
+    $C_to_T_infile = "$prefix.$C_to_T_infile";
+    $G_to_A_infile = "$prefix.$G_to_A_infile";
+    #  warn "Prefixing $prefix:\nnew: $C_to_T_infile\nnew: $G_to_A_infile\n\n";
+  }
+
+  warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+
+  if ($gzip){
+    open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
+  }
+  else{
+    open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+  }

   unless ($directional){
-    print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
-    open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+    warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+    if ($gzip){
+      open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
+    }
+    else{
+      open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+    }
   }

   my $count = 0;
+
   while (1){
     my $header = <IN>;
     my $sequence= <IN>;
@@ -3887,11 +4488,14 @@
       print GTOA "$header$sequence_G_to_A";
     }
   }
+  close CTOT or die "Failed to close filehandle $!\n";
+
   if ($directional){
-    print "\nCreated C -> T converted versions of the FastA file $filename ($count sequences in total)\n\n";
+    warn "\nCreated C -> T converted versions of the FastA file $filename ($count sequences in total)\n\n";
   }
   else{
-    print "\nCreated C -> T as well as G -> A converted versions of the FastA file $filename ($count sequences in total)\n\n";
+    close GTOA or die "Failed to close filehandle $!\n";
+    warn "\nCreated C -> T as well as G -> A converted versions of the FastA file $filename ($count sequences in total)\n\n";
   }
   return ($C_to_T_infile,$G_to_A_infile);
 }
@@ -3899,6 +4503,11 @@
 sub biTransformFastAFiles_paired_end {
   my ($file,$read_number) = @_;

+  if ($gzip){
+    warn "GZIP compression of temporary files is not supported for paired-end FastA data. Continuing to write uncompressed files\n";
+    sleep (2);
+  }
+
   my ($dir,$filename);
   if ($file =~ /\//){
     ($dir,$filename) = $file =~ m/(.*\/)(.*)$/;
@@ -3925,16 +4534,24 @@
   }

   my $C_to_T_infile = my $G_to_A_infile = $filename;
+
   $C_to_T_infile =~ s/$/_C_to_T.fa/;
   $G_to_A_infile =~ s/$/_G_to_A.fa/;

+  if ($prefix){
+    #  warn "Prefixing $prefix:\nold: $C_to_T_infile\nold: $G_to_A_infile\n\n";
+    $C_to_T_infile = "$prefix.$C_to_T_infile";
+    $G_to_A_infile = "$prefix.$G_to_A_infile";
+    #  warn "Prefixing $prefix:\nnew: $C_to_T_infile\nnew: $G_to_A_infile\n\n";
+  }
+
   if ($directional){
     if ($read_number == 1){
-      print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+      warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
       open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
     }
     elsif ($read_number == 2){
-      print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+      warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
       open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
     }
     else{
@@ -3942,8 +4559,8 @@
     }
   }
   else{ # all four strand output
-    print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
-    print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+    warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+    warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
     open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
     open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
   }
@@ -3974,7 +4591,7 @@
     }

     ## small check if the sequence seems to be in FastA format
-    die "Input file doesn't seem to be in FastA format at sequence $count: $!\n" unless ($header =~ /^>.*/);
+    die "Input file doesn't seem to be in FastA format at sequence $count: $!\n" unless ($header =~ /^>/);

     if ($read_number == 1){
       if ($bowtie2){
@@ -4017,14 +4634,14 @@

   if ($directional){
     if ($read_number == 1){
-      print "\nCreated C -> T converted version of the FastA file $filename ($count sequences in total)\n\n";
+      warn "\nCreated C -> T converted version of the FastA file $filename ($count sequences in total)\n\n";
     }
     else{
-      print "\nCreated G -> A converted version of the FastA file $filename ($count sequences in total)\n\n";
+      warn "\nCreated G -> A converted version of the FastA file $filename ($count sequences in total)\n\n";
     }
   }
   else{
-    print "\nCreated C -> T as well as G -> A converted versions of the FastA file $filename ($count sequences in total)\n\n";
+    warn "\nCreated C -> T as well as G -> A converted versions of the FastA file $filename ($count sequences in total)\n\n";
   }

   if ($directional){
@@ -4070,14 +4687,64 @@

   my $C_to_T_infile = my $G_to_A_infile = $filename;

-  $C_to_T_infile =~ s/$/_C_to_T.fastq/;
-  print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
-  open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
-
-  unless ($directional){
-    $G_to_A_infile =~ s/$/_G_to_A.fastq/;
-    print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
-    open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+  if ($prefix){
+    # warn "Prefixing $prefix:\nold: $C_to_T_infile\nold: $G_to_A_infile\n\n";
+    $C_to_T_infile = "$prefix.$C_to_T_infile";
+    $G_to_A_infile = "$prefix.$G_to_A_infile";
+    # warn "Prefixing $prefix:\nnew: $C_to_T_infile\nnew: $G_to_A_infile\n\n";
+  }
+
+  if ($pbat){ # PBAT-Seq
+    if ($gzip){
+      $G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
+    }
+    else{
+      $G_to_A_infile =~ s/$/_G_to_A.fastq/;
+    }
+
+    warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+
+    if ($gzip){
+      open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
+    }
+    else{
+      open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+    }
+  }
+  else{ # directional or non-directional
+    if ($gzip){
+      $C_to_T_infile =~ s/$/_C_to_T.fastq.gz/;
+    }
+    else{
+      $C_to_T_infile =~ s/$/_C_to_T.fastq/;
+    }
+
+    warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+
+    if ($gzip){
+      open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
+    }
+    else{
+      open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n"; # uncompressed option
+    }
+
+    unless ($directional){
+      if ($gzip){
+	$G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
+      }
+      else{
+	$G_to_A_infile =~ s/$/_G_to_A.fastq/;
+      }
+
+      warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+
+      if ($gzip){
+	open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
+      }
+      else{
+	open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+      }
+    }
   }

   my $count = 0;
@@ -4107,26 +4774,43 @@
     }

     ## small check if the sequence file appears to be a FastQ file
-    if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
-      die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
-    }
-
-    my $sequence_C_to_T = $sequence;
-    $sequence_C_to_T =~ tr/C/T/;
-    print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
-
-    unless ($directional){
+    if ($count == 1){
+      if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
+	die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
+      }
+    }
+
+    if ($pbat){
       my $sequence_G_to_A = $sequence;
       $sequence_G_to_A =~ tr/G/A/;
       print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
     }
+    else{ # directional or non-directional
+      my $sequence_C_to_T = $sequence;
+      $sequence_C_to_T =~ tr/C/T/;
+      print CTOT join ('',$identifier,$sequence_C_to_T,$identifier2,$quality_score);
+
+      unless ($directional){
+	my $sequence_G_to_A = $sequence;
+	$sequence_G_to_A =~ tr/G/A/;
+	print GTOA join ('',$identifier,$sequence_G_to_A,$identifier2,$quality_score);
+      }
+    }
   }

   if ($directional){
-    print "\nCreated C -> T converted versions of the FastQ file $filename ($count sequences in total)\n\n";
+    close CTOT or die "Failed to close filehandle $!\n";
+    warn "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
+  }
+  elsif($pbat){
+    warn "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
+    close GTOA or die "Failed to close filehandle $!\n";
+    return ($G_to_A_infile);
   }
   else{
-    print "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
+    close CTOT or die "Failed to close filehandle $!\n";
+    close GTOA or die "Failed to close filehandle $!\n";
+    warn "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
   }

   return ($C_to_T_infile,$G_to_A_infile);
@@ -4161,31 +4845,60 @@
   }

   my $C_to_T_infile = my $G_to_A_infile = $filename;
-  $C_to_T_infile =~ s/$/_C_to_T.fastq/;
-  $G_to_A_infile =~ s/$/_G_to_A.fastq/;
+
+  if ($gzip){
+    $C_to_T_infile =~ s/$/_C_to_T.fastq.gz/;
+    $G_to_A_infile =~ s/$/_G_to_A.fastq.gz/;
+  }
+  else{
+    $C_to_T_infile =~ s/$/_C_to_T.fastq/;
+    $G_to_A_infile =~ s/$/_G_to_A.fastq/;
+  }
+
+  if ($prefix){
+    #  warn "Prefixing $prefix:\nold: $C_to_T_infile\nold: $G_to_A_infile\n\n";
+    $C_to_T_infile = "$prefix.$C_to_T_infile";
+    $G_to_A_infile = "$prefix.$G_to_A_infile";
+    #  warn "Prefixing $prefix:\nnew: $C_to_T_infile\nnew: $G_to_A_infile\n\n";
+  }

   if ($directional){
     if ($read_number == 1){
-      print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
-      open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+      warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+      if ($gzip){
+	open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
+      }
+      else{
+	open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+      }
     }
     elsif ($read_number == 2){
-      print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
-      open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+      warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+      if ($gzip){
+	open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
+      }
+      else{
+	open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+      }
     }
     else{
       die "Read number needs to be 1 or 2, but was $read_number!\n\n";
     }
   }
   else{
-    print "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
-    print "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
-    open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
-    open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+    warn "Writing a C -> T converted version of the input file $filename to $temp_dir$C_to_T_infile\n";
+    warn "Writing a G -> A converted version of the input file $filename to $temp_dir$G_to_A_infile\n";
+    if ($gzip){
+      open (CTOT,"| gzip -c - > ${temp_dir}${C_to_T_infile}") or die "Can't write to file: $!\n";
+      open (GTOA,"| gzip -c - > ${temp_dir}${G_to_A_infile}") or die "Can't write to file: $!\n";
+    }
+    else{
+      open (CTOT,'>',"$temp_dir$C_to_T_infile") or die "Couldn't write to file $!\n";
+      open (GTOA,'>',"$temp_dir$G_to_A_infile") or die "Couldn't write to file $!\n";
+    }
   }

   my $count = 0;
-
   while (1){
     my $identifier = <IN>;
     my $sequence = <IN>;
@@ -4206,8 +4919,10 @@
     $sequence= uc$sequence; # make input file case insensitive

     ## small check if the sequence file appears to be a FastQ file
-    if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
-      die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
+    if ($count == 1){
+      if ($identifier !~ /^\@/ or $identifier2 !~ /^\+/){
+	die "Input file doesn't seem to be in FastQ format at sequence $count: $!\n";
+      }
     }
     my $sequence_C_to_T = my $sequence_G_to_A = $sequence;

@@ -4222,7 +4937,7 @@
     elsif ($read_number == 2){
       if ($bowtie2){
 	$identifier =~ s/$/\/2\/2/;
-       }
+      }
       else{
 	$identifier =~ s/$/\/2/;
       }
@@ -4250,28 +4965,174 @@

   if ($directional){
     if ($read_number == 1){
-      print "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
+      warn "\nCreated C -> T converted version of the FastQ file $filename ($count sequences in total)\n\n";
     }
     else{
-      print "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
+      warn "\nCreated G -> A converted version of the FastQ file $filename ($count sequences in total)\n\n";
     }
   }
   else{
-    print "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
+    warn "\nCreated C -> T as well as G -> A converted versions of the FastQ file $filename ($count sequences in total)\n\n";
   }
   if ($directional){
     if ($read_number == 1){
+      close CTOT or die "Failed to close filehandle $!\n";
       return ($C_to_T_infile);
     }
     else{
+      close GTOA or die "Failed to close filehandle $!\n";
       return ($G_to_A_infile);
     }
   }
   else{
+    close CTOT or die "Failed to close filehandle $!\n";
+    close GTOA or die "Failed to close filehandle $!\n";
     return ($C_to_T_infile,$G_to_A_infile);
   }
 }

+
+### SPECIAL BOWTIE 1 PAIRED-END FORMAT FOR GZIPPED OUTPUT FILES
+
+sub biTransformFastQFiles_paired_end_bowtie1_gzip {
+  my ($file_1,$file_2) = @_;
+  my ($dir,$filename);
+
+  if ($file_1 =~ /\//){
+    ($dir,$filename) = $file_1 =~ m/(.*\/)(.*)$/;
+  }
+  else{
+    $filename = $file_1;
+  }
+
+  ### gzipped version of infile 1
+  if ($file_1 =~ /\.gz$/){
+    open (IN_1,"zcat $file_1 |") or die "Couldn't read from file $file_1: $!\n";
+  }
+  else{
+    open (IN_1,$file_1) or die "Couldn't read from file $file_1: $!\n";
+  }
+  ### gzipped version of infile 2
+  if ($file_2 =~ /\.gz$/){
+    open (IN_2,"zcat $file_2 |") or die "Couldn't read from file $file_2: $!\n";
+  }
+  else{
+    open (IN_2,$file_2) or die "Couldn't read from file $file_2: $!\n";
+  }
+
+
+  if ($skip){
+    warn "Skipping the first $skip reads from $file_1 and $file_2\n";
+    sleep (1);
+  }
+  if ($upto){
+    warn "Processing reads up to sequence no. $upto from $file_1 and $file_2\n";
+    sleep (1);
+  }
+
+  my $CT_plus_GA_infile = my $GA_plus_CT_infile = $filename;
+
+  if ($prefix){
+    # warn "Prefixing $prefix:\nold: $CT_plus_GA_infile\nold: $GA_plus_CT_infile\n\n";
+    $CT_plus_GA_infile = "$prefix.$CT_plus_GA_infile";
+    $GA_plus_CT_infile = "$prefix.$GA_plus_CT_infile";
+    # warn "Prefixing $prefix:\nnew: $CT_plus_GA_infile\nnew: $GA_plus_CT_infile\n\n";
+  }
+
+  $CT_plus_GA_infile =~ s/$/.CT_plus_GA.fastq.gz/;
+  $GA_plus_CT_infile =~ s/$/.GA_plus_CT.fastq.gz/;
+  # warn "Prefixing $prefix:\nnew: $CT_plus_GA_infile\nnew: $GA_plus_CT_infile\n\n";
+
+  warn "Writing a C -> T converted version of $file_1 and a G -> A converted version of $file_2 to $temp_dir$CT_plus_GA_infile\n";
+  open (CTPLUSGA,"| gzip -c - > ${temp_dir}${CT_plus_GA_infile}") or die "Can't write to file: $!\n";
+  # open (CTPLUSGA,'>',"$temp_dir$CT_plus_GA_infile") or die "Couldn't write to file $!\n";
+
+  unless ($directional){
+    print "Writing a G -> A converted version of $file_1 and a C -> T converted version of $file_2 to $temp_dir$GA_plus_CT_infile\n";
+    open (GAPLUSCT,"| gzip -c - > ${temp_dir}${GA_plus_CT_infile}") or die "Can't write to file: $!\n";
+  }
+
+  ### for Bowtie 1 we need to write a single gzipped file with 1 line per pair of sequences in the the following format:
+  ### <seq-ID>     <sequence #1 mate>     <quality #1 mate>     <sequence #2 mate>     <quality #2 mate>
+
+  my $count = 0;
+  while (1){
+    my $identifier_1 = <IN_1>;
+    my $sequence_1 = <IN_1>;
+    my $identifier2_1 = <IN_1>;
+    my $quality_score_1 = <IN_1>;
+
+    my $identifier_2 = <IN_2>;
+    my $sequence_2 = <IN_2>;
+    my $identifier2_2 = <IN_2>;
+    my $quality_score_2 = <IN_2>;
+
+    last unless ($identifier_1 and $sequence_1 and $identifier2_1 and $quality_score_1 and $identifier_2 and $sequence_2 and $identifier2_2 and $quality_score_2);
+
+    ++$count;
+
+    ## small check if the sequence file appears to be a FastQ file
+    if ($count == 1){
+      if ($identifier_1 !~ /^\@/ or $identifier2_1 !~ /^\+/){
+	die "Input file 1 doesn't seem to be in FastQ format at sequence $count: $!\n";
+      }
+      if ($identifier_2 !~ /^\@/ or $identifier2_2 !~ /^\+/){
+	die "Input file 2 doesn't seem to be in FastQ format at sequence $count: $!\n";
+      }
+    }
+
+    $identifier_1 = fix_IDs($identifier_1); # this is to avoid problems with truncated read ID when they contain white spaces
+    chomp $identifier_1;
+    chomp $sequence_1;
+    chomp $sequence_2;
+    chomp $quality_score_1;
+    chomp $quality_score_2;
+
+    $identifier_1 =~ s/^\@//;
+    $identifier_1 =~ s/$/\/1/; #adding an extra /1 to the end which is being removed by Bowtie otherwise (which leads to no sequences alignments whatsoever)
+
+    if ($skip){
+      next unless ($count > $skip);
+    }
+    if ($upto){
+      last if ($count > $upto);
+    }
+
+    $sequence_1 = uc$sequence_1; # make input file 1 case insensitive
+    $sequence_2 = uc$sequence_2; # make input file 2 case insensitive
+
+    # print "$identifier_1\t$sequence_1\t$quality_score_1\t$sequence_2\t$quality_score_2\n";
+    my $sequence_1_C_to_T = $sequence_1;
+    my $sequence_2_G_to_A = $sequence_2;
+    $sequence_1_C_to_T =~ tr/C/T/;
+    $sequence_2_G_to_A =~ tr/G/A/;
+
+    print CTPLUSGA "$identifier_1\t$sequence_1_C_to_T\t$quality_score_1\t$sequence_2_G_to_A\t$quality_score_2\n";
+
+    unless ($directional){
+      my $sequence_1_G_to_A = $sequence_1;
+      my $sequence_2_C_to_T = $sequence_2;
+      $sequence_1_G_to_A =~ tr/G/A/;
+      $sequence_2_C_to_T =~ tr/C/T/;
+      print GAPLUSCT "$identifier_1\t$sequence_1_G_to_A\t$quality_score_1\t$sequence_2_C_to_T\t$quality_score_2\n";
+    }
+  }
+
+  close CTPLUSGA or die "Couldn't close filehandle\n";
+  warn "\nCreated C -> T converted version of FastQ file '$file_1' and G -> A converted version of FastQ file '$file_2' ($count sequences in total)\n";
+
+  if ($directional){
+    warn "\n";
+    return ($CT_plus_GA_infile);
+  }
+  else{
+    close GAPLUSCT or die "Couldn't close filehandle\n";
+    warn "Created G -> A converted version of FastQ file '$file_1' and C -> T converted version of FastQ file '$file_2' ($count sequences in total)\n\n";
+    return ($CT_plus_GA_infile,$GA_plus_CT_infile);
+  }
+}
+
+
 sub fix_IDs{
   my $id = shift;
   $id =~ s/[ \t]+/_/g; # replace spaces or tabs with underscores
@@ -4457,10 +5318,10 @@
   my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;

   if ($directional){
-    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastA)\n";
+    warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastA)\n";
   }
   else{
-    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastA)\n";
+    warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastA)\n";
   }

   ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
@@ -4523,7 +5384,7 @@
     }
     # otherwise we just initialise last_seq_id and last_lines as undefined
     else {
-      print "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+      warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
       $fh->{last_seq_id} = undef;
       $fh->{last_line_1} = undef;
       $fh->{last_line_2} = undef;
@@ -4536,10 +5397,10 @@
 sub paired_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 {
   my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
   if ($directional){
-    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastA)\n";
+    warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastA)\n";
   }
   else{
-    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastA)\n";
+    warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastA)\n";
   }

   ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
@@ -4613,7 +5474,7 @@
     }
     # otherwise we just initialise last_seq_id and last_lines as undefined
     else {
-      print "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+      warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
       $fh->{last_seq_id} = undef;
       $fh->{last_line_1} = undef;
       $fh->{last_line_2} = undef;
@@ -4625,16 +5486,20 @@

 sub paired_end_align_fragments_to_bisulfite_genome_fastQ {
   my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
+
   if ($directional){
-    print "Input files are $C_to_T_infile_1 $G_to_A_infile_2 (FastQ)\n";
+    warn "Input file is $C_to_T_infile_1 (FastQ)\n";
+  }
+  elsif($pbat){
+    warn "Input file is $G_to_A_infile_1 (FastQ; PBAT-Seq)\n";
   }
   else{
-    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastQ)\n";
+    warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 (FastQ)\n";
   }

   ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
   ## data structure above
-  if ($directional){
+  if ($directional or $pbat){
     warn "Now running 2 instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
   }
   else{
@@ -4643,12 +5508,12 @@

   foreach my $fh (@fhs) {

-    if ($directional){
+    if ($directional or $pbat){
       unless ($fh->{inputfile_1}){
 	$fh->{last_seq_id} = undef;
 	$fh->{last_line_1} = undef;
 	$fh->{last_line_2} = undef;
-	next;
+	next; # skipping unwanted filehandles
       }
     }

@@ -4660,8 +5525,15 @@
       $bt_options .= ' --nofw';
     }

-    warn "Now starting a Bowtie paired-end alignment for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile_1} and $temp_dir$fh->{inputfile_2}, with the options: $bt_options))\n";
-    open ($fh->{fh},"$path_to_bowtie $bt_options $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";
+    if ($gzip){
+      warn "Now starting a Bowtie paired-end alignment for $fh->{name} (reading in sequences from ${temp_dir}$fh->{inputfile_1}, with the options: $bt_options)\n";
+      open ($fh->{fh},"zcat ${temp_dir}$fh->{inputfile_1} | $path_to_bowtie $bt_options $fh->{bisulfiteIndex} --12 - |") or die "Can't open pipe to bowtie: $!";
+    }
+    else{
+      warn "Now starting a Bowtie paired-end alignment for $fh->{name} (reading in sequences from ${temp_dir}$fh->{inputfile_1} and ${temp_dir}$fh->{inputfile_2}, with the options: $bt_options))\n";
+      sleep(5);
+      open ($fh->{fh},"$path_to_bowtie $bt_options $fh->{bisulfiteIndex} -1 $temp_dir$fh->{inputfile_1} -2 $temp_dir$fh->{inputfile_2} |") or die "Can't open pipe to bowtie: $!";
+    }

     my $line_1 = $fh->{fh}->getline();
     my $line_2 = $fh->{fh}->getline();
@@ -4693,7 +5565,7 @@

     # otherwise we just initialise last_seq_id and last_lines as undefined
     else {
-      print "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+      warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
       $fh->{last_seq_id} = undef;
       $fh->{last_line_1} = undef;
       $fh->{last_line_2} = undef;
@@ -4706,10 +5578,10 @@
 sub paired_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 {
   my ($C_to_T_infile_1,$G_to_A_infile_1,$C_to_T_infile_2,$G_to_A_infile_2) = @_;
   if ($directional){
-    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastQ)\n";
+    warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_2 (FastQ)\n";
   }
   else{
-    print "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastQ)\n";
+    warn "Input files are $C_to_T_infile_1 and $G_to_A_infile_1 and $C_to_T_infile_2 and $G_to_A_infile_2 (FastQ)\n";
   }

   ## Now starting up 4 instances of Bowtie 2 feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in the
@@ -4784,7 +5656,7 @@

     # otherwise we just initialise last_seq_id and last_lines as undefined
     else {
-      print "Found no alignment, assigning undef to last_seq_id and last_lines\n";
+      warn "Found no alignment, assigning undef to last_seq_id and last_lines\n";
       $fh->{last_seq_id} = undef;
       $fh->{last_line_1} = undef;
       $fh->{last_line_2} = undef;
@@ -4798,10 +5670,10 @@
 sub single_end_align_fragments_to_bisulfite_genome_fastA {
   my ($C_to_T_infile,$G_to_A_infile) = @_;
   if ($directional){
-    print "Input file is $C_to_T_infile (FastA)\n";
+    warn "Input file is $C_to_T_infile (FastA)\n";
   }
   else{
-    print "Input files are $C_to_T_infile and $G_to_A_infile (FastA)\n";
+    warn "Input files are $C_to_T_infile and $G_to_A_infile (FastA)\n";
   }

   ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
@@ -4824,7 +5696,12 @@
     }

     warn "Now starting the Bowtie aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options: $bt_options)\n";
-    open ($fh->{fh},"$path_to_bowtie $bt_options $fh->{bisulfiteIndex} $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!";
+    if ($gzip){
+      open ($fh->{fh},"zcat $temp_dir$fh->{inputfile} | $path_to_bowtie $bt_options $fh->{bisulfiteIndex} - |") or die "Can't open pipe to bowtie: $!";
+    }
+    else{
+      open ($fh->{fh},"$path_to_bowtie $bt_options $fh->{bisulfiteIndex} $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!"; # command for uncompressed data
+    }

     # if Bowtie produces an alignment we store the first line of the output
     $_ = $fh->{fh}->getline();
@@ -4837,7 +5714,7 @@
     }
     # otherwise we just initialise last_seq_id and last_line as undefined
     else {
-      print "Found no alignment, assigning undef to last_seq_id and last_line\n";
+      warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
       $fh->{last_seq_id} = undef;
       $fh->{last_line} = undef;
     }
@@ -4848,10 +5725,10 @@
 sub single_end_align_fragments_to_bisulfite_genome_fastA_bowtie2 {
   my ($C_to_T_infile,$G_to_A_infile) = @_;
   if ($directional){
-    print "Input file is $C_to_T_infile (FastA)\n";
+    warn "Input file is $C_to_T_infile (FastA)\n";
   }
   else{
-    print "Input files are $C_to_T_infile and $G_to_A_infile (FastA)\n";
+    warn "Input files are $C_to_T_infile and $G_to_A_infile (FastA)\n";
   }

   ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
@@ -4897,7 +5774,7 @@
     }
     # otherwise we just initialise last_seq_id and last_line as undefinded. This should only happen at the end of a file for Bowtie 2 output
     else {
-      print "Found no alignment, assigning undef to last_seq_id and last_line\n";
+      warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
       $fh->{last_seq_id} = undef;
       $fh->{last_line} = undef;
     }
@@ -4909,15 +5786,19 @@
 sub single_end_align_fragments_to_bisulfite_genome_fastQ {
   my ($C_to_T_infile,$G_to_A_infile) = @_;
   if ($directional){
-    print "Input file is $C_to_T_infile (FastQ)\n";
+    warn "Input file is $C_to_T_infile (FastQ)\n";
+  }
+  elsif($pbat){
+    warn "Input file is $G_to_A_infile (FastQ)\n";
   }
   else{
-    print "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n";
-  }
+    warn "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n";
+  }
+

   ## Now starting up to 4 instances of Bowtie feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
   ## the data structure above
-  if ($directional){
+  if ($directional or $pbat){
     warn "Now running 2 instances of Bowtie against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
   }
   else{
@@ -4934,7 +5815,14 @@
     }

     warn "Now starting the Bowtie aligner for $fh->{name} (reading in sequences from $temp_dir$fh->{inputfile} with options: $bt_options)\n";
-    open ($fh->{fh},"$path_to_bowtie $bowtie_options $fh->{bisulfiteIndex} $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!";
+    sleep (5);
+
+    if ($gzip){
+      open ($fh->{fh},"zcat $temp_dir$fh->{inputfile} | $path_to_bowtie $bowtie_options $fh->{bisulfiteIndex} - |") or die "Can't open pipe to bowtie: $!";
+    }
+    else{
+      open ($fh->{fh},"$path_to_bowtie $bowtie_options $fh->{bisulfiteIndex} $temp_dir$fh->{inputfile} |") or die "Can't open pipe to bowtie: $!"; # command for uncompressed data
+    }

     # if Bowtie produces an alignment we store the first line of the output
     $_ = $fh->{fh}->getline();
@@ -4947,7 +5835,7 @@
     }
     # otherwise we just initialise last_seq_id and last_line as undefined
     else {
-      print "Found no alignment, assigning undef to last_seq_id and last_line\n";
+      warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
       $fh->{last_seq_id} = undef;
       $fh->{last_line} = undef;
     }
@@ -4956,12 +5844,13 @@

 ### Bowtie 2 | SINGLE-END | FASTQ
 sub single_end_align_fragments_to_bisulfite_genome_fastQ_bowtie2 {
+
   my ($C_to_T_infile,$G_to_A_infile) = @_;
   if ($directional){
-    print "Input file is $C_to_T_infile (FastQ)\n\n";
+    warn "Input file is $C_to_T_infile (FastQ)\n\n";
   }
   else{
-    print "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n\n";
+    warn "Input files are $C_to_T_infile and $G_to_A_infile (FastQ)\n\n";
   }

   ## Now starting up to 4 instances of Bowtie 2 feeding in the converted sequence files and reading in the first line of the bowtie output, and storing it in
@@ -4972,8 +5861,7 @@
   else{
     warn "Now running 4 individual instances of Bowtie 2 against the bisulfite genome of $genome_folder with the specified options: $bowtie_options\n\n";
   }
-
-  foreach my $fh (@fhs) {
+   foreach my $fh (@fhs) {
     my $bt2_options = $bowtie_options;
     if ($fh->{name} eq 'CTreadCTgenome' or $fh->{name} eq 'GAreadGAgenome'){
       $bt2_options .= ' --norc'; ### ensuring the alignments are only reported in a sensible manner
@@ -4988,6 +5876,8 @@
     ### Bowtie 2 outputs out SAM format, so we need to skip everything until the first sequence
     while (1){
       $_ = $fh->{fh}->getline();
+      # warn "$_\n";
+      # sleep(1);
       if ($_) {
 	last unless ($_ =~ /^\@/); # SAM headers start with @
       }
@@ -5003,10 +5893,11 @@
       $fh->{last_seq_id} = $id;
       $fh->{last_line} = $_;
       warn "Found first alignment:\t$fh->{last_line}\n";
+      # warn "storing $id and\n$_\n";
     }
     # otherwise we just initialise last_seq_id and last_line as undefined. This should only happen at the end of a file for Bowtie 2 output
     else {
-      print "Found no alignment, assigning undef to last_seq_id and last_line\n";
+      warn "Found no alignment, assigning undef to last_seq_id and last_line\n";
       $fh->{last_seq_id} = undef;
       $fh->{last_line} = undef;
     }
@@ -5021,9 +5912,11 @@
 	     total_meCHH_count => 0,
 	     total_meCHG_count => 0,
 	     total_meCpG_count => 0,
+	     total_meC_unknown_count => 0,
 	     total_unmethylated_CHH_count => 0,
 	     total_unmethylated_CHG_count => 0,
 	     total_unmethylated_CpG_count => 0,
+	     total_unmethylated_C_unknown_count => 0,
 	     sequences_count => 0,
 	     no_single_alignment_found => 0,
 	     unsuitable_sequence_count => 0,
@@ -5087,6 +5980,52 @@
 	   );
     }
   }
+  elsif($pbat){
+    if ($filename =~ ','){ # paired-end files
+      @fhs=(
+	    { name => 'CTreadCTgenome',
+	      strand_identity => 'con ori forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'CTreadGAgenome',
+	      strand_identity => 'con ori reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'GAreadCTgenome',
+	      strand_identity => 'compl ori con forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'GAreadGAgenome',
+	    strand_identity => 'compl ori con reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	   );
+    }
+    else{ # single-end files
+      @fhs=(
+	    { name => 'GAreadCTgenome',
+	      strand_identity => 'compl ori con forward',
+	      bisulfiteIndex => $CT_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	    { name => 'GAreadGAgenome',
+	      strand_identity => 'compl ori con reverse',
+	      bisulfiteIndex => $GA_index_basename,
+	      seen => 0,
+	      wrong_strand => 0,
+	    },
+	   );
+    }
+  }
   else{
     @fhs=(
 	  { name => 'CTreadCTgenome',
@@ -5156,6 +6095,13 @@
   my $temp_dir;
   my $rdg;
   my $rfg;
+  my $non_bs_mm;
+  my $samtools_path;
+  my $bam;
+  my $gzip;
+  my $pbat;
+  my $prefix;
+  my $old_flag;

   my $command_line = GetOptions ('help|man' => \$help,
 				 '1=s' => \$mates1,
@@ -5192,6 +6138,13 @@
 				 'temp_dir=s' => \$temp_dir,
 				 'rdg=s' => \$rdg,
 				 'rfg=s' => \$rfg,
+				 'non_bs_mm' => \$non_bs_mm,
+				 'samtools_path=s' => \$samtools_path,
+				 'bam' => \$bam,
+				 'gzip' => \$gzip,
+				 'pbat' => \$pbat,
+				 'prefix=s' => \$prefix,
+				 'old_flag' => \$old_flag,
 				);


@@ -5210,7 +6163,8 @@

           Bismark - Bisulfite Mapper and Methylation Caller.

-   Bismark Version: $bismark_version Copyright 2010-12 Felix Krueger, Babraham Bioinformatics
+                       Bismark Version: $bismark_version
+        Copyright 2010-13 Felix Krueger, Babraham Bioinformatics
               www.bioinformatics.babraham.ac.uk/projects/


@@ -5258,6 +6212,57 @@
     }
   }

+  ### OUTPUT REQUESTED AS BAM FILE
+  if ($bam){
+    if ($vanilla){
+      die "Specifying BAM output is not compatible with \"--vanilla\" format. Please respecify\n\n";
+    }
+
+    ### PATH TO SAMTOOLS
+    if (defined $samtools_path){
+      # if Samtools was specified as full command
+      if ($samtools_path =~ /samtools$/){
+	if (-e $samtools_path){
+	  # Samtools executable found
+	}
+	else{
+	  die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
+	}
+      }
+      else{
+	unless ($samtools_path =~ /\/$/){
+	  $samtools_path =~ s/$/\//;
+	}
+	$samtools_path .= 'samtools';
+   	if (-e $samtools_path){
+	  # Samtools executable found
+	}
+	else{
+	  die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
+	}
+      }
+
+      warn "Alignments will be written out in BAM format. Samtools path provided as: '$samtools_path'\n";
+      $bam = 1;
+    }
+    # Check whether Samtools is in the PATH if no path was supplied by the user
+    else{
+      if (!system "which samtools >/dev/null 2>&1"){ # STDOUT is binned, STDERR is redirected to STDOUT. Returns 0 if samtools is in the PATH
+	$samtools_path = `which samtools`;
+	chomp $samtools_path;
+	warn "Alignments will be written out in BAM format. Samtools found here: '$samtools_path'\n";
+	$bam = 1;
+      }
+    }
+
+    unless (defined $samtools_path){
+      $bam = 2;
+      warn "Did not find Samtools on the system. Alignments will be compressed with GZIP instead (.sam.gz)\n";
+    }
+    sleep (1);
+  }
+
+
   ####################################
   ### PROCESSING ARGUMENTS

@@ -5283,7 +6288,7 @@
     $genome_folder = $absolute_genome_folder;
   }
   else{
-    die "Failed to move to $genome_folder: $!\nUSAGE: Bismark.pl [options] <genome_folder> {-1 <mates1> -2 <mates2> | <singles>} [<hits>]    (--help for more details)\n";
+    die "Failed to move to $genome_folder: $!\nUSAGE: bismark [options] <genome_folder> {-1 <mates1> -2 <mates2> | <singles>} [<hits>]    (--help for more details)\n";
   }

   my $CT_dir = "${genome_folder}Bisulfite_Genome/CT_conversion/";
@@ -5295,7 +6300,7 @@
     my @CT_bowtie_index = ('BS_CT.1.bt2','BS_CT.2.bt2','BS_CT.3.bt2','BS_CT.4.bt2','BS_CT.rev.1.bt2','BS_CT.rev.2.bt2');
     foreach my $file(@CT_bowtie_index){
       unless (-f $file){
-	die "The Bowtie 2 index of the C->T converted genome seems to be faulty ($file). Please run the bismark_genome_preparation before running Bismark.\n";
+	die "The Bowtie 2 index of the C->T converted genome seems to be faulty ($file doesn't exist). Please run the bismark_genome_preparation before running Bismark\n";
       }
     }
     ### checking the integrity of $GA_dir
@@ -5303,7 +6308,7 @@
     my @GA_bowtie_index = ('BS_GA.1.bt2','BS_GA.2.bt2','BS_GA.3.bt2','BS_GA.4.bt2','BS_GA.rev.1.bt2','BS_GA.rev.2.bt2');
     foreach my $file(@GA_bowtie_index){
       unless (-f $file){
-	die "The Bowtie 2 index of the G->A converted genome seems to be faulty ($file). Please run bismark_genome_preparation before running Bismark.\n";
+	die "The Bowtie 2 index of the G->A converted genome seems to be faulty ($file doesn't exist). Please run bismark_genome_preparation before running Bismark\n";
       }
     }
   }
@@ -5314,7 +6319,7 @@
     my @CT_bowtie_index = ('BS_CT.1.ebwt','BS_CT.2.ebwt','BS_CT.3.ebwt','BS_CT.4.ebwt','BS_CT.rev.1.ebwt','BS_CT.rev.2.ebwt');
     foreach my $file(@CT_bowtie_index){
       unless (-f $file){
-	die "The Bowtie index of the C->T converted genome seems to be faulty ($file). Please run bismark_genome_preparation before running Bismark.\n";
+	die "The Bowtie index of the C->T converted genome seems to be faulty ($file doesn't exist). Please run bismark_genome_preparation before running Bismark.\n";
       }
     }
     ### checking the integrity of $GA_dir
@@ -5322,7 +6327,7 @@
     my @GA_bowtie_index = ('BS_GA.1.ebwt','BS_GA.2.ebwt','BS_GA.3.ebwt','BS_GA.4.ebwt','BS_GA.rev.1.ebwt','BS_GA.rev.2.ebwt');
     foreach my $file(@GA_bowtie_index){
       unless (-f $file){
-	die "The Bowtie index of the G->A converted genome seems to be faulty ($file). Please run bismark_genome_preparation before running Bismark.\n";
+	die "The Bowtie index of the G->A converted genome seems to be faulty ($file doesn't exist). Please run bismark_genome_preparation before running Bismark.\n";
       }
     }
   }
@@ -5493,23 +6498,40 @@
   }

   ### BOWTIE 2 READ GAP OPTIONS
+  my ($insertion_open,$insertion_extend,$deletion_open,$deletion_extend);
+
   if ($rdg){
     die "The option '--rdg <int1>,<int2>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
-    unless ($rdg =~ /^.+,.+$/){
+    if ($rdg =~ /^(\d+),(\d+)$/){
+      $deletion_open = $1;
+      $deletion_extend = $2;
+    }
+    else{
       die "The option '--rdg <int1>,<int2>' needs to be in the format <integer,integer> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
     }
     push @bowtie_options,"--rdg $rdg";
   }
+  else{
+    $deletion_open = 5;
+    $deletion_extend = 3;
+  }

   ### BOWTIE 2 REFERENCE GAP OPTIONS
   if ($rfg){
     die "The option '--rfg <int1>,<int2>' is only available when using Bowtie 2\n\n" unless ($bowtie2);
-    unless ($rfg =~ /^.+,.+$/){
+    if ($rfg =~ /^(\d+),(\d+)$/){
+      $insertion_open = $1;
+      $insertion_extend = $2;
+    }
+    else{
       die "The option '--rfg <int1>,<int2>' needs to be in the format <integer,integer> . Please consult \"setting up functions\" in the Bowtie 2 manual for further information\n\n";
     }
     push @bowtie_options,"--rfg $rfg";
   }
-
+  else{
+    $insertion_open = 5;
+    $insertion_extend = 3;
+  }


   ### BOWTIE 2 PARALLELIZATION OPTIONS
@@ -5586,6 +6608,11 @@
       push @bowtie_options,'--no-mixed';     ## By default Bowtie 2 is not looking for single-end alignments if it can't find concordant or discordant alignments
       push @bowtie_options,'--no-discordant';## By default Bowtie 2 is not looking for discordant alignments if it can't find concordant ones
     }
+
+    if ($old_flag){
+      warn "\nUsing FLAG values for paired-end SAM output used up to Bismark v0.8.2. In addition, paired-end sequences will have /1 and /2 appended to their read IDs\n\n" unless($vanilla);
+      sleep(3);
+    }
   }
   elsif ($mates2){
     die "Paired-end mapping requires the format: -1 <mates1> -2 <mates2>, please respecify!\n";
@@ -5646,14 +6673,24 @@
   ### STRAND-SPECIFIC LIBRARIES
   my $directional;
   if ($non_directional){
-    print "Library was specified to be not strand-specific (non-directional), therefore alignments to all four possible bisulfite strands (OT, CTOT, OB and CTOB) will be reported.\n";
+    die "A library can only be specified to be either non-directional or a PBAT-Seq library. Please respecify!\n\n" if ($pbat);
+    warn "Library was specified to be not strand-specific (non-directional), therefore alignments to all four possible bisulfite strands (OT, CTOT, OB and CTOB) will be reported\n";
+    sleep (3);
+    $directional = 0;
+  }
+  elsif($pbat){
+    die "The option --pbat is currently not compatible with --gzip. Please run alignments with uncompressed temporary files, i.e. lose the option --gzip\n" if ($gzip);
+    die "The option --pbat is currently not working for Bowtie 2. Please run alignments in default (i.e. Bowtie 1) mode!\n" if ($bowtie2);
+    die "The option --pbat is currently only working with FastQ files. Please respecify (i.e. lose the option -f)!\n" if ($fasta);
+
+    warn "Library was specified as PBAT-Seq (Post-Bisulfite Adapter Tagging), only performing alignments to the complementary strands (CTOT and CTOB)\n";
     sleep (3);
     $directional = 0;
   }
   else{
-    print "Library is assumed to be strand-specific (directional), alignments to strands complementary to the original top or bottom strands will be ignored (i.e. not performed!).\n";
+    warn "Library is assumed to be strand-specific (directional), alignments to strands complementary to the original top or bottom strands will be ignored (i.e. not performed!)\n";
     sleep (3);
-    $directional = 1; # Changed this to being the default behaviour
+    $directional = 1; # default behaviour
   }

   ### UNMAPPED SEQUENCE OUTPUT
@@ -5722,8 +6759,25 @@
     $temp_dir = '';
   }

-
-  return ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_format,$bowtie_options,$directional,$unmapped,$multi_map,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$qupto,$temp_dir);
+  ### OPTIONAL NON-BS MISMATCH OUTPUT AS EXTRA COLUMN IN SAM FILE
+  if ($non_bs_mm){
+    if ($vanilla){
+      die "Option '--non_bs_mm' may only be specified for output in SAM format. Please respecify!\n";
+    }
+  }
+
+  ### PREFIX FOR OUTPUT FILES
+  if ($prefix){
+    # removing trailing dots
+
+    $prefix =~ s/\.+$//;
+
+    warn "Using the following prefix for output files: $prefix\n\n";
+    sleep(1);
+  }
+
+
+  return ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_format,$bowtie_options,$directional,$unmapped,$multi_map,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$qupto,$temp_dir,$non_bs_mm,$insertion_open,$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat,$prefix,$old_flag);
 }


@@ -5750,7 +6804,14 @@
   my $methcall          = $methylation_call_params->{$id}->{methylation_call};
   my $read_conversion   = $methylation_call_params->{$id}->{read_conversion};
   my $genome_conversion = $methylation_call_params->{$id}->{genome_conversion};
-  my $number_of_mismatches = $methylation_call_params->{$id}->{number_of_mismatches};
+  my $number_of_mismatches;
+  if ($bowtie2){
+    $number_of_mismatches= $methylation_call_params->{$id}->{alignment_score};
+  }
+  else{
+    $number_of_mismatches= $methylation_call_params->{$id}->{number_of_mismatches};
+  }
+
   ### This is a description of the bitwise FLAG field which needs to be set for the SAM file taken from: "The SAM Format Specification (v1.4-r985), September 7, 2011"
   ## FLAG: bitwise FLAG. Each bit is explained in the following table:
   ## Bit    Description                                                Comment                                Value
@@ -5870,11 +6931,77 @@

   #####

+  # Optionally calculating number of mismatches for Bowtie 2 alignments
+
+  if ($non_bs_mm) {
+    if ($bowtie2) {
+
+      $number_of_mismatches =~ s/-//; # removing the minus sign
+
+	### if Bowtie 2 was used we need to analyse the CIGAR string whether the read contained any indels to determine the number of mismatches
+	if ($cigar =~ /(D|I)/) {
+	  # warn "$cigar\n";
+
+	  # parsing CIGAR string
+	  my @len = split (/\D+/,$cigar); # storing the length per operation
+	  my @ops = split (/\d+/,$cigar); # storing the operation
+	  shift @ops;		# remove the empty first element
+	  die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+
+	  foreach (0..$#len) {
+	    if ($ops[$_] eq 'M') {
+	      # warn "skipping\n";
+	      next;		# irrelevant
+	    }
+	    elsif ($ops[$_] eq 'I') {	# insertion in the read sequence
+	      $number_of_mismatches -= $insertion_open;
+	      $number_of_mismatches -= $len[$_] * $insertion_extend;
+	      # warn "Insertion: Subtracting $ops[$_], length $len[$_], open: $insertion_open, extend: $insertion_extend\n";
+	    }
+	    elsif ($ops[$_] eq 'D') {	# deletion in the read sequence
+	      $number_of_mismatches -= $deletion_open;
+	      $number_of_mismatches -= $len[$_] * $deletion_extend;
+	      # warn "Deletion: Subtracting $ops[$_], length $len[$_], open: $deletion_open, extend: $deletion_extend\n";
+	    }
+	    elsif ($cigar =~ tr/[NSHPX=]//) {	# if these (for standard mapping) illegal characters exist we die
+	      die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar\n";
+	    }
+	    else {
+	      die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar\n";
+	    }
+	  }
+	  # warn "Alignment score $number_of_mismatches\n";
+	  # print "Mismatches $number_of_mismatches\n\n";
+	}
+      ### Now we have InDel corrected alignment scores
+
+      ### if the actual sequence contained Ns we need to adjust the number of mismatches. Ns receive a penalty of -1, but normal mismatches receive -6. This might still break if the
+      ### sequence contained more than 5 Ns, but this should occur close to never
+
+      my $seq_N_count = $number_of_mismatches % 6; # modulo 6 will return the integer rest after the division
+      # warn "N count: $seq_N_count\n";
+      $number_of_mismatches = int ($number_of_mismatches / 6) + $seq_N_count;
+      # warn "MM    $number_of_mismatches\n";
+    }
+  }
+
+  ####
+
+  my $XA_tag = "XA:Z:$number_of_mismatches";
+
+  #####
+
   # SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
-  print OUT join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$XX_tag,$XM_tag,$XR_tag,$XG_tag)),"\n";
+  ### optionally print number of non-bisulfite mismatches
+  if ($non_bs_mm){
+    print OUT join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$XX_tag,$XM_tag,$XR_tag,$XG_tag,$XA_tag)),"\n";
+  }
+  else{ # default
+    # SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
+    print OUT join("\t",($id,$flag,$chr,$start,$mapq,$cigar,$rnext,$pnext,$tlen,$actual_seq,$qual,$NM_tag,$XX_tag,$XM_tag,$XR_tag,$XG_tag)),"\n";
+  }
 }

-
 sub paired_end_SAM_output{
   my ($id,$actual_seq_1,$actual_seq_2,$methylation_call_params,$qual_1,$qual_2) = @_;
   my $strand_1                = $methylation_call_params->{$id}->{alignment_read_1}; # Bowtie 1 only reports the read 1 alignment strand
@@ -5887,11 +7014,18 @@
   my $read_conversion_1       = $methylation_call_params->{$id}->{read_conversion_1};
   my $read_conversion_2       = $methylation_call_params->{$id}->{read_conversion_2};
   my $genome_conversion       = $methylation_call_params->{$id}->{genome_conversion};
-  my $number_of_mismatches_1  = $methylation_call_params->{$id}->{number_of_mismatches_1}; # only needed for custom allele-specific output, not the default!
-  my $number_of_mismatches_2  = $methylation_call_params->{$id}->{number_of_mismatches_2};
-
-  my $id_1 = $id.'/1';
-  my $id_2 = $id.'/2';
+
+  my $id_1;
+  my $id_2;
+
+  if ($old_flag){
+    $id_1 = $id.'/1';
+    $id_2 = $id.'/2';
+  }
+  else{
+    $id_1 = $id; # appending /1 or /2 confuses some downstream programs such as Picard
+    $id_2 = $id;
+  }

   # Allows all degenerate nucleotide sequences in reference genome
   die "Reference sequence ($ref_seq_1) contains invalid nucleotides!\n" if $ref_seq_1 =~ /[^ACTGNRYMKSWBDHV]/i;
@@ -5914,6 +7048,26 @@
   else {
     die "Unexpected combination of read 1 and genome conversion: $read_conversion_1 / $genome_conversion\n";
   }
+
+  my $number_of_mismatches_1;
+  my $number_of_mismatches_2;
+
+  if ($bowtie2){ # Bowtie 2 reports always as read 1 then read 2, so this is fine
+    $number_of_mismatches_1  = $methylation_call_params->{$id}->{alignment_score_1}; # only needed for custom allele-specific output, not the default!
+    $number_of_mismatches_2  = $methylation_call_params->{$id}->{alignment_score_2};
+  }
+  else{ # Bowtie 1 reports always the leftmost read first. That means we have to reverse the strings if the first read aligned in reverse orientation
+    if ($index == 2 or $index == 3){ # CTOT or OB
+      $number_of_mismatches_1  = $methylation_call_params->{$id}->{number_of_mismatches_2}; # only needed for custom allele-specific output, not the default!
+      $number_of_mismatches_2  = $methylation_call_params->{$id}->{number_of_mismatches_1};
+    }
+    else{ # if the first read aligned in forward direction it is like for Bowtie 2
+      $number_of_mismatches_1  = $methylation_call_params->{$id}->{number_of_mismatches_1}; # only needed for custom allele-specific output, not the default!
+      $number_of_mismatches_2  = $methylation_call_params->{$id}->{number_of_mismatches_2};
+    }
+  }
+
+

   ### we need to remove 2 bp of the genomic sequence as we were extracting read + 2bp long fragments to make a methylation call at the
   ### first or last position.
@@ -5991,24 +7145,50 @@
   # strands OT and CTOT will be treated as aligning to the top strand (both sequences are scored as aligning to the top strand)
   # strands OB and CTOB will be treated as aligning to the bottom strand (both sequences are scored as reverse complemented sequences)

-  my $flag_1;                                                          # FLAG variable used for SAM format
+  my $flag_1;                                                            # FLAG variable used for SAM format
   my $flag_2;

+  ### The new default FLAG values have been suggested by Peter Hickey, Australia (PH)
+
   if ($index == 0){       # OT
-    $flag_1 = 67;                                                      # Read 1 is on the + strand  (1+2+64) (Read 2 is technically reverse-complemented, but we do not score it)
-    $flag_2 = 131;                                                     # Read 2 is on - strand but informative for the OT        (1+2+128)
+    unless ($old_flag){
+      $flag_1 = 99;                                                      # PH: Read 1 is on the + strand and Read 2 is reversed  (1+2+32+64)
+      $flag_2 = 147;                                                     # PH: Read 2 is on - strand but informative for the OT  (1+2+16+128)
+    }
+    else{
+      $flag_1 = 67;                                                      # Read 1 is on the + strand  (1+2+64) (Read 2 is technically reverse-complemented, but we do not score it)
+      $flag_2 = 131;                                                     # Read 2 is on - strand but informative for the OT        (1+2+128)
+    }
   }
   elsif ($index == 1){    # CTOB
-    $flag_1 = 115;                                                     # Read 1 is on the + strand, we score for OB  (1+2+16+32+64)
-    $flag_2 = 179;                                                     # Read 2 is on the - strand  (1+2+16+32+128)
+    unless($old_flag){
+      $flag_1 = 83;                                                      # PH: Read 1 is on the - strand, mapped in proper pair and Read 1 is reversed  (1+2+16+64)
+      $flag_2 = 163;                                                     # PH: read 2 is on the - strand, mapped in proper pair and Read 1 is reversed  (1+2+32+128)
+    }
+    else{
+      $flag_1 = 115;                                                     # Read 1 is on the + strand, we score for OB  (1+2+16+32+64)
+      $flag_2 = 179;                                                     # Read 2 is on the - strand  (1+2+16+32+128)
+    }
   }
   elsif ($index == 2){    # CTOT
-    $flag_1 = 67;                                                      # Read 1 is on the - strand (CTOT) strand, but we score it for OT (1+2+64)
-    $flag_2 = 131;                                                     # Read 2 is on the + strand, score it for OT (1+2+128)
+    unless ($old_flag){
+      $flag_1 = 99;                                                      # PH: Read 1 is on the + strand and Read 2 is reversed  (1+2+32+64)
+      $flag_2 = 147;                                                     # PH: Read 2 is on - strand but informative for the OT  (1+2+16+128)
+    }
+    else{
+      $flag_1 = 67;                                                      # Read 1 is on the - strand (CTOT) strand, but we score it for OT (1+2+64)
+      $flag_2 = 131;                                                     # Read 2 is on the + strand, score it for OT (1+2+128)
+    }
   }
   elsif ($index == 3){    # OB
-    $flag_1 = 115;                                                     # Read 1 is on the - strand, we score for OB  (1+2+16+32+64)
-    $flag_2 = 179;                                                     # Read 2 is on the + strand  (1+2+16+32+128)
+    unless ($old_flag){
+      $flag_1 = 83;                                                      # PH: Read 1 is on the - strand, mapped in proper pair and Read 1 is reversed  (1+2+16+64)
+      $flag_2 = 163;                                                     # PH: read 2 is on the - strand, mapped in proper pair and Read 1 is reversed  (1+2+32+128)
+    }
+    else{
+      $flag_1 = 115;                                                     # Read 1 is on the - strand, we score for OB  (1+2+16+32+64)
+      $flag_2 = 179;                                                     # Read 2 is on the + strand  (1+2+16+32+128)
+    }
   }

   #####
@@ -6078,11 +7258,12 @@
 	# or
 	#
 	# ------------------------->     read 1
-	# <-----------                   read 2   read 2 contained within read 1
-
-	# start and end of read 2  are fully contained within read 1
-	$tlen_1 = 0;                                                       # Set as 0 when the information is unavailable
-	$tlen_2 = 0;                                                       # Set as 0 when the information is unavailable
+	# <------------------------      read 2   read 2 contained within read 1
+
+	# start and end of read 2  are fully contained within read 1, using the length of read 1 for the TLEN variable
+	$tlen_1 = $end_read_1 - $start_read_1 + 1;          # Set to length of read 1   Leftmost read has a + sign,
+	$tlen_2 = ($end_read_1 - $start_read_1 + 1) * -1;   # Set to length of read 1   Rightmost read has a - sign. well this is debatable. Changed this
+                                                            ### as a request by frozenlyse on SeqAnswers on 24 July 2013
       }

     }
@@ -6114,12 +7295,13 @@
 	# or
 	#
 	# ------------------------->     read 2
-	# <-----------                   read 1   read 1 contained within read 2
+	#  <------------------------      read 1   read 1 contained within read 2

-	# start and end of read 1  are fully contained within read 2
-	$tlen_1 = 0;                                                       # Set as 0 when the information is unavailable
-	$tlen_2 = 0;                                                       # Set as 0 when the information is unavailable
-      }
+	# start and end of read 1  are fully contained within read 2, using the length of read 2 for the TLEN variable
+	$tlen_1 = ($end_read_2 - $start_read_2 + 1) * -1;          # Set to length of read 2   Shorter read receives a - sign,
+	$tlen_2 = $end_read_2 - $start_read_2 + 1;                 # Set to length of read 2   Longer read receives a +. Well this is debatable. Changed this
+	                                                           ### as a request by frozenlyse on SeqAnswers on 24 July 2013
+     }
     }
   }

@@ -6208,9 +7390,121 @@

   #####

+  # Optionally calculating number of mismatches for Bowtie 2 alignments
+
+  if ($non_bs_mm) {
+    if ($bowtie2) {
+
+      $number_of_mismatches_1 =~ s/-//; # removing the minus sign
+      $number_of_mismatches_2 =~ s/-//;
+
+      ### if Bowtie 2 was used we need to analyse the CIGAR strings whether the reads contained any indels to determine the number of mismatches
+
+      ### CIGAR 1
+      if ($cigar_1 =~ /(D|I)/) {
+	# warn "$cigar_1\n";
+
+	# parsing CIGAR string
+	my @len = split (/\D+/,$cigar_1); # storing the length per operation
+	my @ops = split (/\d+/,$cigar_1); # storing the operation
+	shift @ops;		# remove the empty first element
+	die "CIGAR string '$cigar_1' contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+
+	foreach (0..$#len) {
+	  if ($ops[$_] eq 'M') {
+	    # warn "skipping\n";
+	    next;		# irrelevant
+	  }
+	  elsif ($ops[$_] eq 'I') {	# insertion in the read sequence
+	    $number_of_mismatches_1 -= $insertion_open;
+	    $number_of_mismatches_1 -= $len[$_] * $insertion_extend;
+	    # warn "Insertion: Subtracting $ops[$_], length $len[$_], open: $insertion_open, extend: $insertion_extend\n";
+	  }
+	  elsif ($ops[$_] eq 'D') {	# deletion in the read sequence
+	    $number_of_mismatches_1 -= $deletion_open;
+	    $number_of_mismatches_1 -= $len[$_] * $deletion_extend;
+	    # warn "Deletion: Subtracting $ops[$_], length $len[$_], open: $deletion_open, extend: $deletion_extend\n";
+	  }
+	  elsif ($cigar_1 =~ tr/[NSHPX=]//) {	# if these (for standard mapping) illegal characters exist we die
+	    die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar_1\n";
+	  }
+	  else {
+	    die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar_1\n";
+	  }
+	}
+
+	# warn "Alignment score $number_of_mismatches_1\n";
+	# print "Mismatches $number_of_mismatches_1\n\n";
+      }
+
+      ### CIGAR 2
+      if ($cigar_2 =~ /(D|I)/) {
+	# warn "$cigar_2\n";
+
+	# parsing CIGAR string
+	my @len = split (/\D+/,$cigar_2); # storing the length per operation
+	my @ops = split (/\d+/,$cigar_2); # storing the operation
+	shift @ops;		# remove the empty first element
+	die "CIGAR string '$cigar_2' contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+
+	foreach (0..$#len) {
+	  if ($ops[$_] eq 'M') {
+	    # warn "skipping\n";
+	    next; #irrelevant
+	  }
+	  elsif ($ops[$_] eq 'I') {	# insertion in the read sequence
+	    $number_of_mismatches_2 -= $insertion_open;
+	    $number_of_mismatches_2 -= $len[$_] * $insertion_extend;
+	    # warn "Insertion: Subtracting $ops[$_], length $len[$_], open: $insertion_open, extend: $insertion_extend\n";
+	  }
+	  elsif ($ops[$_] eq 'D') {	# deletion in the read sequence
+	    $number_of_mismatches_2 -= $deletion_open;
+	    $number_of_mismatches_2 -= $len[$_] * $deletion_extend;
+	    # warn "Deletion: Subtracting $ops[$_], length $len[$_], open: $deletion_open, extend: $deletion_extend\n";
+	  }
+	  elsif ($cigar_2 =~ tr/[NSHPX=]//) {	# if these (for standard mapping) illegal characters exist we die
+	    die "The CIGAR string contained illegal CIGAR operations in addition to 'M', 'I' and 'D': $cigar_2\n";
+	  }
+	  else {
+	    die "The CIGAR string contained undefined CIGAR operations in addition to 'M', 'I' and 'D': $cigar_2\n";
+	  }
+	}
+      }
+
+      ### Now we have InDel corrected Alignment scores
+
+      ### if the actual sequence contained Ns we need to adjust the number of mismatches. Ns receive a penalty of -1, but normal mismatches receive -6. This might still break if the
+      ### sequence contained more than 5 Ns, but this should occur close to never
+
+      my $seq_1_N_count = $number_of_mismatches_1 % 6; # modulo 6 will return the integer rest after the division
+      my $seq_2_N_count = $number_of_mismatches_2 % 6;
+      #   warn "N count 1: $seq_1_N_count\n";
+      #   warn "N count 2: $seq_2_N_count\n";
+
+      $number_of_mismatches_1 = int ($number_of_mismatches_1 / 6) + $seq_1_N_count;
+      $number_of_mismatches_2 = int ($number_of_mismatches_2 / 6) + $seq_2_N_count;
+
+      # warn "MM1    $number_of_mismatches_1 \n";
+      # warn "MM2    $number_of_mismatches_2 \n";
+    }
+  }
+
+  ####
+
+  my $XA_tag = "XA:Z:$number_of_mismatches_1";
+  my $XB_tag = "XB:Z:$number_of_mismatches_2";
+
+
   # SAM format: QNAME, FLAG, RNAME, 1-based POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL, optional fields
-  print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $XX_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag)), "\n";
-  print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $XX_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag)), "\n";
+  ### optionally print number of non-bisulfite mismatches
+  if ($non_bs_mm){
+    print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $XX_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag,$XA_tag)), "\n";
+    print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $XX_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag,$XB_tag)), "\n";
+  }
+  else{ # default
+    print OUT join("\t", ($id_1, $flag_1, $chr, $start_read_1, $mapq, $cigar_1, $rnext, $pnext_1, $tlen_1, $actual_seq_1, $qual_1, $NM_tag_1, $XX_tag_1, $XM_tag_1,$XR_tag_1,$XG_tag)), "\n";
+    print OUT join("\t", ($id_2, $flag_2, $chr, $start_read_2, $mapq, $cigar_2, $rnext, $pnext_2, $tlen_2, $actual_seq_2, $qual_2, $NM_tag_2, $XX_tag_2, $XM_tag_2,$XR_tag_2,$XG_tag)), "\n";
+  }
 }

 sub revcomp{
@@ -6327,7 +7621,8 @@

 -f/--fasta               The query input files (specified as <mate1>,<mate2> or <singles> are FASTA
                          files (usually havin extension .fa, .mfa, .fna or similar). All quality values
-                         are assumed to be 40 on the Phred scale.
+                         are assumed to be 40 on the Phred scale. FASTA files are expected to contain both
+                         the read name and the sequence on a single line (and not spread over several lines).

 -s/--skip <int>          Skip (i.e. do not align) the first <int> reads or read pairs from the input.

@@ -6415,6 +7710,14 @@
                          or bottom (OB) strands in parallel and report these alignments. This is the recommended option
                          for sprand-specific libraries).

+--pbat                   This options may be used for PBAT-Seq libraries (Post-Bisulfite Adapter Tagging; Kobayashi et al.,
+                         PLoS Genetics, 2012). This is essentially the exact opposite of alignments in 'directional' mode,
+                         as it will only launch two alignment threads to the CTOT and CTOB strands instead of the normal OT
+                         and OB ones. Use this option only if you are certain that your libraries were constructed following
+                         a PBAT protocol (if you don't know what PBAT-Seq is you should not specify this option). The option
+                         --pbat works only for single-end and paired-end FastQ files for use with Bowtie1 (uncompressed
+                         temporary files only).
+
 --sam-no-hd              Suppress SAM header lines (starting with @). This might be useful when very large input files are
                          split up into several smaller files to run concurrently and the output files are to be merged.

@@ -6445,6 +7748,46 @@
                          the specified folder does not exist, Bismark will attempt to create it first. The path to the
                          temporary folder can be either relative or absolute.

+--non_bs_mm              Optionally outputs an extra column specifying the number of non-bisulfite mismatches a read during the
+                         alignment step. This option is only available for SAM format. In Bowtie 2 context, this value is
+                         just the number of actual non-bisulfite mismatches and ignores potential insertions or deletions.
+                         The format for single-end reads and read 1 of paired-end reads is 'XA:Z:number of mismatches'
+                         and 'XB:Z:number of mismatches' for read 2 of paired-end reads.
+
+--gzip                   Temporary bisulfite conversion files will be written out in a GZIP compressed form to save disk
+                         space. This option is available for most alignment modes but is not available for paired-end FastA
+                         files. This option might be somewhat slower than writing out uncompressed files, but this awaits
+                         further testing.
+
+--bam                    The output will be written out in BAM format instead of the default SAM format. Bismark will
+                         attempt to use the path to Samtools that was specified with '--samtools_path', or, if it hasn't
+                         been specified, attempt to find Samtools in the PATH. If no installation of Samtools can be found,
+                         the SAM output will be compressed with GZIP instead (yielding a .sam.gz output file).
+
+--samtools_path          The path to your Samtools installation, e.g. /home/user/samtools/. Does not need to be specified
+                         explicitly if Samtools is in the PATH already.
+
+--prefix <prefix>        Prefixes <prefix> to the output filenames. Trailing dots will be replaced by a single one. For
+                         example, '--prefix test' with 'file.fq' would result in the output file 'test.file.fq_bismark.sam' etc.
+
+--old_flag               Only in paired-end SAM mode, uses the FLAG values used by Bismark v0.8.2 and before. In addition,
+                         this options appends /1 and /2 to the read IDs for reads 1 and 2 relative to the input file. Since
+                         both the appended read IDs and custom FLAG values may cause problems with some downstream tools
+                         such as Picard, new defaults were implemented as of version 0.8.3.
+
+
+                                             default                         old_flag
+                                       ===================              ===================
+                                       Read 1       Read 2              Read 1       Read 2
+
+                              OT:         99          147                  67          131
+
+                              OB:         83          163                 115          179
+
+                              CTOT:       99          147                  67          131
+
+                              CTOB:       83          163                 115          179
+


 Other:
@@ -6605,11 +7948,12 @@
 (14) XM-tag (methylation call string)
 (15) XR-tag (read conversion state for the alignment)
 (16) XG-tag (genome conversion state for the alignment)
+(17) XA/XB-tag (non-bisulfite mismatches) (optional!)

 Each read of paired-end alignments is written out in a separate line in the above format.


-This script was last edited on 21 Aug 2012.
+Last edited on 07 October 2013.

 HOW_TO
 }
--- a/bismark_bowtie2_wrapper.xml	Sun Feb 24 14:49:36 2013 -0500
+++ b/bismark_bowtie2_wrapper.xml	Mon Apr 14 16:42:38 2014 -0400
@@ -1,18 +1,18 @@
-<tool id="bismark_bowtie2" name="Bismark" version="0.7.7.2">
-    <!-- Wrapper compatible with Bismark version 0.7.7 -->
+<tool id="bismark_bowtie2" name="Bismark" version="0.10.1">
+    <!-- Wrapper compatible with Bismark version 0.10 -->
     <description>bisulfite mapper (bowtie2)</description>
     <!--<version_command>bismark version</version_command>-->
     <requirements>
         <requirement type="set_environment">SCRIPT_PATH</requirement>
-        <requirement type="package" version="0.12.8">bowtie</requirement>
-        <requirement type="package" version="2.0.0-beta7">bowtie2</requirement>
+        <requirement type="package" version="0.1.19">samtools</requirement>
+        <requirement type="package" version="2.1.0">bowtie2</requirement>
     </requirements>
     <parallelism method="basic"></parallelism>
     <command interpreter="python">
         bismark_wrapper.py

         ## Change this to accommodate the number of threads you have available.
-        --num-threads 4
+        --num-threads "\${GALAXY_SLOTS:-24}"

         --bismark_path \$SCRIPT_PATH

@@ -47,16 +47,23 @@
                 --fasta
             #end if
         #else:
-            --mate-paired
-            --mate1 $singlePaired.input_mate1
-            --mate2 $singlePaired.input_mate2
+            --mate-paired
+            #set $mate1 = list()
+            #set $mate2 = list()
+            #for $mate_pair in $singlePaired.mate_list
+                $mate1.append( str($mate_pair.input_mate1) )
+                $mate2.append( str($mate_pair.input_mate2) )
+            #end for

-            #if $singlePaired.input_mate1.ext == "fastqillumina":
+            --mate1 #echo ','.join($mate1)
+            --mate2 #echo ','.join($mate2)
+
+            #if $singlePaired.mate_list[0].input_mate1.ext == "fastqillumina":
                 --phred64-quals
                 --fastq
-            #elif $singlePaired.input_mate1.ext == "fastqsanger":
+            #elif $singlePaired.mate_list[0].input_mate1.ext == "fastqsanger":
                 --fastq
-            #elif $singlePaired.input_mate1.ext == "fasta":
+            #elif $singlePaired.mate_list[0].input_mate1.ext == "fasta":
                 --fasta
             #end if

@@ -64,6 +71,9 @@
             -X $singlePaired.maxInsert
         #end if

+        #if $sort_bam:
+            --sort-bam
+        #end if

         ## for now hardcode the value for the required memory per thread in --best mode
         --chunkmbs 512
@@ -96,10 +106,13 @@
             ## if set, disable the original behaviour
             $params.no_discordant

+            #if $params.bismark_stdout:
+                --stdout $output_stdout
+            #end if

-            ###if str($params.isReportOutput) == "yes":
-            ##    --output-report-file $report_file
-            ###end if
+            #if $params.isReportOutput:
+                --output-report-file $report_file
+            #end if

         #end if

@@ -107,7 +120,7 @@
         ## Output parameters.
         ##
         --output $output
-        $suppress_header
+        ##$suppress_header

       #if str( $singlePaired.sPaired ) == "single"
         #if $output_unmapped_reads_l
@@ -157,13 +170,16 @@
                 <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
             </when>
             <when value="paired">
-                <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
-                <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
+                <repeat name="mate_list" title="Paired End Pairs" min="1">
+                    <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Mate pair 1" help="FASTQ or FASTA files." />
+                    <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Mate pair 2" help="FASTQ or FASTA files." />
+                </repeat>
                 <param name="minInsert" type="integer" value="0" label="Minimum insert size for valid paired-end alignments" />
-                <param name="maxInsert" type="integer" value="250" label="Maximum insert size for valid paired-end alignments" />
+                <param name="maxInsert" type="integer" value="500" label="Maximum insert size for valid paired-end alignments" />
             </when>
         </conditional>

+        <param name="sort_bam" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Sort BAM file by chromosomal position (not compatibile with methylation extractor)"/>

         <conditional name="params">
             <param name="settingsType" type="select" label="Bismark settings to use" help="You can use the default settings or set custom values for any of Bismark's parameters.">
@@ -188,30 +204,43 @@
                 <param name="qupto" type="integer" value="0" label="Only aligns the first N reads or read pairs from the input" help="Default is 0 and means 'no-limit'." />
                 <param name="skip_reads" type="integer" value="0" label="Skip (i.e. do not align) the first N reads or read pairs from the input" />

-                <param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="False" label="Disable looking for discordant alignments if it cannot find any concordant alignments" help="" />
-                <param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="False" label="Disable Bowtie 2's behaviour to try to find alignments for the individual mates" help="" />
+                <param name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" checked="false" label="Disable looking for discordant alignments if it cannot find any concordant alignments" help="" />
+                <param name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" checked="false" label="Disable Bowtie 2's behaviour to try to find alignments for the individual mates" help="" />

-                <param name="suppressed_read_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write ambiguous reads to an extra output file." help="Write all reads which produce more than one valid alignment with the same number of lowest mismatches or other reads that fail to align uniquely." />
-                <param name="unmapped_read_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads that could not be aligned to a file" />
+                <param name="suppressed_read_file" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write ambiguous reads to an extra output file" help="Write all reads which produce more than one valid alignment with the same number of lowest mismatches or other reads that fail to align uniquely." />
+                <param name="unmapped_read_file" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write all reads that could not be aligned to a file" />
                 <!-- output Options -->
-                <!--
-                <param name="isReportOutput" type="select" label="Offer all report files concatenated in one file.">
-                    <option value="yes">yes</option>
-                    <option value="no">no</option>
-                </param>
-                -->
+                <param name="bismark_stdout" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write the bismark output and summary information to an extra file" />
+                <param name="isReportOutput" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Offer all report files concatenated in one file" />
+
                 <!--end output options -->
             </when>  <!-- full -->
       </conditional>  <!-- params -->
-      <param name="suppress_header" type="boolean" truevalue="--suppress-header" falsevalue="" checked="False" label="Suppress the header in the output SAM file" help="Bowtie produces SAM with several lines of header information by default" />
+      <!--
+      <param name="suppress_header" type="boolean" truevalue="..suppress-header" falsevalue="" checked="false" label="Suppress the header in the output SAM file" help="Bowtie produces SAM with several lines of header information by default." />
+        -->
     </inputs>
+
+
     <outputs>
-        <!-- that does not work
         <data format="txt" name="report_file" label="${tool.name} on ${on_string}: Report">
-            <filter>str($params.isReportOutput) == "yes"</filter>
+            <filter>
+            ((
+                params['settingsType'] == "custom" and
+                params['isReportOutput'] is True
+            ))
+            </filter>
         </data>
-        -->
-        <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
+        <data format="txt" name="output_stdout" label="${tool.name} on ${on_string}: Summary">
+            <filter>
+            ((
+                params['settingsType'] == "custom" and
+                params['bismark_stdout'] is True
+            ))
+            </filter>
+        </data>
+
+        <data format="bam" name="output" label="${tool.name} on ${on_string}: mapped reads">
           <actions>
             <conditional name="refGenomeSource.genomeSource">
               <when value="indexed">
@@ -247,7 +276,7 @@
           </when>
           <when value="paired">
             <action type="format">
-              <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+              <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
             </action>
           </when>
         </conditional>
@@ -267,7 +296,7 @@
           </when>
           <when value="paired">
             <action type="format">
-              <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+              <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
             </action>
           </when>
         </conditional>
@@ -291,12 +320,13 @@
           </when>
           <when value="paired">
             <action type="format">
-              <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+              <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
             </action>
           </when>
         </conditional>
       </actions>
     </data>
+
     <data format="fastq" name="output_unmapped_reads_r" label="${tool.name} on ${on_string}: unmapped reads (R)">
       <filter>singlePaired['sPaired'] == "paired"</filter>
       <filter>params['settingsType'] == "custom"</filter>
@@ -310,14 +340,12 @@
           </when>
           <when value="paired">
             <action type="format">
-              <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+              <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
             </action>
           </when>
         </conditional>
       </actions>
     </data>
-
-
     </outputs>

     <tests>
@@ -332,7 +360,7 @@
 version (C->T conversion) or into a bisulfite treated reverse strand (G->A conversion of the forward strand).
 Each of these reads are then aligned to bisulfite treated forward strand index of a reference genome
 (C->T converted) and a bisulfite treated reverse strand index of the genome (G->A conversion of the
-forward strand, by doing this alignments will produce the same positions). These 4 instances of Bowtie (1 or 2)
+forward strand, by doing this alignments will produce the same positions). These instances of Bowtie 2
 are run in parallel. The sequence file(s) are then read in again sequence by sequence to pull out the original
 sequence from the genome and determine if there were any protected C's present or not.

@@ -353,6 +381,11 @@

  .. __: http://www.bioinformatics.babraham.ac.uk/projects/bismark/

+
+.. class:: warningmark
+
+Make sure all your input reads are in the correct and same format. If thats not the case please adjust/convert the filetype with galaxy's build-in converters.
+
 ------

 **Input formats**
@@ -415,12 +448,7 @@

 **Bismark parameter list**

-This is an exhaustive list of Bismark options:
-
-------
-
-**OPTIONS**
-
+This is an exhaustive list of Bismark options.

 Input::
--- a/bismark_bowtie_wrapper.xml	Sun Feb 24 14:49:36 2013 -0500
+++ b/bismark_bowtie_wrapper.xml	Mon Apr 14 16:42:38 2014 -0400
@@ -1,18 +1,15 @@
-<tool id="bismark_bowtie" name="Bismark" version="0.7.7.3">
-    <!-- Wrapper compatible with Bismark version 0.7.7 -->
+<tool id="bismark_bowtie" name="Bismark" version="0.10.0">
+    <!-- Wrapper compatible with Bismark version 0.10 -->
     <description>bisulfite mapper (bowtie)</description>
     <!--<version_command>bismark version</version_command>-->
     <requirements>
         <requirement type="set_environment">SCRIPT_PATH</requirement>
+        <requirement type="package" version="0.1.19">samtools</requirement>
         <requirement type="package" version="0.12.8">bowtie</requirement>
-        <requirement type="package" version="2.0.0-beta7">bowtie2</requirement>
     </requirements>
     <parallelism method="basic"></parallelism>
     <command interpreter="python">
         bismark_wrapper.py
-
-        ## Change this to accommodate the number of threads you have available.
-        --num-threads 4

         --bismark_path \$SCRIPT_PATH

@@ -45,16 +42,23 @@
                 --fasta
             #end if
         #else:
-            --mate-paired
-            --mate1 $singlePaired.input_mate1
-            --mate2 $singlePaired.input_mate2
+            --mate-paired
+            #set $mate1 = list()
+            #set $mate2 = list()
+            #for $mate_pair in $singlePaired.mate_list
+                $mate1.append( str($mate_pair.input_mate1) )
+                $mate2.append( str($mate_pair.input_mate2) )
+            #end for

-            #if $singlePaired.input_mate1.ext == "fastqillumina":
+            --mate1 #echo ','.join($mate1)
+            --mate2 #echo ','.join($mate2)
+
+            #if $singlePaired.mate_list[0].input_mate1.ext == "fastqillumina":
                 --phred64-quals
                 --fastq
-            #elif $singlePaired.input_mate1.ext == "fastqsanger":
+            #elif $singlePaired.mate_list[0].input_mate1.ext == "fastqsanger":
                 --fastq
-            #elif $singlePaired.input_mate1.ext == "fasta":
+            #elif $singlePaired.mate_list[0].input_mate1.ext == "fasta":
                 --fasta
             #end if

@@ -85,9 +89,13 @@
                 --skip-reads $params.skip_reads
             #end if

-            ###if str($params.isReportOutput) == "yes":
-            ##    --output-report-file $report_file
-            ###end if
+            #if $params.bismark_stdout:
+                --stdout $output_stdout
+            #end if
+
+            #if $params.isReportOutput:
+                --output-report-file $report_file
+            #end if

         #end if

@@ -95,7 +103,7 @@
         ## Output parameters.
         ##
         --output $output
-        $suppress_header
+        ##$suppress_header

       #if str( $singlePaired.sPaired ) == "single"
         #if $output_unmapped_reads_l
@@ -123,7 +131,7 @@
             <option value="history">Use one from the history</option>
           </param>
           <when value="indexed">
-            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
+            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin.">
               <options from_data_table="bowtie_indexes">
                 <filter type="sort_by" column="2"/>
                 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
@@ -145,10 +153,12 @@
                 <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
             </when>
             <when value="paired">
-                <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
-                <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." />
+                <repeat name="mate_list" title="Paired End Pairs" min="1">
+                    <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Mate pair 1" help="FASTQ or FASTA files." />
+                    <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Mate pair 2" help="FASTQ or FASTA files." />
+                </repeat>
                 <param name="minInsert" type="integer" value="0" label="Minimum insert size for valid paired-end alignments" />
-                <param name="maxInsert" type="integer" value="250" label="Maximum insert size for valid paired-end alignments" />
+                <param name="maxInsert" type="integer" value="500" label="Maximum insert size for valid paired-end alignments" />
             </when>
         </conditional>

@@ -162,41 +172,51 @@
             <!-- Full/advanced params. -->
             <when value="custom">
                 <!-- -n -->
-                <param name="seed_mismatches" type="select" label="The maximum number of mismatches permitted in the 'seed'.">
+                <param name="seed_mismatches" type="select" label="The maximum number of mismatches permitted in the 'seed'">
                     <option value="0">0</option>
                     <option value="1">1</option>
                     <option value="2" selected="true">2</option>
                     <option value="3">3</option>
                 </param>
                 <!-- -l -->
-                <param name="seed_len" type="integer" value="28" label="The 'seed length'; The number of bases of the high quality end of the read to which the maximum number of mismatches applies." />
+                <param name="seed_len" type="integer" value="28" label="The 'seed length'; The number of bases of the high quality end of the read to which the maximum number of mismatches applies" />
                 <!--
                 <param name="maqerr" type="integer" value="70" label="Maximum permitted total of quality values at all mismatched read positions throughout the entire alignment, not just in the 'seed'." />
                 -->
                 <param name="qupto" type="integer" value="0" label="Only aligns the first N reads or read pairs from the input" help="Default is 0 and means 'no-limit'." />
                 <param name="skip_reads" type="integer" value="0" label="Skip (i.e. do not align) the first N reads or read pairs from the input" />

-                <param name="suppressed_read_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write ambiguous reads to an extra output file." help="Write all reads which produce more than one valid alignment with the same number of lowest mismatches or other reads that fail to align uniquely." />
-                <param name="unmapped_read_file" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads that could not be aligned to a file" />
+                <param name="suppressed_read_file" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write ambiguous reads to an extra output file" help="Write all reads which produce more than one valid alignment with the same number of lowest mismatches or other reads that fail to align uniquely." />
+                <param name="unmapped_read_file" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write all reads that could not be aligned to a file" />
                 <!-- output Options -->
-                <!--
-                <param name="isReportOutput" type="select" label="Offer all report files concatenated in one file.">
-                    <option value="yes">yes</option>
-                    <option value="no">no</option>
-                </param>
-                -->
+                <param name="bismark_stdout" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write the bismark output and summary information to an extra file" />
+                <param name="isReportOutput" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Offer all report files concatenated in one file" />
                 <!--end output options -->
             </when>  <!-- full -->
       </conditional>  <!-- params -->
-      <param name="suppress_header" type="boolean" truevalue="--suppress-header" falsevalue="" checked="False" label="Suppress the header in the output SAM file" help="Bowtie produces SAM with several lines of header information by default" />
+      <!--
+      <param name="suppress_header" type="boolean" truevalue="..suppress-header" falsevalue="" checked="false" label="Suppress the header in the output SAM file" help="Bowtie produces SAM with several lines of header information by default." />
+     -->
     </inputs>
     <outputs>
-        <!-- that does not work
         <data format="txt" name="report_file" label="${tool.name} on ${on_string}: Report">
-            <filter>str($params.isReportOutput) == "yes"</filter>
+            <filter>
+            ((
+                params['settingsType'] == "custom" and
+                params['isReportOutput'] is True
+            ))
+            </filter>
         </data>
-        -->
-        <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
+        <data format="txt" name="output_stdout" label="${tool.name} on ${on_string}: Summary">
+            <filter>
+            ((
+                params['settingsType'] == "custom" and
+                params['bismark_stdout'] is True
+            ))
+            </filter>
+        </data>
+
+        <data format="bam" name="output" label="${tool.name} on ${on_string}: mapped reads">
           <actions>
             <conditional name="refGenomeSource.genomeSource">
               <when value="indexed">
@@ -232,7 +252,7 @@
           </when>
           <when value="paired">
             <action type="format">
-              <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+              <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
             </action>
           </when>
         </conditional>
@@ -252,7 +272,7 @@
           </when>
           <when value="paired">
             <action type="format">
-              <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+              <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
             </action>
           </when>
         </conditional>
@@ -276,7 +296,7 @@
           </when>
           <when value="paired">
             <action type="format">
-              <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+              <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
             </action>
           </when>
         </conditional>
@@ -295,7 +315,7 @@
           </when>
           <when value="paired">
             <action type="format">
-              <option type="from_param" name="singlePaired.input_mate1" param_attribute="ext" />
+              <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
             </action>
           </when>
         </conditional>
@@ -338,6 +358,11 @@

  .. __: http://www.bioinformatics.babraham.ac.uk/projects/bismark/

+
+.. class:: warningmark
+
+Make sure all your input reads are in the correct and same format. If thats not the case please adjust/convert the filetype with galaxy's build-in converters.
+
 ------

 **Input formats**
@@ -400,12 +425,7 @@

 **Bismark parameter list**

-This is an exhaustive list of Bismark options:
-
-------
-
-**OPTIONS**
-
+This is an exhaustive list of Bismark options.

 Input::

@@ -526,76 +546,5 @@
                          the specified folder does not exist, Bismark will attempt to create it first. The path to the
                          temporary folder can be either relative or absolute.

-------
-
-Bowtie 2 alignment options::
-
-  -N INT                 Sets the number of mismatches to allowed in a seed alignment during multiseed alignment.
-                         Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower)
-                         but increases sensitivity. Default: 0. This option is only available for Bowtie 2 (for
-                         Bowtie 1 see -n).
-
-  -L INT                   Sets the length of the seed substrings to align during multiseed alignment. Smaller values
-                         make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is
-                         used by default, which sets -L to 20. This option is only available for Bowtie 2 (for
-                         Bowtie 1 see -l).
-
-  --ignore-quals         When calculating a mismatch penalty, always consider the quality value at the mismatched
-                         position to be the highest possible, regardless of the actual value. I.e. input is treated
-                         as though all quality values are high. This is also the default behavior when the input
-                         doesn't specify quality values (e.g. in -f mode). This option is invariable and on by default.
-
-
-Bowtie 2 paired-end options::
-
-  --no-mixed             This option disables Bowtie 2's behavior to try to find alignments for the individual mates if
-                         it cannot find a concordant or discordant alignment for a pair. This option is invariable and
-                         and on by default.
-
-  --no-discordant        Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments.
-                         A discordant alignment is an alignment where both mates align uniquely, but that does not
-                         satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior
-                         and it is on by default.
-
-
-Bowtie 2 effort options::
-
-  -D INT                 Up to INT consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using
-                         the alignments found so far. A seed extension "fails" if it does not yield a new best or a
-                         new second-best alignment. Default: 15.
-
-  -R INT                 INT is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds.
-                         When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of
-                         mismatches allowed) at different offsets and searches for more alignments. A read is considered
-                         to have repetitive seeds if the total number of seed hits divided by the number of seeds
-                         that aligned at least once is greater than 300. Default: 2.
-
-
-Bowtie 2 Scoring options::
-
-  --score_min "func"     Sets a function governing the minimum alignment score needed for an alignment to be considered
-                         "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying
-                         L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.
-                         See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is
-                         L,0,-0.2.
-
-
-Bowtie 2 Reporting options::
-
- --most_valid_alignments INT This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is
-                         deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the
-                         default mode, but adjusting the -M setting is deprecated.  Use the -D and -R options to adjust the
-                         effort expended to find valid alignments.
-
-                         For reference, this used to be the old (now deprecated) description of -M:
-                         Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it
-                         can't find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever
-                         happens first. Only the best alignment is reported. Information from the other alignments is used to
-                         estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes
-                         Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that
-                         aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not
-                         guarantee that the alignment reported is the best possible in terms of alignment score. -M is
-                         always used and its default value is set to 10.
-
   </help>
 </tool>
--- a/bismark_genome_preparation	Sun Feb 24 14:49:36 2013 -0500
+++ b/bismark_genome_preparation	Mon Apr 14 16:42:38 2014 -0400
@@ -6,7 +6,7 @@
 $|++;


-## This program is Copyright (C) 2010-12, Felix Krueger (felix.krueger@bbsrc.ac.uk)
+## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)

 ## This program is free software: you can redistribute it and/or modify
 ## it under the terms of the GNU General Public License as published by
@@ -33,7 +33,7 @@
 my $single_fasta;
 my $bowtie2;

-my $bismark_version = 'v0.7.7';
+my $bismark_version = 'v0.10.0';

 GetOptions ('verbose' => \$verbose,
 	    'help' => \$help,
@@ -44,10 +44,6 @@
 	    'bowtie2' => \$bowtie2,
 	   );

-my $genome_folder = shift @ARGV; # mandatory
-my $CT_dir;
-my $GA_dir;
-
 if ($help or $man){
   print_helpfile();
   exit;
@@ -59,13 +55,38 @@
           Bismark - Bisulfite Mapper and Methylation Caller.

           Bismark Genome Preparation Version: $bismark_version
-        Copyright 2010-12 Felix Krueger, Babraham Bioinformatics
+        Copyright 2010-13 Felix Krueger, Babraham Bioinformatics
               www.bioinformatics.babraham.ac.uk/projects/

 VERSION
     exit;
 }

+my $genome_folder = shift @ARGV; # mandatory
+
+# Ensuring a genome folder has been specified
+if ($genome_folder){
+  unless ($genome_folder =~ /\/$/){
+    $genome_folder =~ s/$/\//;
+  }
+  $verbose and print "Path to genome folder specified as: $genome_folder\n";
+  chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!";
+
+  # making the genome folder path abolsolute so it won't break if the path was specified relative
+  $genome_folder = getcwd;
+  unless ($genome_folder =~ /\/$/){
+    $genome_folder =~ s/$/\//;
+  }
+}
+else{
+  die "Please specify a genome folder to be used for bisulfite conversion\n\n";
+}
+
+
+my $CT_dir;
+my $GA_dir;
+
+
 if ($single_fasta){
   print "Writing individual genomes out into single-entry fasta files (one per chromosome)\n\n";
   $multi_fasta = 0;
@@ -309,41 +330,6 @@

   $verbose and print "Bismark Genome Preparation - Step I: Preparing folders\n\n";

-  # Ensuring a genome folder has been specified
-  if ($genome_folder){
-    unless ($genome_folder =~ /\/$/){
-      $genome_folder =~ s/$/\//;
-    }
-    $verbose and print "Path to genome folder specified: $genome_folder\n";
-    chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!";
-
-    # making the genome folder path abolsolute so it won't break if the path was specified relative
-    $genome_folder = getcwd;
-    unless ($genome_folder =~ /\/$/){
-      $genome_folder =~ s/$/\//;
-    }
-  }
-
-  else{
-    $verbose and print "Genome folder was not provided as argument ";
-    while (1){
-      print "Please specify a genome folder to be bisulfite converted:\n";
-      $genome_folder = <STDIN>;
-      chomp $genome_folder;
-
-      # adding a trailing slash unless already present
-      unless ($genome_folder =~ /\/$/){
-	$genome_folder =~ s/$/\//;
-      }
-      if (chdir $genome_folder){
-	last;
-      }
-      else{
-	warn "Could't move to directory $genome_folder! $!";
-      }
-    }
-  }
-
   if ($path_to_bowtie){
     unless ($path_to_bowtie =~ /\/$/){
       $path_to_bowtie =~ s/$/\//;
@@ -376,7 +362,7 @@
     die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions\n";
   }

-  warn "Bisulfite Genome Indexer version $bismark_version (last modified 17 Nov 2011)\n\n";
+  warn "Bisulfite Genome Indexer version $bismark_version (last modified 19 Sept 2013)\n\n";
   sleep (3);

   # creating a directory inside the genome folder to store the bisfulfite genomes unless it already exists
@@ -386,27 +372,10 @@
     $verbose and print "Created Bisulfite Genome folder $bisulfite_dir\n";
   }
   else{
-    while (1){
-      print "\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indexes might be overwritten!\nDo you want to continue anyway?\t";
-      my $proceed = <STDIN>;
-      chomp $proceed;
-      if ($proceed =~ /^y/i ){
-	last;
-      }
-      elsif ($proceed =~ /^n/i){
-	die "Terminated by user\n\n";
-      }
-    }
+    print "\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indices will be overwritten!\n\n";
+    sleep(5);
   }

-  ### as of version 0.6.0 the Bismark indexer will no longer delete the Bisulfite_Genome directory if it was present already, since it could store the Bowtie 1 or 2  indexes already
-  # removing any existing files and subfolders in the bisulfite directory (the specified directory won't be deleted)
-  # rmtree($bisulfite_dir, {verbose => 1,keep_root => 1});
-  #  unless (-d $bisulfite_dir){ #  had to add this after changing remove_tree to rmtree // suggested by Samantha Cooper @ Illumina
-  #    mkdir $bisulfite_dir or die "Unable to create directory $bisulfite_dir $!\n";
-  #  }
-  # }
-
   chdir $bisulfite_dir or die "Unable to move to $bisulfite_dir\n";
   $CT_dir = "${bisulfite_dir}CT_conversion/";
   $GA_dir = "${bisulfite_dir}GA_conversion/";
@@ -440,15 +409,14 @@
 bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs
 converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference
 genome folder. Once the bisulfite conversion has been completed the program will fork and launch
-two simultaneous instances of the bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware
+two simultaneous instances of the Bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware
 that the indexing process can take up to several hours; this will mainly depend on genome size
 and system resources.


-
 The following is a brief description of command line options and arguments to control the
-Bismark Genome Preparation script:
+Bismark Genome Preparation:


 USAGE: bismark_genome_preparation [options] <arguments>
@@ -462,8 +430,9 @@

 --verbose                Print verbose output for more details or debugging.

---path_to_bowtie         The full path to the Bowtie 1 or Bowtie 2 installation on your system.If
-                         the path </../../> is not provided as an option you will be prompted for it.
+--path_to_bowtie </../>  The full path to the Bowtie 1 or Bowtie 2 installation on your system
+                         (depending on which aligner/indexer you intend to use). Unless this path
+                         is specified it is assumed that Bowtie is in the PATH.

 --bowtie2                This will create bisulfite indexes for Bowtie 2. (Default: Bowtie 1).

@@ -481,12 +450,10 @@
 ARGUMENTS:

 <path_to_genome_folder>  The path to the folder containing the genome to be bisulfite converted.
-                         At the current time Bismark Genome Preparation expects one or more fastA
-                         files in the folder (with the file extension: .fa or .fasta). If the path
-                         is not provided as an argument you will be prompted for it.
+                         The Bismark Genome Preparation expects one or more fastA files in the folder
+                         (with the file extension: .fa or .fasta). Specifying this path is mandatory.


-
-This script was last modified on 18 Nov 2011.
+This script was last modified on 19 Sept 2013.
 HOW_TO
 }
--- a/bismark_methylation_extractor	Sun Feb 24 14:49:36 2013 -0500
+++ b/bismark_methylation_extractor	Mon Apr 14 16:42:38 2014 -0400
@@ -5,6 +5,24 @@
 use Getopt::Long;
 use Cwd;
 use Carp;
+use FindBin qw($Bin);
+use lib "$Bin/../lib";
+
+
+## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)
+
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+
+## You should have received a copy of the GNU General Public License
+## along with this program. If not, see <http://www.gnu.org/licenses/>.

 my @filenames; # input files
 my %counting;
@@ -12,8 +30,8 @@

 my %fhs;

-my $version = 'v0.7.7';
-my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome) = process_commandline();
+my $version = 'v0.10.1';
+my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_only,$gazillion,$ample_mem) = process_commandline();


 ### only needed for bedGraph output
@@ -24,6 +42,9 @@
 ### only needed for genome-wide cytosine methylation report
 my %chromosomes;

+my %mbias_1;
+my %mbias_2;
+
 ##############################################################################################
 ### Summarising Run Parameters
 ##############################################################################################
@@ -50,9 +71,20 @@
   }
 }

-if ($ignore){
-  warn "First $ignore bases will be disregarded when processing the methylation call string\n";
+if ($single){
+  if ($ignore){
+    warn "First $ignore bp will be disregarded when processing the methylation call string\n";
+  }
 }
+else{ ## paired-end
+  if ($ignore){
+    warn "First $ignore bp will be disregarded when processing the methylation call string of Read 1\n";
+  }
+  if ($ignore_r2){
+    warn "First $ignore_r2 bp will be disregarded when processing the methylation call string of Read 2\n";
+  }
+}
+

 if ($full){
   warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\n";
@@ -78,7 +110,7 @@
   warn '='x63,"\n";

   if ($counts){
-    warn "Generating additional output in bedGraph format including methylating counts (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage> <count methylated> <count non-methylated>)\n";
+    warn "Generating additional output in bedGraph and coverage format\nbedGraph format:\t<Chromosome> <Start Position> <End Position> <Methylation Percentage>\ncoverage format:\t<Chromosome> <Start Position> <End Position> <Methylation Percentage> <count methylated> <count non-methylated>\n\n";
   }
   else{
     warn "Generating additional sorted output in bedGraph format (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage>)\n";
@@ -98,12 +130,24 @@
     warn "White spaces in read ID names will be removed prior to sorting\n";
   }

+  if ($ample_mem){
+    warn "Sorting chromosomal postions for the bedGraph step using arrays instead of using UNIX sort\n";
+  }
+  elsif (defined $sort_size){
+    warn "The bedGraph UNIX sort command will use the following memory setting:\t'$sort_size'. Temporary directory used for sorting is the output directory\n";
+  }
+  else{
+    warn "Setting a default memory usage for the bedGraph UNIX sort command to 2GB\n";
+  }
+
+
+
   sleep (1);

   if ($cytosine_report){
     warn "\n\nSummarising genome-wide cytosine methylation report parameters:\n";
     warn '='x63,"\n";
-    warn "Generating comprehensive genome-wide cytosine report (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage> )\n";
+    warn "Generating comprehensive genome-wide cytosine report\n(output format: <Chromosome> <Position> <Strand> <count methylated> <count non-methylated>  <C-context>  <trinucleotide context> )\n";


     if ($CX_context){
@@ -160,41 +204,460 @@
 	      total_unmethylated_CpG_count => 0,
 	      sequences_count => 0,
 	     );
+
   @sorting_files = ();
   @bedfiles = ();

+  %mbias_1 = ();
+  %mbias_2 = ();
+
+  ### performing a quick check to see if a paired-end SAM file has been sorted by positions which does interfere with the logic used by the extractor
+  unless ($vanilla){
+    if ($paired){
+      test_positional_sorting($filename);
+    }
+  }
+
   process_Bismark_results_file($filename);

+  ### Closing all filehandles so that the Bismark methylation extractor output doesn't get truncated due to buffering issues
+  foreach my $fh (keys %fhs) {
+    if ($fh =~ /^[1230]$/) {
+      foreach my $context (keys %{$fhs{$fh}}) {
+	close $fhs{$fh}->{$context} or die $!;
+      }
+    }
+    else{
+      close $fhs{$fh} or die $!;
+    }
+  }
+
+  ### printing out all M-Bias data
+  produce_mbias_plots ($filename);
+
+  delete_unused_files();
+
   if ($bedGraph){
-    my $out = $filename;
+
+    my $out = (split (/\//,$filename))[-1]; # extracting the filename if a full path was specified
+    $out =~ s/gz$//;
     $out =~ s/sam$//;
+    $out =~ s/bam$//;
     $out =~ s/txt$//;
     $out =~ s/$/bedGraph/;

     my $bedGraph_output = $out;
-    open (OUT,'>',$output_dir.$out) or die $!;
-    # warn "Writing bedGraph to file: $out\n";
-
-    process_bedGraph_output();
-    close OUT or die $!;
+    my @args;
+
+    if ($remove){
+      push @args, '--remove';
+    }
+    if ($CX_context){
+      push @args, '--CX_context';
+    }
+    if ($no_header){
+      push @args, '--no_header';
+    }
+    if ($gazillion){
+      push @args, '--gazillion';
+    }
+    if ($ample_mem){
+      push @args, '--ample_memory';
+    }
+
+
+    #   if ($counts){
+    #      push @args, "--counts";
+    #   }
+
+    push @args, "--buffer_size $sort_size";
+    push @args, "--cutoff $coverage_threshold";
+    push @args, "--output $bedGraph_output";
+    push @args, "--dir '$output_dir'";
+
+    ### adding all files to be sorted to @args
+    foreach my $f (@sorting_files){
+      push @args, $f;
+    }
+
+    #  print join "\t",@args,"\n";
+
+    system ("$Bin/bismark2bedGraph @args");
+
+    warn "Finished BedGraph conversion ...\n\n";
+    sleep(3);
+
+    # open (OUT,'>',$output_dir.$bedGraph_output) or die "Problems with the bedGraph output filename detected: file path: '$output_dir'\tfile name: '$bedGraph_output' $!";
+    # warn "Writing bedGraph to file: $bedGraph_output\n";
+    # process_bedGraph_output();
+    # close OUT or die $!;

     ### genome-wide cytosine methylation report requires bedGraph processing anyway
     if ($cytosine_report){
+
+      @args = (); # resetting @args
       my $cytosine_out = $out;
       $cytosine_out =~ s/bedGraph$//;

-      read_genome_into_memory();
-      generate_genome_wide_cytosine_report($bedGraph_output,$cytosine_out);
+      if ($CX_context){
+	$cytosine_out =~ s/$/CX_report.txt/;
+      }
+      else{
+	$cytosine_out =~ s/$/CpG_report.txt/;
+      }
+
+      push @args, "--output $cytosine_out";
+      push @args, "--dir '$output_dir'";
+      push @args, "--genome '$genome_folder'";
+      push @args, "--parent_dir '$parent_dir'";
+
+      if ($zero){
+	push @args, "--zero";
+      }
+      if ($CX_context){
+	push @args, '--CX_context';
+      }
+      if ($split_by_chromosome){
+	push @args, '--split_by_chromosome';
+      }
+
+      my $coverage_output = $bedGraph_output;
+      $coverage_output =~ s/bedGraph$/bismark.cov/;
+
+      push @args, $output_dir . $coverage_output; # this will be the infile
+
+      system ("$Bin/coverage2cytosine @args");
+      # generate_genome_wide_cytosine_report($bedGraph_output,$cytosine_out);
+      warn "\n\nFinished generating genome-wide cytosine report\n\n";
     }
   }
 }

+sub delete_unused_files{
+
+  warn "Deleting unused files ...\n\n"; sleep(1);
+
+  my $index = 0;
+
+  while ($index <= $#sorting_files){
+    if ($sorting_files[$index] =~ /gz$/){
+      open (USED,"zcat $sorting_files[$index] |") or die "Failed to read from methylation extractor output file $sorting_files[$index]: $!\n";
+    }
+    else{
+      open (USED,$sorting_files[$index]) or die "Failed to read from methylation extractor output file $sorting_files[$index]: $!\n";
+    }
+
+    my $used = 0;
+
+    while (<USED>){
+      next if (/^Bismark/);
+      if ($_){
+	$used = 1;
+	last;
+      }
+    }
+
+    if ($used){
+      warn "$sorting_files[$index] contains data ->\tkept\n";
+      ++$index;
+    }
+    else{
+
+      my $delete = unlink $sorting_files[$index];
+
+      if ($delete){
+	warn "$sorting_files[$index] was empty ->\tdeleted\n";
+      }
+      else{
+	warn "$sorting_files[$index] was empty, however deletion was unsuccessful: $!\n"
+      }
+
+      ### we also need to remove the element from @sorting_files
+      splice @sorting_files, $index, 1;
+    }
+  }
+  warn "\n\n"; ## can't close the piped filehandles at this point because it will die (unfortunately)
+}
+
+sub produce_mbias_plots{
+
+  my $filename = shift;
+
+  my $mbias = (split (/\//,$filename))[-1]; # extracting the filename if a full path was specified
+  $mbias =~ s/gz$//;
+  $mbias =~ s/sam$//;
+  $mbias =~ s/bam$//;
+  $mbias =~ s/txt$//;
+   my $mbias_graph_1 = my $mbias_graph_2 = $mbias;
+  $mbias_graph_1 = $output_dir . $mbias_graph_1 . 'M-bias_R1.png';
+  $mbias_graph_2 = $output_dir . $mbias_graph_2 . 'M-bias_R2.png';
+
+  $mbias =~ s/$/M-bias.txt/;
+
+  open (MBIAS,'>',"$output_dir$mbias") or die "Failed to open file for the M-bias data\n\n";
+
+  # determining maximum read length
+  my $max_length_1 = 0;
+  my $max_length_2 = 0;
+
+  foreach my $context (keys %mbias_1){
+    foreach my $pos (sort {$a<=>$b} keys %{$mbias_1{$context}}){
+      $max_length_1 = $pos unless ($max_length_1 >= $pos);
+    }
+  }
+  if ($paired){
+    foreach my $context (keys %mbias_2){
+      foreach my $pos (sort {$a<=>$b} keys %{$mbias_2{$context}}){
+	$max_length_2 = $pos unless ($max_length_2 >= $pos);
+      }
+    }
+  }
+
+  if ($single){
+    warn "Determining maximum read length for M-Bias plot\n";
+    warn "Maximum read length of Read 1: $max_length_1\n\n";
+  }
+  else{
+    warn "Determining maximum read lengths for M-Bias plots\n";
+    warn "Maximum read length of Read 1: $max_length_1\n";
+    warn "Maximum read length of Read 2: $max_length_2\n\n";
+  }
+  # sleep(3);
+
+  my @mbias_read1;
+  my @mbias_read2;
+
+  #Check whether the module GD::Graph:lines is installed
+  my $gd_graph_installed = 0;
+  eval{
+    require GD::Graph::lines;
+    GD::Graph::lines->import();
+  };
+
+  unless($@) { # syntax or routine error variable, set if something goes wron in the last eval{ require ...}
+    $gd_graph_installed = 1;
+
+    #Check whether the module GD::Graph::colour is installed
+    eval{
+      require GD::Graph::colour;
+      GD::Graph::colour->import(qw(:colours :lists :files :convert));
+    };
+
+    if ($@) {
+      warn "Perl module GD::Graph::colour not found, skipping drawing M-bias plots (only writing out M-bias plot table)\n";
+      sleep(2);
+      $gd_graph_installed = 0;
+    }
+
+
+  }
+  else{
+    warn "Perl module GD::Graph::lines is not installed, skipping drawing M-bias plots (only writing out M-bias plot table)\n";
+    sleep(2);
+  }
+
+
+  my $graph_title;
+  my $graph1;
+  my $graph2;
+
+  if ( $gd_graph_installed){
+    $graph1 = GD::Graph::lines->new(800,600);
+    if ($paired){
+      $graph2 = GD::Graph::lines->new(800,600);
+    }
+  }
+
+  foreach my $context (qw(CpG CHG CHH)){
+    @{$mbias_read1[0]} = ();
+
+    if ($paired){
+      print MBIAS "$context context (R1)\n================\n";
+      $graph_title = 'M-bias (Read 1)';
+    }
+    else{
+      print MBIAS "$context context\n===========\n";
+      $graph_title = 'M-bias';
+    }
+    print MBIAS "position\tcount methylated\tcount unmethylated\t% methylation\tcoverage\n";
+
+    foreach my $pos (1..$max_length_1){
+
+      unless (defined $mbias_1{$context}->{$pos}->{meth}){
+	$mbias_1{$context}->{$pos}->{meth} = 0;
+      }
+      unless (defined $mbias_1{$context}->{$pos}->{un}){
+	$mbias_1{$context}->{$pos}->{un} = 0;
+      }
+
+      my $percent = '';
+      if (($mbias_1{$context}->{$pos}->{meth} + $mbias_1{$context}->{$pos}->{un}) > 0){
+	$percent = sprintf("%.2f",$mbias_1{$context}->{$pos}->{meth} * 100/ ( $mbias_1{$context}->{$pos}->{meth} + $mbias_1{$context}->{$pos}->{un}) );
+      }
+      my $coverage = $mbias_1{$context}->{$pos}->{un} + $mbias_1{$context}->{$pos}->{meth};
+
+      print MBIAS "$pos\t$mbias_1{$context}->{$pos}->{meth}\t$mbias_1{$context}->{$pos}->{un}\t$percent\t$coverage\n";
+      push @{$mbias_read1[0]},$pos;
+
+      if ($context eq 'CpG'){
+	push @{$mbias_read1[1]},$percent;
+	push @{$mbias_read1[4]},$coverage;
+      }
+      elsif ($context eq 'CHG'){
+	push @{$mbias_read1[2]},$percent;
+	push @{$mbias_read1[5]},$coverage;
+      }
+      elsif ($context eq 'CHH'){
+    	push @{$mbias_read1[3]},$percent;
+	push @{$mbias_read1[6]},$coverage;
+      }
+    }
+    print MBIAS "\n";
+  }
+
+  if ( $gd_graph_installed){
+
+    add_colour(nice_blue => [31,120,180]);
+    add_colour(nice_orange => [255,127,0]);
+    add_colour(nice_green => [51,160,44]);
+    add_colour(pale_blue => [153,206,227]);
+    add_colour(pale_orange => [253,204,138]);
+    add_colour(pale_green => [191,230,207]);
+
+    $graph1->set(
+		 x_label              => 'position (bp)',
+		 y1_label              => '% methylation',
+		 y2_label              => '# methylation calls',
+		 title                => $graph_title,
+		 line_width           => 2,
+		 x_max_value          => $max_length_1,
+		 x_min_value          => 0,
+		 y_tick_number        => 10,
+		 y_label_skip         => 2,
+		 y1_max_value          => 100,
+		 y1_min_value          => 0,
+		 y_label_skip         => 2,
+		 y2_min_value          => 0,
+		 x_label_skip         => 5,
+		 x_label_position     => 0.5,
+		 x_tick_offset        => -1,
+		 bgclr                => 'white',
+		 transparent          => 0,
+		 two_axes             => 1,
+		 use_axis             => [1,1,1,2,2,2],
+		 legend_placement     => 'RC',
+		 legend_spacing       => 6,
+		 legend_marker_width  => 24,
+		 legend_marker_height => 18,
+		 dclrs              => [ qw(nice_blue nice_orange nice_green pale_blue pale_orange pale_green)],
+		) or die $graph1->error;
+
+    $graph1->set_legend('CpG methylation','CHG methylation','CHH methylation','CpG total calls','CHG total calls','CHH total calls');
+
+    my $gd1 = $graph1->plot(\@mbias_read1) or die $graph1->error;
+
+    open (MBIAS_G1,'>',$mbias_graph_1) or die "Failed to write to file for M-bias plot 1: $!\n\n";
+    binmode MBIAS_G1;
+    print MBIAS_G1 $gd1->png;
+  }
+
+  if ($paired){
+
+    foreach my $context (qw(CpG CHG CHH)){
+      @{$mbias_read2[0]} = ();
+
+      print MBIAS "$context context (R2)\n================\n";
+      print MBIAS "position\tcount methylated\tcount unmethylated\t% methylation\tcoverage\n";
+      foreach my $pos (1..$max_length_2){
+
+	unless (defined $mbias_2{$context}->{$pos}->{meth}){
+	  $mbias_2{$context}->{$pos}->{meth} = 0;
+	}
+	unless (defined $mbias_2{$context}->{$pos}->{un}){
+	  $mbias_2{$context}->{$pos}->{un} = 0;
+	}
+
+	my $percent = '';
+	if (($mbias_2{$context}->{$pos}->{meth} + $mbias_2{$context}->{$pos}->{un}) > 0){
+	  $percent = sprintf("%.2f",$mbias_2{$context}->{$pos}->{meth} * 100/ ($mbias_2{$context}->{$pos}->{meth} + $mbias_2{$context}->{$pos}->{un}) );
+	}
+	my $coverage = $mbias_2{$context}->{$pos}->{un} + $mbias_2{$context}->{$pos}->{meth};
+
+	print MBIAS "$pos\t$mbias_2{$context}->{$pos}->{meth}\t$mbias_2{$context}->{$pos}->{un}\t$percent\t$coverage\n";
+
+	push @{$mbias_read2[0]},$pos;
+
+	if ($context eq 'CpG'){
+	  push @{$mbias_read2[1]},$percent;
+	  push @{$mbias_read2[4]},$coverage;
+	}
+	elsif ($context eq 'CHG'){
+	  push @{$mbias_read2[2]},$percent;
+	  push @{$mbias_read2[5]},$coverage;
+	}
+	elsif ($context eq 'CHH'){
+	  push @{$mbias_read2[3]},$percent;
+	  push @{$mbias_read2[6]},$coverage;
+	}
+      }
+      print MBIAS "\n";
+    }
+
+    if ( $gd_graph_installed){
+
+      add_colour(nice_blue => [31,120,180]);
+      add_colour(nice_orange => [255,127,0]);
+      add_colour(nice_green => [51,160,44]);
+      add_colour(pale_blue => [153,206,227]);
+      add_colour(pale_orange => [253,204,138]);
+      add_colour(pale_green => [191,230,207]);
+
+      $graph2->set(
+		   x_label              => 'position (bp)',
+		   line_width           => 2,
+		   x_max_value          => $max_length_1,
+		   x_min_value          => 0,
+		   y_tick_number        => 10,
+		   y_label_skip         => 2,
+		   y1_max_value          => 100,
+		   y1_min_value          => 0,
+		   y_label_skip         => 2,
+		   y2_min_value          => 0,
+		   x_label_skip         => 5,
+		   x_label_position     => 0.5,
+		   x_tick_offset        => -1,
+		   bgclr                => 'white',
+		   transparent          => 0,
+		   two_axes             => 1,
+		   use_axis             => [1,1,1,2,2,2],
+		   legend_placement     => 'RC',
+		   legend_spacing       => 6,
+		   legend_marker_width  => 24,
+		   legend_marker_height => 18,
+		   dclrs                => [ qw(nice_blue nice_orange nice_green pale_blue pale_orange pale_green)],
+		   x_label              => 'position (bp)',
+		   y1_label             => '% methylation',
+		   y2_label             => '# calls',
+		   title                => 'M-bias (Read 2)',
+		  ) or die $graph2->error;
+
+      $graph2->set_legend('CpG methylation','CHG methylation','CHH methylation','CpG total calls','CHG total calls','CHH total calls');
+      my $gd2 = $graph2->plot(\@mbias_read2) or die $graph2->error;
+
+      open (MBIAS_G2,'>',$mbias_graph_2) or die "Failed to write to file for M-bias plot 2: $!\n\n";
+      binmode MBIAS_G2;
+      print MBIAS_G2 $gd2->png;
+
+    }
+  }
+}

 sub process_commandline{
   my $help;
   my $single_end;
   my $paired_end;
   my $ignore;
+  my $ignore_r2;
   my $genomic_fasta;
   my $full;
   my $report;
@@ -214,31 +677,43 @@
   my $CpG_only;
   my $CX_context;
   my $split_by_chromosome;
-
-
-  my $command_line = GetOptions ('help|man' => \$help,
-				 'p|paired-end' => \$paired_end,
-				 's|single-end' => \$single_end,
-				 'fasta' => \$genomic_fasta,
-				 'ignore=i' => \$ignore,
-				 'comprehensive' => \$full,
-				 'report' => \$report,
-				 'version' => \$extractor_version,
-				 'no_overlap' => \$no_overlap,
-				 'merge_non_CpG' => \$merge_non_CpG,
-				 'vanilla' => \$vanilla,
-				 'o|output=s' => \$output_dir,
-				 'no_header' => \$no_header,
-				 'bedGraph' => \$bedGraph,
-				 "cutoff=i" => \$coverage_threshold,
-				 "remove_spaces" => \$remove,
-				 "counts" => \$counts,
-				 "cytosine_report" => \$cytosine_report,
-				 'g|genome_folder=s' => \$genome_folder,
-				 "zero_based" => \$zero,
-				 "CX|CX_context" => \$CX_context,
-				 "split_by_chromosome" => \$split_by_chromosome,
-				);
+  my $sort_size;
+  my $samtools_path;
+  my $gzip;
+  my $mbias_only;
+  my $gazillion;
+  my $ample_mem;
+
+  my $command_line = GetOptions ('help|man'             => \$help,
+				 'p|paired-end'         => \$paired_end,
+				 's|single-end'         => \$single_end,
+				 'fasta'                => \$genomic_fasta,
+				 'ignore=i'             => \$ignore,
+				 'ignore_r2=i'          => \$ignore_r2,
+				 'comprehensive'        => \$full,
+				 'report'               => \$report,
+				 'version'              => \$extractor_version,
+				 'no_overlap'           => \$no_overlap,
+				 'merge_non_CpG'        => \$merge_non_CpG,
+				 'vanilla'              => \$vanilla,
+				 'o|output=s'           => \$output_dir,
+				 'no_header'            => \$no_header,
+				 'bedGraph'             => \$bedGraph,
+				 "cutoff=i"             => \$coverage_threshold,
+				 "remove_spaces"        => \$remove,
+				 "counts"               => \$counts,
+				 "cytosine_report"      => \$cytosine_report,
+				 'g|genome_folder=s'    => \$genome_folder,
+				 "zero_based"           => \$zero,
+				 "CX|CX_context"        => \$CX_context,
+				 "split_by_chromosome"  => \$split_by_chromosome,
+				 "buffer_size=s"        => \$sort_size,
+				 'samtools_path=s'      => \$samtools_path,
+				 "gzip"                 => \$gzip,
+				 "mbias_only"           => \$mbias_only,
+				 "gazillion|scaffolds"  => \$gazillion,
+				 "ample_memory"         => \$ample_mem,
+	);

   ### EXIT ON ERROR if there were errors with any of the supplied options
   unless ($command_line){
@@ -257,7 +732,8 @@

                            Bismark Methylation Extractor

-   Bismark Extractor Version: $version Copyright 2010-12 Felix Krueger, Babraham Bioinformatics
+                      Bismark Extractor Version: $version
+              Copyright 2010-13 Felix Krueger, Babraham Bioinformatics
                 www.bioinformatics.babraham.ac.uk/projects/bismark/


@@ -274,10 +750,23 @@

   warn "\n *** Bismark methylation extractor version $version ***\n\n";

-  ### IGNORING <INT> bases at the start of the read when processing the methylation call string
-  unless ($ignore){
-    $ignore = 0;
+  ### M-BIAS ONLY
+  if ($mbias_only){
+    if ($bedGraph){
+      die "Option '--mbias_only' skips all sorts of methylation extraction, including the bedGraph generation. Please respecify!\n";
+    }
+    if ($cytosine_report){
+      die "Option '--mbias_only' skips all sorts of methylation extraction, including the genome-wide cytosine methylation report generation. Please respecify!\n";
+    }
+    if ($merge_non_CpG){
+      warn "Option '--mbias_only' skips all sorts of methylation extraction, thus '--merge' won't have any effect\n";
+    }
+    if ($full){
+      warn "Option '--mbias_only' skips all sorts of methylation extraction, thus '--comprehensive' won't have any effect\n";
+    }
+    sleep(3);
   }
+
   ### PRINT A REPORT
   unless ($report){
     $report = 0;
@@ -310,9 +799,65 @@
     $single_end = 0;   ### PAIRED-END ALIGNMENTS
   }
   else{
-    die "Please specify whether the supplied file(s) are in Bismark single-end or paired-end format\n\n";
+
+    ### we will try to determine whether the input file was a single-end or paired-end sequencing run from the SAM header
+
+    if ($vanilla){
+      die "Please specify whether the supplied file(s) are in Bismark single-end or paired-end format with '-s' or '-p'\n\n";
+    }
+    else{ # SAM/BAM format
+
+      my $file = $filenames[0];
+      warn "Trying to determine the type of mapping from the SAM header line of file $file\n"; sleep(1);
+
+      ### if the user did not specify whether the alignment file was single-end or paired-end we are trying to get this information from the @PG header line in the SAM/BAM file
+      if ($file =~ /\.gz$/){
+	open (DETERMINE,"zcat $file |") or die "Unable to read from gzipped file $file: $!\n";
+      }
+      elsif ($file =~ /\.bam$/ ||  `file -b $file` =~ /^gzip/){
+	open (DETERMINE,"samtools view -h $file |") or die "Unable to read from BAM file $file: $!\n";
+      }
+      else{
+	open (DETERMINE,$file) or die "Unable to read from $file: $!\n";
+      }
+
+      while (<DETERMINE>){
+	last unless (/^\@/);
+	if ($_ =~ /^\@PG/){
+	  # warn "found the \@PG line:\n";
+	  # warn "$_";
+
+	  if ($_ =~ /-1/ and $_ =~ /-2/){
+	    warn "Treating file(s) as paired-end data (as extracted from \@PG line)\n\n"; sleep(1);
+	    $paired_end = 1;
+	    $single_end = 0;
+	  }
+	  else{
+	    warn "Treating file(s) as single-end data (as extracted from \@PG line)\n\n"; sleep(1);
+	    $paired_end = 0;
+	    $single_end = 1;
+	  }
+	}
+      }
+
+      close DETERMINE or warn $!;
+
+    }
   }

+  ### IGNORING <INT> bases at the start of the read when processing the methylation call string
+  unless ($ignore){
+    $ignore = 0;
+  }
+
+  if (defined $ignore_r2){
+    die "You can only specify --ignore_r2 for paired-end result files\n" unless ($paired_end);
+  }
+  else{
+    $ignore_r2 = 0;
+  }
+
+
   ### NO OVERLAP
   if ($no_overlap){
     die "The option '--no_overlap' can only be specified for paired-end input!\n" unless ($paired_end);
@@ -336,14 +881,26 @@
     $remove = 0;
   }

-  ### COVERAGE THRESHOLD FOR gedGraph OUTPUT
-  unless (defined $coverage_threshold){
+  ### COVERAGE THRESHOLD FOR bedGraph OUTPUT
+  if (defined $coverage_threshold){
     unless ($coverage_threshold > 0){
       die "Please select a coverage greater than 0 (positive integers only)\n";
     }
+  }
+  else{
     $coverage_threshold = 1;
   }

+  ### SORT buffer size
+  if (defined $sort_size){
+    unless ($sort_size =~ /^\d+\%$/ or $sort_size =~ /^\d+(K|M|G|T)$/){
+      die "Please select a buffer size as percentage (e.g. --buffer_size 20%) or a number to be multiplied with K, M, G, T etc. (e.g. --buffer_size 20G). For more information on sort type 'info sort' on a command line\n";
+    }
+  }
+  else{
+    $sort_size = '2G';
+  }
+
   if ($zero){
     die "Option '--zero' is only available if  '--cytosine_report' is specified as well. Please respecify\n" unless ($cytosine_report);
   }
@@ -351,6 +908,13 @@
   if ($CX_context){
     die "Option '--CX_context' is only available if  '--cytosine_report' or '--bedGraph' is specified as well. Please respecify\n" unless ($cytosine_report or $bedGraph);
   }
+  else{
+    $CX_context = 0;
+  }
+
+  unless ($counts){
+    $counts = 1; # counts will always be set
+  }

   if ($cytosine_report){

@@ -369,13 +933,122 @@
       $bedGraph = 1;
     }
     unless ($counts){
-      warn "Setting the option '--counts' since this is required for the genome-wide cytosine report\n";
+      # warn "Setting the option '--counts' since this is required for the genome-wide cytosine report\n";
       $counts = 1;
     }
     warn "\n";
   }

-  return ($ignore,$genomic_fasta,$single_end,$paired_end,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome);
+  ### PATH TO SAMTOOLS
+  if (defined $samtools_path){
+    # if Samtools was specified as full command
+    if ($samtools_path =~ /samtools$/){
+      if (-e $samtools_path){
+	# Samtools executable found
+      }
+      else{
+	die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
+      }
+    }
+    else{
+      unless ($samtools_path =~ /\/$/){
+	$samtools_path =~ s/$/\//;
+      }
+      $samtools_path .= 'samtools';
+      if (-e $samtools_path){
+	# Samtools executable found
+      }
+      else{
+	die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
+      }
+    }
+  }
+  # Check whether Samtools is in the PATH if no path was supplied by the user
+  else{
+    if (!system "which samtools >/dev/null 2>&1"){ # STDOUT is binned, STDERR is redirected to STDOUT. Returns 0 if Samtools is in the PATH
+      $samtools_path = `which samtools`;
+      chomp $samtools_path;
+    }
+  }
+
+  unless (defined $samtools_path){
+    $samtools_path = '';
+  }
+
+
+  if ($gazillion){
+    if ($ample_mem){
+      die "You can't currently select '--ample_mem' together with '--gazillion'. Make your pick!\n\n";
+    }
+  }
+
+  return ($ignore,$genomic_fasta,$single_end,$paired_end,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_only,$gazillion,$ample_mem);
+}
+
+
+sub test_positional_sorting{
+
+  my $filename = shift;
+
+  print "\nNow testing Bismark result file $filename for positional sorting (which would be bad...)\t";
+  sleep(1);
+
+  if ($filename =~ /\.gz$/) {
+    open (TEST,"zcat $filename |") or die "Can't open gzipped file $filename: $!\n";
+  }
+  elsif ($filename =~ /bam$/ || `file -b $filename` =~ /^gzip/) {
+    if ($samtools_path){
+      open (TEST,"$samtools_path view -h $filename |") or die "Can't open BAM file $filename: $!\n";
+    }
+    else{
+      die "Sorry couldn't find an installation of Samtools. Either specifiy an alternative path using the option '--samtools_path /your/path/', or use a SAM file instead\n\n";
+    }
+  }
+  else {
+    open (TEST,$filename) or die "Can't open file $filename: $!\n";
+  }
+
+  my $count = 0;
+
+  while (<TEST>) {
+    if (/^\@/) {	     # testing header lines if they contain the @SO flag (for being sorted)
+      if (/^\@SO/) {
+	die "SAM/BAM header line '$_' indicates that the Bismark aligment file has been sorted by chromosomal positions which is is incompatible with correct methylation extraction. Please use an unsorted file instead\n\n";
+      }
+      next;
+    }
+    $count++;
+
+    last if ($count > 100000); # else we test the first 100000 sequences if they start with the same read ID
+
+    my ($id_1) = (split (/\t/));
+
+    ### reading the next line which should be read 2
+    $_ = <TEST>;
+    my ($id_2) = (split (/\t/));
+    last unless ($id_2);
+    ++$count;
+
+    if ($id_1 eq $id_2){
+      ### ids are the same
+      next;
+    }
+    else{ ### in previous versions of Bismark we appended /1 and /2 to the read IDs for easier eyeballing which read is which. These tags need to be removed first
+      my $id_1_trunc = $id_1;
+      $id_1_trunc =~ s/\/1$//;
+      my $id_2_trunc = $id_2;
+      $id_2_trunc =~ s/\/2$//;
+
+      unless ($id_1_trunc eq $id_2_trunc){
+	die "The IDs of Read 1 ($id_1) and Read 2 ($id_2) are not the same. This might be a result of sorting the paired-end SAM/BAM files by chromosomal position which is not compatible with correct methylation extraction. Please use an unsorted file instead\n\n";
+      }
+    }
+  }
+  #  close TEST or die $!; somehow fails on our cluster...
+  ### If it hasen't died so far then it seems the file is in the correct Bismark format (read 1 and read 2 of a pair directly following each other)
+  warn "...passed!\n";
+  sleep(1);
+
 }


@@ -386,7 +1059,16 @@

   if ($filename =~ /\.gz$/) {
     open (IN,"zcat $filename |") or die "Can't open gzipped file $filename: $!\n";
-  } else {
+  }
+  elsif ($filename =~ /bam$/ || `file -b $filename` =~ /^gzip/) {
+    if ($samtools_path){
+      open (IN,"$samtools_path view -h $filename |") or die "Can't open BAM file $filename: $!\n";
+    }
+    else{
+      die "Sorry couldn't find an installation of Samtools. Either specifiy an alternative path using the option '--samtools_path /your/path/', or use a SAM file instead\n\n";
+    }
+  }
+  else {
     open (IN,$filename) or die "Can't open file $filename: $!\n";
   }

@@ -414,7 +1096,8 @@
   ### OPENING OUTPUT-FILEHANDLES
   if ($report) {
     my $report_filename = $output_filename;
-    $report_filename =~ s/[\.sam|\.txt]$//;
+    $report_filename =~ s/\.sam$//;
+    $report_filename =~ s/\.txt$//;
     $report_filename =~ s/$/_splitting_report.txt/;
     $report_filename = $output_dir . $report_filename;
     open (REPORT,'>',$report_filename) or die "Failed to write to file $report_filename $!\n";
@@ -438,9 +1121,18 @@
 	print REPORT "Bismark result file: single-end (SAM format)\n"; # default
       }
     }
-
-    if ($ignore) {
-      print REPORT "Ignoring first $ignore bases\n";
+    if ($single){
+      if ($ignore) {
+	print REPORT "Ignoring first $ignore bp\n";
+      }
+    }
+    else{ # paired-end
+      if ($ignore) {
+	print REPORT "Ignoring first $ignore bp of Read 1\n";
+      }
+      if ($ignore_r2){
+	print REPORT "Ignoring first $ignore_r2 bp of Read 2\n";
+      }
     }

     if ($full) {
@@ -462,6 +1154,8 @@
     print REPORT "\n";
   }

+#####   open (OUT,"| gzip -c - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";
+
   ### CpG-context and non-CpG context. THIS SECTION IS OPTIONAL
   ### if --comprehensive AND --merge_non_CpG was specified we are only writing out one CpG-context and one Any-Other-context result file
   if ($full and $merge_non_CpG) {
@@ -469,27 +1163,46 @@
     ### C in CpG context
     $cpg_output =~ s/^/CpG_context_/;
     $cpg_output =~ s/sam$/txt/;
+    $cpg_output =~ s/bam$/txt/;
     $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
     $cpg_output = $output_dir . $cpg_output;
+
+    if ($gzip){
+      $cpg_output .= '.gz';
+      open ($fhs{CpG_context},"| gzip -c - > $cpg_output") or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context to $cpg_output\n" unless($mbias_only);
     push @sorting_files,$cpg_output;
-    open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n";
-    print "Writing result file containing methylation information for C in CpG context to $cpg_output\n";

     unless ($no_header) {
-      print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n";
+      print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     ### C in any other context than CpG
     $other_c_output =~ s/^/Non_CpG_context_/;
     $other_c_output =~ s/sam$/txt/;
+    $other_c_output =~ s/bam$/txt/;
     $other_c_output =~ s/$/.txt/ unless ($other_c_output =~ /\.txt$/);
     $other_c_output = $output_dir . $other_c_output;
+
+    if ($gzip){
+      $other_c_output .= '.gz';
+      open ($fhs{other_context},"| gzip -c - > $other_c_output") or die "Failed to write to $other_c_output $! \n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{other_context},'>',$other_c_output) or die "Failed to write to $other_c_output $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in any other context to $other_c_output\n" unless($mbias_only);
     push @sorting_files,$other_c_output;
-    open ($fhs{other_context},'>',$other_c_output) or die "Failed to write to $other_c_output $!\n";
-    print "Writing result file containing methylation information for C in any other context to $other_c_output\n";
+

     unless ($no_header) {
-      print {$fhs{other_context}} "Bismark methylation extractor version $version\n";
+      print {$fhs{other_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }
   }

@@ -501,50 +1214,86 @@
     ### For cytosines in CpG context
     $cpg_ot =~ s/^/CpG_OT_/;
     $cpg_ot =~ s/sam$/txt/;
+    $cpg_ot =~ s/bam$/txt/;
     $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
     $cpg_ot = $output_dir . $cpg_ot;
+
+    if ($gzip){
+      $cpg_ot .= '.gz';
+      open ($fhs{0}->{CpG},"| gzip -c - > $cpg_ot") or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n" unless($mbias_only);
     push @sorting_files,$cpg_ot;
-    open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n";
-    print "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n";

     unless($no_header){
-      print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n";
+      print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $cpg_ctot =~ s/^/CpG_CTOT_/;
     $cpg_ctot =~ s/sam$/txt/;
+    $cpg_ctot =~ s/bam$/txt/;
     $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
     $cpg_ctot = $output_dir . $cpg_ctot;
+
+    if ($gzip){
+      $cpg_ctot .= '.gz';
+      open ($fhs{1}->{CpG},"| gzip -c - > $cpg_ctot") or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n" unless($mbias_only);
     push @sorting_files,$cpg_ctot;
-    open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n";
-    print "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n";

     unless($no_header){
-      print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n";
+      print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $cpg_ctob =~ s/^/CpG_CTOB_/;
     $cpg_ctob =~ s/sam$/txt/;
+    $cpg_ctob =~ s/bam$/txt/;
     $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
     $cpg_ctob = $output_dir . $cpg_ctob;
+
+    if ($gzip){
+      $cpg_ctob .= '.gz';
+      open ($fhs{2}->{CpG},"| gzip -c - > $cpg_ctob") or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n" unless($mbias_only);
     push @sorting_files,$cpg_ctob;
-    open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n";
-    print "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n";

     unless($no_header){
-      print {$fhs{2}->{CpG}}  "Bismark methylation extractor version $version\n";
+      print {$fhs{2}->{CpG}}  "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $cpg_ob =~ s/^/CpG_OB_/;
     $cpg_ob =~ s/sam$/txt/;
+    $cpg_ob =~ s/bam$/txt/;
     $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
     $cpg_ob = $output_dir . $cpg_ob;
+
+    if ($gzip){
+      $cpg_ob .= '.gz';
+      open ($fhs{3}->{CpG},"| gzip -c - > $cpg_ob") or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n" unless($mbias_only);
     push @sorting_files,$cpg_ob;
-    open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n";
-    print "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n";

     unless($no_header){
-      print {$fhs{3}->{CpG}}  "Bismark methylation extractor version $version\n";
+      print {$fhs{3}->{CpG}}  "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     ### For cytosines in Non-CpG (CC, CT or CA) context
@@ -552,50 +1301,86 @@

     $other_c_ot =~ s/^/Non_CpG_OT_/;
     $other_c_ot =~ s/sam$/txt/;
+    $other_c_ot =~ s/bam$/txt/;
     $other_c_ot =~ s/$/.txt/ unless ($other_c_ot =~ /\.txt$/);
     $other_c_ot = $output_dir . $other_c_ot;
+
+    if ($gzip){
+      $other_c_ot .= '.gz';
+      open ($fhs{0}->{other_c},"| gzip -c - > $other_c_ot") or die "Failed to write to $other_c_ot $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{0}->{other_c},'>',$other_c_ot) or die "Failed to write to $other_c_ot $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in any other context from the original top strand to $other_c_ot\n" unless($mbias_only);
     push @sorting_files,$other_c_ot;
-    open ($fhs{0}->{other_c},'>',$other_c_ot) or die "Failed to write to $other_c_ot $!\n";
-    print "Writing result file containing methylation information for C in any other context from the original top strand to $other_c_ot\n";

     unless($no_header){
-      print {$fhs{0}->{other_c}} "Bismark methylation extractor version $version\n";
+      print {$fhs{0}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $other_c_ctot =~ s/^/Non_CpG_CTOT_/;
     $other_c_ctot =~ s/sam$/txt/;
+    $other_c_ctot =~ s/bam$/txt/;
     $other_c_ctot =~ s/$/.txt/ unless ($other_c_ctot =~ /\.txt$/);
     $other_c_ctot = $output_dir . $other_c_ctot;
+
+    if ($gzip){
+      $other_c_ctot .= '.gz';
+      open ($fhs{1}->{other_c},"| gzip -c - > $other_c_ctot") or die "Failed to write to $other_c_ctot $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{1}->{other_c},'>',$other_c_ctot) or die "Failed to write to $other_c_ctot $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in any other context from the complementary to original top strand to $other_c_ctot\n" unless($mbias_only);
     push @sorting_files,$other_c_ctot;
-    open ($fhs{1}->{other_c},'>',$other_c_ctot) or die "Failed to write to $other_c_ctot $!\n";
-    print "Writing result file containing methylation information for C in any other context from the complementary to original top strand to $other_c_ctot\n";

     unless($no_header){
-      print {$fhs{1}->{other_c}} "Bismark methylation extractor version $version\n";
+      print {$fhs{1}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $other_c_ctob =~ s/^/Non_CpG_CTOB_/;
     $other_c_ctob =~ s/sam$/txt/;
+    $other_c_ctob =~ s/bam$/txt/;
     $other_c_ctob =~ s/$/.txt/ unless ($other_c_ctob =~ /\.txt$/);
     $other_c_ctob = $output_dir . $other_c_ctob;
+
+    if ($gzip){
+      $other_c_ctob .= '.gz';
+      open ($fhs{2}->{other_c},"| gzip -c - > $other_c_ctob") or die "Failed to write to $other_c_ctob $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{2}->{other_c},'>',$other_c_ctob) or die "Failed to write to $other_c_ctob $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in any other context from the complementary to original bottom strand to $other_c_ctob\n" unless($mbias_only);
     push @sorting_files,$other_c_ctob;
-    open ($fhs{2}->{other_c},'>',$other_c_ctob) or die "Failed to write to $other_c_ctob $!\n";
-    print "Writing result file containing methylation information for C in any other context from the complementary to original bottom strand to $other_c_ctob\n";

     unless($no_header){
-      print {$fhs{2}->{other_c}} "Bismark methylation extractor version $version\n";
+      print {$fhs{2}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $other_c_ob =~ s/^/Non_CpG_OB_/;
     $other_c_ob =~ s/sam$/txt/;
+    $other_c_ob =~ s/sam$/txt/;
     $other_c_ob =~ s/$/.txt/ unless ($other_c_ob =~ /\.txt$/);
     $other_c_ob = $output_dir . $other_c_ob;
+
+    if ($gzip){
+      $other_c_ob .= '.gz';
+      open ($fhs{3}->{other_c},"| gzip -c - > $other_c_ob") or die "Failed to write to $other_c_ob $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{3}->{other_c},'>',$other_c_ob) or die "Failed to write to $other_c_ob $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in any other context from the original bottom strand to $other_c_ob\n\n" unless($mbias_only);
     push @sorting_files,$other_c_ob;
-    open ($fhs{3}->{other_c},'>',$other_c_ob) or die "Failed to write to $other_c_ob $!\n";
-    print "Writing result file containing methylation information for C in any other context from the original bottom strand to $other_c_ob\n\n";

     unless($no_header){
-      print {$fhs{3}->{other_c}} "Bismark methylation extractor version $version\n";
+      print {$fhs{3}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }
   }
   ### THIS SECTION IS THE DEFAULT (CpG, CHG and CHH context)
@@ -606,40 +1391,67 @@
     ### C in CpG context
     $cpg_output =~ s/^/CpG_context_/;
     $cpg_output =~ s/sam$/txt/;
+    $cpg_output =~ s/bam$/txt/;
     $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
     $cpg_output = $output_dir . $cpg_output;
+
+    if ($gzip){
+      $cpg_output .= '.gz';
+      open ($fhs{CpG_context},"| gzip -c - > $cpg_output") or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context to $cpg_output\n" unless($mbias_only);
     push @sorting_files,$cpg_output;
-    open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n";
-    print "Writing result file containing methylation information for C in CpG context to $cpg_output\n";

     unless($no_header){
-      print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n";
+      print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     ### C in CHG context
     $chg_output =~ s/^/CHG_context_/;
     $chg_output =~ s/sam$/txt/;
+    $chg_output =~ s/bam$/txt/;
     $chg_output =~ s/$/.txt/ unless ($chg_output =~ /\.txt$/);
     $chg_output = $output_dir . $chg_output;
+
+    if ($gzip){
+      $chg_output .= '.gz';
+      open ($fhs{CHG_context},"| gzip -c - > $chg_output") or die "Failed to write to $chg_output $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{CHG_context},'>',$chg_output) or die "Failed to write to $chg_output $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CHG context to $chg_output\n" unless($mbias_only);
     push @sorting_files,$chg_output;
-    open ($fhs{CHG_context},'>',$chg_output) or die "Failed to write to $chg_output $!\n";
-    print "Writing result file containing methylation information for C in CHG context to $chg_output\n";

     unless($no_header){
-      print {$fhs{CHG_context}} "Bismark methylation extractor version $version\n";
+      print {$fhs{CHG_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     ### C in CHH context
     $chh_output =~ s/^/CHH_context_/;
     $chh_output =~ s/sam$/txt/;
+    $chh_output =~ s/bam$/txt/;
     $chh_output =~ s/$/.txt/ unless ($chh_output =~ /\.txt$/);
     $chh_output = $output_dir . $chh_output;
+
+    if ($gzip){
+      $chh_output .= '.gz';
+      open ($fhs{CHH_context},"| gzip -c - > $chh_output") or die "Failed to write to $chh_output $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{CHH_context},'>',$chh_output) or die "Failed to write to $chh_output $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CHH context to $chh_output\n" unless($mbias_only);
     push @sorting_files, $chh_output;
-    open ($fhs{CHH_context},'>',$chh_output) or die "Failed to write to $chh_output $!\n";
-    print "Writing result file containing methylation information for C in CHH context to $chh_output\n";

     unless($no_header){
-      print {$fhs{CHH_context}} "Bismark methylation extractor version $version\n";
+      print {$fhs{CHH_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }
   }
   ### else we will write out 12 different output files, depending on where the (first) unique best alignment was found
@@ -649,50 +1461,86 @@
     ### For cytosines in CpG context
     $cpg_ot =~ s/^/CpG_OT_/;
     $cpg_ot =~ s/sam$/txt/;
+    $cpg_ot =~ s/bam$/txt/;
     $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
     $cpg_ot = $output_dir . $cpg_ot;
+
+    if ($gzip){
+      $cpg_ot .= '.gz';
+      open ($fhs{0}->{CpG},"| gzip -c - > $cpg_ot") or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n" unless($mbias_only);
     push @sorting_files,$cpg_ot;
-    open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n";
-    print "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n";

     unless($no_header){
-      print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n";
+      print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $cpg_ctot =~ s/^/CpG_CTOT_/;
     $cpg_ctot =~ s/sam$/txt/;
+    $cpg_ctot =~ s/bam$/txt/;
     $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
     $cpg_ctot = $output_dir . $cpg_ctot;
+
+    if ($gzip){
+      $cpg_ctot .= '.gz';
+      open ($fhs{1}->{CpG},"| gzip -c - > $cpg_ctot") or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n" unless($mbias_only);
     push @sorting_files,$cpg_ctot;
-    open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n";
-    print "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n";

     unless($no_header){
-      print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n";
+      print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $cpg_ctob =~ s/^/CpG_CTOB_/;
     $cpg_ctob =~ s/sam$/txt/;
+    $cpg_ctob =~ s/bam$/txt/;
     $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
     $cpg_ctob = $output_dir . $cpg_ctob;
+
+    if ($gzip){
+      $cpg_ctob .= '.gz';
+      open ($fhs{2}->{CpG},"| gzip -c - > $cpg_ctob") or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n" unless($mbias_only);
     push @sorting_files,$cpg_ctob;
-    open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n";
-    print "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n";

     unless($no_header){
-      print {$fhs{2}->{CpG}}  "Bismark methylation extractor version $version\n";
+      print {$fhs{2}->{CpG}}  "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $cpg_ob =~ s/^/CpG_OB_/;
     $cpg_ob =~ s/sam$/txt/;
+    $cpg_ob =~ s/bam$/txt/;
     $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
     $cpg_ob = $output_dir . $cpg_ob;
+
+    if ($gzip){
+      $cpg_ob .= '.gz';
+      open ($fhs{3}->{CpG},"| gzip -c - > $cpg_ob") or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n" unless($mbias_only);
     push @sorting_files,$cpg_ob;
-    open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n";
-    print "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n";

     unless($no_header){
-      print {$fhs{3}->{CpG}}  "Bismark methylation extractor version $version\n";
+      print {$fhs{3}->{CpG}}  "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     ### For cytosines in CHG context
@@ -700,50 +1548,86 @@

     $chg_ot =~ s/^/CHG_OT_/;
     $chg_ot =~ s/sam$/txt/;
+    $chg_ot =~ s/bam$/txt/;
     $chg_ot =~ s/$/.txt/ unless ($chg_ot =~ /\.txt$/);
     $chg_ot = $output_dir . $chg_ot;
+
+    if ($gzip){
+      $chg_ot .= '.gz';
+      open ($fhs{0}->{CHG},"| gzip -c - > $chg_ot") or die "Failed to write to $chg_ot $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{0}->{CHG},'>',$chg_ot) or die "Failed to write to $chg_ot $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CHG context from the original top strand to $chg_ot\n" unless($mbias_only);
     push @sorting_files,$chg_ot;
-    open ($fhs{0}->{CHG},'>',$chg_ot) or die "Failed to write to $chg_ot $!\n";
-    print "Writing result file containing methylation information for C in CHG context from the original top strand to $chg_ot\n";

     unless($no_header){
-      print {$fhs{0}->{CHG}} "Bismark methylation extractor version $version\n";
+      print {$fhs{0}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $chg_ctot =~ s/^/CHG_CTOT_/;
     $chg_ctot =~ s/sam$/txt/;
+    $chg_ctot =~ s/bam$/txt/;
     $chg_ctot =~ s/$/.txt/ unless ($chg_ctot =~ /\.txt$/);
     $chg_ctot = $output_dir . $chg_ctot;
+
+    if ($gzip){
+      $chg_ctot .= '.gz';
+      open ($fhs{1}->{CHG},"| gzip -c - > $chg_ctot") or die "Failed to write to $chg_ctot $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{1}->{CHG},'>',$chg_ctot) or die "Failed to write to $chg_ctot $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CHG context from the complementary to original top strand to $chg_ctot\n" unless($mbias_only);
     push @sorting_files,$chg_ctot;
-    open ($fhs{1}->{CHG},'>',$chg_ctot) or die "Failed to write to $chg_ctot $!\n";
-    print "Writing result file containing methylation information for C in CHG context from the complementary to original top strand to $chg_ctot\n";

     unless($no_header){
-      print {$fhs{1}->{CHG}} "Bismark methylation extractor version $version\n";
+      print {$fhs{1}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $chg_ctob =~ s/^/CHG_CTOB_/;
     $chg_ctob =~ s/sam$/txt/;
+    $chg_ctob =~ s/bam$/txt/;
     $chg_ctob =~ s/$/.txt/ unless ($chg_ctob =~ /\.txt$/);
     $chg_ctob = $output_dir . $chg_ctob;
+
+    if ($gzip){
+      $chg_ctob .= '.gz';
+      open ($fhs{2}->{CHG},"| gzip -c - > $chg_ctob") or die "Failed to write to $chg_ctob $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{2}->{CHG},'>',$chg_ctob) or die "Failed to write to $chg_ctob $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CHG context from the complementary to original bottom strand to $chg_ctob\n" unless($mbias_only);
     push @sorting_files,$chg_ctob;
-    open ($fhs{2}->{CHG},'>',$chg_ctob) or die "Failed to write to $chg_ctob $!\n";
-    print "Writing result file containing methylation information for C in CHG context from the complementary to original bottom strand to $chg_ctob\n";

     unless($no_header){
-      print {$fhs{2}->{CHG}} "Bismark methylation extractor version $version\n";
+      print {$fhs{2}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $chg_ob =~ s/^/CHG_OB_/;
     $chg_ob =~ s/sam$/txt/;
+    $chg_ob =~ s/bam$/txt/;
     $chg_ob =~ s/$/.txt/ unless ($chg_ob =~ /\.txt$/);
     $chg_ob = $output_dir . $chg_ob;
+
+    if ($gzip){
+      $chg_ob .= '.gz';
+      open ($fhs{3}->{CHG},"| gzip -c - > $chg_ob") or die "Failed to write to $chg_ob $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{3}->{CHG},'>',$chg_ob) or die "Failed to write to $chg_ob $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CHG context from the original bottom strand to $chg_ob\n\n" unless($mbias_only);
     push @sorting_files,$chg_ob;
-    open ($fhs{3}->{CHG},'>',$chg_ob) or die "Failed to write to $chg_ob $!\n";
-    print "Writing result file containing methylation information for C in CHG context from the original bottom strand to $chg_ob\n\n";

     unless($no_header){
-      print {$fhs{3}->{CHG}} "Bismark methylation extractor version $version\n";
+      print {$fhs{3}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     ### For cytosines in CHH context
@@ -751,50 +1635,86 @@

     $chh_ot =~ s/^/CHH_OT_/;
     $chh_ot =~ s/sam$/txt/;
+    $chh_ot =~ s/bam$/txt/;
     $chh_ot =~ s/$/.txt/ unless ($chh_ot =~ /\.txt$/);
     $chh_ot = $output_dir . $chh_ot;
+
+    if ($gzip){
+      $chh_ot .= '.gz';
+      open ($fhs{0}->{CHH},"| gzip -c - > $chh_ot") or die "Failed to write to $chh_ot $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{0}->{CHH},'>',$chh_ot) or die "Failed to write to $chh_ot $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CHH context from the original top strand to $chh_ot\n" unless($mbias_only);
     push @sorting_files,$chh_ot;
-    open ($fhs{0}->{CHH},'>',$chh_ot) or die "Failed to write to $chh_ot $!\n";
-    print "Writing result file containing methylation information for C in CHH context from the original top strand to $chh_ot\n";

     unless($no_header){
-      print {$fhs{0}->{CHH}} "Bismark methylation extractor version $version\n";
+      print {$fhs{0}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $chh_ctot =~ s/^/CHH_CTOT_/;
     $chh_ctot =~ s/sam$/txt/;
+    $chh_ctot =~ s/bam$/txt/;
     $chh_ctot =~ s/$/.txt/ unless ($chh_ctot =~ /\.txt$/);
     $chh_ctot = $output_dir . $chh_ctot;
+
+    if ($gzip){
+      $chh_ctot .= '.gz';
+      open ($fhs{1}->{CHH},"| gzip -c - > $chh_ctot") or die "Failed to write to $chh_ctot $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{1}->{CHH},'>',$chh_ctot) or die "Failed to write to $chh_ctot $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CHH context from the complementary to original top strand to $chh_ctot\n" unless($mbias_only);
     push @sorting_files,$chh_ctot;
-    open ($fhs{1}->{CHH},'>',$chh_ctot) or die "Failed to write to $chh_ctot $!\n";
-    print "Writing result file containing methylation information for C in CHH context from the complementary to original top strand to $chh_ctot\n";

     unless($no_header){
-      print {$fhs{1}->{CHH}} "Bismark methylation extractor version $version\n";
+      print {$fhs{1}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $chh_ctob =~ s/^/CHH_CTOB_/;
     $chh_ctob =~ s/sam$/txt/;
+    $chh_ctob =~ s/bam$/txt/;
     $chh_ctob =~ s/$/.txt/ unless ($chh_ctob =~ /\.txt$/);
     $chh_ctob = $output_dir . $chh_ctob;
+
+    if ($gzip){
+      $chh_ctob .= '.gz';
+      open ($fhs{2}->{CHH},"| gzip -c - > $chh_ctob") or die "Failed to write to $chh_ctob $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{2}->{CHH},'>',$chh_ctob) or die "Failed to write to $chh_ctob $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CHH context from the complementary to original bottom strand to $chh_ctob\n" unless($mbias_only);
     push @sorting_files,$chh_ctob;
-    open ($fhs{2}->{CHH},'>',$chh_ctob) or die "Failed to write to $chh_ctob $!\n";
-    print "Writing result file containing methylation information for C in CHH context from the complementary to original bottom strand to $chh_ctob\n";

     unless($no_header){
-      print {$fhs{2}->{CHH}} "Bismark methylation extractor version $version\n";
+      print {$fhs{2}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }

     $chh_ob =~ s/^/CHH_OB_/;
     $chh_ob =~ s/sam$/txt/;
+    $chh_ob =~ s/bam$/txt/;
     $chh_ob =~ s/$/.txt/ unless ($chh_ob =~ /\.txt$/);
     $chh_ob = $output_dir . $chh_ob;
+
+    if ($gzip){
+      $chh_ob .= '.gz';
+      open ($fhs{3}->{CHH},"| gzip -c - > $chh_ob") or die "Failed to write to $chh_ob $!\n" unless($mbias_only);
+    }
+    else{
+      open ($fhs{3}->{CHH},'>',$chh_ob) or die "Failed to write to $chh_ob $!\n" unless($mbias_only);
+    }
+
+    warn "Writing result file containing methylation information for C in CHH context from the original bottom strand to $chh_ob\n\n" unless($mbias_only);
     push @sorting_files,$chh_ob;
-    open ($fhs{3}->{CHH},'>',$chh_ob) or die "Failed to write to $chh_ob $!\n";
-    print "Writing result file containing methylation information for C in CHH context from the original bottom strand to $chh_ob\n\n";

     unless($no_header){
-      print {$fhs{3}->{CHH}} "Bismark methylation extractor version $version\n";
+      print {$fhs{3}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
     }
   }

@@ -928,6 +1848,7 @@
 	    # print "\n\n$meth_call\n";
 	    $meth_call = substr($meth_call,$ignore,length($meth_call)-$ignore);
 	    # print "$meth_call\n";
+
 	    ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly

 	    my @len = split (/\D+/,$cigar); # storing the length per operation
@@ -1045,14 +1966,20 @@

 	if ($meth_call_1 and $meth_call_2) {
 	  ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'
+
 	  if ($ignore) {
 	    $meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
-	    $meth_call_2 = substr($meth_call_2,$ignore,length($meth_call_2)-$ignore);
-
+
 	    ### we also need to adjust the start and end positions of the alignments accordingly if '--ignore' was specified
 	    $start_read_1 += $ignore;
-	    $end_read_2   -= $ignore;
 	  }
+	  if ($ignore_r2) {
+	    $meth_call_2 = substr($meth_call_2,$ignore_r2,length($meth_call_2)-$ignore_r2);
+
+	    ### we also need to adjust the start and end positions of the alignments accordingly if '--ignore_r2' was specified
+	    $end_read_2   -= $ignore_r2;
+	  }
+
 	  my $end_read_1;
 	  my $start_read_2;

@@ -1060,30 +1987,32 @@

 	    $end_read_1 = $start_read_1+length($meth_call_1)-1;
 	    $start_read_2 = $end_read_2-length($meth_call_2)+1;
-
+
 	    ## we first pass the first read which is in + orientation on the forward strand
-	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id,'+',$index,0,0);
+	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id,'+',$index,0,0,undef,1); # the last two values are CIGAR string and read identity

 	    # we next pass the second read which is in - orientation on the reverse strand
 	    ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
-	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$end_read_2,$id,'-',$index,$no_overlap,$end_read_1);
-	  } else {
+	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$end_read_2,$id,'-',$index,$no_overlap,$end_read_1,undef,2);
+	  }
+	  else {

 	    $end_read_1 = $start_read_1+length($meth_call_2)-1;	# read 1 is the second reported read!
 	    $start_read_2 = $end_read_2-length($meth_call_1)+1;	# read 2 is the first reported read!

 	    ## we first pass the first read which is in - orientation on the reverse strand
-	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$end_read_2,$id,'-',$index,0,0);
+	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$end_read_2,$id,'-',$index,0,0,undef,1);

 	    # we next pass the second read which is in + orientation on the forward strand
 	    ### if --no_overlap was specified we also pass the end of read 2. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
-	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_1,$id,'+',$index,$no_overlap,$start_read_2);
+	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_1,$id,'+',$index,$no_overlap,$start_read_2,undef,2);
 	  }

 	  $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
 	}
       }
-    } else {	      # Bismark paired-end SAM output format (default)
+    }
+    else {	      # Bismark paired-end SAM output format (default)
       while (<IN>) {
 	### SAM format can either start with header lines (starting with @) or start with alignments directly
 	if (/^\@/) {	     # skipping header lines (starting with @)
@@ -1186,7 +2115,8 @@
 	  my @len_1 = split (/\D+/,$cigar_1); # storing the length per operation
 	  my @ops_1 = split (/\d+/,$cigar_1); # storing the operation
 	  shift @ops_1;		# remove the empty first element
-	  die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len_1 == scalar @ops_1);
+
+	  die "CIGAR string contained a non-matching number of lengths and operations: $cigar_1\n".join(" ",@len_1)."\n".join(" ",@ops_1)."\n" unless (scalar @len_1 == scalar @ops_1);

 	  my @comp_cigar_1; # building an array with all CIGAR operations
 	  foreach my $index (0..$#len_1) {
@@ -1213,20 +2143,19 @@
 	  # print "original CIGAR read 2: $cigar_2\n";
 	  # print "original CIGAR read 2: @comp_cigar_2\n";

+
+
 	  if ($ignore) {
-	    ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'
+	    ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>' for read 1
 	    ### the methylation calls have already been reversed where necessary
 	    $meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
-	    $meth_call_2 = substr($meth_call_2,$ignore,length($meth_call_2)-$ignore);
-
-	    ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly

 	    if ($strand eq '+') {

 	      ### if the (read 1) strand information is '+', read 1 needs to be trimmed from the start
 	      my $D_count_1 = 0; # counting all deletions that affect the ignored genomic position for read 1, i.e. Deletions and insertions
 	      my $I_count_1 = 0;
-
+
 	      for (1..$ignore) {
 		my $op = shift @comp_cigar_1; # adjusting composite CIGAR string of read 1 by removing $ignore operations from the start
 		# print "$_ deleted $op\n";
@@ -1243,17 +2172,10 @@

 	      $start_read_1 += $ignore + $D_count_1 - $I_count_1;
 	      # print "start read 1 $start_read_1\t ignore: $ignore\t D count 1: $D_count_1\tI_count 1: $I_count_1\n";
-
-	      ### if the (read 1) strand information is '+', read 2 needs to be trimmed from the back
-
-	      for (1..$ignore) {
-		my $op = pop @comp_cigar_2; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
-		while ($op eq 'D') { # repeating this for deletions (D)
-		  $op = pop @comp_cigar_2;
-		}
-	      }
+
 	      # the start position of reads mapping to the reverse strand is being adjusted further below
-	    } elsif ($strand eq '-') {
+	    }
+	    elsif ($strand eq '-') {

 	      ### if the (read 1) strand information is '-', read 1 needs to be trimmed from the back
 	      for (1..$ignore) {
@@ -1264,11 +2186,35 @@
 	      }
 	      # the start position of reads mapping to the reverse strand is being adjusted further below

+	    }
+	  }
+
+	  if ($ignore_r2) {
+	    ### Clipping off the first <int> number of bases from the methylation call string as specified with '--ignore_r2 <int>' for read 2
+	    ### the methylation calls have already been reversed where necessary
+	    $meth_call_2 = substr($meth_call_2,$ignore_r2,length($meth_call_2)-$ignore_r2);
+
+	    ### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly
+
+	    if ($strand eq '+') {
+
+	      ### if the (read 1) strand information is '+', read 2 needs to be trimmed from the back
+
+	      for (1..$ignore_r2) {
+		my $op = pop @comp_cigar_2; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
+		while ($op eq 'D') { # repeating this for deletions (D)
+		  $op = pop @comp_cigar_2;
+		}
+	      }
+	      # the start position of reads mapping to the reverse strand is being adjusted further below
+	    }
+	    elsif ($strand eq '-') {
+
 	      ### if the (read 1) strand information is '-', read 2 needs to be trimmed from the start
 	      my $D_count_2 = 0; # counting all deletions that affect the ignored genomic position for read 2, i.e. Deletions and insertions
-	      my $I_count_2 = 0;
-
-	      for (1..$ignore) {
+		      my $I_count_2 = 0;
+
+	      for (1..$ignore_r2) {
 		my $op = shift @comp_cigar_2; # adjusting composite CIGAR string of read 2 by removing $ignore operations from the start
 		# print "$_ deleted $op\n";

@@ -1282,11 +2228,12 @@
 		}
 	      }

-	      $start_read_2 += $ignore + $D_count_2 - $I_count_2;
-	      # print "start read 2 $start_read_2\t ignore: $ignore\t D count 2: $D_count_2\tI_count 2: $I_count_2\n";
+	      $start_read_2 += $ignore_r2 + $D_count_2 - $I_count_2;
+	      # print "start read 2 $start_read_2\t ignore R2: $ignore_r2\t D count 2: $D_count_2\tI_count 2: $I_count_2\n";
+	    }
+	  }

-	    }
-
+	  if ($ignore){
 	    ### reconstituting shortened CIGAR string 1
 	    my $new_cigar_1;
 	    my $count_1 = 0;
@@ -1309,6 +2256,9 @@
 	    $new_cigar_1 .= "$count_1$last_op_1"; # appending the last operation and count
 	    $cigar_1 = $new_cigar_1;
 	    # print "ignore adjusted CIGAR 1 scalar: $cigar_1\n";
+	  }
+
+	  if ($ignore_r2){

 	    ### reconstituting shortened CIGAR string 2
 	    my $new_cigar_2;
@@ -1323,7 +2273,8 @@
 	      }
 	      if ($last_op_2 eq $op) {
 		++$count_2;
-	      } else {
+	      }
+	      else {
 		$new_cigar_2 .= "$count_2$last_op_2";
 		$last_op_2 = $op;
 		$count_2 = 1;
@@ -1331,10 +2282,11 @@
 	    }
 	    $new_cigar_2 .= "$count_2$last_op_2"; # appending the last operation and count
 	    $cigar_2 = $new_cigar_2;
-	    # print "ignore adjusted CIGAR 2 scalar: $cigar_2\n";
-
+	    # print "ignore_r2 adjusted CIGAR 2 scalar: $cigar_2\n";
 	  }

+	  ### Adjusting CIGAR string and starting position of reads in reverse orientation which we will pass to the extraction subroutine later on
+
 	  if ($strand eq '+') {
 	    ### adjusting the start position for all reads mapping to the reverse strand, in this case read 2
 	    @comp_cigar_2  = reverse@comp_cigar_2; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
@@ -1352,7 +2304,8 @@

 	    $end_read_1 = $start_read_1 + $MD_count_1 - 1;
 	    $start_read_2 += $MD_count_2 - 1; ## Passing on the start position on the reverse strand
-	  } else {
+	  }
+	  else {
 	    ### adjusting the start position for all reads mapping to the reverse strand, in this case read 1

 	    @comp_cigar_1  = reverse@comp_cigar_1; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
@@ -1365,23 +2318,22 @@

 	    $end_read_1 = $start_read_1;
 	    $start_read_1 +=  $MD_count_1 - 1; ### Passing on the start position on the reverse strand
-
 	  }

 	  if ($strand eq '+') {
-	    ## we first pass the first read which is in + orientation on the forward strand
-	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'+',$index,0,0,$cigar_1);
+	    ## we first pass the first read which is in + orientation on the forward strand; the last value is the read identity
+	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'+',$index,0,0,$cigar_1,1);

 	    # we next pass the second read which is in - orientation on the reverse strand
 	    ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
-	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'-',$index,$no_overlap,$end_read_1,$cigar_2);
+	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'-',$index,$no_overlap,$end_read_1,$cigar_2,2);
 	  } else {
 	    ## we first pass the first read which is in - orientation on the reverse strand
-	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'-',$index,0,0,$cigar_1);
+	    print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'-',$index,0,0,$cigar_1,1);

 	    # we next pass the second read which is in + orientation on the forward strand
 	    ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
-	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'+',$index,$no_overlap,$end_read_1,$cigar_2);
+	    print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'+',$index,$no_overlap,$end_read_1,$cigar_2,2);
 	  }

 	  $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
@@ -1392,9 +2344,10 @@
     die "Single-end or paired-end reads not specified properly\n";
   }

-  print "\n\nProcessed $line_count lines from $filename in total\n";
-  print "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
+  warn "\n\nProcessed $line_count lines from $filename in total\n";
+  warn "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
   if ($report) {
+    print REPORT "\n\nProcessed $line_count lines from $filename in total\n";
     print REPORT "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
   }
   print_splitting_report ();
@@ -1535,11 +2488,13 @@


-
-
 sub print_individual_C_methylation_states_paired_end_files{

-  my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$no_overlap,$end_read_1,$cigar) = @_;
+  my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$no_overlap,$end_read_1,$cigar,$read_identity) = @_;
+
+  ### we will use the read identity for the M-bias plot to discriminate read 1 and read 2
+  die "Read identity was neither 1 nor 2: $read_identity\n\n" unless ($read_identity == 1 or $read_identity == 2);
+
   my @methylation_calls = split(//,$meth_call);

   #################################################################
@@ -1550,6 +2505,8 @@
   ### h for not methylated C in CHH context (was converted)     ###
   ### Z for methylated C in CpG context (was protected)         ###
   ### z for not methylated C in CpG context (was converted)     ###
+  ### U for methylated C in Unknown context (was protected)     ###
+  ### u for not methylated C in Unknown context (was converted) ###
   #################################################################

   my $methyl_CHG_count = 0;
@@ -1559,16 +2516,21 @@
   my $unmethylated_CHH_count = 0;
   my $unmethylated_CpG_count = 0;

-  my @len;
-  my @ops;
   my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
   my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels
   my @comp_cigar;

-  if ($cigar){ # parsing CIGAR string
+  ### Checking whether the CIGAR string is a linear genomic match or whether if requires indel processing
+  if ($cigar =~ /^\d+M$/){
+    # this check speeds up the extraction process by up to 60%!!!
+  }
+  else{ # parsing CIGAR string
+    my @len;
+    my @ops;
     @len = split (/\D+/,$cigar); # storing the length per operation
     @ops = split (/\d+/,$cigar); # storing the operation
     shift @ops; # remove the empty first element
+
     die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);

     foreach my $index (0..$#len){
@@ -1586,11 +2548,12 @@
     # sleep (1);
   }

-
   if ($strand eq '-') {

     ### the  CIGAR string needs to be reversed, the methylation call has already been reversed above
-    @comp_cigar  = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+    if (@comp_cigar){
+      @comp_cigar  = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+    }
     #  print "reverse CIGAR string: @comp_cigar\n";

     ### the start position of paired-end files has already been corrected, see above
@@ -1599,15 +2562,14 @@
   ### THIS IS AN OPTIONAL 2-CONTEXT (CpG and non-CpG) SECTION IF --merge_non_CpG was specified

   if ($merge_non_CpG) {
-
-    if ($no_overlap) {
+    if ($no_overlap) { # this has to be read 2...

       ### single-file CpG and non-CpG context output
       if ($full) {
 	if ($strand eq '+') {
 	  for my $index (0..$#methylation_calls) {

-	    if ($cigar){ # only needed for SAM files
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
 	      $cigar_offset += $cigar_mod;
@@ -1621,32 +2583,75 @@

 	    if ($methylation_calls[$index] eq 'X') {
 	      $counting{total_meCHG_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'x') {
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'x') {
 	      $counting{total_unmethylated_CHG_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'Z') {
 	      $counting{total_meCpG_count}++;
-	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'z') {
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'z') {
 	      $counting{total_unmethylated_CpG_count}++;
-	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'H') {
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'H') {
 	      $counting{total_meCHH_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'h') {
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'h') {
 	      $counting{total_unmethylated_CHH_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{un}++;
+	      }
 	    }
 	    elsif ($methylation_calls[$index] eq '.'){}
+	    elsif (lc$methylation_calls[$index] eq 'u'){}
 	    else{
-	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
 	    }
 	  }
-	} elsif ($strand eq '-') {
+	}
+	elsif ($strand eq '-') {
 	  for my $index (0..$#methylation_calls) {

-	    if ($cigar){ # only needed for SAM files
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
 	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	      $cigar_offset += $cigar_mod;
@@ -1657,29 +2662,71 @@
 	    if ($start-$index+$pos_offset <= $end_read_1) {
 	      return;
 	    }
-
+
 	    if ($methylation_calls[$index] eq 'X') {
 	      $counting{total_meCHG_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'x') {
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'x') {
 	      $counting{total_unmethylated_CHG_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'Z') {
 	      $counting{total_meCpG_count}++;
-	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'z') {
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'z') {
 	      $counting{total_unmethylated_CpG_count}++;
-	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'H') {
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'H') {
 	      $counting{total_meCHH_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'h') {
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'h') {
 	      $counting{total_unmethylated_CHH_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{un}++;
+	      }
 	    }
 	    elsif ($methylation_calls[$index] eq '.') {}
+	    elsif (lc$methylation_calls[$index] eq 'u'){}
 	    else{
-	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
 	    }
 	  }
 	} else {
@@ -1692,7 +2739,7 @@
 	if ($strand eq '+') {
 	  for my $index (0..$#methylation_calls) {

-	    if ($cigar){ # only needed for SAM files
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
 	      $cigar_offset += $cigar_mod;
@@ -1706,24 +2753,66 @@

 	    if ($methylation_calls[$index] eq 'X') {
 	      $counting{total_meCHG_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'x') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'x') {
 	      $counting{total_unmethylated_CHG_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'Z') {
 	      $counting{total_meCpG_count}++;
-	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'z') {
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'z') {
 	      $counting{total_unmethylated_CpG_count}++;
-	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'H') {
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'H') {
 	      $counting{total_meCHH_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'h') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'h') {
 	      $counting{total_unmethylated_CHH_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{un}++;
+	      }
 	    }
 	    elsif ($methylation_calls[$index] eq '.') {}
+	    elsif (lc$methylation_calls[$index] eq 'u'){}
 	    else{
 	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	    }
@@ -1731,7 +2820,7 @@
 	} elsif ($strand eq '-') {
 	  for my $index (0..$#methylation_calls) {

-	    if ($cigar){ # only needed for SAM files
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
 	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	      $cigar_offset += $cigar_mod;
@@ -1745,24 +2834,66 @@

 	    if ($methylation_calls[$index] eq 'X') {
 	      $counting{total_meCHG_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'x') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'x') {
 	      $counting{total_unmethylated_CHG_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'Z') {
 	      $counting{total_meCpG_count}++;
-	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'z') {
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'z') {
 	      $counting{total_unmethylated_CpG_count}++;
-	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'H') {
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'H') {
 	      $counting{total_meCHH_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'h') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'h') {
 	      $counting{total_unmethylated_CHH_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{un}++;
+	      }
 	    }
 	    elsif ($methylation_calls[$index] eq '.') {}
+	    elsif (lc$methylation_calls[$index] eq 'u'){}
 	    else{
 	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	    }
@@ -1781,7 +2912,7 @@
 	if ($strand eq '+') {
 	  for my $index (0..$#methylation_calls) {

-	    if ($cigar){ # only needed for SAM files
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
 	      $cigar_offset += $cigar_mod;
@@ -1790,32 +2921,74 @@

 	    if ($methylation_calls[$index] eq 'X') {
 	      $counting{total_meCHG_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'x') {
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'x') {
 	      $counting{total_unmethylated_CHG_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'Z') {
 	      $counting{total_meCpG_count}++;
-	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'z') {
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'z') {
 	      $counting{total_unmethylated_CpG_count}++;
-	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'H') {
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'H') {
 	      $counting{total_meCHH_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'h') {
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'h') {
 	      $counting{total_unmethylated_CHH_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{un}++;
+	      }
 	    }
 	    elsif ($methylation_calls[$index] eq '.') {}
+	    elsif (lc$methylation_calls[$index] eq 'u'){}
 	    else{
-	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
 	    }
 	  }
 	} elsif ($strand eq '-') {
 	  for my $index (0..$#methylation_calls) {

-	    if ($cigar){ # only needed for SAM files
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
 	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	      $cigar_offset += $cigar_mod;
@@ -1824,26 +2997,68 @@

 	    if ($methylation_calls[$index] eq 'X') {
 	      $counting{total_meCHG_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'x') {
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	       if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'x') {
 	      $counting{total_unmethylated_CHG_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'Z') {
 	      $counting{total_meCpG_count}++;
-	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'z') {
+	      print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'z') {
 	      $counting{total_unmethylated_CpG_count}++;
-	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'H') {
+	      print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'H') {
 	      $counting{total_meCHH_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'h') {
+	      print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'h') {
 	      $counting{total_unmethylated_CHH_count}++;
-	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	      print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{un}++;
+	      }
 	    }
 	    elsif ($methylation_calls[$index] eq '.') {}
+	    elsif (lc$methylation_calls[$index] eq 'u'){}
 	    else{
-	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
 	    }
 	  }
 	} else {
@@ -1857,7 +3072,7 @@
 	if ($strand eq '+') {
 	  for my $index (0..$#methylation_calls) {

-	    if ($cigar){ # only needed for SAM files
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
 	      $cigar_offset += $cigar_mod;
@@ -1866,24 +3081,66 @@

 	    if ($methylation_calls[$index] eq 'X') {
 	      $counting{total_meCHG_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'x') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	       if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'x') {
 	      $counting{total_unmethylated_CHG_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'Z') {
 	      $counting{total_meCpG_count}++;
-	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'z') {
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'z') {
 	      $counting{total_unmethylated_CpG_count}++;
-	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'H') {
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'H') {
 	      $counting{total_meCHH_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'h') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'h') {
 	      $counting{total_unmethylated_CHH_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{un}++;
+	      }
 	    }
 	    elsif ($methylation_calls[$index] eq '.') {}
+	    elsif (lc$methylation_calls[$index] eq 'u'){}
 	    else{
 	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	    }
@@ -1891,7 +3148,7 @@
 	} elsif ($strand eq '-') {
 	  for my $index (0..$#methylation_calls) {

-	    if ($cigar){ # only needed for SAM files
+	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
 	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	      $cigar_offset += $cigar_mod;
@@ -1900,24 +3157,66 @@

 	    if ($methylation_calls[$index] eq 'X') {
 	      $counting{total_meCHG_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'x') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'x') {
 	      $counting{total_unmethylated_CHG_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'Z') {
 	      $counting{total_meCpG_count}++;
-	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'z') {
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'z') {
 	      $counting{total_unmethylated_CpG_count}++;
-	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'H') {
+	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CpG}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CpG}->{$index+1}->{un}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'H') {
 	      $counting{total_meCHH_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	    } elsif ($methylation_calls[$index] eq 'h') {
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{meth}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{meth}++;
+	      }
+	    }
+	    elsif ($methylation_calls[$index] eq 'h') {
 	      $counting{total_unmethylated_CHH_count}++;
-	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	      if ($read_identity == 1){
+		$mbias_1{CHH}->{$index+1}->{un}++;
+	      }
+	      else{
+		$mbias_2{CHH}->{$index+1}->{un}++;
+	      }
 	    }
 	    elsif ($methylation_calls[$index] eq '.') {}
+	    elsif (lc$methylation_calls[$index] eq 'u'){}
 	    else{
 	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	    }
@@ -1939,7 +3238,7 @@
       if ($strand eq '+') {
 	for my $index (0..$#methylation_calls) {

-	  if ($cigar){ # only needed for SAM files
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
 	    $cigar_offset += $cigar_mod;
@@ -1953,24 +3252,66 @@

 	  if ($methylation_calls[$index] eq 'X') {
 	    $counting{total_meCHG_count}++;
-	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'x') {
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'x') {
 	    $counting{total_unmethylated_CHG_count}++;
-	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'Z') {
 	    $counting{total_meCpG_count}++;
-	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'z') {
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'z') {
 	    $counting{total_unmethylated_CpG_count}++;
-	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'H') {
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'H') {
 	    $counting{total_meCHH_count}++;
-	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'h') {
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'h') {
 	    $counting{total_unmethylated_CHH_count}++;
-	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{un}++;
+	    }
 	  }
 	  elsif ($methylation_calls[$index] eq '.') {}
+	  elsif (lc$methylation_calls[$index] eq 'u'){}
 	  else{
 	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	  }
@@ -1978,7 +3319,7 @@
       } elsif ($strand eq '-') {
 	for my $index (0..$#methylation_calls) {

-	  if ($cigar){ # only needed for SAM files
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
 	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	    $cigar_offset += $cigar_mod;
@@ -1992,24 +3333,66 @@

 	  if ($methylation_calls[$index] eq 'X') {
 	    $counting{total_meCHG_count}++;
-	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'x') {
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'x') {
 	    $counting{total_unmethylated_CHG_count}++;
-	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'Z') {
 	    $counting{total_meCpG_count}++;
-	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'z') {
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'z') {
 	    $counting{total_unmethylated_CpG_count}++;
-	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'H') {
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'H') {
 	    $counting{total_meCHH_count}++;
-	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'h') {
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'h') {
 	    $counting{total_unmethylated_CHH_count}++;
-	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{un}++;
+	    }
 	  }
 	  elsif ($methylation_calls[$index] eq '.') {}
+	  elsif (lc$methylation_calls[$index] eq 'u'){}
 	  else{
 	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	  }
@@ -2024,7 +3407,7 @@
       if ($strand eq '+') {
 	for my $index (0..$#methylation_calls) {

-	  if ($cigar){ # only needed for SAM files
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
 	    $cigar_offset += $cigar_mod;
@@ -2038,24 +3421,66 @@

 	  if ($methylation_calls[$index] eq 'X') {
 	    $counting{total_meCHG_count}++;
-	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'x') {
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'x') {
 	    $counting{total_unmethylated_CHG_count}++;
-	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'Z') {
 	    $counting{total_meCpG_count}++;
-	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'z') {
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'z') {
 	    $counting{total_unmethylated_CpG_count}++;
-	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'H') {
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'H') {
 	    $counting{total_meCHH_count}++;
-	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'h') {
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'h') {
 	    $counting{total_unmethylated_CHH_count}++;
-	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{un}++;
+	    }
 	  }
 	  elsif ($methylation_calls[$index] eq '.') {}
+	  elsif (lc$methylation_calls[$index] eq 'u'){}
 	  else{
 	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	  }
@@ -2063,7 +3488,7 @@
       } elsif ($strand eq '-') {
 	for my $index (0..$#methylation_calls) {

-	  if ($cigar){ # only needed for SAM files
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
 	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	    $cigar_offset += $cigar_mod;
@@ -2077,24 +3502,66 @@

 	  if ($methylation_calls[$index] eq 'X') {
 	    $counting{total_meCHG_count}++;
-	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'x') {
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'x') {
 	    $counting{total_unmethylated_CHG_count}++;
-	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'Z') {
 	    $counting{total_meCpG_count}++;
-	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'z') {
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'z') {
 	    $counting{total_unmethylated_CpG_count}++;
-	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'H') {
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'H') {
 	    $counting{total_meCHH_count}++;
-	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'h') {
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'h') {
 	    $counting{total_unmethylated_CHH_count}++;
-	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{un}++;
+	    }
 	  }
 	  elsif ($methylation_calls[$index] eq '.') {}
+	  elsif (lc$methylation_calls[$index] eq 'u'){}
 	  else{
 	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	  }
@@ -2112,7 +3579,7 @@
       if ($strand eq '+') {
 	for my $index (0..$#methylation_calls) {

-	  if ($cigar){ # only needed for SAM files
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
 	    $cigar_offset += $cigar_mod;
@@ -2121,24 +3588,66 @@

 	  if ($methylation_calls[$index] eq 'X') {
 	    $counting{total_meCHG_count}++;
-	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'x') {
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'x') {
 	    $counting{total_unmethylated_CHG_count}++;
-	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'Z') {
 	    $counting{total_meCpG_count}++;
-	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'z') {
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'z') {
 	    $counting{total_unmethylated_CpG_count}++;
-	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'H') {
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'H') {
 	    $counting{total_meCHH_count}++;
-	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'h') {
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'h') {
 	    $counting{total_unmethylated_CHH_count}++;
-	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{un}++;
+	    }
 	  }
 	  elsif ($methylation_calls[$index] eq '.') {}
+	  elsif (lc$methylation_calls[$index] eq 'u'){}
 	  else{
 	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	  }
@@ -2146,7 +3655,7 @@
       } elsif ($strand eq '-') {
 	for my $index (0..$#methylation_calls) {

-	  if ($cigar){ # only needed for SAM files
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
 	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	    $cigar_offset += $cigar_mod;
@@ -2155,24 +3664,66 @@

 	  if ($methylation_calls[$index] eq 'X') {
 	    $counting{total_meCHG_count}++;
-	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'x') {
+	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'x') {
 	    $counting{total_unmethylated_CHG_count}++;
-	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'Z') {
 	    $counting{total_meCpG_count}++;
-	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'z') {
+	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'z') {
 	    $counting{total_unmethylated_CpG_count}++;
-	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'H') {
+	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'H') {
 	    $counting{total_meCHH_count}++;
-	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'h') {
+	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'h') {
 	    $counting{total_unmethylated_CHH_count}++;
-	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{un}++;
+	    }
 	  }
 	  elsif ($methylation_calls[$index] eq '.') {}
+	  elsif (lc$methylation_calls[$index] eq 'u'){}
 	  else{
 	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	  }
@@ -2187,7 +3738,7 @@
       if ($strand eq '+') {
 	for my $index (0..$#methylation_calls) {

-	  if ($cigar){ # only needed for SAM files
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
 	    $cigar_offset += $cigar_mod;
@@ -2196,24 +3747,66 @@

 	  if ($methylation_calls[$index] eq 'X') {
 	    $counting{total_meCHG_count}++;
-	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'x') {
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'x') {
 	    $counting{total_unmethylated_CHG_count}++;
-	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'Z') {
 	    $counting{total_meCpG_count}++;
-	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'z') {
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'z') {
 	    $counting{total_unmethylated_CpG_count}++;
-	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'H') {
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'H') {
 	    $counting{total_meCHH_count}++;
-	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'h') {
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'h') {
 	    $counting{total_unmethylated_CHH_count}++;
-	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{un}++;
+	    }
 	  }
 	  elsif ($methylation_calls[$index] eq '.') {}
+	  elsif (lc$methylation_calls[$index] eq 'u'){}
 	  else{
 	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	  }
@@ -2221,7 +3814,7 @@
       } elsif ($strand eq '-') {
 	for my $index (0..$#methylation_calls) {

-	  if ($cigar){ # only needed for SAM files
+	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
 	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
 	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	    $cigar_offset += $cigar_mod;
@@ -2230,24 +3823,66 @@

 	  if ($methylation_calls[$index] eq 'X') {
 	    $counting{total_meCHG_count}++;
-	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'x') {
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'x') {
 	    $counting{total_unmethylated_CHG_count}++;
-	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'Z') {
+	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'Z') {
 	    $counting{total_meCpG_count}++;
-	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'z') {
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'z') {
 	    $counting{total_unmethylated_CpG_count}++;
-	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'H') {
+	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CpG}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CpG}->{$index+1}->{un}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'H') {
 	    $counting{total_meCHH_count}++;
-	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	  } elsif ($methylation_calls[$index] eq 'h') {
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{meth}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{meth}++;
+	    }
+	  }
+	  elsif ($methylation_calls[$index] eq 'h') {
 	    $counting{total_unmethylated_CHH_count}++;
-	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	    if ($read_identity == 1){
+	      $mbias_1{CHH}->{$index+1}->{un}++;
+	    }
+	    else{
+	      $mbias_2{CHH}->{$index+1}->{un}++;
+	    }
 	  }
 	  elsif ($methylation_calls[$index] eq '.') {}
+	  elsif (lc$methylation_calls[$index] eq 'u'){}
 	  else{
 	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	  }
@@ -2373,23 +4008,34 @@
   my $unmethylated_CHH_count = 0;
   my $unmethylated_CpG_count = 0;

-  my @len;
-  my @ops;
   my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
   my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels

   my @comp_cigar;

   if ($cigar){ # parsing CIGAR string
-    @len = split (/\D+/,$cigar); # storing the length per operation
-    @ops = split (/\d+/,$cigar); # storing the operation
-    shift @ops; # remove the empty first element
-    die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
-
-    foreach my $index (0..$#len){
-      foreach (1..$len[$index]){
-	# print  "$ops[$index]";
-	push @comp_cigar, $ops[$index];
+
+    ### Checking whether the CIGAR string is a linear genomic match or whether if requires indel processing
+    if ($cigar =~ /^\d+M$/){
+      #  warn "See!? I told you so! $cigar\n";
+      # sleep(1);
+    }
+    else{
+
+      my @len;
+      my @ops;
+
+      @len = split (/\D+/,$cigar); # storing the length per operation
+      @ops = split (/\d+/,$cigar); # storing the operation
+      shift @ops; # remove the empty first element
+      # die "CIGAR string contained a non-matching number of lengths and operations: id: $id\nmeth call: $meth_call\nCIGAR: $cigar\n".join(" ",@len)."\n".join(" ",@ops)."\n" unless (scalar @len == scalar @ops);
+      die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
+
+      foreach my $index (0..$#len){
+	foreach (1..$len[$index]){
+	  # print  "$ops[$index]";
+	  push @comp_cigar, $ops[$index];
+	}
       }
     }
     # warn "\nDetected CIGAR string: $cigar\n";
@@ -2404,17 +4050,24 @@
   ### adjusting the start position for all reads mapping to the reverse strand
   if ($strand eq '-') {

-    @comp_cigar  = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
-    # print @comp_cigar,"\n";
+    if (@comp_cigar){ # only needed for SAM reads with InDels
+      @comp_cigar  = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
+      # print @comp_cigar,"\n";
+    }

     unless ($ignore){  ### if --ignore was specified the start position has already been corrected

       if ($cigar){ ### SAM format
-	my $MD_count = 0;
-	foreach (@comp_cigar){
-	  ++$MD_count if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
+	if ($cigar =~ /^(\d+)M$/){ # linear match
+	  $start += $1 - 1;
 	}
-	$start += $MD_count - 1;
+	else{ # InDel read
+	  my $MD_count = 0;
+	  foreach (@comp_cigar){
+	    ++$MD_count if ($_ eq 'M' or $_ eq 'D'); # Matching bases or deletions affect the genomic position of the 3' ends of reads, insertions don't
+	  }
+	  $start += $MD_count - 1;
+	}
       }
       else{ ### vanilla format
 	$start += length($meth_call)-1;
@@ -2428,42 +4081,48 @@
   if ($full and $merge_non_CpG) {
     if ($strand eq '+') {
       for my $index (0..$#methylation_calls) {
-
-	if ($cigar){ # only needed for SAM files
+
+	if ($cigar and @comp_cigar){ # only needed for SAM alignments with InDels
 	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	  # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
 	  $cigar_offset += $cigar_mod;
 	  $pos_offset += $pos_mod;
 	}
-
+
 	### methylated Cs (any context) will receive a forward (+) orientation
 	### not methylated Cs (any context) will receive a reverse (-) orientation
 	if ($methylation_calls[$index] eq 'X') {
 	  $counting{total_meCHG_count}++;
-	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'x') {
 	  $counting{total_unmethylated_CHG_count}++;
-	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq 'Z') {
 	  $counting{total_meCpG_count}++;
-	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'z') {
 	  $counting{total_unmethylated_CpG_count}++;
-	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq 'H') {
 	  $counting{total_meCHH_count}++;
-	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'h') {
 	  $counting{total_unmethylated_CHH_count}++;
-	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{un}++;
 	}
-	elsif ($methylation_calls[$index] eq '.') {
-	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	elsif (lc$methylation_calls[$index] eq 'u'){}
 	else{
 	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	}
@@ -2475,7 +4134,7 @@
 	### methylated Cs (any context) will receive a forward (+) orientation
 	### not methylated Cs (any context) will receive a reverse (-) orientation

-	if ($cigar){ # only needed for SAM files
+	if ($cigar and @comp_cigar){ # only needed for SAM entries with InDels
 	  # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
 	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	  $cigar_offset += $cigar_mod;
@@ -2484,30 +4143,36 @@

 	if ($methylation_calls[$index] eq 'X') {
 	  $counting{total_meCHG_count}++;
-	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'x') {
 	  $counting{total_unmethylated_CHG_count}++;
-	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq 'Z') {
 	  $counting{total_meCpG_count}++;
-	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'z') {
 	  $counting{total_unmethylated_CpG_count}++;
-	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq 'H') {
 	  $counting{total_meCHH_count}++;
-	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'h') {
 	  $counting{total_unmethylated_CHH_count}++;
-	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{un}++;
 	}
-	elsif ($methylation_calls[$index] eq '.'){
-	}
+	elsif ($methylation_calls[$index] eq '.'){}
+	elsif (lc$methylation_calls[$index] eq 'u'){}
 	else{
 	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	}
@@ -2525,7 +4190,7 @@
 	### methylated Cs (any context) will receive a forward (+) orientation
 	### not methylated Cs (any context) will receive a reverse (-) orientation

-	if ($cigar){ # only needed for SAM files
+	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
 	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	  $cigar_offset += $cigar_mod;
 	  $pos_offset += $pos_mod;
@@ -2533,30 +4198,36 @@

 	if ($methylation_calls[$index] eq 'X') {
 	  $counting{total_meCHG_count}++;
-	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'x') {
 	  $counting{total_unmethylated_CHG_count}++;
-	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq 'Z') {
 	  $counting{total_meCpG_count}++;
-	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'z') {
 	  $counting{total_unmethylated_CpG_count}++;
-	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq 'H') {
 	  $counting{total_meCHH_count}++;
-	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'h') {
 	  $counting{total_unmethylated_CHH_count}++;
-	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{un}++;
 	}
-	elsif ($methylation_calls[$index] eq '.') {
-	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	elsif (lc$methylation_calls[$index] eq 'u'){}
 	else{
 	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	}
@@ -2568,7 +4239,7 @@
 	### methylated Cs (any context) will receive a forward (+) orientation
 	### not methylated Cs (any context) will receive a reverse (-) orientation

-    	if ($cigar){ # only needed for SAM files
+    	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
 	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	  $cigar_offset += $cigar_mod;
 	  $pos_offset += $pos_mod;
@@ -2576,30 +4247,36 @@

 	if ($methylation_calls[$index] eq 'X') {
 	  $counting{total_meCHG_count}++;
-	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'x') {
 	  $counting{total_unmethylated_CHG_count}++;
-	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq 'Z') {
 	  $counting{total_meCpG_count}++;
-	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'z') {
 	  $counting{total_unmethylated_CpG_count}++;
-	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq 'H') {
 	  $counting{total_meCHH_count}++;
-	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{meth}++;
 	}
 	elsif ($methylation_calls[$index] eq 'h') {
 	  $counting{total_unmethylated_CHH_count}++;
-	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{un}++;
 	}
-	elsif ($methylation_calls[$index] eq '.') {
-	}
+	elsif ($methylation_calls[$index] eq '.') {}
+	elsif (lc$methylation_calls[$index] eq 'u'){}
 	else{
 	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	}
@@ -2618,34 +4295,46 @@
 	### methylated Cs (any context) will receive a forward (+) orientation
 	### not methylated Cs (any context) will receive a reverse (-) orientation

-	if ($cigar){ # only needed for SAM files
+	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
 	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	  $cigar_offset += $cigar_mod;
 	  $pos_offset += $pos_mod;
 	}
-
+
 	if ($methylation_calls[$index] eq 'X') {
 	  $counting{total_meCHG_count}++;
-	  print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'x') {
+	  print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
 	  $counting{total_unmethylated_CHG_count}++;
-	  print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'Z') {
+	  print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{un}++;
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
 	  $counting{total_meCpG_count}++;
-	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'z') {
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
 	  $counting{total_unmethylated_CpG_count}++;
-	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'H') {
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{un}++;
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
 	  $counting{total_meCHH_count}++;
-	  print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'h') {
+	  print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
 	  $counting{total_unmethylated_CHH_count}++;
-	  print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq '.') {}
+	elsif (lc$methylation_calls[$index] eq 'u'){}
 	else{
-	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
 	}
       }
     }
@@ -2655,7 +4344,7 @@
 	### methylated Cs (any context) will receive a forward (+) orientation
 	### not methylated Cs (any context) will receive a reverse (-) orientation

-	if ($cigar){ # only needed for SAM files
+	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
 	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	  $cigar_offset += $cigar_mod;
 	  $pos_offset += $pos_mod;
@@ -2663,24 +4352,36 @@

 	if ($methylation_calls[$index] eq 'X') {
 	  $counting{total_meCHG_count}++;
-	  print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'x') {
+	  print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
 	  $counting{total_unmethylated_CHG_count}++;
-	  print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'Z') {
+	  print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{un}++;
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
 	  $counting{total_meCpG_count}++;
-	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'z') {
+	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
 	  $counting{total_unmethylated_CpG_count}++;
-	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'H') {
+	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{un}++;
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
 	  $counting{total_meCHH_count}++;
-	  print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'h') {
+	  print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
 	  $counting{total_unmethylated_CHH_count}++;
-	  print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq '.') {}
+	elsif (lc$methylation_calls[$index] eq 'u'){}
 	else{
 	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	}
@@ -2698,7 +4399,7 @@
 	### methylated Cs (any context) will receive a forward (+) orientation
 	### not methylated Cs (any context) will receive a reverse (-) orientation

-	if ($cigar){ # only needed for SAM files
+	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
 	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	  $cigar_offset += $cigar_mod;
 	  $pos_offset += $pos_mod;
@@ -2706,24 +4407,36 @@

 	if ($methylation_calls[$index] eq 'X') {
 	  $counting{total_meCHG_count}++;
-	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'x') {
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
 	  $counting{total_unmethylated_CHG_count}++;
-	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'Z') {
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{un}++;
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
 	  $counting{total_meCpG_count}++;
-	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'z') {
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
 	  $counting{total_unmethylated_CpG_count}++;
-	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'H') {
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{un}++;
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
 	  $counting{total_meCHH_count}++;
-	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'h') {
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
 	  $counting{total_unmethylated_CHH_count}++;
-	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq '.') {}
+	elsif (lc$methylation_calls[$index] eq 'u'){}
 	else{
 	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	}
@@ -2735,7 +4448,7 @@
 	### methylated Cs (any context) will receive a forward (+) orientation
 	### not methylated Cs (any context) will receive a reverse (-) orientation

-	if ($cigar){ # only needed for SAM files
+	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
 	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);
 	  $cigar_offset += $cigar_mod;
 	  $pos_offset += $pos_mod;
@@ -2743,24 +4456,36 @@

 	if ($methylation_calls[$index] eq 'X') {
 	  $counting{total_meCHG_count}++;
-	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'x') {
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
 	  $counting{total_unmethylated_CHG_count}++;
-	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'Z') {
+	  print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHG}->{$index+1}->{un}++;
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
 	  $counting{total_meCpG_count}++;
-	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'z') {
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
 	  $counting{total_unmethylated_CpG_count}++;
-	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'H') {
+	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CpG}->{$index+1}->{un}++;
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
 	  $counting{total_meCHH_count}++;
-	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
-	} elsif ($methylation_calls[$index] eq 'h') {
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{meth}++;
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
 	  $counting{total_unmethylated_CHH_count}++;
-	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n";
+	  print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index]),"\n" unless($mbias_only);
+	  $mbias_1{CHH}->{$index+1}->{un}++;
 	}
 	elsif ($methylation_calls[$index] eq '.') {}
+	elsif (lc$methylation_calls[$index] eq 'u'){}
 	else{
 	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
 	}
@@ -2774,569 +4499,6 @@


-#######################################################################################################################################
-### bismark2bedGaph section - START
-#######################################################################################################################################
-
-sub process_bedGraph_output{
-  warn  "="x64,"\n";
-  warn "Methylation information will now be written into a bedGraph file\n";
-  warn  "="x64,"\n\n";
-  sleep (2);
-
-  ### Closing all filehandles so that the Bismark methtylation extractor output doesn't get truncated due to buffering issues
-  foreach my $fh (keys %fhs) {
-    if ($fh =~ /^[1230]$/) {
-      foreach my $context (keys %{$fhs{$fh}}) {
-	close $fhs{$fh}->{$context} or die $!;
-      }
-    } else {
-      close $fhs{$fh} or die $!;
-    }
-  }
-
-  ### deciding which files to use for bedGraph conversion
-  foreach my $filename (@sorting_files){
-    # warn "$filename\n";
-    if ($filename =~ /\//){ # if files are in a different output folder we extract the filename again
-      $filename =~ s/.*\///; # replacing everything up to the last slash in the filename
-      # warn "$filename\n";
-    }
-
-    if ($CX_context){
-      push @bedfiles,$filename;
-    }
-    else{ ## CpG context only (default)
-      if ($filename =~ /^CpG_/){
-	push @bedfiles,$filename;
-      }
-      else{
-	# skipping CHH or CHG files
-      }
-    }
-  }
-
-  warn "Using the following files as Input:\n";
-  print join ("\t",@bedfiles),"\n\n";
-  sleep (2);
-
-  my %temp_fhs;
-  my @temp_files; # writing all context files (default CpG only) to these files prior to sorting
-
-  ### changing to the output directory
-  unless ($output_dir eq ''){ # default
-    chdir $output_dir or die "Failed to change directory to $output_dir\n";
-    warn "Changed directory to $output_dir\n";
-  }
-
-  foreach my $infile (@bedfiles) {
-
-    if ($remove) {
-      warn "Now replacing whitespaces in the sequence ID field of the Bismark methylation extractor output $infile prior to bedGraph conversion\n\n";
-
-      open (READ,$infile) or die $!;
-
-      my $removed_spaces_outfile = $infile;
-      $removed_spaces_outfile =~ s/$/.spaces_removed.txt/;
-
-      open (REM,'>',$output_dir.$removed_spaces_outfile) or die "Couldn't write to file $removed_spaces_outfile: $!\n";
-
-      unless ($no_header){
-	$_ = <READ>;		### Bismark version header
-	print REM $_;		### Bismark version header
-      }
-
-      while (<READ>) {
-	chomp;
-	my ($id,$strand,$chr,$pos,$context) = (split (/\t/));
-	$id =~ s/\s+/_/g;
-	print REM join ("\t",$id,$strand,$chr,$pos,$context),"\n";
-      }
-
-      close READ or die $!;
-      close REM or die $!;
-
-      ### changing the infile name to the new file without spaces
-      $infile = $removed_spaces_outfile;
-    }
-
-    warn "Now writing methylation information for file $infile to individual files for each chromosome\n";
-    open (IN,$infile) or die $!;
-
-    ## always ignoring the version header
-    unless ($no_header){
-      $_ = <IN>;		### Bismark version header
-    }
-
-    while (<IN>) {
-      chomp;
-      my ($chr) = (split (/\t/))[2];
-
-      unless (exists $temp_fhs{$chr}) {
-	open ($temp_fhs{$chr},'>','chr'.$chr.'.meth_extractor.temp') or die "Failed to open filehandle: $!";
-      }
-      print {$temp_fhs{$chr}} "$_\n";
-    }
-
-    warn "Finished writing out individual chromosome files for $infile\n";
-  }
-  warn "\n";
-
-  @temp_files = <*.meth_extractor.temp>;
-
-  warn "Collecting temporary chromosome file information...\n";
-  sleep (1);
-  warn "processing the following input file(s):\n";
-  warn join ("\n",@temp_files),"\n\n";
-  sleep (1);
-
-  foreach my $in (@temp_files) {
-    warn "Sorting input file $in by positions\n";
-    open my $ifh, "sort -k3,3 -k4,4n $in |" or die "Input file could not be sorted. $!";
-    # print "Chromosome\tStart Position\tEnd Position\tMethylation Percentage\n";
-
-    ############################################# m.a.bentley - moved the variables out of the while loop to hold the current line data {
-
-    my $name;
-    my $meth_state;
-    my $chr = "";
-    my $pos = 0;
-    my $meth_state2;
-
-    my $last_pos;
-    my $last_chr;
-
-    #############################################  }
-
-    while (my $line = <$ifh>) {
-      next if $line =~ /^Bismark/;
-      chomp $line;
-
-      ########################################### m.a.bentley - (1) set the last_chr and last_pos variables early in the while loop, before the line split (2) removed unnecessary setting of same variables in if statement {
-
-      $last_chr = $chr;
-      $last_pos = $pos;
-      ($name, $meth_state, $chr, $pos, $meth_state2) = split "\t", $line;
-
-      if (($last_pos ne $pos) || ($last_chr ne $chr)) {
-	generate_output($last_chr,$last_pos) if $methylcalls[2] > 0;
-	@methylcalls = qw (0 0 0);
-      }
-
-      #############################################  }
-
-      my $validated = validate_methylation_call($meth_state, $meth_state2);
-      unless($validated){
-	warn "Methylation state of sequence ($name) in file ($in) on line $. is inconsistent (meth_state is $meth_state, meth_state2 = $meth_state2)\n";
-	next;
-      }
-      if ($meth_state eq "+") {
-	$methylcalls[0]++;
-	$methylcalls[2]++;
-      } else {
-	$methylcalls[1]++;
-	$methylcalls[2]++;
-      }
-    }
-
-    ############################################# m.a.bentley - set the last_chr and last_pos variables for the last line in the file (outside the while loop's scope using the method i've implemented) {
-
-    $last_chr = $chr;
-    $last_pos = $pos;
-    if ($methylcalls[2] > 0) {
-      generate_output($last_chr,$last_pos) if $methylcalls[2] > 0;
-    }
-    #############################################  }
-
-    close $ifh or die $!;
-
-    @methylcalls = qw (0 0 0); # resetting @methylcalls
-
-    ### deleting temporary files
-    my $delete = unlink $in;
-    if ($delete) {
-      warn "Successfully deleted the temporary input file $in\n\n";
-    }
-    else {
-      warn "The temporary inputfile $in could not be deleted $!\n\n";
-    }
-  }
-}
-
-sub generate_output{
-  my $methcount = $methylcalls[0];
-  my $nonmethcount = $methylcalls[1];
-  my $totalcount = $methylcalls[2];
-  my $last_chr = shift;
-  my $last_pos = shift;
-  croak "Should not be generating output if there's no reads to this region" unless $totalcount > 0;
-  croak "Total counts ($totalcount) is not the sum of the methylated ($methcount) and unmethylated ($nonmethcount) counts" if $totalcount != ($methcount + $nonmethcount);
-
-  ############################################# m.a.bentley - declare a new variable 'bed_pos' to distinguish from bismark positions (-1) - previous scripts modified the last_pos variable earlier in the script leading to problems in meth % calculation {
-
-  my $bed_pos = $last_pos -1; ### Bismark coordinates are 1 based whereas bedGraph coordinates are 0 based.
-  my $meth_percentage;
-  ($totalcount >= $coverage_threshold) ? ($meth_percentage = ($methcount/$totalcount) * 100) : ($meth_percentage = undef);
-  # $meth_percentage =~ s/(\.\d\d).+$/$1/ unless $meth_percentage =~ /^Below/;
-  if (defined $meth_percentage){
-    if ($counts){
-      print OUT "$last_chr\t$bed_pos\t$bed_pos\t$meth_percentage\t$methcount\t$nonmethcount\n";
-    }
-    else{
-      print OUT "$last_chr\t$bed_pos\t$bed_pos\t$meth_percentage\n";
-    }
-  }
-  #############################################  }
-}
-
-sub validate_methylation_call{
-  my $meth_state = shift;
-  croak "Missing (+/-) methylation call" unless defined $meth_state;
-  my $meth_state2 = shift;
-  croak "Missing alphabetical methylation call" unless defined $meth_state2;
-  my $is_consistent;
-  ($meth_state2 =~ /^z/i) ? ($is_consistent = check_CpG_methylation_call($meth_state, $meth_state2))
-                          : ($is_consistent = check_nonCpG_methylation_call($meth_state,$meth_state2));
-  return 1 if $is_consistent;
-  return 0;
-}
-
-sub check_CpG_methylation_call{
-  my $meth1 = shift;
-  my $meth2 = shift;
-  return 1 if($meth1 eq "+" && $meth2 eq "Z");
-  return 1 if($meth1 eq "-" && $meth2 eq "z");
-  return 0;
-}
-
-sub check_nonCpG_methylation_call{
-  my $meth1 = shift;
-  my $meth2 = shift;
-  return 1 if($meth1 eq "+" && $meth2 eq "C");
-  return 1 if($meth1 eq "+" && $meth2 eq "X");
-  return 1 if($meth1 eq "+" && $meth2 eq "H");
-  return 1 if($meth1 eq "-" && $meth2 eq "c");
-  return 1 if($meth1 eq "-" && $meth2 eq "x");
-  return 1 if($meth1 eq "-" && $meth2 eq "h");
-  return 0;
-}
-
-#######################################################################################################################################
-### bismark2bedGaph section - END
-#######################################################################################################################################
-
-
-
-
-
-
-#######################################################################################################################################
-### genome-wide cytosine methylation report - START
-#######################################################################################################################################
-
-sub generate_genome_wide_cytosine_report {
-
-  warn  "="x78,"\n";
-  warn "Methylation information will now be written into a genome-wide cytosine report\n";
-  warn  "="x78,"\n\n";
-  sleep (2);
-
-  ### changing to the output directory again
-  unless ($output_dir eq ''){ # default
-    chdir $output_dir or die "Failed to change directory to $output_dir\n";
-    # warn "Changed directory to $output_dir\n";
-  }
-
-  my $in = shift;
-  open (IN,$in) or die $!;
-
-  my $cytosine_out = shift;
-
-  if ($CX_context){
-    $cytosine_out =~ s/$/genome-wide_CX_report.txt/;
-  }
-  else{
-    $cytosine_out =~ s/$/genome_wide_CpG_report.txt/;
-  }
-
-  ### note: we are still in the folder: $output_dir, so we do not have to include this into the open commands
-  unless ($split_by_chromosome){ ### writing all output to a single file (default)
-    open (CYT,'>',$cytosine_out) or die $!;
-    warn "Writing genome-wide cytosine report to: $cytosine_out\n";
-    sleep (3);
-  }
-
-  my $last_chr;
-  my %chr; # storing reads for one chromosome at a time
-
-  my $count = 0;
-  while (<IN>){
-    chomp;
-    ++$count;
-    my ($chr,$start,$end,undef,$meth,$nonmeth) = (split /\t/);
-
-    # defining the first chromosome
-    unless (defined $last_chr){
-      $last_chr = $chr;
-      # warn "Storing all covered cytosine positions for chromosome: $chr\n";
-    }
-
-    if ($chr eq $last_chr){
-      $chr{$chr}->{$start}->{meth} = $meth;
-      $chr{$chr}->{$start}->{nonmeth} = $nonmeth;
-    }
-    else{
-      warn "Writing cytosine reports for chromosome $last_chr (stored ",scalar keys %{$chr{$last_chr}}," different covered positions)\n";
-
-      if ($split_by_chromosome){ ## writing output to 1 file per chromosome
-	my $chromosome_out = $cytosine_out;
-	$chromosome_out =~ s/txt$/chr${last_chr}.txt/;
-      open (CYT,'>',$chromosome_out) or die $!;
-    }
-
-      while ( $chromosomes{$last_chr} =~ /([CG])/g){
-
-	my $tri_nt = '';
-	my $context = '';
-	my $pos = pos$chromosomes{$last_chr};
-
-	my $strand;
-	my $meth = 0;
-	my $nonmeth = 0;
-
-	if ($1 eq 'C'){    # C on forward strand
-	  $tri_nt = substr ($chromosomes{$last_chr},($pos-1),3);   # positions are 0-based!
-	  $strand = '+';
-	}
-	elsif ($1 eq 'G'){ # C on reverse strand
-	  $tri_nt = substr ($chromosomes{$last_chr},($pos-3),3);   # positions are 0-based!
-	  $tri_nt = reverse $tri_nt;
-	  $tri_nt =~ tr/ACTG/TGAC/;
-	  $strand = '-';
-	}
-	next if (length$tri_nt < 3); # trinucleotide sequence could not be extracted
-
-	if (exists $chr{$last_chr}->{($pos-1)}){ # stored positions are 0-based!
-	  $meth =  $chr{$last_chr}->{$pos-1}->{meth};
-	  $nonmeth = $chr{$last_chr}->{$pos-1}->{nonmeth};
-	}
-
-	### determining cytosine context
-	if ($tri_nt =~ /^CG/){
-	  $context = 'CG';
-	}
-	elsif ($tri_nt =~ /^C.{1}G$/){
-	  $context = 'CHG';
-	}
-	elsif ($tri_nt =~ /^C.{2}$/){
-	  $context = 'CHH';
-	}
-	else{ # if the context can't be determined the positions will not be printed (it will equally not have been reported by Bismark)
-	  warn "The sequence context could not be determined (found: '$tri_nt'). Skipping.\n";
-	  next;
-	}
-
-	if ($CpG_only){
-	  if ($tri_nt =~ /^CG/){ # CpG context is the default
-	    if ($zero){ # zero based coordinates
-	      $pos -= 1;
-	      print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
-	    }
-	    else{ # default
-	      print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
-	    }
-	  }
-	}
-	else{ ## all cytosines, specified with --CX
-	  if ($zero){ # zero based coordinates
-	    $pos -= 1;
-	    print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
-	  }
-	  else{ # default
-	    print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
-	  }
-	}
-      }
-
-      %chr = (); # resetting the hash
-
-      # new first entry
-      $last_chr = $chr;
-      $chr{$chr}->{$start}->{meth} = $meth;
-      $chr{$chr}->{$start}->{nonmeth} = $nonmeth;
-    }
-  }
-
-  # Last found chromosome
-warn "Writing cytosine reports for chromosome $last_chr (stored ",scalar keys %{$chr{$last_chr}}," different covered positions)\n";
-
-if ($split_by_chromosome){ ## writing output to 1 file per chromosome
-  my $chromosome_out = $cytosine_out;
-  $chromosome_out =~ s/txt$/chr${last_chr}.txt/;
-  open (CYT,'>',$chromosome_out) or die $!;
-}
-
-  while ( $chromosomes{$last_chr} =~ /([CG])/g){
-
-    my $tri_nt;
-    my $context;
-    my $pos = pos$chromosomes{$last_chr};
-
-    my $strand;
-    my $meth = 0;
-    my $nonmeth = 0;
-
-    if ($1 eq 'C'){    # C on forward strand
-      $tri_nt = substr ($chromosomes{$last_chr},($pos-1),3);   # positions are 0-based!
-      $strand = '+';
-    }
-    elsif ($1 eq 'G'){ # C on reverse strand
-      $tri_nt = substr ($chromosomes{$last_chr},($pos-3),3);   # positions are 0-based!
-      $tri_nt = reverse $tri_nt;
-      $tri_nt =~ tr/ACTG/TGAC/;
-      $strand = '-';
-    }
-
-    if (exists $chr{$last_chr}->{($pos-1)}){ # stored positions are 0-based!
-      $meth =  $chr{$last_chr}->{$pos-1}->{meth};
-      $nonmeth = $chr{$last_chr}->{$pos-1}->{nonmeth};
-    }
-
-    next if (length$tri_nt < 3); # trinucleotide sequence could not be extracted
-
-    ### determining cytosine context
-    if ($tri_nt =~ /^CG/){
-      $context = 'CG';
-    }
-    elsif ($tri_nt =~ /^C.{1}G$/){
-      $context = 'CHG';
-    }
-    elsif ($tri_nt =~ /^C.{2}$/){
-      $context = 'CHH';
-    }
-    else{ # if the context can't be determined the positions will not be printed (it will equally not have been reported by Bismark)
-      warn "The cytosine context could not be determined (found: '$tri_nt'). Skipping.\n";
-      next;
-    }
-
-    if ($CpG_only){
-      if ($tri_nt =~ /^CG/){ # CpG context is the default
-	if ($zero){ # zero-based coordinates
-	  $pos -= 1;
-	  print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
-	}
-	else{ # default
-	  print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
-	}
-      }
-    }
-    else{ ## all cytosines, specified with --CX
-      if ($zero){ # zero based coordinates
-	$pos -= 1;
-	print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
-      }
-      else{ # default
-	print CYT join ("\t",$last_chr,$pos,$strand,$meth,$nonmeth,$context,$tri_nt),"\n";
-      }
-    }
-  }
-  close CYT or die $!;
-}
-
-
-sub read_genome_into_memory{
-
-  ## reading in and storing the specified genome in the %chromosomes hash
-  chdir ($genome_folder) or die "Can't move to $genome_folder: $!";
-  warn "Now reading in and storing sequence information of the genome specified in: $genome_folder\n\n";
-
-  my @chromosome_filenames =  <*.fa>;
-
-  ### if there aren't any genomic files with the extension .fa we will look for files with the extension .fasta
-  unless (@chromosome_filenames){
-    @chromosome_filenames =  <*.fasta>;
-  }
-  unless (@chromosome_filenames){
-    die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions)\n";
-  }
-
-  foreach my $chromosome_filename (@chromosome_filenames){
-
-    # skipping the tophat entire mouse genome fasta file
-    next if ($chromosome_filename eq 'Mus_musculus.NCBIM37.fa');
-
-    open (CHR_IN,$chromosome_filename) or die "Failed to read from sequence file $chromosome_filename $!\n";
-    ### first line needs to be a fastA header
-    my $first_line = <CHR_IN>;
-    chomp $first_line;
-    $first_line =~ s/\r//; # removing /r carriage returns
-
-    ### Extracting chromosome name from the FastA header
-    my $chromosome_name = extract_chromosome_name($first_line);
-
-    my $sequence;
-    while (<CHR_IN>){
-      chomp;
-      $_ =~ s/\r//; # removing /r carriage returns
-
-      if ($_ =~ /^>/){
-	### storing the previous chromosome in the %chromosomes hash, only relevant for Multi-Fasta-Files (MFA)
-	if (exists $chromosomes{$chromosome_name}){
-	  warn "chr $chromosome_name (",length $sequence ," bp)\n";
-	  die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name!\n";
-	}
-	else {
-	  if (length($sequence) == 0){
-	    warn "Chromosome $chromosome_name in the multi-fasta file $chromosome_filename did not contain any sequence information!\n";
-	  }
-	  warn "chr $chromosome_name (",length $sequence ," bp)\n";
-	  $chromosomes{$chromosome_name} = $sequence;
-	}
-	### resetting the sequence variable
-	$sequence = '';
-	### setting new chromosome name
-	$chromosome_name = extract_chromosome_name($_);
-      }
-      else{
-	$sequence .= uc$_;
-      }
-    }
-
-    if (exists $chromosomes{$chromosome_name}){
-      warn "chr $chromosome_name (",length $sequence ," bp)\t";
-      die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name.\n";
-    }
-    else{
-      if (length($sequence) == 0){
-	warn "Chromosome $chromosome_name in the file $chromosome_filename did not contain any sequence information!\n";
-      }
-      warn "chr $chromosome_name (",length $sequence ," bp)\n";
-      $chromosomes{$chromosome_name} = $sequence;
-    }
-  }
-  warn "\n";
-  chdir $parent_dir or die "Failed to move to directory $parent_dir\n";
-}
-
-sub extract_chromosome_name {
-  ## Bowtie extracts the first string after the inition > in the FASTA file, so we are doing this as well
-  my $fasta_header = shift;
-  if ($fasta_header =~ s/^>//){
-    my ($chromosome_name) = split (/\s+/,$fasta_header);
-    return $chromosome_name;
-  }
-  else{
-    die "The specified chromosome ($fasta_header) file doesn't seem to be in FASTA format as required!\n";
-  }
-}
-
-#######################################################################################################################################
-### genome-wide cytosine methylation report - END
-#######################################################################################################################################
-
-
-
-
 sub print_helpfile{

  print << 'HOW_TO';
@@ -3350,15 +4512,17 @@
 for individual cytosines. This information is found in the methylation call field
 which can contain the following characters:

-       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-       ~~~   X   for methylated C in CHG context (was protected)     ~~~
-       ~~~   x   for not methylated C CHG (was converted)            ~~~
-       ~~~   H   for methylated C in CHH context (was protected)     ~~~
-       ~~~   h   for not methylated C in CHH context (was converted) ~~~
-       ~~~   Z   for methylated C in CpG context (was protected)     ~~~
-       ~~~   z   for not methylated C in CpG context (was converted) ~~~
-       ~~~   .   for any bases not involving cytosines               ~~~
-       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       ~~~   X   for methylated C in CHG context                      ~~~
+       ~~~   x   for not methylated C CHG                             ~~~
+       ~~~   H   for methylated C in CHH context                      ~~~
+       ~~~   h   for not methylated C in CHH context                  ~~~
+       ~~~   Z   for methylated C in CpG context                      ~~~
+       ~~~   z   for not methylated C in CpG context                  ~~~
+       ~~~   U   for methylated C in Unknown context (CN or CHN       ~~~
+       ~~~   u   for not methylated C in Unknown context (CN or CHN)  ~~~
+       ~~~   .   for any bases not involving cytosines                ~~~
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 The methylation extractor outputs result files for cytosines in CpG, CHG and CHH
 context (this distinction is actually already made in Bismark itself). As the methylation
@@ -3392,6 +4556,7 @@


 ARGUMENTS:
+==========

 <filenames>              A space-separated list of Bismark result files in SAM format from
                          which methylation information is extracted for every cytosine in
@@ -3421,12 +4586,21 @@
                          proportion of the data. This option is highly recommended for paired-end
                          data.

---ignore <int>           Ignore the first <int> bp at the 5' end of each read when processing the
+--ignore <int>           Ignore the first <int> bp from the 5' end of Read 1 when processing the
                          methylation call string. This can remove e.g. a restriction enzyme site
-                         at the start of each read.
-
---comprehensive          Specifying this option will merge all four possible strand-specific
-                         methylation info into context-dependent output files. The default
+                         at the start of each read or any other source of bias (e.g. PBAT-Seq data).
+
+--ignore_r2 <int>        Ignore the first <int> bp from the 5' end of Read 2 of paired-end sequencing
+                         results only. Since the first couple of bases in Read 2 of BS-Seq experiments
+                         show a severe bias towards non-methylation as a result of end-repairing
+                         sonicated fragments with unmethylated cytosines (see M-bias plot), it is
+                         recommended that the first couple of bp of Read 2 are removed before
+                         starting downstream analysis. Please see the section on M-bias plots in the
+                         Bismark User Guide for more details.
+
+--comprehensive          Specifying this option will merge all four possible strand-specific
+                         methylation info into context-dependent output files. The default
+
                          contexts are:
                           - CpG context
                           - CHG context
@@ -3446,22 +4620,36 @@
 -o/--output DIR          Allows specification of a different output directory (absolute or relative
                          path). If not specified explicitely, the output will be written to the current directory.

+--samtools_path          The path to your Samtools installation, e.g. /home/user/samtools/. Does not need to be specified
+                         explicitly if Samtools is in the PATH already.
+
+--gzip                   The methylation extractor files (CpG_OT_..., CpG_OB_... etc) will be written out in
+                         a GZIP compressed form to save disk space. This option does not work on bedGraph and
+                         genome-wide cytosine reports as they are 'tiny' anyway.
+
 --version                Displays version information.

 -h/--help                Displays this help file and exits.

+--mbias_only             The methylation extractor will read the entire file but only output the M-bias table and plots as
+                         well as a report (optional) and then quit. Default: OFF.
+


 bedGraph specific options:
+==========================

 --bedGraph               After finishing the methylation extraction, the methylation output is written into a
                          sorted bedGraph file that reports the position of a given cytosine and its methylation
-                         state (in %, seem details below). The methylation extractor output is temporarily split up into
+                         state (in %, see details below). The methylation extractor output is temporarily split up into
                          temporary files, one per chromosome (written into the current directory or folder
                          specified with -o/--output); these temp files are then used for sorting and deleted
                          afterwards. By default, only cytosines in CpG context will be sorted. The option
-                         '--CX_context' may be used to report all cyosines irrespective of sequence context
-                         (this will take MUCH longer!).
+                         '--CX_context' may be used to report all cytosines irrespective of sequence context
+                         (this will take MUCH longer!). The default folder for temporary files during the sorting
+                         process is the output directory. The bedGraph conversion step is performed by the external
+                         module 'bismark2bedGraph'; this script needs to reside in the same folder as the
+                         bismark_methylation_extractor itself.


 --cutoff [threshold]     The minimum number of times a methylation state has to be seen for that nucleotide
@@ -3470,28 +4658,53 @@
 --remove_spaces          Replaces whitespaces in the sequence ID field with underscores to allow sorting.


---counts                 Adds two additional columns to the output file to enable further calculations:
-                             col 5: number of methylated calls
-                             col 6: number of unmethylated calls
-                         This option is required if '--cytosine_report' is specified (and will be set automatically if
-                         necessary).
-
 --CX/--CX_context        The sorted bedGraph output file contains information on every single cytosine that was covered
                          in the experiment irrespective of its sequence context. This applies to both forward and
                          reverse strands. Please be aware that this option may generate large temporary and output files
                          and may take a long time to sort (up to many hours). Default: OFF.
                          (i.e. Default = CpG context only).

+--buffer_size <string>   This allows you to specify the main memory sort buffer when sorting the methylation information.
+                         Either specify a percentage of physical memory by appending % (e.g. --buffer_size 50%) or
+			 a multiple of 1024 bytes, e.g. 'K' multiplies by 1024, 'M' by 1048576 and so on for 'T' etc.
+                         (e.g. --buffer_size 20G). For more information on sort type 'info sort' on a command line.
+                         Defaults to 2G.
+
+--scaffolds/--gazillion  Users working with unfinished genomes sporting tens or even hundreds of thousands of
+                         scaffolds/contigs/chromosomes frequently encountered errors with pre-sorting reads to
+                         individual chromosome files. These errors were caused by the operating system's limit
+                         of the number of filehandle that can be written to at any one time (typically 1024; to
+                         find out this limit on Linux, type: ulimit -a).
+                         To bypass the limitation of open filehandles, the option --scaffolds does not pre-sort
+                         methylation calls into individual chromosome files. Instead, all input files are
+                         temporarily merged into a single file (unless there is only a single file), and this
+                         file will then be sorted by both chromosome AND position using the Unix sort command.
+                         Please be aware that this option might take a looooong time to complete, depending on
+                         the size of the input files, and the memory you allocate to this process (see --buffer_size).
+                         Nevertheless, it seems to be working.
+
+--ample_memory           Using this option will not sort chromosomal positions using the UNIX 'sort' command, but will
+                         instead use two arrays to sort methylated and unmethylated calls. This may result in a faster
+                         sorting process of very large files, but this comes at the cost of a larger memory footprint
+                         (two arrays of the length of the largest human chromosome 1 (~250M bp) consume around 16GB
+                         of RAM). Due to overheads in creating and looping through these arrays it seems that it will
+                         actually be *slower* for small files (few million alignments), and we are currently testing at
+                         which point it is advisable to use this option. Note that --ample_memory is not compatible
+                         with options '--scaffolds/--gazillion' (as it requires pre-sorted files to begin with).
+


 Genome-wide cytosine methylation report specific options:
+=========================================================

 --cytosine_report        After the conversion to bedGraph has completed, the option '--cytosine_report' produces a
                          genome-wide methylation report for all cytosines in the genome. By default, the output uses 1-based
                          chromosome coordinates (zero-based cords are optional) and reports CpG context only (all
                          cytosine context is optional). The output considers all Cs on both forward and reverse strands and
                          reports their position, strand, trinucleotide content and methylation state (counts are 0 if not
-                         covered).
+                         covered). The cytsoine report conversion step is performed by the external module
+                         'bedGraph2cytosine'; this script needs to reside in the same folder as the bismark_methylation_extractor
+                         itself.

 --CX/--CX_context        The output file contains information on every single cytosine in the genome irrespective of
                          its context. This applies to both forward and reverse strands. Please be aware that this will
@@ -3519,19 +4732,29 @@


-The bedGraph output (optional) looks like this (tab-delimited):
-===============================================================
+The bedGraph output (optional) looks like this (tab-delimited; 0-based start coords, 1-based end coords):
+=========================================================================================================
+
+track type=bedGraph (header line)
+
 <chromosome>  <start position>  <end position>  <methylation percentage>


+The coverage output looks like this (tab-delimited, 1-based genomic coords):
+============================================================================
+
+<chromosome>  <start position>  <end position>  <methylation percentage>  <count methylated>  <count non-methylated>
+
+
+
 The genome-wide cytosine methylation output file is tab-delimited in the following format:
 ==========================================================================================
 <chromosome>  <position>  <strand>  <count methylated>  <count non-methylated>  <C-context>  <trinucleotide context>


-This script was last modified on 02 Oct 2012.
+This script was last modified on 25 November 2013.

 HOW_TO
 }
--- a/bismark_methylation_extractor.py	Sun Feb 24 14:49:36 2013 -0500
+++ b/bismark_methylation_extractor.py	Mon Apr 14 16:42:38 2014 -0400
@@ -27,10 +27,10 @@
     # input options
     parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' )

-    parser.add_argument( '--infile', help='Input file in SAM format.' )
+    parser.add_argument( '--infile', help='Input file in SAM or BAM format.' )
     parser.add_argument( '--single-end', dest='single_end', action="store_true" )
     parser.add_argument( '--paired-end', dest='paired_end', action="store_true" )
-
+
     parser.add_argument( '--report-file', dest='report_file' )
     parser.add_argument( '--comprehensive', action="store_true" )
     parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" )
@@ -93,9 +93,15 @@
     if args.report_file:
         additional_opts += ' --report '

-
-    # Final command:
-    cmd = cmd % (output_dir, additional_opts, args.infile)
+    #detect BAM file, use samtools view if it is a bam file
+    f = open (args.infile, 'rb')
+    sig = f.read(4)
+    f.close()
+    if sig == '\x1f\x8b\x08\x04' :
+	cmd = cmd % (output_dir, additional_opts, '-')
+	cmd = 'samtools view %s | %s' % (args.infile, cmd )
+    else :
+        cmd = cmd % (output_dir, additional_opts, args.infile)

     # Run
     try:
--- a/bismark_methylation_extractor.xml	Sun Feb 24 14:49:36 2013 -0500
+++ b/bismark_methylation_extractor.xml	Mon Apr 14 16:42:38 2014 -0400
@@ -1,11 +1,11 @@
-<tool id="bismark_methylation_extractor" name="Bismark" version="0.7.7.2">
-    <!-- Wrapper compatible with Bismark version 0.7.7 -->
-    <description>methylation extractor</description>
+<tool id="bismark_methylation_extractor" name="Bismark Meth. Extractor" version="0.10.1">
+    <!-- Wrapper compatible with Bismark version 0.10 -->
+    <description>Reports on methylation status of reads mapped by Bismark</description>
     <!--<version_command>bismark_methylation_extractor version</version_command>-->
     <requirements>
         <requirement type="set_environment">SCRIPT_PATH</requirement>
         <requirement type="package" version="0.12.8">bowtie</requirement>
-        <requirement type="package" version="2.0.0-beta7">bowtie2</requirement>
+        <requirement type="package" version="2.1.0">bowtie2</requirement>
     </requirements>
     <parallelism method="basic"></parallelism>
     <command interpreter="python">
@@ -19,7 +19,7 @@
             --single-end
         #else:
             --paired-end
-            $no_overlap
+            $singlePaired.no_overlap
         #end if

         #if str($ignore_bps) != "0":
@@ -81,7 +81,7 @@
     </command>
     <inputs>
         <!-- Input Parameters -->
-        <param name="input" type="data" format="sam" label="SAM file from Bismark bisulfid mapper" />
+        <param name="input" type="data" format="sam,bam" label="SAM/BAM file from Bismark bisulfite mapper" />
         <conditional name="singlePaired">
             <param name="sPaired" type="select" label="Is this library mate-paired?">
               <option value="single">Single-end</option>
@@ -92,7 +92,6 @@
                 <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="This option avoids scoring overlapping methylation calls twice, in case of overlapping read one and read two" help="" />
             </when>
         </conditional>
-
        <param name="ignore_bps" type="integer" value="0" label="Ignore the first N bp when processing the methylation call string" />
        <param name="comprehensive" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all four possible strand-specific methylation info
 into context-dependent output files" help="" />
--- a/bismark_wrapper.py	Sun Feb 24 14:49:36 2013 -0500
+++ b/bismark_wrapper.py	Mon Apr 14 16:42:38 2014 -0400
@@ -1,6 +1,13 @@
 #!/usr/bin/env python

-import argparse, os, shutil, subprocess, sys, tempfile, fileinput
+import argparse
+import os
+import shutil
+import subprocess
+import sys
+import shlex
+import tempfile
+import fileinput
 import fileinput
 from glob import glob

@@ -9,6 +16,8 @@
     sys.exit()

 def __main__():
+
+    print 'tempfile_location',tempfile.gettempdir()
     #Parse Command Line
     parser = argparse.ArgumentParser(description='Wrapper for the bismark bisulfite mapper.')
     parser.add_argument( '-p', '--num-threads', dest='num_threads',
@@ -22,6 +31,8 @@
     parser.add_argument( '--own-file', dest='own_file', help='' )
     parser.add_argument( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' )
     parser.add_argument( '-O', '--output', dest='output' )
+
+
     parser.add_argument( '--output-report-file', dest='output_report_file' )
     parser.add_argument( '--suppress-header', dest='suppress_header', action="store_true" )

@@ -32,6 +43,7 @@
         help='The forward reads file in Sanger FASTQ or FASTA format.' )
     parser.add_argument( '-2', '--mate2', dest='mate2',
         help='The reverse reads file in Sanger FASTQ or FASTA format.' )
+    parser.add_argument( '--sort-bam', dest='sort_bam', action="store_true" )

     parser.add_argument( '--output-unmapped-reads', dest='output_unmapped_reads',
         help='Additional output file with unmapped reads (single-end).' )
@@ -39,14 +51,16 @@
         help='File name for unmapped reads (left, paired-end).' )
     parser.add_argument( '--output-unmapped-reads-r', dest='output_unmapped_reads_r',
         help='File name for unmapped reads (right, paired-end).' )
-
-
+
+
     parser.add_argument( '--output-suppressed-reads', dest='output_suppressed_reads',
         help='Additional output file with suppressed reads (single-end).' )
     parser.add_argument( '--output-suppressed-reads-l', dest='output_suppressed_reads_l',
         help='File name for suppressed reads (left, paired-end).' )
     parser.add_argument( '--output-suppressed-reads-r', dest='output_suppressed_reads_r',
         help='File name for suppressed reads (right, paired-end).' )
+    parser.add_argument( '--stdout', dest='output_stdout',
+        help='File name for the standard output of bismark.' )


     parser.add_argument( '--single-paired', dest='single_paired',
@@ -93,7 +107,7 @@
     parser.add_argument( '--chunkmbs', type=int, default=512 )

     args = parser.parse_args()
-
+
     # Create bismark index if necessary.
     index_dir = ""
     if args.own_file:
@@ -119,8 +133,12 @@
         else:
             cmd_index = 'bismark_genome_preparation %s ' % ( tmp_index_dir )
         if args.bismark_path:
-            # add the path to the bismark perl scripts, that is needed for galaxy
-            cmd_index = os.path.join(args.bismark_path, cmd_index)
+            if os.path.exists(args.bismark_path):
+                # add the path to the bismark perl scripts, that is needed for galaxy
+                cmd_index = os.path.join(args.bismark_path, cmd_index)
+            else:
+                # assume the same directory as that script
+                cmd_index = 'perl %s' % os.path.join(os.path.realpath(os.path.dirname(__file__)), cmd_index)
         try:
             tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
             tmp_stderr = open( tmp, 'wb' )
@@ -147,15 +165,33 @@
             stop_err( 'Error indexing reference sequence\n' + str( e ) )
         index_dir = tmp_index_dir
     else:
-        index_dir = args.index_path
+        # bowtie path is the path to the index directory and the first path of the index file name
+        index_dir = os.path.dirname( args.index_path )

     # Build bismark command
+
+    """
+    Bismark requires a large amount of temporary disc space. If that is not available, for example on a cluster you can hardcode the
+    TMP to some larger space. It's not recommended but it works.
+    """
+    #tmp_bismark_dir = tempfile.mkdtemp( dir='/data/0/galaxy_db/tmp/' )
     tmp_bismark_dir = tempfile.mkdtemp()
     output_dir = os.path.join( tmp_bismark_dir, 'results')
-    cmd = 'bismark %(args)s --temp_dir %(tmp_bismark_dir)s -o %(output_dir)s --quiet %(genome_folder)s %(reads)s'
+    cmd = 'bismark %(args)s --bam --temp_dir %(tmp_bismark_dir)s --gzip -o %(output_dir)s --quiet %(genome_folder)s %(reads)s'
+
+    if args.fasta:
+        # the query input files (specified as mate1,mate2 or singles) are FastA
+        cmd = '%s %s' % (cmd, '--fasta')
+    elif args.fastq:
+        cmd = '%s %s' % (cmd, '--fastq')
+
     if args.bismark_path:
         # add the path to the bismark perl scripts, that is needed for galaxy
-        cmd = os.path.join(args.bismark_path, cmd)
+        if os.path.exists(args.bismark_path):
+            cmd = os.path.join(args.bismark_path, cmd)
+        else:
+            # assume the same directory as that script
+            cmd = 'perl %s' % os.path.join(os.path.realpath(os.path.dirname(__file__)), cmd)

     arguments = {
         'genome_folder': index_dir,
@@ -178,7 +214,7 @@

     if not args.bowtie2:
         # use bowtie specific options
-        #additional_opts += ' --best ' # bug in bismark, --best is not available only --non-best, best is default
+        #additional_opts += ' --best ' # bug in bismark, --best is not available as option. Only --non-best, best-mode is activated by default
         if args.seed_mismatches:
             # --seedmms
             additional_opts += ' -n %s ' % args.seed_mismatches
@@ -188,7 +224,7 @@

     # alignment options
     if args.bowtie2:
-        additional_opts += ' -p %s --bowtie2 ' % args.num_threads
+        additional_opts += ' -p %s --bowtie2 ' % (int(args.num_threads/2)) #divides by 2 here since bismark will spawn 2 (original top and original bottom) jobs with -p threads each
         if args.seed_mismatches:
             additional_opts += ' -N %s ' % args.seed_mismatches
         if args.seed_len:
@@ -220,9 +256,11 @@

     arguments.update( {'args': additional_opts, 'reads': reads} )

-    # Final command:
+    # Final bismark command:
     cmd = cmd % arguments
-
+    print 'bismark_cmd:', cmd
+    #sys.stderr.write( cmd )
+    #sys.exit(1)
     # Run
     try:
         tmp_out = tempfile.NamedTemporaryFile().name
@@ -231,36 +269,37 @@
         tmp_stderr = open( tmp_err, 'wb' )
         proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
         returncode = proc.wait()
-        tmp_stderr.close()
-        # get stderr, allowing for case where it's very large
-        tmp_stderr = open( tmp_err, 'rb' )
-        stderr = ''
-        buffsize = 1048576
-        try:
-            while True:
-                stderr += tmp_stderr.read( buffsize )
-                if not stderr or len( stderr ) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
+
+        if returncode != 0:
+            tmp_stdout.close()
+            tmp_stderr.close()
+            # get stderr, allowing for case where it's very large
+            tmp_stderr = open( tmp_err, 'rb' )
+            stderr = ''
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read( buffsize )
+                    if not stderr or len( stderr ) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+
+            raise Exception, stderr
         tmp_stdout.close()
         tmp_stderr.close()
-        if returncode != 0:
-            raise Exception, stderr
-
+
         # TODO: look for errors in program output.
     except Exception, e:
-        stop_err( 'Error in bismark:\n' + str( e ) )
-
+        stop_err( 'Error in bismark:\n' + str( e ) )

     # collect and copy output files
-    """
     if args.output_report_file:
         output_report_file = open(args.output_report_file, 'w+')
-        for line in fileinput.input(glob( os.path.join( output_dir, '*.txt') )):
+        for line in fileinput.input(glob( os.path.join( output_dir, '*report.txt') )):
             output_report_file.write(line)
         output_report_file.close()
-    """
+

     if args.output_suppressed_reads:
         shutil.move( glob(os.path.join( output_dir, '*ambiguous_reads.txt'))[0], args.output_suppressed_reads )
@@ -276,7 +315,51 @@
     if args.output_unmapped_reads_r:
         shutil.move( glob(os.path.join( output_dir, '*unmapped_reads_2.txt'))[0], args.output_unmapped_reads_r )

-    shutil.move( glob( os.path.join( output_dir, '*.sam'))[0] , args.output)
+    try:
+        """
+            merge all bam files
+        """
+        #tmp_out = tempfile.NamedTemporaryFile( dir=output_dir ).name
+        tmp_stdout = open( tmp_out, 'wab' )
+        tmp_err = tempfile.NamedTemporaryFile( dir=output_dir ).name
+        tmp_stderr = open( tmp_err, 'wb' )
+
+        tmp_res = tempfile.NamedTemporaryFile( dir= output_dir).name
+
+        bam_files = glob( os.path.join( output_dir, '*.bam') )
+        if len( bam_files ) > 1:
+            cmd = 'samtools merge -@ %s -f %s %s ' % ( args.num_threads, tmp_res, ' '.join( bam_files ) )
+
+            proc = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE )
+
+            returncode = proc.wait()
+            tmp_stdout.close()
+            tmp_stderr.close()
+            if returncode != 0:
+                raise Exception, open( tmp_stderr.name ).read()
+        else:
+	    tmp_res = bam_files[0]
+
+        bam_path = "%s" % tmp_res
+
+        if os.path.exists( bam_path ):
+	    if args.sort_bam:
+                cmd = 'samtools sort -@ %s %s %s' % (args.num_threads, bam_path, args.output)
+	    else:
+                shutil.copy( bam_path, args.output )
+        else:
+            stop_err( 'BAM file no found:\n' + str( bam_path ) )
+
+
+
+    # TODO: look for errors in program output.
+    except Exception, e:
+        stop_err( 'Error in merging bam files:\n' + str( e ) )
+
+
+    if args.output_stdout:
+        # copy the temporary saved stdout from bismark
+        shutil.move( tmp_out, args.output_stdout )

     # Clean up temp dirs
     if args.own_file:
@@ -284,5 +367,7 @@
             shutil.rmtree( tmp_index_dir )
     if os.path.exists( tmp_bismark_dir ):
         shutil.rmtree( tmp_bismark_dir )
+    if os.path.exists( output_dir ):
+        shutil.rmtree( output_dir )

 if __name__=="__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Mon Apr 14 16:42:38 2014 -0400
@@ -0,0 +1,53 @@
+===============
+Bismark Wrapper
+===============
+
+Bismark_ uses Bowtie or Bowtie2 to map bisulfite converted sequence reads to a reference genome and determine cytosine methylation states.
+
+Publication: http://www.ncbi.nlm.nih.gov/pubmed/21493656
+
+User Guide: http://www.bioinformatics.babraham.ac.uk/projects/bismark/Bismark_User_Guide_v0.7.12.pdf
+
+.. _bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/
+
+Preparation
+===========
+
+Create your reference index with *bismark_genome_preparation* in your normal Galaxy Bowtie2/Botwie index directory. It will create a Bisulfite_Genome folder directly in your Bowtie2/Bowtie index directory.
+If you follow that approach you do not need to specify or modify an extra .loc file.
+That wrapper will extract the path to the Bisulfite_Genome folder from ./tool-data/bowtie2_indices.loc or ./tool-data/bowtie_indices.loc.
+
+=======
+History
+=======
+
+- v0.7: Initial public release
+- v0.7.8: update and add Tool Shed Integration
+- v0.7.11.1 change default output to BAM, from now on samtools are required
+- v0.7.11.2 added multi-threading to samtools (samtools > 0.1.19 is required)
+- v0.7.12 upgrade to bismark 0.7.12 and fix a major slowdown
+- v0.7.12.1 define a dependency to samtools 0.1.19
+
+
+===============================
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
--- a/tool_dependencies.xml	Sun Feb 24 14:49:36 2013 -0500
+++ b/tool_dependencies.xml	Mon Apr 14 16:42:38 2014 -0400
@@ -1,12 +1,15 @@
 <?xml version="1.0"?>
 <tool_dependency>
+    <package name="samtools" version="0.1.19">
+        <repository changeset_revision="36aa94676939" name="package_samtools_0_1_19" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    </package>
     <set_environment version="1.0">
-        <environment_variable name="SCRIPT_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>
+        <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable>
     </set_environment>
-    <package name="bowtie2" version="2.0.0-beta7">
+    <package name="bowtie2" version="2.1.0">
         <install version="1.0">
             <actions>
-                <action type="download_by_url">http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.0.0-beta7/bowtie2-2.0.0-beta7-source.zip</action>
+                <action type="download_by_url">http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.1.0/bowtie2-2.1.0-source.zip</action>
                 <action type="shell_command">make</action>
                 <action type="move_file">
                     <source>bowtie2</source>
@@ -22,7 +25,7 @@
                 </action>
                 <action type="shell_command">chmod +x $INSTALL_DIR/bin/bowtie2</action>
                 <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
                 </action>
             </actions>
         </install>
@@ -48,7 +51,7 @@
                     <destination>$INSTALL_DIR/bin</destination>
                 </action>
                 <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
                 </action>
             </actions>
         </install>
@@ -56,6 +59,3 @@
         </readme>
     </package>
 </tool_dependency>
-
-
-