changeset 17:59a61c1d5aee draft default tip

Uploaded
author eugen
date Thu, 16 Aug 2012 08:11:06 -0400
parents bfda928ca5d8
children
files methylation_extractor
diffstat 1 files changed, 850 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/methylation_extractor	Thu Aug 16 08:11:06 2012 -0400
@@ -0,0 +1,850 @@
+#!/usr/bin/perl
+use Getopt::Long;
+use Cwd;
+
+my @filenames;
+my %counting;
+
+my %fhs;
+my %bases;
+my ($single,$paired,$output_file) = process_commandline();
+my $parent_dir = getcwd;
+
+process_Bismark_results_file();
+
+sub process_commandline{
+  my $help;
+  my $single_end;
+  my $paired_end;
+  my $ignore;
+  my $genomic_fasta;
+  my $full;
+  my $report;
+  my $extractor_version;
+  my $no_overlap;
+  my $merge_non_CpG;
+  my $vanilla;
+  my $output_file;
+
+  my $command_line = GetOptions ('p|paired-end' => \$paired_end,
+				 's|single-end' => \$single_end,
+				 'o|output=s' => \$output_file
+				);
+
+  ### EXIT ON ERROR if there were errors with any of the supplied options
+  unless ($command_line){
+    die "Please respecify command line options\n";
+  }
+
+ 
+  ### no files provided
+  unless (@ARGV){
+    die "You need to provide one or more Bismark files to create an individual C methylation output. Please respecify!\n";
+  }
+  @filenames = @ARGV;
+
+  if ($single_end){
+    $paired_end = 0;   ### SINGLE END ALIGNMENTS
+  }
+  elsif ($paired_end){
+    $single_end = 0;   ### PAIRED-END ALIGNMENTS
+  }
+  else{
+    die "Please specify whether the supplied file(s) are in Bismark single-end or paired-end format\n\n";
+  }
+
+  return ($single_end,$paired_end, $output_file);
+}
+
+sub process_Bismark_results_file{
+  chdir $parent_dir or die "Unable to move to initial working directory $!\n";
+  foreach my $filename (@filenames){
+    %fhs = ();
+    %counting =(
+		total_meCHG_count => 0,
+		total_meCHH_count => 0,
+		total_meCpG_count => 0,
+		total_unmethylated_CHG_count => 0,
+		total_unmethylated_CHH_count => 0,
+		total_unmethylated_CpG_count => 0,
+		sequences_count => 0,
+	       );
+    warn "\nNow reading in Bismark result file $filename\n\n";
+    open (IN,$filename) or die "Can't open file $filename: $!\n";
+
+    if($output_file){
+      $full_output=$output_file;
+    }
+    else{
+      #print "lbubb";
+      my($dir, $output_filename, $extension) = $filename =~ m/(.*\/)(.*)(\..*)$/;  
+      my $cpg_output = my $other_c_output = my $full_output = $output_filename;
+      $full_output =~ s/^/Full_/;
+      $full_output = $dir."/".$full_output.".bed";
+    }
+    
+    ### OPENING OUTPUT-FILEHANDLES
+    open ($fhs{full_output},'>',$full_output) or die "Failed to write to $full_output $!\n";
+  
+    ### Write header
+    printf {$fhs{full_output}} ("chr\tpos\tstrand\tcontext\tcoverage\tmethylated\tpercentage methylated\n");
+ 
+    my $methylation_call_strings_processed = 0;
+    my $line_count = 0;
+
+
+    ### PROCESSING SINGLE-END RESULT FILES
+    if ($single){
+   
+		while (<IN>){
+	 		 ### SAM format can either start with header lines (starting with @) or start with alignments directly
+		  if (/^\@/){ # skipping header lines (starting with @)
+		    warn "skipping SAM header line:\t$_";
+		    next;
+		  }
+
+		  ++$line_count;
+		  warn "Processed lines: $line_count\n" if ($line_count%500000==0);
+	
+		  my ($id,$chrom,$start,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
+	  
+		  $chrom = "chrC" if ("\U$chrom" =~ /CHLOR.*/);
+		  $chrom = "chrM" if ("\U$chrom" =~ /MITOCH.*/);
+
+		  if ("\U$chrom" !~ /CHR.*/){
+		    $chrom = "chr".$chrom;
+		  }
+
+		  if (!defined $last_chrom){
+		    print "Now parsing data from $chrom\n";
+		  }		
+
+		  if (defined($last_chrom) and $last_chrom ne $chrom){
+		    print "Parsed: $last_chrom\n";
+		    write_hash($last_chrom);
+		    print "Now parsing data from $chrom\n";
+		  }
+
+		  $meth_call =~ s/^XM:Z://;
+		  $read_conversion =~ s/^XR:Z://;
+		  $genome_conversion =~ s/^XG:Z://;	
+		  my $strand;
+		  chomp $genome_conversion;
+
+	
+	  	my $index;
+		  if ($meth_call){
+		    if ($read_conversion eq 'CT' and $genome_conversion eq 'CT'){ ## original top strand
+		      $index = 0;
+		      $strand = '+';
+		    }	
+		    elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT'){ ## complementary to original top strand
+		      $index = 1;
+		      $strand = '-';
+		    }
+		    elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA'){ ## complementary to original bottom strand
+		      $index = 2;
+		      $strand = '+';
+		    }
+		    elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA'){ ## original bottom strand
+		      $index = 3;
+		      $strand = '-';
+		    }
+		    else {
+		      die "Unexpected combination of read and genome conversion: $read_conversion / $genome_conversion\n";
+		    }
+	
+		    ### If the read is in SAM format we need to reverse the methylation call if the read has been reverse-complemented for the output
+		    if ($strand eq '-'){
+		      $meth_call = reverse $meth_call;
+		    }
+		  }
+
+
+		  ### printing out the methylation state of every C in the read
+		  hash_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index);
+		
+		  ++$methylation_call_strings_processed; # 1 per single-end result
+	
+		  $last_chrom = $chrom;
+    }
+      write_hash($last_chrom);
+      print "\n\nDone!";
+  }
+
+    ### PROCESSING PAIRED-END RESULT FILES
+    elsif ($paired){
+	while (<IN>){
+	  ### SAM format can either start with header lines (starting with @) or start with alignments directly
+	  if (/^\@/){ # skipping header lines (starting with @)
+	    warn "skipping SAM header line:\t$_";
+	    next;
+	  }
+	
+	  ++$line_count;
+	  # example paired-end reads in SAM format (2 consecutive lines)
+	  # 1_R1/1	67	5	103172224	255	40M	=	103172417	233	AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:4	XX:Z:4T1T24TT7	XM:Z:....h.h........................hh.......	XR:Z:CT	XG:Z:CT
+	  # 1_R1/2	131	5	103172417	255	40M	=	103172224	-233	TATTTTTTTTTAGAGTATTTTTTAATGGTTATTAGATTTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:6	XX:Z:T5T1T9T9T7T3	XM:Z:h.....h.h.........h.........h.......h...	XR:Z:GA	XG:Z:CT
+	
+	
+	  my ($id_1,$chrom,$start_read_1,$meth_call_1,$first_read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
+	  $_ = <IN>; # reading in the paired read
+	  my ($id_2,$start_read_2,$meth_call_2,$second_read_conversion) = (split("\t"))[0,3,13,14];
+		
+
+	  $chrom = "chrC" if ("\U$chrom" =~ /CHLOR.*/);
+	  $chrom = "chrM" if ("\U$chrom" =~ /MITOCH.*/);
+	  if ("\U$chrom" !~ /CHR.*/){
+	    $chrom = "chr".$chrom;
+	  }
+	  if (!defined $last_chrom){
+	    print "Now parsing data from $chrom\n";
+	  }		
+	  if (defined($last_chrom) and $last_chrom ne $chrom){
+	    print "Parsed: $last_chrom\n";
+	    write_hash($last_chrom);
+	    print "Now parsing data from $chrom\n";
+	  }
+	  $meth_call_1 =~ s/^XM:Z://;
+	  $meth_call_2 =~ s/^XM:Z://;
+	  $first_read_conversion =~ s/^XR:Z://;
+	  $second_read_conversion =~ s/^XR:Z://;
+	  $genome_conversion =~ s/^XG:Z://;	
+	  chomp $genome_conversion;	
+	 
+		
+	  my $index;
+	  my $strand;
+
+	  if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT'){
+	    $index = 0; ## this is OT
+	    $strand = '+';
+	  }	
+	  elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT'){
+	    $index = 1; ## this is CTOT
+	    $strand = '-';
+	  }
+	  elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA'){
+	    $index = 2; ## this is CTOB
+	    $strand = '+';
+	  }
+	  elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA'){
+	    $index = 3; ## this is OB
+	    $strand = '-';
+	  }
+	  else {
+	    die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
+	  }
+
+	  ### reversing the methylation call of the read that was reverse-complemented
+	  if ($strand eq '+'){
+	   # $meth_call_2 = reverse $meth_call_2;
+	  }
+	  else{
+	    #$meth_call_1 = reverse $meth_call_1;
+	  }
+
+	  if ($meth_call_1 and $meth_call_2){
+	    ### Clipping off the first <int> number of bases from the methylation call strings as specified with '--ignore <int>'
+	    my $end_read_1;
+	
+	    if ($strand eq '+'){
+	      $end_read_1 = $start_read_1+length($meth_call_1)-1;
+	      #$start_read_2 += length($meth_call_2)-1; ## $meth_call_2 is already shortened! Passing on the start position on the reverse strand
+	    }
+	    else{
+				$end_read_1 = $start_read_1+length($meth_call_1)-1;
+	      #$end_read_1 = $start_read_1;	
+	      #$start_read_1 += length($meth_call_1)-1; ## $meth_call_1 is already shortened! Passing on the start position on the reverse strand
+	    }
+
+	    if ($strand eq '+'){
+	      ## we first pass the first read which is in + orientation on the forward strand
+	      hash_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'+',$index,0);
+	
+	      # we next pass the second read which is in - orientation on the reverse strand
+	      ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
+	      hash_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'+',$index,$end_read_1);
+	    }
+	
+	    else{
+	      ## we first pass the first read which is in - orientation on the reverse strand
+	      hash_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'-',$index,0);
+	
+	      # we next pass the second read which is in + orientation on the forward strand
+	      ### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
+	      hash_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'-',$index,$end_read_1);
+	    }
+	
+	    $methylation_call_strings_processed += 2; # paired-end = 2 methylation calls
+	  }	
+	  $last_chrom = $chrom;
+      }
+      write_hash($last_chrom);
+      print "\n\nDone!";
+    }
+    else{
+      die "Single-end or paired-end reads not specified properly\n";
+    }
+
+    print "\n\nProcessed $line_count lines from $filename in total\n";
+    print "Total number of methylation call strings processed: $methylation_call_strings_processed\n\n";
+  }
+}
+
+
+sub hash_individual_C_methylation_states_paired_end_files{
+
+  my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$end_read_1) = @_;
+  my @methylation_calls = split(//,$meth_call);
+
+  #################################################################
+  ### . for bases not involving cytosines                       ###
+  ### X for methylated C in CHG context (was protected)         ###
+  ### x for not methylated C in CHG context (was converted)     ###
+  ### H for methylated C in CHH context (was protected)         ###
+  ### h for not methylated C in CHH context (was converted)     ###
+  ### Z for methylated C in CpG context (was protected)         ###
+  ### z for not methylated C in CpG context (was converted)     ###
+  #################################################################
+
+  #my $methyl_CHG_count = 0;
+  #my $methyl_CHH_count = 0;
+  #my $methyl_CpG_count = 0;
+  #my $unmethylated_CHG_count = 0;
+  #my $unmethylated_CHH_count = 0;
+  #my $unmethylated_CpG_count = 0;
+
+  ### THIS IS AN OPTIONAL 2-CONTEXT (CpG and $methylation_calls[$indexnon-CpG) SECTION IF --merge_non_CpG was specified
+
+if ($strand eq '+') {		      
+	  for my $index (0..$#methylation_calls) {
+	
+	    ### Returning as soon as the methylation calls start overlapping
+	    #if ($start+$index >= $end_read_1) {
+	    #  return;
+	    #}
+
+	  if ($methylation_calls[$index] eq 'X') {
+	      #$counting{total_meCHG_count}++;
+		if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      #$counting{total_unmethylated_CHG_count}++;
+		if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      #$counting{total_meCpG_count}++;
+		if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      #$counting{total_unmethylated_CpG_count}++;
+	      	if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      #$counting{total_meCHH_count}++;
+	      	if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      #$counting{total_unmethylated_CHH_count}++;
+	      	if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    }
+	    elsif ($methylation_calls[$index] eq '.'){}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} elsif ($strand eq '-') {
+	  for my $index (0..$#methylation_calls) {
+	
+	    ### Returning as soon as the methylation calls start overlapping
+	    #if ($start-$index <= $end_read_1) {
+	    #  return;
+	    #}
+=head1
+			if ($start-$index == 100){
+				print "\nStrand: -   ID: $id\n";
+			}
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      #$counting{total_meCHG_count}++;
+	  if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      #$counting{total_unmethylated_CHG_count}++;
+	      	if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      #$counting{total_meCpG_count}++;
+	  if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      #$counting{total_unmethylated_CpG_count}++;
+	     	if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	     # $counting{total_meCHH_count}++;
+	      	if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      #$counting{total_unmethylated_CHH_count}++;
+	      	if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+=cut
+	  if ($methylation_calls[$index] eq 'X') {
+	      #$counting{total_meCHG_count}++;
+	  if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      #$counting{total_unmethylated_CHG_count}++;
+	      	if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      #$counting{total_meCpG_count}++;
+	  if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      #$counting{total_unmethylated_CpG_count}++;
+	     	if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	     # $counting{total_meCHH_count}++;
+	      	if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      #$counting{total_unmethylated_CHH_count}++;
+	      	if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} else {
+	  die "This cannot happen $!\n";
+	}
+
+=head1
+	if ($strand eq '+') {		      
+	  for my $index (0..$#methylation_calls) {
+	
+	    ### Returning as soon as the methylation calls start overlapping
+	    #if ($start+$index >= $end_read_1) {
+	    #  return;
+	    #}
+	    if ($methylation_calls[$index] eq 'X') {
+	      #$counting{total_meCHG_count}++;
+		if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      #$counting{total_unmethylated_CHG_count}++;
+		if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      #$counting{total_meCpG_count}++;
+		if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      #$counting{total_unmethylated_CpG_count}++;
+	      	if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	      #$counting{total_meCHH_count}++;
+	      	if (defined $bases{$start+$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      #$counting{total_unmethylated_CHH_count}++;
+	      	if (defined $bases{$start+$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start+$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    }
+	    elsif ($methylation_calls[$index] eq '.'){}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} elsif ($strand eq '-') {
+	  for my $index (0..$#methylation_calls) {
+	
+	    ### Returning as soon as the methylation calls start overlapping
+	    #if ($start-$index <= $end_read_1) {
+	    #  return;
+	    #}
+
+	    if ($methylation_calls[$index] eq 'X') {
+	      #$counting{total_meCHG_count}++;
+	      	if (defined $bases{$start-$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'x') {
+	      #$counting{total_unmethylated_CHG_count}++;
+	      	if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'Z') {
+	      #$counting{total_meCpG_count}++;
+	      	if (defined $bases{$start-$index}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'z') {
+	      #$counting{total_unmethylated_CpG_count}++;
+	     	if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'H') {
+	     # $counting{total_meCHH_count}++;
+	      	if (defined $bases{$start-$=cutindex}{"+"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"+"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"+"}{$methylation_calls[$index]}=1;
+		}
+	    } elsif ($methylation_calls[$index] eq 'h') {
+	      #$counting{total_unmethylated_CHH_count}++;
+	      	if (defined $bases{$start-$index}{"-"}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}++;
+		}else{
+		  $bases{$start-$index}{"-"}{$methylation_calls[$index]}=1;
+		}
+	    }
+	    elsif ($methylation_calls[$index] eq '.') {}
+	    else{
+	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	    }
+	  }
+	} else {
+	  die "This cannot happen $!\n";
+	}
+=cut
+} 
+
+
+
+sub hash_individual_C_methylation_states_single_end{
+
+  my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index) = @_;
+  my @methylation_calls = split(//,$meth_call);
+
+  #################################################################
+  ### . for bases not involving cytosines                       ###
+  ### X for methylated C in CHG context (was protected)         ###
+  ### x for not methylated C in CHG context (was converted)     ###
+  ### H for methylated C in CHH context (was protected)         ###
+  ### h for not methylated C in CHH context (was converted)     ###
+  ### Z for methylated C in CpG context (was protected)         ###
+  ### z for not methylated C in CpG context (was converted)     ###
+  #################################################################
+
+  #my $methyl_CHG_count = 0;
+  #my $methyl_CHH_count = 0;
+  #my $methyl_CpG_count = 0;
+  #my $unmethylated_CHG_count = 0;
+  #my $unmethylated_CHH_count = 0;
+  #my $unmethylated_CpG_count = 0;
+
+### single-file CpG and other-context output
+    if ($strand eq '+') {
+      for my $index (0..$#methylation_calls) {
+
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+	if ($methylation_calls[$index] eq 'X') {
+	  #$counting{total_meCHG_count}++;
+	  if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
+	  #$counting{total_unmethylated_CHG_count}++;
+	  if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
+	  #$counting{total_meCpG_count}++;
+	  if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
+	  #$counting{total_unmethylated_CpG_count}++;
+	 if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
+	  #$counting{total_meCHH_count}++;
+	  if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
+	  #$counting{total_unmethylated_CHH_count}++;
+	  if (defined $bases{$start+$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start+$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq '.') {
+	}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    elsif ($strand eq '-') {
+
+      unless ($ignore){  ### if --ignore was specified the start position has already been corrected
+	$start += length($meth_call)-1;
+      }
+
+      for my $index (0..$#methylation_calls) {
+	### methylated Cs (any context) will receive a forward (+) orientation
+	### not methylated Cs (any context) will receive a reverse (-) orientation
+	if ($methylation_calls[$index] eq 'X') {
+	  #$counting{total_meCHG_count}++;
+	  if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq 'x') {
+	  #$counting{total_unmethylated_CHG_count}++;
+	  if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq 'Z') {
+	  #$counting{total_meCpG_count}++;
+	  if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq 'z') {
+	  #$counting{total_unmethylated_CpG_count}++;
+	  if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq 'H') {
+	  #$counting{total_meCHH_count}++;
+	  if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq 'h') {
+	  #$counting{total_unmethylated_CHH_count}++;
+	  if (defined $bases{$start-$index}{$strand}{$methylation_calls[$index]}){
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}++;
+	  }else{
+		  $bases{$start-$index}{$strand}{$methylation_calls[$index]}=1;
+	  }
+	}
+	elsif ($methylation_calls[$index] eq '.'){
+	}
+	else{
+	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
+	}
+      }
+    }
+    else {
+      die "This cannot happen (or at least it shouldn't....)\n";
+    }
+}
+
+sub write_hash(){
+	my ($chrom) = @_;
+	
+	for my $k1 (sort {$a <=> $b} (keys %bases)){
+		my $coverage = 0;
+		my $methylated = 0;
+		my $context;
+		for my $k2 (keys %{$bases{$k1}}){
+		   
+		      for my $k3 (keys %{$bases{$k1}{$k2}}){
+			      if ($k3 eq "Z" or $k3 eq "z"){
+				      $context = "CPG";
+			      }elsif ($k3 eq "H" or $k3 eq "h"){
+				      $context = "CHH";
+			      }elsif ($k3 eq "X" or $k3 eq "x"){
+				      $context = "CHG";
+			      }
+
+			      #test, if $k3 is upper-case (==methylated base)			
+			      if ($k3 eq "\U$k3"){
+				      $methylated+=$bases{$k1}{$k2}{$k3};
+			      }
+			      $coverage+=$bases{$k1}{$k2}{$k3};
+
+		      }
+		      my $percentage = ($methylated / $coverage)*100;
+
+		      #####################################################################################
+		      ### BED file format tabs:								###
+		      ### 1. Chromosome									###
+		      ### 2. Start position								###
+		      ### 3. Strand									###
+		      ### 4. Context (CpG,CHH,CHG)							###
+		      ### 5. Coverage									###
+		      ### 6. Methylated Cs								###
+		      ### 7. Percentage methylated Cs							###
+		      #####################################################################################
+
+		      printf {$fhs{full_output}} ("%s\t%u\t%s\t%s\t%u\t%u\t%.2f\n", $chrom, $k1, $k2, $context, $coverage, $methylated, $percentage );	
+		}
+	}
+	%bases = ();	
+}
+
+
+sub write_hash_back(){
+	my ($chrom) = @_;
+	
+	for my $k1 (sort (keys %bases)){
+		my $coverage = 0;
+		my $methylated = 0;
+		my $context;
+		my $k2 = (keys %{$bases{$k1}})[0];
+		for my $k3 (keys %{$bases{$k1}{$k2}}){
+			if ($k3 eq "Z" or $k3 eq "z"){
+				$context = "CPG";
+			}elsif ($k3 eq "H" or $k3 eq "h"){
+				$context = "CHH";
+			}elsif ($k3 eq "X" or $k3 eq "x"){
+				$context = "CHG";
+			}
+
+			#test, if $k3 is upper-case (==methylated base)			
+			if ($k3 eq "\U$k3"){
+				$methylated+=$bases{$k1}{$k2}{$k3};
+			}
+			$coverage+=$bases{$k1}{$k2}{$k3};
+
+		}
+		my $percentage = ($methylated / $coverage)*100;
+
+		###################################################################################
+		### BED file format tabs:							###
+		### 1. Chromosome								###
+		### 2. Start position								###
+		### 3. Strand									###
+		### 4. Context (CpG,CHH,CHG)							###
+		### 5. Coverage									###
+		### 6. Methylated Cs								###
+		### 7. Percentage methylated Cs							###
+		###################################################################################
+
+		printf {$fhs{full_output}} ("%s\t%u\t%s\t%s\t%u\t%u\t%.2f\n", $chrom, $k1, $k2, $context, $coverage, $methylated, $percentage );	
+
+	}
+	%bases = ();	
+}
+	
+
+