changeset 22:fff7de4f9fed draft

Uploaded
author melpetera
date Thu, 09 May 2019 08:17:53 -0400
parents 243347918a63
children eab2c4b805a2
files ACF/Analytic_correlation_filtration.pl ACF/IonFiltration.pm ACF/analytic_correlation_filtration.xml ACF/lib/IonFiltration.pm
diffstat 4 files changed, 672 insertions(+), 677 deletions(-) [+]
line wrap: on
line diff
--- a/ACF/Analytic_correlation_filtration.pl	Thu Apr 11 08:42:38 2019 -0400
+++ b/ACF/Analytic_correlation_filtration.pl	Thu May 09 08:17:53 2019 -0400
@@ -8,8 +8,8 @@
 
 #Personnal packages
 use FindBin ; ## Allows you to locate the directory of original perl script
-use lib $FindBin::Bin;
-use lib "$FindBin::Bin/../lib";
+#use lib $FindBin::Bin;
+use lib "$FindBin::Bin/lib";
 use IonFiltration;
 
 my ($file, $mass_file, $opt, $dataMatrix, $combined_DMVM, $repres_opt, $rt_threshold, $mass_threshold, $output_sif, $output_tabular, $correl_threshold, $intensity_threshold, $intensity_pourc); #Options to complete
--- a/ACF/IonFiltration.pm	Thu Apr 11 08:42:38 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,668 +0,0 @@
-#!usr/bin/perl
-package IonFiltration;
-
-### Perl modules
-use strict;
-use warnings;
-
-
-
-
-
-
-########################################################################
-### Création of a hash containing all adduits and fragments possible ###
-########################################################################
-
-
-sub MassCollecting{
-	
-	my $mass_file = $_[0];
-	my %hmass;
-
-	open (F1, $mass_file);
-	
-	while(my $line = <F1>){
-		chomp $line;
-		my @tline = split(/[\t;]/, $line);
-		if(defined($hmass{$tline[2]})){
-			print "Il existe déjà une même différence de masse : $tline[2] !\n";
-		}
-		$hmass{$tline[1]}{$tline[2]}=$tline[0];
-	}
-	
-	close F1;
-#	my $count=0;
-#	foreach my $k (keys %hmass){
-#		$count++;
-#	}
-	
-	return %hmass;
-	
-	
-}
-
-
-
-
-
-
-
-########################################################
-### Creation of a sif table + correlation filtration ###
-########################################################
-
-
-sub sifTableCreation{
-	
-	my $file = $_[0];
-	my $output_sif = $_[1];
-	my $opt = $_[2];
-	my $rt_threshold = $_[3];
-	my $mass_threshold = $_[4];
-	my $correl_threshold = $_[5];
-	my $dataMatrix = $_[6];
-	my $output_tabular = $_[7];
-	my $combined_DMVM = $_[8];
-	my $repres_opt = $_[9];
-	my $intensity_threshold = $_[10];
-	my $intensity_pourc = $_[11];
-	my $refhmass = $_[12];
-	
-	
-	
-	
-	my %hheader_file;
-	my %hduplicate;
-	
-	my %hcorrelgroup;
-	my $groupct=1;
-
-	
-	my $linenb3=0;
-	my %hheader_line;
-	my %hrtmz;
-	
-	open (F5, $combined_DMVM);
-	while(my $line = <F5>){
-		chomp $line;
-		my @tline = split(/\t/, $line);
-		
-		if($linenb3 == 0){
-			for(my $i=0; $i<scalar(@tline);$i++){
-				my $a = $tline[$i];
-				$hheader_line{$a}=$i;
-			}
-		}
-		else{
-			if(defined($hheader_line{mzmed})){
-				my $b = $tline[$hheader_line{mzmed}];
-				$hrtmz{$tline[0]}{mz}=$b;
-			}
-			else{
-				my $b = $tline[$hheader_line{mz}];
-				$hrtmz{$tline[0]}{mz}=$b;
-			}
-			if(defined($hheader_line{rtmed})){
-				my $d = $tline[$hheader_line{rtmed}];
-				$hrtmz{$tline[0]}{rt}=$d;
-			}
-			else{
-				my $d = $tline[$hheader_line{rt}];
-				$hrtmz{$tline[0]}{rt}=$d;
-			}
-		}
-		
-		$linenb3 ++;
-	}
-	close F5;
-	
-	
-	my $linenb=0;
-	
-	open (F1, $file) or die "Impossible to open $file\n";
-	open(F2, ">$output_sif") or die "Impossible to open $output_sif\n";
-	
-	while(my $line = <F1>){
-		chomp $line;
-		my @tline = split(/\t/, $line);
-		
-		
-		###############################
-		### Création of a sif table ###
-		###############################
-		
-		if($linenb == 0){
-			for(my $i=0; $i<scalar(@tline);$i++){
-				my $a = $tline[$i];
-				$hheader_file{$i}=$a;
-			}
-		}
-		else{
-			for(my $i=1; $i<scalar(@tline);$i++){
-				my $a=$tline[0];
-				my $b=$hheader_file{$i};
-				my $coef=$tline[$i];
-				
-				
-				if($a eq $b){
-	#				print "This is a correlation between A ($a) and A ($b) !\n"
-				}
-				else{
-					
-					
-					#########################
-					### Remove duplicates ###
-					#########################
-					
-					my $y = $a."/".$b;
-					my $z = $b."/".$a;
-					
-					if((!(defined($hduplicate{$y}))) && (!(defined($hduplicate{$z})))){
-						
-						$hduplicate{$y}=1;
-						
-						if($coef > $correl_threshold){
-						
-							print F2 "$a\t$coef\t$b\n";
-							
-							my $count=0;
-							
-							######################################################
-							### Analytic correlation filtrering follow options ###
-							######################################################
-							
-							
-							
-								my $amass=$hrtmz{$a}{mz};
-								my $atemp=$hrtmz{$a}{rt};
-									my $bmass= $hrtmz{$b}{mz};
-									my $btemp=$hrtmz{$b}{rt};
-									my $diff = $amass-$bmass;
-									$diff = abs($diff);
-								
-									### Option 1: Don't take into acount mass information ###
-					
-									if($opt == 1){
-										my $btplus = $btemp + $rt_threshold;
-										my $btmoins = $btemp - $rt_threshold;
-										if(($btmoins <= $atemp) && ($atemp <= $btplus)){
-											foreach my $k (keys %hcorrelgroup){
-												
-												if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
-													$hcorrelgroup{$k}{$a}=1;
-													$hcorrelgroup{$k}{$b}=1;
-													$count++;
-													last;
-												}
-											}
-											if($count == 0){
-												my $groupnb="group".$groupct;
-												$hcorrelgroup{$groupnb}{$a}=1;
-												$hcorrelgroup{$groupnb}{$b}=1;
-												$groupct ++;
-											}
-											
-										}
-									}
-									
-								
-									### Option 2: Check that all mass differences are include in a specific list taking into account RT information ###
-									
-									elsif($opt == 2){
-										
-										my $print = 0;
-										foreach my $s (keys %$refhmass){
-											foreach my $r (keys %{%$refhmass{$s}}){
-												my $rm = $r - $mass_threshold;
-												my $rp = $r + $mass_threshold;
-												if(($diff <= $rp) && ($diff >= $rm)){
-													if($print == 0){
-														my $btplus = $btemp + $rt_threshold;
-														my $btmoins = $btemp - $rt_threshold;
-														
-														if(($btmoins <= $atemp) && ($atemp <= $btplus)){
-#														foreach my $s (keys %{%$refhmass{$r}}){
-						#									print F2 "$line\t$hmass{$r}{$s} : $s ($r)\n";
-															foreach my $k (keys %hcorrelgroup){
-																if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
-																	$hcorrelgroup{$k}{$a}=1;
-																	$hcorrelgroup{$k}{$b}=1;
-#																	$hcorrelgroup{$k}{frag}.="#".$s;
-																	$count++;
-																	last;
-																}
-															}
-															if($count == 0){
-																my $groupnb="group".$groupct;
-																$hcorrelgroup{$groupnb}{$a}=1;
-																$hcorrelgroup{$groupnb}{$b}=1;
-#																$hcorrelgroup{$groupnb}{frag}.="#".$s;
-																$groupct ++;
-																
-															}
-															$print = 1;
-														}
-														
-													}
-												}
-											}
-										}
-									}
-									
-								
-									### Option 3: Check that all mass differences are include in a specific list, ignoring RT information ###
-					
-									elsif($opt == 3){
-										
-										my $print = 0;
-										foreach my $s (keys %$refhmass){
-											foreach my $r (keys %{%$refhmass{$s}}){
-												my $rm = $r - $mass_threshold;
-												my $rp = $r + $mass_threshold;
-												if(($diff <= $rp) && ($diff >= $rm)){
-													if($print == 0){
-														
-#													foreach my $s (keys %{%$refhmass{$r}}){
-						#								print F2 "$line\t$hmass{$r}{$s} : $s ($r)\n";
-														foreach my $k (keys %hcorrelgroup){
-															if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
-																$hcorrelgroup{$k}{$a}=1;
-																$hcorrelgroup{$k}{$b}=1;
-#																$hcorrelgroup{$k}{frag}.="#".$s;
-																$count++;
-																last;
-															}
-														}
-														if($count == 0){
-															my $groupnb="group".$groupct;
-															$hcorrelgroup{$groupnb}{$a}=1;
-															$hcorrelgroup{$groupnb}{$b}=1;
-#															$hcorrelgroup{$groupnb}{frag}.="#".$s;
-															$groupct ++;
-														}
-														$print = 1;
-													}
-												}
-											}
-										}
-									}
-								}
-
-
-					}
-				}
-			}
-		}
-		$linenb ++;
-	}
-	close F1;
-	close F2;
-	
-
-	
-	
-	#############################################
-	### Join groups that have been subdivided ###
-	#############################################
-	
-	my @tdelete;
-	
-	foreach my $k (keys %hcorrelgroup){
-		foreach my $i (keys %{$hcorrelgroup{$k}}){
-			foreach my $v (keys %hcorrelgroup){
-				my $count = 0;
-				if ($v ne $k){
-					foreach my $w (keys %{$hcorrelgroup{$v}}){
-#						if($w ne "frag"){
-							if($w eq $i){
-								$count = 1;
-								push(@tdelete, $v);
-							}	
-#						}
-					}
-				}
-				if($count == 1){
-					foreach my $w (keys %{$hcorrelgroup{$v}}){
-						$hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w};
-					}
-					delete($hcorrelgroup{$v});
-				}
-			}
-		}
-	}
-	
-	foreach my $t (@tdelete){
-		delete($hcorrelgroup{$t});
-	}
-	
-	
-	### Do it twice to see if it fix the problem of unmerge groups
-	
-	foreach my $k (keys %hcorrelgroup){
-		foreach my $i (keys %{$hcorrelgroup{$k}}){
-			foreach my $v (keys %hcorrelgroup){
-				my $count = 0;
-				if ($v ne $k){
-					foreach my $w (keys %{$hcorrelgroup{$v}}){
-#						if($w ne "frag"){
-							if($w eq $i){
-								$count = 1;
-								push(@tdelete, $v);
-							}	
-#						}
-					}
-				}
-				if($count == 1){
-					foreach my $w (keys %{$hcorrelgroup{$v}}){
-						$hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w};
-					}
-					delete($hcorrelgroup{$v});
-				}
-			}
-		}
-	}
-	
-	foreach my $t (@tdelete){
-		delete($hcorrelgroup{$t});
-	}
-	
-	
-	
-	##########################################
-	### Addition of annotation information ###
-	##########################################
-	
-	foreach my $k (keys %hcorrelgroup){
-		foreach my $i (keys %{$hcorrelgroup{$k}}){
-			foreach my $j (keys %{$hcorrelgroup{$k}}){
-				my $count = 0;
-				if ($i ne $j){
-					
-					my $a = $hrtmz{$i}{mz};
-					my $b = $hrtmz{$j}{mz};
-					
-					my $diff = $a - $b;
-					my $sign;
-					if($diff>0){
-						$sign="+";
-					}
-					if($diff<0){
-						$sign="-";
-					}
-					$diff = abs($diff);
-					
-					foreach my $z (keys %$refhmass){
-						
-						foreach my $y (keys %{%$refhmass{$z}}){
-							my $ym = $y - $mass_threshold;
-							my $yp = $y + $mass_threshold;
-							
-							
-							if(($diff <= $yp) && ($diff >= $ym)){
-								my $diff_list = $diff - $y;
-								$diff_list = abs($diff_list);
-								$diff_list = sprintf ("%0.6f", $diff_list);
-								
-								if($hcorrelgroup{$k}{$i} eq 1){
-									my $val = "#".$j."|".$sign."(".$z.")(".$diff_list.")|";
-									$hcorrelgroup{$k}{$i}=$val;
-									$count ++;
-								}
-								else{
-									if($count == 0){
-										my $val = "#".$j."|".$sign."(".$z.")(".$diff_list.")|";
-										$hcorrelgroup{$k}{$i}.=$val;
-										$count ++;
-									}
-									else{
-										my $val = $sign."(".$z.")(".$diff_list.")|";
-										$hcorrelgroup{$k}{$i}.=$val;
-										$count ++;
-									}
-								}
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-	
-	
-	
-	####################################################
-	### Choose the representative ion for each group ###
-	####################################################
-	
-	my %hgrouprepres;
-		
-	open(F3, $dataMatrix);
-	
-	while (my $line = <F3>){
-		chomp $line;
-		
-		my @tline = split (/\t/, $line);
-		
-		
-		
-			foreach my $k (keys %hcorrelgroup){
-				foreach my $i (keys %{$hcorrelgroup{$k}}){
-					
-					if($tline[0] eq $i){
-						$hgrouprepres{$k}{$i}{mass}=$hrtmz{$tline[0]}{mz};
-					
-						my $intensity;
-						my $nbsubjects=0;
-						for(my $y=1;$y<scalar(@tline);$y++){
-							$intensity += $tline[$y];
-							$nbsubjects ++;
-						}
-						my $meanintensity = $intensity/$nbsubjects;
-						$hgrouprepres{$k}{$i}{intensity}=$meanintensity;
-						$hgrouprepres{$k}{$i}{squaredmassint}=($hgrouprepres{$k}{$i}{mass}**2)/($hgrouprepres{$k}{$i}{intensity});
-					}
-				}
-			}
-	}
-	close F3;
-	
-	foreach my $z (keys %hgrouprepres){
-		my $max_intensity =  0;
-		my $max_int_ion = "";
-		my $max_mass = 0;
-		my $max_mass_ion = "";
-		my $max_squared = 0;
-		my $max_squared_ion = "";
-		foreach my $w (keys %{$hgrouprepres{$z}}){
-			if($hgrouprepres{$z}{$w}{intensity} > $max_intensity){
-				$max_intensity = $hgrouprepres{$z}{$w}{intensity};
-				$max_int_ion = $w;
-				
-			}
-			if($hgrouprepres{$z}{$w}{mass} > $max_mass){
-				$max_mass = $hgrouprepres{$z}{$w}{mass};
-				$max_mass_ion = $w;
-			}
-			if($hgrouprepres{$z}{$w}{squaredmassint} > $max_squared){
-				$max_squared = $hgrouprepres{$z}{$w}{squaredmassint};
-				$max_squared_ion = $w;
-			}
-		}
-		
-		my $max_int_max_mass_ion="";
-		
-		if($repres_opt eq "max_intensity_max_mass"){
-			my %hfirst;
-			my $first=0;
-			foreach my $w (reverse sort {$hgrouprepres{$z}{$a}{intensity} <=> $hgrouprepres{$z}{$b}{intensity} } keys %{$hgrouprepres{$z}}){
-				$first ++;
-				if ($first <= 3){
-					$hfirst{$w} = $hgrouprepres{$z}{$w}{intensity};
-				}
-			}
-			
-			my $first_2 = 0;
-			my $intens_max = 0;
-			my $mass_max = 0;
-			
-			foreach my $y (reverse sort {$hfirst{$a} <=> $hfirst{$b}} keys %hfirst){
-				
-				
-				
-				$first_2 ++;
-				if($first_2 == 1){
-					$intens_max = $hfirst{$y};
-					if($intensity_threshold > $intens_max){
-						$intensity_threshold = 0;
-					}
-					$max_int_max_mass_ion = $y;
-					$mass_max = $hgrouprepres{$z}{$y}{mass};
-				}
-				if($hgrouprepres{$z}{$y}{mass} > $mass_max){
-					if($hfirst{$y}>$intensity_threshold){
-						my $a = $intens_max * $intensity_pourc;
-						if($hfirst{$y} > $a){
-							$max_int_max_mass_ion = $y;
-							$mass_max = $hgrouprepres{$z}{$y}{mass};
-						}
-					}
-				}
-				
-			}
-		}
-		
-		
-		$hgrouprepres{$z}{max_int}=$max_int_ion;
-		$hgrouprepres{$z}{max_mass}=$max_mass_ion;
-		$hgrouprepres{$z}{max_squared}=$max_squared_ion;
-		$hgrouprepres{$z}{max_int_max_mass}=$max_int_max_mass_ion;
-		
-	}
-	
-	
-	######################
-	### Print result ! ###
-	######################
-	
-	open(F4, ">$output_tabular");
-	open(F5, $combined_DMVM);
-	
-	my $line_nb = 0;
-	my %hheader;
-	while (my $line = <F5>){
-		chomp $line;
-		
-		
-		my @tline = split (/\t/, $line);
-		
-		if($line_nb == 0){
-			print F4 "$line\tACF_groups";
-			if($opt == 1){
-				if($repres_opt eq "intensity"){print F4 "\tintensity_repres\tACF_filter\n"}
-				if($repres_opt eq "mass"){print F4 "\tmass_repres\tACF_filter\n"}
-				if($repres_opt eq "mixt"){print F4 "\tmass2intens_repres\tACF_filter\n"}
-				if($repres_opt eq "max_intensity_max_mass"){print F4 "\tmax_intensity_max_mass_repres\tACF_filter\n"}
-
-			}
-			else{
-				if($repres_opt eq "intensity"){print F4 "\tintensity_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"}
-				if($repres_opt eq "mass"){print F4 "\tmass_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"}
-				if($repres_opt eq "mixt"){print F4 "\tmass2intens_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"}
-				if($repres_opt eq "max_intensity_max_mass"){print F4 "\tmax_intensity_max_mass_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"}
-			}
-			
-			
-			### Creation of a header hash
-			for(my $i=0; $i<scalar(@tline);$i++){
-				my $a = $tline[$i];
-				$hheader{$a}=$i;
-			}
-		}
-		
-		else{
-			my $find = 0;
-			foreach my $v (keys %hcorrelgroup){
-					if(defined($hgrouprepres{$v}{$tline[0]})){
-						print F4 "$line\t$v";
-						
-						
-						if($repres_opt eq "intensity"){print F4 "\t$hgrouprepres{$v}{max_int}\t"}
-						if($repres_opt eq "mass"){print F4 "\t$hgrouprepres{$v}{max_mass}\t"}
-						if($repres_opt eq "mixt"){print F4 "\t$hgrouprepres{$v}{max_squared}\t"}
-						if($repres_opt eq "max_intensity_max_mass"){print F4 "\t$hgrouprepres{$v}{max_int_max_mass}\t"}
-						
-
-						if($opt != 1){
-							if(defined($hcorrelgroup{$v}{$tline[0]})){
-								print F4 "$hcorrelgroup{$v}{$tline[0]}\t";
-							}
-							else{
-								print F4 "\t";
-							}
-						}
-						
-						if($repres_opt eq "intensity"){
-							if($tline[0] eq $hgrouprepres{$v}{max_int}){
-								print F4 "conserved\n";
-							}
-							else{
-								print F4 "deleted\n";
-							}
-							$find = 1;
-						}
-						if($repres_opt eq "mass"){
-							if($tline[0] eq $hgrouprepres{$v}{max_mass}){
-								print F4 "conserved\n";
-							}
-							else{
-								print F4 "deleted\n";
-							}
-							$find = 1;
-						}
-						if($repres_opt eq "mixt"){
-							if($tline[0] eq $hgrouprepres{$v}{max_squared}){
-								print F4 "conserved\n";
-							}
-							else{
-								print F4 "deleted\n";
-							}
-							$find = 1;
-						}
-						if($repres_opt eq "max_intensity_max_mass"){
-							if($tline[0] eq $hgrouprepres{$v}{max_int_max_mass}){
-								print F4 "conserved\n";
-							}
-							else{
-								print F4 "deleted\n";
-							}
-							$find = 1;
-						}
-					}
-			}
-			if($find == 0){
-				$groupct ++;
-				my $group = "group".$groupct;
-				if($opt != 1){
-					print F4 "$line\t$group\t-\t-\t-\n";
-				}
-				else{
-					print F4 "$line\t$group\t-\t-\n";
-				}	
-			}
-			
-			
-		}
-		
-		$line_nb ++;
-	}
-	
-	
-	return ($output_sif, $output_tabular);
-}
-
-
-
-
-
-1;
\ No newline at end of file
--- a/ACF/analytic_correlation_filtration.xml	Thu Apr 11 08:42:38 2019 -0400
+++ b/ACF/analytic_correlation_filtration.xml	Thu May 09 08:17:53 2019 -0400
@@ -3,10 +3,6 @@
 		: Detect analytic correlation among data and remove them.
 	</description>
 	
-	<requirements>
-		<requirement type="package">perl-statistics</requirement>
-		<requirement type="package" version="1.19">perl-soap-lite</requirement>
-	</requirements>
 	
 	 <command><![CDATA[
 		
@@ -166,8 +162,7 @@
 .. class:: warningmark
 
 For more information about input files, refer to the corresponding "W4M HowTo" page:
-W4M_table_format_for_Galaxy_
-.. _W4M_table_format_for_Galaxy: http://workflow4metabolomics.org/sites/workflow4metabolomics.org/files/files/w4m_TableFormatForGalaxy_150908.pdf
+http://workflow4metabolomics.org/sites/workflow4metabolomics.org/files/files/w4m_TableFormatForGalaxy_150908.pdf
 
 
 Mass differences list
@@ -175,7 +170,7 @@
 	* Example:
 
 .. image:: Adduct_fragment_list.JPG
-	:width: 800
+	:width: 350
 
 ---------------------------------------------------
 	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/lib/IonFiltration.pm	Thu May 09 08:17:53 2019 -0400
@@ -0,0 +1,668 @@
+#!usr/bin/perl
+package IonFiltration;
+
+### Perl modules
+use strict;
+use warnings;
+
+
+
+
+
+
+########################################################################
+### Création of a hash containing all adduits and fragments possible ###
+########################################################################
+
+
+sub MassCollecting{
+	
+	my $mass_file = $_[0];
+	my %hmass;
+
+	open (F1, $mass_file);
+	
+	while(my $line = <F1>){
+		chomp $line;
+		my @tline = split(/[\t;]/, $line);
+		if(defined($hmass{$tline[2]})){
+			print "Il existe déjà une même différence de masse : $tline[2] !\n";
+		}
+		$hmass{$tline[1]}{$tline[2]}=$tline[0];
+	}
+	
+	close F1;
+#	my $count=0;
+#	foreach my $k (keys %hmass){
+#		$count++;
+#	}
+	
+	return %hmass;
+	
+	
+}
+
+
+
+
+
+
+
+########################################################
+### Creation of a sif table + correlation filtration ###
+########################################################
+
+
+sub sifTableCreation{
+	
+	my $file = $_[0];
+	my $output_sif = $_[1];
+	my $opt = $_[2];
+	my $rt_threshold = $_[3];
+	my $mass_threshold = $_[4];
+	my $correl_threshold = $_[5];
+	my $dataMatrix = $_[6];
+	my $output_tabular = $_[7];
+	my $combined_DMVM = $_[8];
+	my $repres_opt = $_[9];
+	my $intensity_threshold = $_[10];
+	my $intensity_pourc = $_[11];
+	my $refhmass = $_[12];
+	
+	
+	
+	
+	my %hheader_file;
+	my %hduplicate;
+	
+	my %hcorrelgroup;
+	my $groupct=1;
+
+	
+	my $linenb3=0;
+	my %hheader_line;
+	my %hrtmz;
+	
+	open (F5, $combined_DMVM);
+	while(my $line = <F5>){
+		chomp $line;
+		my @tline = split(/\t/, $line);
+		
+		if($linenb3 == 0){
+			for(my $i=0; $i<scalar(@tline);$i++){
+				my $a = $tline[$i];
+				$hheader_line{$a}=$i;
+			}
+		}
+		else{
+			if(defined($hheader_line{mzmed})){
+				my $b = $tline[$hheader_line{mzmed}];
+				$hrtmz{$tline[0]}{mz}=$b;
+			}
+			else{
+				my $b = $tline[$hheader_line{mz}];
+				$hrtmz{$tline[0]}{mz}=$b;
+			}
+			if(defined($hheader_line{rtmed})){
+				my $d = $tline[$hheader_line{rtmed}];
+				$hrtmz{$tline[0]}{rt}=$d;
+			}
+			else{
+				my $d = $tline[$hheader_line{rt}];
+				$hrtmz{$tline[0]}{rt}=$d;
+			}
+		}
+		
+		$linenb3 ++;
+	}
+	close F5;
+	
+	
+	my $linenb=0;
+	
+	open (F1, $file) or die "Impossible to open $file\n";
+	open(F2, ">$output_sif") or die "Impossible to open $output_sif\n";
+	
+	while(my $line = <F1>){
+		chomp $line;
+		my @tline = split(/\t/, $line);
+		
+		
+		###############################
+		### Création of a sif table ###
+		###############################
+		
+		if($linenb == 0){
+			for(my $i=0; $i<scalar(@tline);$i++){
+				my $a = $tline[$i];
+				$hheader_file{$i}=$a;
+			}
+		}
+		else{
+			for(my $i=1; $i<scalar(@tline);$i++){
+				my $a=$tline[0];
+				my $b=$hheader_file{$i};
+				my $coef=$tline[$i];
+				
+				
+				if($a eq $b){
+	#				print "This is a correlation between A ($a) and A ($b) !\n"
+				}
+				else{
+					
+					
+					#########################
+					### Remove duplicates ###
+					#########################
+					
+					my $y = $a."/".$b;
+					my $z = $b."/".$a;
+					
+					if((!(defined($hduplicate{$y}))) && (!(defined($hduplicate{$z})))){
+						
+						$hduplicate{$y}=1;
+						
+						if($coef > $correl_threshold){
+						
+							print F2 "$a\t$coef\t$b\n";
+							
+							my $count=0;
+							
+							######################################################
+							### Analytic correlation filtrering follow options ###
+							######################################################
+							
+							
+							
+								my $amass=$hrtmz{$a}{mz};
+								my $atemp=$hrtmz{$a}{rt};
+									my $bmass= $hrtmz{$b}{mz};
+									my $btemp=$hrtmz{$b}{rt};
+									my $diff = $amass-$bmass;
+									$diff = abs($diff);
+								
+									### Option 1: Don't take into acount mass information ###
+					
+									if($opt == 1){
+										my $btplus = $btemp + $rt_threshold;
+										my $btmoins = $btemp - $rt_threshold;
+										if(($btmoins <= $atemp) && ($atemp <= $btplus)){
+											foreach my $k (keys %hcorrelgroup){
+												
+												if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
+													$hcorrelgroup{$k}{$a}=1;
+													$hcorrelgroup{$k}{$b}=1;
+													$count++;
+													last;
+												}
+											}
+											if($count == 0){
+												my $groupnb="group".$groupct;
+												$hcorrelgroup{$groupnb}{$a}=1;
+												$hcorrelgroup{$groupnb}{$b}=1;
+												$groupct ++;
+											}
+											
+										}
+									}
+									
+								
+									### Option 2: Check that all mass differences are include in a specific list taking into account RT information ###
+									
+									elsif($opt == 2){
+										
+										my $print = 0;
+										foreach my $s (keys %{$refhmass}){
+											foreach my $r (keys %{$refhmass->{$s}}){
+												my $rm = $r - $mass_threshold;
+												my $rp = $r + $mass_threshold;
+												if(($diff <= $rp) && ($diff >= $rm)){
+													if($print == 0){
+														my $btplus = $btemp + $rt_threshold;
+														my $btmoins = $btemp - $rt_threshold;
+														
+														if(($btmoins <= $atemp) && ($atemp <= $btplus)){
+#														foreach my $s (keys %{%$refhmass{$r}}){
+						#									print F2 "$line\t$hmass{$r}{$s} : $s ($r)\n";
+															foreach my $k (keys %hcorrelgroup){
+																if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
+																	$hcorrelgroup{$k}{$a}=1;
+																	$hcorrelgroup{$k}{$b}=1;
+#																	$hcorrelgroup{$k}{frag}.="#".$s;
+																	$count++;
+																	last;
+																}
+															}
+															if($count == 0){
+																my $groupnb="group".$groupct;
+																$hcorrelgroup{$groupnb}{$a}=1;
+																$hcorrelgroup{$groupnb}{$b}=1;
+#																$hcorrelgroup{$groupnb}{frag}.="#".$s;
+																$groupct ++;
+																
+															}
+															$print = 1;
+														}
+														
+													}
+												}
+											}
+										}
+									}
+									
+								
+									### Option 3: Check that all mass differences are include in a specific list, ignoring RT information ###
+					
+									elsif($opt == 3){
+										
+										my $print = 0;
+										foreach my $s (keys %{$refhmass}){
+											foreach my $r (keys %{$refhmass->{$s}}){
+												my $rm = $r - $mass_threshold;
+												my $rp = $r + $mass_threshold;
+												if(($diff <= $rp) && ($diff >= $rm)){
+													if($print == 0){
+														
+#													foreach my $s (keys %{%$refhmass{$r}}){
+						#								print F2 "$line\t$hmass{$r}{$s} : $s ($r)\n";
+														foreach my $k (keys %hcorrelgroup){
+															if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
+																$hcorrelgroup{$k}{$a}=1;
+																$hcorrelgroup{$k}{$b}=1;
+#																$hcorrelgroup{$k}{frag}.="#".$s;
+																$count++;
+																last;
+															}
+														}
+														if($count == 0){
+															my $groupnb="group".$groupct;
+															$hcorrelgroup{$groupnb}{$a}=1;
+															$hcorrelgroup{$groupnb}{$b}=1;
+#															$hcorrelgroup{$groupnb}{frag}.="#".$s;
+															$groupct ++;
+														}
+														$print = 1;
+													}
+												}
+											}
+										}
+									}
+								}
+
+
+					}
+				}
+			}
+		}
+		$linenb ++;
+	}
+	close F1;
+	close F2;
+	
+
+	
+	
+	#############################################
+	### Join groups that have been subdivided ###
+	#############################################
+	
+	my @tdelete;
+	
+	foreach my $k (keys %hcorrelgroup){
+		foreach my $i (keys %{$hcorrelgroup{$k}}){
+			foreach my $v (keys %hcorrelgroup){
+				my $count = 0;
+				if ($v ne $k){
+					foreach my $w (keys %{$hcorrelgroup{$v}}){
+#						if($w ne "frag"){
+							if($w eq $i){
+								$count = 1;
+								push(@tdelete, $v);
+							}	
+#						}
+					}
+				}
+				if($count == 1){
+					foreach my $w (keys %{$hcorrelgroup{$v}}){
+						$hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w};
+					}
+					delete($hcorrelgroup{$v});
+				}
+			}
+		}
+	}
+	
+	foreach my $t (@tdelete){
+		delete($hcorrelgroup{$t});
+	}
+	
+	
+	### Do it twice to see if it fix the problem of unmerge groups
+	
+	foreach my $k (keys %hcorrelgroup){
+		foreach my $i (keys %{$hcorrelgroup{$k}}){
+			foreach my $v (keys %hcorrelgroup){
+				my $count = 0;
+				if ($v ne $k){
+					foreach my $w (keys %{$hcorrelgroup{$v}}){
+#						if($w ne "frag"){
+							if($w eq $i){
+								$count = 1;
+								push(@tdelete, $v);
+							}	
+#						}
+					}
+				}
+				if($count == 1){
+					foreach my $w (keys %{$hcorrelgroup{$v}}){
+						$hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w};
+					}
+					delete($hcorrelgroup{$v});
+				}
+			}
+		}
+	}
+	
+	foreach my $t (@tdelete){
+		delete($hcorrelgroup{$t});
+	}
+	
+	
+	
+	##########################################
+	### Addition of annotation information ###
+	##########################################
+	
+	foreach my $k (keys %hcorrelgroup){
+		foreach my $i (keys %{$hcorrelgroup{$k}}){
+			foreach my $j (keys %{$hcorrelgroup{$k}}){
+				my $count = 0;
+				if ($i ne $j){
+					
+					my $a = $hrtmz{$i}{mz};
+					my $b = $hrtmz{$j}{mz};
+					
+					my $diff = $a - $b;
+					my $sign;
+					if($diff>0){
+						$sign="+";
+					}
+					if($diff<0){
+						$sign="-";
+					}
+					$diff = abs($diff);
+					
+					foreach my $z (keys %{$refhmass}){
+						
+						foreach my $y (keys %{$refhmass->{$z}}){
+							my $ym = $y - $mass_threshold;
+							my $yp = $y + $mass_threshold;
+							
+							
+							if(($diff <= $yp) && ($diff >= $ym)){
+								my $diff_list = $diff - $y;
+								$diff_list = abs($diff_list);
+								$diff_list = sprintf ("%0.6f", $diff_list);
+								
+								if($hcorrelgroup{$k}{$i} eq 1){
+									my $val = "#".$j."|".$sign."(".$z.")(".$diff_list.")|";
+									$hcorrelgroup{$k}{$i}=$val;
+									$count ++;
+								}
+								else{
+									if($count == 0){
+										my $val = "#".$j."|".$sign."(".$z.")(".$diff_list.")|";
+										$hcorrelgroup{$k}{$i}.=$val;
+										$count ++;
+									}
+									else{
+										my $val = $sign."(".$z.")(".$diff_list.")|";
+										$hcorrelgroup{$k}{$i}.=$val;
+										$count ++;
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	
+	
+	
+	####################################################
+	### Choose the representative ion for each group ###
+	####################################################
+	
+	my %hgrouprepres;
+		
+	open(F3, $dataMatrix);
+	
+	while (my $line = <F3>){
+		chomp $line;
+		
+		my @tline = split (/\t/, $line);
+		
+		
+		
+			foreach my $k (keys %hcorrelgroup){
+				foreach my $i (keys %{$hcorrelgroup{$k}}){
+					
+					if($tline[0] eq $i){
+						$hgrouprepres{$k}{$i}{mass}=$hrtmz{$tline[0]}{mz};
+					
+						my $intensity;
+						my $nbsubjects=0;
+						for(my $y=1;$y<scalar(@tline);$y++){
+							$intensity += $tline[$y];
+							$nbsubjects ++;
+						}
+						my $meanintensity = $intensity/$nbsubjects;
+						$hgrouprepres{$k}{$i}{intensity}=$meanintensity;
+						$hgrouprepres{$k}{$i}{squaredmassint}=($hgrouprepres{$k}{$i}{mass}**2)/($hgrouprepres{$k}{$i}{intensity});
+					}
+				}
+			}
+	}
+	close F3;
+	
+	foreach my $z (keys %hgrouprepres){
+		my $max_intensity =  0;
+		my $max_int_ion = "";
+		my $max_mass = 0;
+		my $max_mass_ion = "";
+		my $max_squared = 0;
+		my $max_squared_ion = "";
+		foreach my $w (keys %{$hgrouprepres{$z}}){
+			if($hgrouprepres{$z}{$w}{intensity} > $max_intensity){
+				$max_intensity = $hgrouprepres{$z}{$w}{intensity};
+				$max_int_ion = $w;
+				
+			}
+			if($hgrouprepres{$z}{$w}{mass} > $max_mass){
+				$max_mass = $hgrouprepres{$z}{$w}{mass};
+				$max_mass_ion = $w;
+			}
+			if($hgrouprepres{$z}{$w}{squaredmassint} > $max_squared){
+				$max_squared = $hgrouprepres{$z}{$w}{squaredmassint};
+				$max_squared_ion = $w;
+			}
+		}
+		
+		my $max_int_max_mass_ion="";
+		
+		if($repres_opt eq "max_intensity_max_mass"){
+			my %hfirst;
+			my $first=0;
+			foreach my $w (reverse sort {$hgrouprepres{$z}{$a}{intensity} <=> $hgrouprepres{$z}{$b}{intensity} } keys %{$hgrouprepres{$z}}){
+				$first ++;
+				if ($first <= 3){
+					$hfirst{$w} = $hgrouprepres{$z}{$w}{intensity};
+				}
+			}
+			
+			my $first_2 = 0;
+			my $intens_max = 0;
+			my $mass_max = 0;
+			
+			foreach my $y (reverse sort {$hfirst{$a} <=> $hfirst{$b}} keys %hfirst){
+				
+				
+				
+				$first_2 ++;
+				if($first_2 == 1){
+					$intens_max = $hfirst{$y};
+					if($intensity_threshold > $intens_max){
+						$intensity_threshold = 0;
+					}
+					$max_int_max_mass_ion = $y;
+					$mass_max = $hgrouprepres{$z}{$y}{mass};
+				}
+				if($hgrouprepres{$z}{$y}{mass} > $mass_max){
+					if($hfirst{$y}>$intensity_threshold){
+						my $a = $intens_max * $intensity_pourc;
+						if($hfirst{$y} > $a){
+							$max_int_max_mass_ion = $y;
+							$mass_max = $hgrouprepres{$z}{$y}{mass};
+						}
+					}
+				}
+				
+			}
+		}
+		
+		
+		$hgrouprepres{$z}{max_int}=$max_int_ion;
+		$hgrouprepres{$z}{max_mass}=$max_mass_ion;
+		$hgrouprepres{$z}{max_squared}=$max_squared_ion;
+		$hgrouprepres{$z}{max_int_max_mass}=$max_int_max_mass_ion;
+		
+	}
+	
+	
+	######################
+	### Print result ! ###
+	######################
+	
+	open(F4, ">$output_tabular");
+	open(F5, $combined_DMVM);
+	
+	my $line_nb = 0;
+	my %hheader;
+	while (my $line = <F5>){
+		chomp $line;
+		
+		
+		my @tline = split (/\t/, $line);
+		
+		if($line_nb == 0){
+			print F4 "$line\tACF_groups";
+			if($opt == 1){
+				if($repres_opt eq "intensity"){print F4 "\tintensity_repres\tACF_filter\n"}
+				if($repres_opt eq "mass"){print F4 "\tmass_repres\tACF_filter\n"}
+				if($repres_opt eq "mixt"){print F4 "\tmass2intens_repres\tACF_filter\n"}
+				if($repres_opt eq "max_intensity_max_mass"){print F4 "\tmax_intensity_max_mass_repres\tACF_filter\n"}
+
+			}
+			else{
+				if($repres_opt eq "intensity"){print F4 "\tintensity_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"}
+				if($repres_opt eq "mass"){print F4 "\tmass_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"}
+				if($repres_opt eq "mixt"){print F4 "\tmass2intens_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"}
+				if($repres_opt eq "max_intensity_max_mass"){print F4 "\tmax_intensity_max_mass_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"}
+			}
+			
+			
+			### Creation of a header hash
+			for(my $i=0; $i<scalar(@tline);$i++){
+				my $a = $tline[$i];
+				$hheader{$a}=$i;
+			}
+		}
+		
+		else{
+			my $find = 0;
+			foreach my $v (keys %hcorrelgroup){
+					if(defined($hgrouprepres{$v}{$tline[0]})){
+						print F4 "$line\t$v";
+						
+						
+						if($repres_opt eq "intensity"){print F4 "\t$hgrouprepres{$v}{max_int}\t"}
+						if($repres_opt eq "mass"){print F4 "\t$hgrouprepres{$v}{max_mass}\t"}
+						if($repres_opt eq "mixt"){print F4 "\t$hgrouprepres{$v}{max_squared}\t"}
+						if($repres_opt eq "max_intensity_max_mass"){print F4 "\t$hgrouprepres{$v}{max_int_max_mass}\t"}
+						
+
+						if($opt != 1){
+							if(defined($hcorrelgroup{$v}{$tline[0]})){
+								print F4 "$hcorrelgroup{$v}{$tline[0]}\t";
+							}
+							else{
+								print F4 "\t";
+							}
+						}
+						
+						if($repres_opt eq "intensity"){
+							if($tline[0] eq $hgrouprepres{$v}{max_int}){
+								print F4 "conserved\n";
+							}
+							else{
+								print F4 "deleted\n";
+							}
+							$find = 1;
+						}
+						if($repres_opt eq "mass"){
+							if($tline[0] eq $hgrouprepres{$v}{max_mass}){
+								print F4 "conserved\n";
+							}
+							else{
+								print F4 "deleted\n";
+							}
+							$find = 1;
+						}
+						if($repres_opt eq "mixt"){
+							if($tline[0] eq $hgrouprepres{$v}{max_squared}){
+								print F4 "conserved\n";
+							}
+							else{
+								print F4 "deleted\n";
+							}
+							$find = 1;
+						}
+						if($repres_opt eq "max_intensity_max_mass"){
+							if($tline[0] eq $hgrouprepres{$v}{max_int_max_mass}){
+								print F4 "conserved\n";
+							}
+							else{
+								print F4 "deleted\n";
+							}
+							$find = 1;
+						}
+					}
+			}
+			if($find == 0){
+				$groupct ++;
+				my $group = "group".$groupct;
+				if($opt != 1){
+					print F4 "$line\t$group\t-\t-\t-\n";
+				}
+				else{
+					print F4 "$line\t$group\t-\t-\n";
+				}	
+			}
+			
+			
+		}
+		
+		$line_nb ++;
+	}
+	
+	
+	return ($output_sif, $output_tabular);
+}
+
+
+
+
+
+1;
\ No newline at end of file