changeset 0:cfe4b819911b draft

Uploaded
author melpetera
date Thu, 10 Oct 2019 12:20:11 -0400
parents
children 86ee1a3d5723
files ACF/Analytic_correlation_filtration.pl ACF/README.md ACF/analytic_correlation_filtration.xml ACF/data/default_list.csv ACF/lib/IonFiltration.pm ACF/static/images/Adduct_fragment_list.JPG ACF/static/images/Correlation_matrix.JPG
diffstat 7 files changed, 1301 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/Analytic_correlation_filtration.pl	Thu Oct 10 12:20:11 2019 -0400
@@ -0,0 +1,644 @@
+#!usr/bin/perl
+
+### Perl modules
+use warnings;
+use strict;
+use Getopt::Long qw(GetOptions); #Creation of script options
+use Pod::Usage qw(pod2usage); #Creation of script options
+
+#Personnal packages
+use FindBin ; ## Allows you to locate the directory of original perl script
+#use lib $FindBin::Bin;
+use lib "$FindBin::Bin/lib";
+use IonFiltration;
+
+my ($file, $mass_file, $opt, $dataMatrix, $combined_DMVM, $repres_opt, $rt_threshold, $mass_threshold, $output_sif, $output_tabular, $correl_threshold, $intensity_threshold, $intensity_pourc); #Options to complete
+
+########################
+### Options and help ###
+########################
+
+GetOptions("f=s"=>\$file, "m=s"=>\$mass_file, "o=s"=>\$opt, "d=s"=>\$dataMatrix, "v=s"=>\$combined_DMVM, "r=s"=>\$repres_opt, "rt=f"=>\$rt_threshold, "mass=f"=>\$mass_threshold, "output_sif=s"=>\$output_sif, "output_tabular=s"=>\$output_tabular, "correl=s"=>\$correl_threshold, "IT=f"=>\$intensity_threshold, "IP=f"=>\$intensity_pourc) or pod2usage(2);
+
+### Check required parameters :
+pod2usage({-message=>q{Mandatory argument '-f' is missing}, -exitval=>1, -verbose=>0}) unless $file;
+#pod2usage({-message=>q{Mandatory argument '-m' is missing}, -exitval=>1, -verbose=>0}) unless $mass_file;
+pod2usage({-message=>q{Mandatory argument '-o' is missing. It correspond to the grouping method for analytical correlation groups formation.
+#It should be a number (1 ; 2 or 3) :
+#	1 : Don't take into acount mass information (only RT) ;
+#	2 : Check that all mass differences are include in a specific list and taking into acount RT information
+#	3 : Check that all mass differences are include in a specific list, ignoring RT information
+#To use the tool without takinf into account mass and RT information, use option 1 and define the RT threshold to 999999999.}, -exitval=>1, -verbose=>0}) unless $opt;
+pod2usage({-message=>q{Mandatory argument '-r' is missing. It correspond to the group representent choosing method for analytical correlation groups formation.
+It should be one of the 3 options below :
+	"mass" : choose the ion with the highest mass as the representant
+	"intensity" : choose the ion with the highest intensity as the representant
+	"mixt" : choose the ion with the highest (mass^2 * intensity) as the representant
+	"max_intensity_max_mass" : choose tha ion witht he highest intenisty among the 5 most intense ions of the group}, -exitval=>1, -verbose=>0}) unless $repres_opt;
+pod2usage({-message=>q{Mandatory argument '-d' is missing}, -exitval=>1, -verbose=>0}) unless $dataMatrix;
+pod2usage({-message=>q{Mandatory argument '-v' is missing}, -exitval=>1, -verbose=>0}) unless $combined_DMVM;
+#pod2usage({-message=>q{Mandatory argument '-rt' is missing}, -exitval=>1, -verbose=>0}) unless $rt_threshold;
+#pod2usage({-message=>q{Mandatory argument '-mass' is missing}, -exitval=>1, -verbose=>0}) unless $mass_threshold;
+pod2usage({-message=>q{Mandatory argument '-correl' is missing}, -exitval=>1, -verbose=>0}) unless $correl_threshold;
+pod2usage({-message=>q{Mandatory argument '-output_tabular' is missing}, -exitval=>1, -verbose=>0}) unless $output_tabular;
+pod2usage({-message=>q{Mandatory argument '-output_sif' is missing}, -exitval=>1, -verbose=>0}) unless $output_sif;
+
+
+#if(($opt != 1) && ($opt != 2) && ($opt != 3)){
+#	print "you must indicate \"1\", \"2\" or \"3\" for the --o otpion\n";
+#	exit;
+#}
+
+	
+
+if(($repres_opt ne "mass") && ($repres_opt ne "intensity") && ($repres_opt ne "mixt") && ($repres_opt ne "max_intensity_max_mass")){
+	print "you must indicate \"mass\", \"intensity\", \"mix\" or \"max_intensity_max_mass\" for the --r otpion\n";
+	exit;
+}
+
+
+
+#########################################################################
+#### Création of a hash containing all adduits and fragments possible ###
+#########################################################################
+
+my %hmass;
+if($opt != 1){
+	%hmass = IonFiltration::MassCollecting($mass_file);
+	
+}
+
+my $refhmass = \%hmass;
+
+print "Création of a hash containing all adduits and fragments possible\n";
+
+	
+########################################################
+### Creation of a sif table + correlation filtration ###
+########################################################
+
+my %hrtmz;
+($output_sif, %hrtmz) = IonFiltration::sifTableCreation($file, $output_sif, $opt, $rt_threshold, $mass_threshold, $correl_threshold, $dataMatrix, $output_tabular, $combined_DMVM, $repres_opt, $intensity_threshold, $intensity_pourc, \%hmass);
+print "Creation of a sif table + correlation filtration done\n";
+
+
+######################################################
+### Analytic correlation filtrering follow options ###
+######################################################
+
+my %hheader_file;
+my %hduplicate;
+	
+my %hcorrelgroup;
+my $groupct=1;
+
+my $linenb3=0;
+my %hheader_line;
+
+
+
+open (F1, $output_sif) or die "Impossible to open $output_sif\n";
+	
+while(my $line = <F1>){
+	my $count=0;
+	chomp $line;
+	my @tline = split(/\t/, $line);
+	my $a = $tline[0];
+	my $b = $tline[2];
+							
+	my $amass=$hrtmz{$a}{mz};
+	my $atemp=$hrtmz{$a}{rt};
+	my $bmass= $hrtmz{$b}{mz};
+	my $btemp=$hrtmz{$b}{rt};
+	print "YY : $a ==> $amass ; $b ==> $bmass\n";
+	my $diff = $amass-$bmass;
+	$diff = abs($diff);
+								
+	### Option 1: Don't take into acount mass information ###
+					
+	if($opt == 1){
+		my $btplus = $btemp + $rt_threshold;
+		my $btmoins = $btemp - $rt_threshold;
+		if(($btmoins <= $atemp) && ($atemp <= $btplus)){
+			foreach my $k (keys %hcorrelgroup){
+				if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
+					$hcorrelgroup{$k}{$a}=1;
+					$hcorrelgroup{$k}{$b}=1;
+					$count++;
+					last;
+				}
+			}
+			if($count == 0){
+				my $groupnb="group".$groupct;
+				$hcorrelgroup{$groupnb}{$a}=1;
+				$hcorrelgroup{$groupnb}{$b}=1;
+				$groupct ++;
+			}
+		}
+	}
+
+									
+								
+	### Option 2: Check that all mass differences are include in a specific list taking into account RT information ###
+									
+	elsif($opt == 2){
+										
+		my $print = 0;
+		foreach my $s (keys %{$refhmass}){
+			foreach my $r (keys %{$refhmass->{$s}}){
+				my $rm = $r - $mass_threshold;
+				my $rp = $r + $mass_threshold;
+				if(($diff <= $rp) && ($diff >= $rm)){
+					if($print == 0){
+						my $btplus = $btemp + $rt_threshold;
+						my $btmoins = $btemp - $rt_threshold;
+														
+						if(($btmoins <= $atemp) && ($atemp <= $btplus)){
+							foreach my $k (keys %hcorrelgroup){
+								if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
+									$hcorrelgroup{$k}{$a}=1;
+									$hcorrelgroup{$k}{$b}=1;
+									$count++;
+									last;
+								}
+							}
+							if($count == 0){
+								my $groupnb="group".$groupct;
+								$hcorrelgroup{$groupnb}{$a}=1;
+								$hcorrelgroup{$groupnb}{$b}=1;
+								$groupct ++;
+							}
+							$print = 1;
+						}
+					}
+				}
+			}
+		}
+	}
+									
+								
+	### Option 3: Check that all mass differences are include in a specific list, ignoring RT information ###
+					
+	elsif($opt == 3){
+										
+		my $print = 0;
+		foreach my $s (keys %{$refhmass}){
+			foreach my $r (keys %{$refhmass->{$s}}){
+				my $rm = $r - $mass_threshold;
+				my $rp = $r + $mass_threshold;
+				if(($diff <= $rp) && ($diff >= $rm)){
+					if($print == 0){
+							
+						foreach my $k (keys %hcorrelgroup){
+							if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
+								$hcorrelgroup{$k}{$a}=1;
+								$hcorrelgroup{$k}{$b}=1;
+								$count++;
+								last;
+							}
+						}
+						if($count == 0){
+							my $groupnb="group".$groupct;
+							$hcorrelgroup{$groupnb}{$a}=1;
+							$hcorrelgroup{$groupnb}{$b}=1;
+							$groupct ++;
+						}
+						$print = 1;
+					}
+				}
+			}
+		}
+	}
+}
+close F1;
+
+print "Analytic correlation filtrering follow options done\n";
+	
+	
+#############################################
+### Join groups that have been subdivided ###
+#############################################
+
+my @tdelete;
+
+foreach my $k (keys %hcorrelgroup){
+	foreach my $i (keys %{$hcorrelgroup{$k}}){
+		foreach my $v (keys %hcorrelgroup){
+			my $count = 0;
+			if ($v ne $k){
+				foreach my $w (keys %{$hcorrelgroup{$v}}){
+					if($w eq $i){
+						$count = 1;
+						push(@tdelete, $v);
+					}	
+				}
+			}
+			if($count == 1){
+				foreach my $w (keys %{$hcorrelgroup{$v}}){
+					$hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w};
+				}
+				delete($hcorrelgroup{$v});
+			}
+		}
+	}
+}
+	
+foreach my $t (@tdelete){
+	delete($hcorrelgroup{$t});
+}
+
+
+### Do it twice to see if it fix the problem of unmerge groups
+
+foreach my $k (keys %hcorrelgroup){
+	foreach my $i (keys %{$hcorrelgroup{$k}}){
+		foreach my $v (keys %hcorrelgroup){
+			my $count = 0;
+			if ($v ne $k){
+				foreach my $w (keys %{$hcorrelgroup{$v}}){
+					if($w eq $i){
+						$count = 1;
+						push(@tdelete, $v);
+					}	
+				}
+			}
+			if($count == 1){
+				foreach my $w (keys %{$hcorrelgroup{$v}}){
+					$hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w};
+				}
+				delete($hcorrelgroup{$v});
+			}
+		}
+	}
+}
+	
+foreach my $t (@tdelete){
+	delete($hcorrelgroup{$t});
+}
+	
+print "Join groups that have been subdivided done\n";
+	
+#######################################################
+### Addition of annotation information among groups ###
+#######################################################
+
+foreach my $k (keys %hcorrelgroup){
+	foreach my $i (keys %{$hcorrelgroup{$k}}){
+		foreach my $j (keys %{$hcorrelgroup{$k}}){
+			my $count = 0;
+			if ($i ne $j){
+				
+				my $a = $hrtmz{$i}{mz};
+				my $b = $hrtmz{$j}{mz};
+				
+				my $diff = $a - $b;
+				my $sign;
+				if($diff>0){
+					$sign="+";
+				}
+				if($diff<0){
+					$sign="-";
+				}
+				$diff = abs($diff);
+				
+				foreach my $z (keys %{$refhmass}){
+					
+					foreach my $y (keys %{$refhmass->{$z}}){
+						my $ym = $y - $mass_threshold;
+						my $yp = $y + $mass_threshold;
+						
+						
+						if(($diff <= $yp) && ($diff >= $ym)){
+							my $diff_list = $diff - $y;
+							$diff_list = abs($diff_list);
+							$diff_list = sprintf ("%0.6f", $diff_list);
+							
+							if($hcorrelgroup{$k}{$i} eq 1){
+								my $val = "@".$j."|".$sign."(".$z.")(".$diff_list.")|";
+								$hcorrelgroup{$k}{$i}=$val;
+								$count ++;
+							}
+							else{
+								if($count == 0){
+									my $val = "@".$j."|".$sign."(".$z.")(".$diff_list.")|";
+									$hcorrelgroup{$k}{$i}.=$val;
+									$count ++;
+								}
+								else{
+									my $val = $sign."(".$z.")(".$diff_list.")|";
+									$hcorrelgroup{$k}{$i}.=$val;
+									$count ++;
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
+	
+
+print "Addition of annotation information among groups done\n";
+	
+	
+####################################################
+### Choose the representative ion for each group ###
+####################################################
+
+my %hgrouprepres;
+	
+open(F3, $dataMatrix);
+
+while (my $line = <F3>){
+	chomp $line;
+	
+	my @tline = split (/\t/, $line);
+	
+	foreach my $k (keys %hcorrelgroup){
+		foreach my $i (keys %{$hcorrelgroup{$k}}){
+			if($tline[0] eq $i){
+				$hgrouprepres{$k}{$i}{mass}=$hrtmz{$tline[0]}{mz};
+				my $intensity;
+				my $nbsubjects=0;
+				for(my $y=1;$y<scalar(@tline);$y++){
+					$intensity += $tline[$y];
+					$nbsubjects ++;
+				}
+				my $meanintensity = $intensity/$nbsubjects;
+				$hgrouprepres{$k}{$i}{intensity}=$meanintensity;
+				$hgrouprepres{$k}{$i}{squaredmassint}=($hgrouprepres{$k}{$i}{mass}**2)/($hgrouprepres{$k}{$i}{intensity});
+			}
+		}
+	}
+}
+close F3;
+	
+foreach my $z (keys %hgrouprepres){
+	my $max_intensity =  0;
+	my $max_int_ion = "";
+	my $max_mass = 0;
+	my $max_mass_ion = "";
+	my $max_squared = 0;
+	my $max_squared_ion = "";
+	foreach my $w (keys %{$hgrouprepres{$z}}){
+		if($hgrouprepres{$z}{$w}{intensity} > $max_intensity){
+			$max_intensity = $hgrouprepres{$z}{$w}{intensity};
+			$max_int_ion = $w;
+		}
+		if($hgrouprepres{$z}{$w}{mass} > $max_mass){
+			$max_mass = $hgrouprepres{$z}{$w}{mass};
+			$max_mass_ion = $w;
+		}
+		if($hgrouprepres{$z}{$w}{squaredmassint} > $max_squared){
+			$max_squared = $hgrouprepres{$z}{$w}{squaredmassint};
+			$max_squared_ion = $w;
+		}
+	}
+	
+	my $max_int_max_mass_ion="";
+	
+	if($repres_opt eq "max_intensity_max_mass"){
+		my %hfirst;
+		my $first=0;
+		foreach my $w (reverse sort {$hgrouprepres{$z}{$a}{intensity} <=> $hgrouprepres{$z}{$b}{intensity} } keys %{$hgrouprepres{$z}}){
+			$first ++;
+			if ($first <= 3){
+				$hfirst{$w} = $hgrouprepres{$z}{$w}{intensity};
+			}
+		}
+		
+		my $first_2 = 0;
+		my $intens_max = 0;
+		my $mass_max = 0;
+		
+		foreach my $y (reverse sort {$hfirst{$a} <=> $hfirst{$b}} keys %hfirst){
+			
+			$first_2 ++;
+			if($first_2 == 1){
+				$intens_max = $hfirst{$y};
+				if($intensity_threshold > $intens_max){
+					$intensity_threshold = 0;
+				}
+				$max_int_max_mass_ion = $y;
+				$mass_max = $hgrouprepres{$z}{$y}{mass};
+			}
+			if($hgrouprepres{$z}{$y}{mass} > $mass_max){
+				if($hfirst{$y}>$intensity_threshold){
+					my $a = $intens_max * $intensity_pourc;
+					if($hfirst{$y} > $a){
+						$max_int_max_mass_ion = $y;
+						$mass_max = $hgrouprepres{$z}{$y}{mass};
+					}
+				}
+			}
+		}
+	}
+	
+	$hgrouprepres{$z}{max_int}=$max_int_ion;
+	$hgrouprepres{$z}{max_mass}=$max_mass_ion;
+	$hgrouprepres{$z}{max_squared}=$max_squared_ion;
+	$hgrouprepres{$z}{max_int_max_mass}=$max_int_max_mass_ion;
+	
+}
+
+
+print "Choose the representative ion for each group done\n";
+
+#############################################################################
+### Addition of annotation information relative to the representative ion ###
+#############################################################################
+	
+my %hreprescomparison;
+
+my $representative="";
+
+if($opt != 1){
+	foreach my $k (keys %hcorrelgroup){
+		foreach my $i (keys %{$hcorrelgroup{$k}}){
+												
+			if($repres_opt eq "mass"){$representative = $hgrouprepres{$k}{max_mass}}
+			if($repres_opt eq "intensity"){$representative = $hgrouprepres{$k}{max_int}}
+			if($repres_opt eq "mixt"){$representative = $hgrouprepres{$k}{max_squared}}
+			if($repres_opt eq "max_intensity_max_mass"){$representative = $hgrouprepres{$k}{max_int_max_mass}}
+			
+			
+			my $count = 0;
+			if ($i ne $representative){
+					
+				my $a = $hrtmz{$i}{mz};
+				my $b = $hrtmz{$representative}{mz};
+				
+				my $diff = $a - $b;
+				my $sign;
+				if($diff>0){
+					$sign="+";
+				}
+				if($diff<0){
+					$sign="-";
+				}
+				$diff = abs($diff);
+				
+				foreach my $z (keys %{$refhmass}){
+					
+					foreach my $y (keys %{$refhmass->{$z}}){
+						my $ym = $y - $mass_threshold;
+						my $yp = $y + $mass_threshold;
+						
+						if(($diff <= $yp) && ($diff >= $ym)){
+							my $diff_list = $diff - $y;
+							$diff_list = abs($diff_list);
+							$diff_list = sprintf ("%0.4f", $diff_list);
+							if($hcorrelgroup{$k}{$i} eq 1){
+								my $valrep = "[M ".$sign."(".$z.")]|";
+								$hreprescomparison{$k}{$i}{repres_diff}=$valrep;
+								$count ++;
+							}
+							else{
+								if($count == 0){
+									my $valrep = "[M ".$sign."(".$z.")]|";
+									$hreprescomparison{$k}{$i}{repres_diff}.=$valrep;
+									$count ++;
+								}
+								else{
+									my $valrep = "[M ".$sign."(".$z.")]|";
+									$hreprescomparison{$k}{$i}{repres_diff}.=$valrep;
+									$count ++;
+								}
+							}
+						}
+					}
+				}
+			}
+			else{
+				$hreprescomparison{$k}{$i}{repres_diff}="M";
+			}
+		}
+	}
+}
+	
+
+print "Addition of annotation information relative to the representative ion done\n";
+	
+##############################
+### Print in result file ! ###
+##############################
+
+open(F4, ">$output_tabular");
+open(F5, $combined_DMVM);
+
+my $line_nb = 0;
+my %hheader;
+while (my $line = <F5>){
+	chomp $line;
+	
+	
+	my @tline = split (/\t/, $line);
+	
+	if($line_nb == 0){
+		print F4 "$line\tACorF_groups";
+		if($opt == 1){
+			if($repres_opt eq "intensity"){print F4 "\tACorF_filter\tintensity_repres\n"}
+			if($repres_opt eq "mass"){print F4 "\tACorF_filter\tmass_repres\n"}
+			if($repres_opt eq "mixt"){print F4 "\tACorF_filter\tmass2intens_repres\n"}
+			if($repres_opt eq "max_intensity_max_mass"){print F4 "\tACorF_filter\tmax_intensity_max_mass_repres\n"}
+			}
+		else{
+			if($repres_opt eq "intensity"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tintensity_repres\tannotation_relative_to_representative\n"}
+			if($repres_opt eq "mass"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tmass_repres\tannotation_relative_to_representative\n"}
+			if($repres_opt eq "mixt"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tmass2intens_repres\tannotation_relative_to_representative\n"}
+			if($repres_opt eq "max_intensity_max_mass"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tmax_intensity_max_mass_repres\tannotation_relative_to_representative\n"}
+		}
+		
+		
+		### Creation of a header hash
+		for(my $i=0; $i<scalar(@tline);$i++){
+			my $a = $tline[$i];
+			$hheader{$a}=$i;
+		}
+	}
+	
+	else{
+		my $find = 0;
+		foreach my $v (keys %hcorrelgroup){
+			if(defined($hgrouprepres{$v}{$tline[0]})){
+				print F4 "$line\t$v";
+					
+				if($opt != 1){
+					if(defined($hcorrelgroup{$v}{$tline[0]})){
+						print F4 "\t$hcorrelgroup{$v}{$tline[0]}\t";
+							
+					}
+					else{
+						print F4 "\t";
+					}
+				}
+					
+				if($repres_opt eq "intensity"){
+					if($tline[0] eq $hgrouprepres{$v}{max_int}){
+						print F4 "1\t";
+					}
+					else{
+						print F4 "0\t";
+					}
+					$find = 1;
+				}
+				if($repres_opt eq "mass"){
+					if($tline[0] eq $hgrouprepres{$v}{max_mass}){
+						print F4 "1\t";
+					}
+					else{
+						print F4 "0\t";
+					}
+					$find = 1;
+				}
+				if($repres_opt eq "mixt"){
+					if($tline[0] eq $hgrouprepres{$v}{max_squared}){
+						print F4 "1\t";
+					}
+					else{
+						print F4 "0\t";
+					}
+					$find = 1;
+				}
+				if($repres_opt eq "max_intensity_max_mass"){
+					if($tline[0] eq $hgrouprepres{$v}{max_int_max_mass}){
+						print F4 "1\t";
+					}
+					else{
+						print F4 "0\t";
+					}
+					$find = 1;
+				}
+					
+				if($repres_opt eq "intensity"){print F4 "$hgrouprepres{$v}{max_int}\t"}
+				if($repres_opt eq "mass"){print F4 "$hgrouprepres{$v}{max_mass}\t"}
+				if($repres_opt eq "mixt"){print F4 "$hgrouprepres{$v}{max_squared}\t"}
+				if($repres_opt eq "max_intensity_max_mass"){print F4 "$hgrouprepres{$v}{max_int_max_mass}\t"}
+				
+				if(defined($hreprescomparison{$v}{$tline[0]}{repres_diff})){
+					print F4 "$hreprescomparison{$v}{$tline[0]}{repres_diff}\n";
+				}
+				else{
+					print F4 "-\n";
+				}
+			}
+		}
+		if($find == 0){
+			$groupct ++;
+			my $group = "group".$groupct;
+			if($opt != 1){
+				print F4 "$line\t$group\t-\t-\t-\t-\n";
+			}
+			else{
+				print F4 "$line\t$group\t-\t-\n";
+			}	
+		}
+	}
+	$line_nb ++;
+}
+
+print "Print in result file done\n";
+
+print "All steps done\n";
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/README.md	Thu Oct 10 12:20:11 2019 -0400
@@ -0,0 +1,45 @@
+Analytical Correlation Filtration
+=======
+
+Metadata
+-----------
+
+ * **@name**: ACorF
+ * **@version**: 2019-06-20
+ * **@authors**: <stephanie.monnerie@inra.fr>
+ * **@date creation**: 2018/11/17
+ * **@main usage**: Reduction of analytical redundancies in Metabolomics data
+
+
+Configuration
+-----------
+
+### Requirement:
+ * perl
+
+
+### Deploy:
+
+    
+### Warnings:
+
+
+Services provided
+-----------
+
+
+
+Technical description
+-----------
+
+
+Notes
+-----------
+
+
+
+
+License (optional)
+-----------
+
+This code is published under CECILL 2.1. 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/analytic_correlation_filtration.xml	Thu Oct 10 12:20:11 2019 -0400
@@ -0,0 +1,206 @@
+<tool id="Analytic_correlation_filtration" name="Analytic correlation filtration" version="2019-06-20">
+	<description>
+		: Detect analytic correlation among data and remove them.
+	</description>
+	
+	
+	 <command><![CDATA[
+		
+		
+		perl $__tool_directory__/Analytic_correlation_filtration.pl
+		
+		
+		#if str($mass_file.mass_choice)=="false":
+			#if str($rt_cond.rt_choice)=="false":
+				perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -o 1 -d "$dataMatrix_in" -v "$variableMetadata_in" -rt 9999999999
+			#else:
+				perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -o 1 -d "$dataMatrix_in" -v "$variableMetadata_in"  -rt "$rt_cond.rt_threshold" 
+			#end if
+		#else:
+			#if str($mass_file.liste.mass_list)=="true":
+				#if str($rt_cond.rt_choice)=="true":
+					perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m "$mass_file.liste.mass_file_in" -o 2 -d "$dataMatrix_in" -v "$variableMetadata_in"  -rt "$rt_cond.rt_threshold" -mass "$mass_file.mass_threshold" 
+				#end if
+				#if str($rt_cond.rt_choice)=="false":
+					perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m "$mass_file.liste.mass_file_in" -o 3 -d "$dataMatrix_in" -v "$variableMetadata_in"  -mass "$mass_file.mass_threshold" 
+				#end if
+			#else
+					#if str($rt_cond.rt_choice)=="true":
+						perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m $__tool_directory__/data/default_list.csv -o 2 -d "$dataMatrix_in" -v "$variableMetadata_in" -rt "$rt_cond.rt_threshold" -mass "$mass_file.mass_threshold" 
+					#end if
+					#if str($rt_cond.rt_choice)=="false":
+						perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m $__tool_directory__/data/default_list.csv -o 3 -d "$dataMatrix_in" -v "$variableMetadata_in" -mass "$mass_file.mass_threshold" 
+					#end if
+			#end if
+		#end if
+	
+		-r "$repres_opt.repres_opt_selector"
+		
+		#if str($repres_opt.repres_opt_selector)=="max_intensity_max_mass":
+			-IT $repres_opt.int_threshold
+			-IP $repres_opt.int_percentage
+		#end if
+		-correl "$correl_threshold"
+		-output_sif "$sif_out"
+		-output_tabular "$variableMetadata_out"
+		
+	]]></command>
+	
+	<inputs>
+		<param type="data" name="file_in" format="txt" help="The .txt correlation table (you can obtain it by using the Between-table Correlation tool or for exemple the cor() function in R) " label="Correlation table file" />
+		<param type="data" name="dataMatrix_in" format="tabular" help="" label="dataMatrix file" />
+		<param type="data" name="variableMetadata_in" format="tabular" help="" label="variableMetadata file" />
+		
+		<param help="Define the minimum correlation threshold accepted to determine analytic correlation" label="Correlation threshold" type="float" name="correl_threshold" value="0.90"/>
+		
+		<conditional name="mass_file">
+		  <param name="mass_choice" checked="true" falsevalue="false" help="'YES' if you want to take it into account; 'NO' if you don't want to take into account mass information" label="Do you want to take into account mass differences between 2 ions?" truevalue="true" type="boolean"/>
+				<when value="true">
+					<conditional name="liste">
+						<param name="mass_list" checked="true" falsevalue="false" help="'YES' if you have your own list to upload; 'NO' if you want to use a default list" label="Do you have your own list of mass differences or do you want to use a default list ?" truevalue="true" type="boolean"/>
+						<when value="false">
+						
+						</when>
+						<when value="true">
+							<param type="data" name="mass_file_in" format="tabular,csv" help="The file containing all your report and known mass differences (cf help for file example) " label="Mass differences table (format: tabular or csv) " />
+						</when>
+					</conditional>
+					<param help="2 ions need to have a difference mass included in the list at +/- mass difference range to be considered as analytically correlated | Value recommendation : 0.005" label="Mass difference range" type="float" name="mass_threshold" value="0.005"/>
+				</when>
+				<when value="false">
+			
+				</when>
+		</conditional>
+		
+		<conditional name="rt_cond">
+			<param checked="true" falsevalue="false" help="'YES' if want to take into account retention time information; 'NO' if you don't want to take into account retention time information" label="Do you want to take into account retention time differences between 2 ions? " name="rt_choice" truevalue="true" type="boolean"/>
+				<when value="true">
+					<param help="Choose a retention time difference threshold between 2 ions considered as analytically correlated | Value recommendation : 0.1" label="Retention time difference threshold" type="float" name="rt_threshold" value="0.1"/>
+				</when>
+				<when value="false">
+					
+				</when>
+		</conditional>
+		
+		<conditional name="repres_opt">
+			<param name="repres_opt_selector" label="Which representative ion do you want to select for each group" type="select" display="radio" help="">
+				<option value="intensity">Highest intensity</option>
+				<option value="mass">Highest mass</option>
+				<option value="mixt">Highest (mass2 x intensity) </option>
+				<option value="max_intensity_max_mass">Highest mass between the 3 highest intensity (following intensity threshold and rules ==> see help) </option>
+			</param>
+			<when value="max_intensity_max_mass">
+				<param help="" label="Minimum intensity threshold for the representative ion" type="float" name="int_threshold" value="1000"/>
+				<param help="Example: ion A have the highest intensity of a group but not the highest mass, B is an ion that have the second highest intensity in the group and a highest mass than A, to choose B as a representative ion for the group his intensity need to be at list 50% of the A intensity." label="Percentage of highest intensity of the group accept for the new representative ion. This option allow to avoid isotope selection. " type="float" name="int_percentage" value="0.5"/>
+			</when>
+			<when value="intensity">
+			</when>
+			<when value="mass">
+			</when>
+			<when value="mixt">
+			</when>
+		</conditional>
+		
+	</inputs>
+	
+	<outputs>
+		<data format="sif" label="${file_in.name}_sif" name="sif_out"/>
+		<data format="tabular" label="${variableMetadata_in.name}_representative_ion" name="variableMetadata_out"/>
+	</outputs>
+	
+	<help><![CDATA[
+	
+.. class:: infomark
+
+**Authors** : **Stephanie Monnerie** (stephanie.monnerie@inra.fr) wrote this tool for analytic correlation detection.
+
+---------------------------------------------------
+
+.. class:: infomark
+
+**References** : 
+
+---------------------------------------------------
+	
+-----------
+Input files
+-----------
+
++-----------------------------------------+---------------+
+| File                                    |     Format    |
++=========================================+===============+
+| 1)  Correlation matrix                  |  txt          |
++-----------------------------------------+---------------+
+| 2)  Data matrix                         |  tabular      |
++-----------------------------------------+---------------+
+| 3)  Variable metadata                   |  tabular      |
++-----------------------------------------+---------------+
+| **Optional file**                       |   **Format**  |
++-----------------------------------------+---------------+
+| 4)  Optional : Mass differences list    |  csv/tabular  |
++-----------------------------------------+---------------+
+
+---------------------------------------------------
+
+-------------
+Files content
+-------------
+	
+Correlation matrix
+	* File organisation : on line by correlation pairs with the first ion ID, the correlation value and the second ion ID, tabular separated ==> Fist_Ion_ID \\t Correlation_Value \\t Second_Ion_ID
+	* Example:
+	
+.. image:: Correlation_matrix.JPG
+	:width: 800
+	
+Data matrix file
+	* "variable x sample" **dataMatrix** : tabular separated file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the variable metadata (see below)
+
+Variable metadata file
+	* "variable x metadata" **variableMetadata** tabular separated file of the numeric and/or character variable metadata, with . as decimal and NA for missing values
+
+.. class:: warningmark
+
+For more information about input files, refer to the corresponding "W4M HowTo" page:
+http://workflow4metabolomics.org/sites/workflow4metabolomics.org/files/files/w4m_TableFormatForGalaxy_150908.pdf
+
+
+Mass differences list
+	* A file containing list of known adducts, fragments or isotopes with the mass differences linked to them
+	* Example:
+
+.. image:: Adduct_fragment_list.JPG
+	:width: 350
+
+---------------------------------------------------
+	
+----------
+Parameters
+----------
+
+Take into account mass diffrences between 2 ions :
+	* You can enter a list of mass differences that are known. The file must be organized with a first column for the mass difference type (isotope, fragment, etc...), a second column with the mass difference chemical formula (H+, -2H+K, etc...) and a third column for the mass difference value
+	* If you are choosing to use a mass differences table, you have to choose a mass difference range that will be a threshold to accept or not a difference value as true (recognize a mass difference value in the file +/- this threshold).
+
+Take into acount retention time :
+	* You can use retention time as a criteria to group ions. You have to choose a value that will be use as intervalle : 2 ions are group when their retention time is equal +/- the threshold.
+
+Choose the representative ion for each group, there are 3 possibilities to determine the representative ion :
+	* The ion with the highest intensity (recommandated for LC/MS)
+	* The ion with the highest mass
+	* The ion with the highest "mass2 * intensity" value 
+	* The ion with the highest mass between the 3 highest intensity of the group, except if the highest mass ion have an intensity < determined percentage of the highest intensity ion one (for exemple 50%) (recommandated for GC/MS)
+	
+
+---------------------------------------------------
+	
+--------------
+Example of use
+--------------
+
+Add exemples according to the ppt presentation !
+
+
+
+	]]></help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/data/default_list.csv	Thu Oct 10 12:20:11 2019 -0400
@@ -0,0 +1,225 @@
+adduit	-2H+Na+K	59.9378259
+adduit	H	1.007825032
+adduit	-H+K	37.95588165
+adduit	-H+Na	21.98194425
+adduit	-3H+3Na	65.94583274
+adduit	-4H+4K	151.8235266
+adduit	-4H+4Na	87.92777699
+adduit	-3H+3K	113.8676449
+adduit	-2H+2K	75.9117633
+adduit	-2H+2Na	43.9638885
+adduit	2H 	2.015650064
+adduit	Cl 	34.96885268
+adduit	-2H+Ca 	37.94694092
+isotope	13C db 	0.501677419
+isotope	13C	1.003354838
+isotope	15N	0.997034893
+isotope	18O	2.00424638
+isotope	34S	1.9957959
+isotope	41K	1.99811908
+isotope	37Cl	1.99704991
+isotope	13C2	2.006709676
+isotope	13C3	3.010064513
+isotope	13C+37Cl	3.000404748
+isotope	13C+18O	3.007601218
+isotope	13C+34S	2.999150738
+isotope	44Ca	3.99289082
+adduit	CH3OH	32.02621475
+adduit	CH3CN	41.0265491
+adduit	H2O	18.01056468
+adduit	2(H2O	36.02112937
+adduit	NaCl 	57.95862196
+adduit	HCOOH	46.0054793
+adduit	+(HCOOH)+(HCOOK)	129.9668403
+adduit	+(HCOOH)+(HCOONa)	113.9929029
+adduit	+(HCOOH)+2(HCOONa)	181.9803264
+adduit	HCOOK	83.96136095
+adduit	+(HCOOK)+(HCOONa)	151.9487845
+adduit	HCOONa	67.98742355
+adduit	2(HCOOH)	92.01095861
+adduit	+2(HCOOH)+(HCOOK)	175.9723196
+adduit	+2(HCOOH)+(HCOONa)	159.9983822
+adduit	2(HCOOK)	167.9227219
+adduit	2(HCOONa)	135.9748471
+fragment	C11H18O9	294.0950822
+fragment	C12H16O12	352.064176
+fragment	C12H20O9	308.1107322
+fragment	C2H2O	42.01056468
+fragment	C2H3.	27.0229265
+fragment	C2H3N	41.0265491
+fragment	C2H3NO3	89.01129296
+fragment	C2H3O.	43.01784112
+fragment	C2H4	28.03130013
+fragment	C2H4N.	42.03382553
+fragment	C2H4O	44.02621475
+fragment	C2H5.	29.03857656
+fragment	C2H5N	43.04219916
+fragment	C2H5NO2	75.0320284
+fragment	C2H5O.	45.03349118
+fragment	C2H5O6P	155.9823745
+fragment	C2H6	30.04695019
+fragment	C2H7N	45.05784922
+fragment	C2HNO2	71.00072827
+fragment	C3H4O3	88.01604399
+fragment	C3H5.	41.03857656
+fragment	C3H5NO2	87.0320284
+fragment	-(C3H5O2NS)-(NH3)	136.0306485
+fragment	C3H5O2NS	119.0040994
+fragment	C3H6	42.04695019
+fragment	C3H6O3	90.03169405
+fragment	C3H7.	43.05422662
+fragment	C3H7O2N	89.04767846
+fragment	C3H7O2NS	121.0197495
+fragment	C3H7O6P	169.9980246
+fragment	C4H6	54.04695019
+fragment	C4H6O2	86.03677943
+fragment	C4H6O4	118.0266087
+fragment	C4H7.	55.05422662
+fragment	C4H8O3	104.0473441
+fragment	C4H9	57.07042529
+fragment	C5H7O3N	129.0425931
+fragment	C5H8O3NS	162.0224891
+fragment	C5H8O4	132.0422587
+fragment	C6H10O4	146.0579088
+fragment	-(C6H10O5)-(H2O)	180.0633881
+fragment	C6H10O5	162.0528234
+fragment	C6H10O7	194.0426527
+fragment	C6H8O6	176.032088
+fragment	CH2O	30.01056468
+fragment	-(CH2S)-(HCOOH)	91.99320037
+fragment	-(CH2S)-(NH3)	63.01427016
+fragment	CH2S	45.98772106
+fragment	CH3.	15.0229265
+fragment	CH3COO.	59.01275574
+fragment	CH3COOH	60.02112937
+fragment	CH3N	29.0265491
+fragment	CH3O.	31.01784112
+fragment	CH3OH	32.02621475
+fragment	CH4	16.03130013
+fragment	CH4N.	30.03382553
+fragment	-(CH4S)-(HCOOH)	94.00885043
+fragment	-(CH4S)-(NH3)	65.02992022
+fragment	CH4S	48.00337113
+fragment	CH5N	31.04219916
+fragment	Cl.	34.96830408
+fragment	CO	27.99491462
+fragment	-(CO2)-(CO)	71.98474386
+fragment	CO2	43.98982924
+fragment	-(H2)-(NH3)	19.04219916
+fragment	H2	2.015650064
+fragment	-(H2O)-(CO2)	62.00039392
+fragment	-(H2O)-(HCOOH)	64.01604399
+fragment	-(H2O)-(NH3)	35.03711378
+fragment	H2O	18.01056468
+fragment	-(H2O)-2(CO2)	105.9902232
+fragment	-(H2S)-(H2O)	51.99828575
+fragment	H2S	33.98772106
+fragment	H2SO4	97.96737954
+fragment	H3PO4	97.97689521
+fragment	HCl	35.97667771
+fragment	HCN	27.01089903
+fragment	-(HCOOH)-(HCN)	73.01637834
+fragment	HCOOH	46.0054793
+fragment	HS.	32.97934743
+fragment	-(NC3H9)-(CH3COOH)	119.0946287
+fragment	-(NC3H9)-(H2O)	77.08406397
+fragment	-(NC3H9)-(HCOOH)	105.0789786
+fragment	NC3H9	59.07349929
+fragment	NaCl	57.95862196
+fragment	NH2CO.	44.01309008
+fragment	-(NH3)-(CO2)-(H2O)	79.02694302
+fragment	-(NH3)-(CO2)	61.01637834
+fragment	-(NH3)-(CONH)	60.03236275
+fragment	-(NH3)-(HCOOH)	63.0320284
+fragment	NH3	17.0265491
+fragment	NH3CO	45.02146372
+fragment	NHCO	43.00581365
+fragment	OH.	17.00219105
+fragment	PO3	78.95850549
+fragment	SO2	63.96190024
+fragment	SO3	79.95681486
+fragment	-2(H2O)-(CO2)	80.01095861
+fragment	-2(H2O)-(HCOOH)-(NH3)	99.05315777
+fragment	-2(H2O)-(HCOOH)	82.02660867
+fragment	2(H2O)	36.02112937
+fragment	2(HCOOH)	92.01095861
+fragment	-2(NH3)-(CO)-(CO2)	106.0378421
+fragment	-2(NH3)-(CO)	62.04801281
+fragment	2(NH3)	34.05309819
+fragment	3(H2O)	54.03169405
+fragment	3(NH3)	51.07964729
+fragment	4(H2O)	72.04225874
+fragment	C10H11O3N5	249.0861892
+fragment	C10H13O4N5	267.0967539
+fragment	C10H14O7N5P	347.0630844
+fragment	C10H15O5N5	285.1073186
+fragment	C2H3NO2	73.01637834
+fragment	C2H4O2	60.02112937
+fragment	C2H5NO3	91.02694302
+fragment	C2H6O2	62.03677943
+fragment	C2H6O3	78.03169405
+fragment	-(C2H6O3)-(H2O)	96.04225874
+fragment	C2H6O4	94.02660867
+fragment	C2H7NO2	77.04767846
+fragment	C3H10O5	126.0528234
+fragment	-(C3H6O3)-(CHNO)	133.0375077
+fragment	C3H6O4	106.0266087
+fragment	C3H8O3	92.04734412
+fragment	C3H8O4	108.0422587
+fragment	C4H10O5	138.0528234
+fragment	C4H5NO3	115.026943
+fragment	C4H8O4	120.0422587
+fragment	C5H10O4	134.0579088
+fragment	C5H13O4N	151.0844579
+fragment	C6H11O4N	161.0688078
+fragment	C6H11O5N	177.0637225
+fragment	C6H13O5N	179.0793725
+fragment	C5H10O5	150.0528234
+fragment	C5H10O6	166.047738
+fragment	C5H12O2	104.0837296
+fragment	-(C5H12O2)-(H2O)	122.0942943
+fragment	C5H5N5	135.0544952
+fragment	C5H5ON5	151.0494098
+fragment	C5H6O2	98.03677943
+fragment	C5H7O2N5	169.0599745
+fragment	-(C5H7O3N)-(CO2)	173.0324223
+fragment	-(C5H7O3N)-(H2O)	147.0531578
+fragment	C5H8N3	110.0718223
+fragment	C5H8O3	116.0473441
+fragment	C5H8O5N5P	249.026305
+fragment	C5H9O3	117.0551691
+fragment	C5H9O6P	196.0136746
+fragment	C5H9O7P	212.0085893
+fragment	C6H10O3	130.0629942
+fragment	-(C6H10O3)-(H2O)	148.0735589
+fragment	C6H11O4N3PS	252.0207885
+fragment	C6H11O4NPS	224.0146405
+fragment	C6H12O5	164.0684735
+fragment	C6H14O6	182.0790382
+fragment	C6H14O7	198.0739528
+fragment	C6H16O7	200.0896029
+fragment	C6H16O8	216.0845175
+fragment	C6H8N3	122.0718223
+fragment	C6H8NS	126.0377453
+fragment	C7H5ON5	175.0494098
+fragment	C7H6ON6	190.0603088
+fragment	C7H7O2N5	193.0599745
+fragment	C7H11O6N	205.0586371
+fragment	C8H14O7	222.0739528
+fragment	C8H5O3N5	219.039239
+fragment	C8H7O4N5	237.0498037
+fragment	C9H10O4N2	210.0640568
+fragment	C9H11O3N3	209.0800412
+fragment	C9H11O4N3	225.0749558
+fragment	C9H12O5N2	228.0746215
+fragment	C9H12O6N3P	289.0463717
+fragment	C9H13O4N3	227.0906059
+fragment	C9H14O7N3P	307.0569364
+fragment	C9H16O8	252.0845175
+fragment	CH2N2	42.02179806
+fragment	-(CH2O)-(H2O)	48.02112937
+fragment	CH5NO	47.03711378
+fragment	-(H3PO4)-(CHNO)	140.9827089
+fragment	-(H3PO4)-(H2O)	115.9874599
+fragment	-(H3PO4)-(NH3)	115.0034443
+fragment	HPO3	79.96633052
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/lib/IonFiltration.pm	Thu Oct 10 12:20:11 2019 -0400
@@ -0,0 +1,181 @@
+#!usr/bin/perl
+package IonFiltration;
+
+### Perl modules
+use strict;
+use warnings;
+
+
+
+
+
+
+########################################################################
+### Création of a hash containing all adduits and fragments possible ###
+########################################################################
+
+
+sub MassCollecting{
+	
+	my $mass_file = $_[0];
+	my %hmass;
+
+	open (F1, $mass_file);
+	
+	while(my $line = <F1>){
+		chomp $line;
+		my @tline = split(/[\t;]/, $line);
+		if(defined($hmass{$tline[2]})){
+			print "The mass difference already exists : $tline[2] !\n";
+		}
+		$hmass{$tline[1]}{$tline[2]}=$tline[0];
+	}
+	
+	close F1;
+	return %hmass;
+	
+}
+
+
+
+
+
+
+
+########################################################
+### Creation of a sif table + correlation filtration ###
+########################################################
+
+
+sub sifTableCreation{
+	
+	my $file = $_[0];
+	my $output_sif = $_[1];
+#	my $opt = $_[2];
+#	my $rt_threshold = $_[3];
+#	my $mass_threshold = $_[4];
+	my $correl_threshold = $_[5];
+#	my $dataMatrix = $_[6];
+#	my $output_tabular = $_[7];
+	my $combined_DMVM = $_[8];
+#	my $repres_opt = $_[9];
+#	my $intensity_threshold = $_[10];
+#	my $intensity_pourc = $_[11];
+#	my $refhmass = $_[12];
+	
+	
+	
+	
+	my %hheader_file;
+	my %hduplicate;
+	
+	my %hcorrelgroup;
+	my $groupct=1;
+
+	
+	my $linenb3=0;
+	my %hheader_line;
+	my %hrtmz;
+	
+	open (F5, $combined_DMVM);
+	while(my $line = <F5>){
+		chomp $line;
+		my @tline = split(/\t/, $line);
+		
+		if($linenb3 == 0){
+			for(my $i=0; $i<scalar(@tline);$i++){
+				my $a = $tline[$i];
+				$hheader_line{$a}=$i;
+			}
+		}
+		else{
+			if(defined($hheader_line{mzmed})){
+				my $b = $tline[$hheader_line{mzmed}];
+				$hrtmz{$tline[0]}{mz}=$b;
+			}
+			else{
+				my $b = $tline[$hheader_line{mz}];
+				$hrtmz{$tline[0]}{mz}=$b;
+			}
+			if(defined($hheader_line{rtmed})){
+				my $d = $tline[$hheader_line{rtmed}];
+				$hrtmz{$tline[0]}{rt}=$d;
+			}
+			else{
+				my $d = $tline[$hheader_line{rt}];
+				$hrtmz{$tline[0]}{rt}=$d;
+			}
+		}
+		
+		$linenb3 ++;
+	}
+	close F5;
+	
+	
+	my $linenb=0;
+	
+	open (F1, $file) or die "Impossible to open $file\n";
+	open(F2, ">$output_sif") or die "Impossible to open $output_sif\n";
+	
+	
+	while(my $line = <F1>){
+		chomp $line;
+		my @tline = split(/\t/, $line);
+		
+		###############################
+		### Création of a sif table ###
+		###############################
+		
+		if($linenb == 0){
+			for(my $i=0; $i<scalar(@tline);$i++){
+				my $a = $tline[$i];
+				$hheader_file{$i}=$a;
+			}
+		}
+		else{
+			for(my $i=1; $i<scalar(@tline);$i++){
+				my $a=$tline[0];
+				my $b=$hheader_file{$i};
+				my $coef=$tline[$i];
+								
+				if($a eq $b){
+	#				print "This is a correlation between A ($a) and A ($b) !\n"
+				}
+				else{
+					
+					#########################
+					### Remove duplicates ###
+					#########################
+					
+					my $y = $a."/".$b;
+					my $z = $b."/".$a;
+					
+					if((!(defined($hduplicate{$y}))) && (!(defined($hduplicate{$z})))){
+						
+						$hduplicate{$y}=1;
+#						my $abcoef=abs($coef); # Only when you want to consider negative correlations
+						
+#						if($abcoef > $correl_threshold){ # Only when you want to consider negative correlations
+						if($coef > $correl_threshold){
+						
+							print F2 "$a\t$coef\t$b\n";
+							
+							my $count=0;
+							
+						}
+					}
+				}
+			}
+		}
+		$linenb ++;
+	}
+	close F1;
+	close F2;
+	return ($output_sif, %hrtmz);
+}			
+							
+							
+							
+							
+
+1;
\ No newline at end of file
Binary file ACF/static/images/Adduct_fragment_list.JPG has changed
Binary file ACF/static/images/Correlation_matrix.JPG has changed