Mercurial > repos > melpetera > utilitaires
changeset 22:fff7de4f9fed draft
Uploaded
author | melpetera |
---|---|
date | Thu, 09 May 2019 08:17:53 -0400 |
parents | 243347918a63 |
children | eab2c4b805a2 |
files | ACF/Analytic_correlation_filtration.pl ACF/IonFiltration.pm ACF/analytic_correlation_filtration.xml ACF/lib/IonFiltration.pm |
diffstat | 4 files changed, 672 insertions(+), 677 deletions(-) [+] |
line wrap: on
line diff
--- a/ACF/Analytic_correlation_filtration.pl Thu Apr 11 08:42:38 2019 -0400 +++ b/ACF/Analytic_correlation_filtration.pl Thu May 09 08:17:53 2019 -0400 @@ -8,8 +8,8 @@ #Personnal packages use FindBin ; ## Allows you to locate the directory of original perl script -use lib $FindBin::Bin; -use lib "$FindBin::Bin/../lib"; +#use lib $FindBin::Bin; +use lib "$FindBin::Bin/lib"; use IonFiltration; my ($file, $mass_file, $opt, $dataMatrix, $combined_DMVM, $repres_opt, $rt_threshold, $mass_threshold, $output_sif, $output_tabular, $correl_threshold, $intensity_threshold, $intensity_pourc); #Options to complete
--- a/ACF/IonFiltration.pm Thu Apr 11 08:42:38 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,668 +0,0 @@ -#!usr/bin/perl -package IonFiltration; - -### Perl modules -use strict; -use warnings; - - - - - - -######################################################################## -### Création of a hash containing all adduits and fragments possible ### -######################################################################## - - -sub MassCollecting{ - - my $mass_file = $_[0]; - my %hmass; - - open (F1, $mass_file); - - while(my $line = <F1>){ - chomp $line; - my @tline = split(/[\t;]/, $line); - if(defined($hmass{$tline[2]})){ - print "Il existe déjà une même différence de masse : $tline[2] !\n"; - } - $hmass{$tline[1]}{$tline[2]}=$tline[0]; - } - - close F1; -# my $count=0; -# foreach my $k (keys %hmass){ -# $count++; -# } - - return %hmass; - - -} - - - - - - - -######################################################## -### Creation of a sif table + correlation filtration ### -######################################################## - - -sub sifTableCreation{ - - my $file = $_[0]; - my $output_sif = $_[1]; - my $opt = $_[2]; - my $rt_threshold = $_[3]; - my $mass_threshold = $_[4]; - my $correl_threshold = $_[5]; - my $dataMatrix = $_[6]; - my $output_tabular = $_[7]; - my $combined_DMVM = $_[8]; - my $repres_opt = $_[9]; - my $intensity_threshold = $_[10]; - my $intensity_pourc = $_[11]; - my $refhmass = $_[12]; - - - - - my %hheader_file; - my %hduplicate; - - my %hcorrelgroup; - my $groupct=1; - - - my $linenb3=0; - my %hheader_line; - my %hrtmz; - - open (F5, $combined_DMVM); - while(my $line = <F5>){ - chomp $line; - my @tline = split(/\t/, $line); - - if($linenb3 == 0){ - for(my $i=0; $i<scalar(@tline);$i++){ - my $a = $tline[$i]; - $hheader_line{$a}=$i; - } - } - else{ - if(defined($hheader_line{mzmed})){ - my $b = $tline[$hheader_line{mzmed}]; - $hrtmz{$tline[0]}{mz}=$b; - } - else{ - my $b = $tline[$hheader_line{mz}]; - $hrtmz{$tline[0]}{mz}=$b; - } - if(defined($hheader_line{rtmed})){ - my $d = $tline[$hheader_line{rtmed}]; - $hrtmz{$tline[0]}{rt}=$d; - } - else{ - my $d = $tline[$hheader_line{rt}]; - $hrtmz{$tline[0]}{rt}=$d; - } - } - - $linenb3 ++; - } - close F5; - - - my $linenb=0; - - open (F1, $file) or die "Impossible to open $file\n"; - open(F2, ">$output_sif") or die "Impossible to open $output_sif\n"; - - while(my $line = <F1>){ - chomp $line; - my @tline = split(/\t/, $line); - - - ############################### - ### Création of a sif table ### - ############################### - - if($linenb == 0){ - for(my $i=0; $i<scalar(@tline);$i++){ - my $a = $tline[$i]; - $hheader_file{$i}=$a; - } - } - else{ - for(my $i=1; $i<scalar(@tline);$i++){ - my $a=$tline[0]; - my $b=$hheader_file{$i}; - my $coef=$tline[$i]; - - - if($a eq $b){ - # print "This is a correlation between A ($a) and A ($b) !\n" - } - else{ - - - ######################### - ### Remove duplicates ### - ######################### - - my $y = $a."/".$b; - my $z = $b."/".$a; - - if((!(defined($hduplicate{$y}))) && (!(defined($hduplicate{$z})))){ - - $hduplicate{$y}=1; - - if($coef > $correl_threshold){ - - print F2 "$a\t$coef\t$b\n"; - - my $count=0; - - ###################################################### - ### Analytic correlation filtrering follow options ### - ###################################################### - - - - my $amass=$hrtmz{$a}{mz}; - my $atemp=$hrtmz{$a}{rt}; - my $bmass= $hrtmz{$b}{mz}; - my $btemp=$hrtmz{$b}{rt}; - my $diff = $amass-$bmass; - $diff = abs($diff); - - ### Option 1: Don't take into acount mass information ### - - if($opt == 1){ - my $btplus = $btemp + $rt_threshold; - my $btmoins = $btemp - $rt_threshold; - if(($btmoins <= $atemp) && ($atemp <= $btplus)){ - foreach my $k (keys %hcorrelgroup){ - - if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){ - $hcorrelgroup{$k}{$a}=1; - $hcorrelgroup{$k}{$b}=1; - $count++; - last; - } - } - if($count == 0){ - my $groupnb="group".$groupct; - $hcorrelgroup{$groupnb}{$a}=1; - $hcorrelgroup{$groupnb}{$b}=1; - $groupct ++; - } - - } - } - - - ### Option 2: Check that all mass differences are include in a specific list taking into account RT information ### - - elsif($opt == 2){ - - my $print = 0; - foreach my $s (keys %$refhmass){ - foreach my $r (keys %{%$refhmass{$s}}){ - my $rm = $r - $mass_threshold; - my $rp = $r + $mass_threshold; - if(($diff <= $rp) && ($diff >= $rm)){ - if($print == 0){ - my $btplus = $btemp + $rt_threshold; - my $btmoins = $btemp - $rt_threshold; - - if(($btmoins <= $atemp) && ($atemp <= $btplus)){ -# foreach my $s (keys %{%$refhmass{$r}}){ - # print F2 "$line\t$hmass{$r}{$s} : $s ($r)\n"; - foreach my $k (keys %hcorrelgroup){ - if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){ - $hcorrelgroup{$k}{$a}=1; - $hcorrelgroup{$k}{$b}=1; -# $hcorrelgroup{$k}{frag}.="#".$s; - $count++; - last; - } - } - if($count == 0){ - my $groupnb="group".$groupct; - $hcorrelgroup{$groupnb}{$a}=1; - $hcorrelgroup{$groupnb}{$b}=1; -# $hcorrelgroup{$groupnb}{frag}.="#".$s; - $groupct ++; - - } - $print = 1; - } - - } - } - } - } - } - - - ### Option 3: Check that all mass differences are include in a specific list, ignoring RT information ### - - elsif($opt == 3){ - - my $print = 0; - foreach my $s (keys %$refhmass){ - foreach my $r (keys %{%$refhmass{$s}}){ - my $rm = $r - $mass_threshold; - my $rp = $r + $mass_threshold; - if(($diff <= $rp) && ($diff >= $rm)){ - if($print == 0){ - -# foreach my $s (keys %{%$refhmass{$r}}){ - # print F2 "$line\t$hmass{$r}{$s} : $s ($r)\n"; - foreach my $k (keys %hcorrelgroup){ - if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){ - $hcorrelgroup{$k}{$a}=1; - $hcorrelgroup{$k}{$b}=1; -# $hcorrelgroup{$k}{frag}.="#".$s; - $count++; - last; - } - } - if($count == 0){ - my $groupnb="group".$groupct; - $hcorrelgroup{$groupnb}{$a}=1; - $hcorrelgroup{$groupnb}{$b}=1; -# $hcorrelgroup{$groupnb}{frag}.="#".$s; - $groupct ++; - } - $print = 1; - } - } - } - } - } - } - - - } - } - } - } - $linenb ++; - } - close F1; - close F2; - - - - - ############################################# - ### Join groups that have been subdivided ### - ############################################# - - my @tdelete; - - foreach my $k (keys %hcorrelgroup){ - foreach my $i (keys %{$hcorrelgroup{$k}}){ - foreach my $v (keys %hcorrelgroup){ - my $count = 0; - if ($v ne $k){ - foreach my $w (keys %{$hcorrelgroup{$v}}){ -# if($w ne "frag"){ - if($w eq $i){ - $count = 1; - push(@tdelete, $v); - } -# } - } - } - if($count == 1){ - foreach my $w (keys %{$hcorrelgroup{$v}}){ - $hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w}; - } - delete($hcorrelgroup{$v}); - } - } - } - } - - foreach my $t (@tdelete){ - delete($hcorrelgroup{$t}); - } - - - ### Do it twice to see if it fix the problem of unmerge groups - - foreach my $k (keys %hcorrelgroup){ - foreach my $i (keys %{$hcorrelgroup{$k}}){ - foreach my $v (keys %hcorrelgroup){ - my $count = 0; - if ($v ne $k){ - foreach my $w (keys %{$hcorrelgroup{$v}}){ -# if($w ne "frag"){ - if($w eq $i){ - $count = 1; - push(@tdelete, $v); - } -# } - } - } - if($count == 1){ - foreach my $w (keys %{$hcorrelgroup{$v}}){ - $hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w}; - } - delete($hcorrelgroup{$v}); - } - } - } - } - - foreach my $t (@tdelete){ - delete($hcorrelgroup{$t}); - } - - - - ########################################## - ### Addition of annotation information ### - ########################################## - - foreach my $k (keys %hcorrelgroup){ - foreach my $i (keys %{$hcorrelgroup{$k}}){ - foreach my $j (keys %{$hcorrelgroup{$k}}){ - my $count = 0; - if ($i ne $j){ - - my $a = $hrtmz{$i}{mz}; - my $b = $hrtmz{$j}{mz}; - - my $diff = $a - $b; - my $sign; - if($diff>0){ - $sign="+"; - } - if($diff<0){ - $sign="-"; - } - $diff = abs($diff); - - foreach my $z (keys %$refhmass){ - - foreach my $y (keys %{%$refhmass{$z}}){ - my $ym = $y - $mass_threshold; - my $yp = $y + $mass_threshold; - - - if(($diff <= $yp) && ($diff >= $ym)){ - my $diff_list = $diff - $y; - $diff_list = abs($diff_list); - $diff_list = sprintf ("%0.6f", $diff_list); - - if($hcorrelgroup{$k}{$i} eq 1){ - my $val = "#".$j."|".$sign."(".$z.")(".$diff_list.")|"; - $hcorrelgroup{$k}{$i}=$val; - $count ++; - } - else{ - if($count == 0){ - my $val = "#".$j."|".$sign."(".$z.")(".$diff_list.")|"; - $hcorrelgroup{$k}{$i}.=$val; - $count ++; - } - else{ - my $val = $sign."(".$z.")(".$diff_list.")|"; - $hcorrelgroup{$k}{$i}.=$val; - $count ++; - } - } - } - } - } - } - } - } - } - - - - #################################################### - ### Choose the representative ion for each group ### - #################################################### - - my %hgrouprepres; - - open(F3, $dataMatrix); - - while (my $line = <F3>){ - chomp $line; - - my @tline = split (/\t/, $line); - - - - foreach my $k (keys %hcorrelgroup){ - foreach my $i (keys %{$hcorrelgroup{$k}}){ - - if($tline[0] eq $i){ - $hgrouprepres{$k}{$i}{mass}=$hrtmz{$tline[0]}{mz}; - - my $intensity; - my $nbsubjects=0; - for(my $y=1;$y<scalar(@tline);$y++){ - $intensity += $tline[$y]; - $nbsubjects ++; - } - my $meanintensity = $intensity/$nbsubjects; - $hgrouprepres{$k}{$i}{intensity}=$meanintensity; - $hgrouprepres{$k}{$i}{squaredmassint}=($hgrouprepres{$k}{$i}{mass}**2)/($hgrouprepres{$k}{$i}{intensity}); - } - } - } - } - close F3; - - foreach my $z (keys %hgrouprepres){ - my $max_intensity = 0; - my $max_int_ion = ""; - my $max_mass = 0; - my $max_mass_ion = ""; - my $max_squared = 0; - my $max_squared_ion = ""; - foreach my $w (keys %{$hgrouprepres{$z}}){ - if($hgrouprepres{$z}{$w}{intensity} > $max_intensity){ - $max_intensity = $hgrouprepres{$z}{$w}{intensity}; - $max_int_ion = $w; - - } - if($hgrouprepres{$z}{$w}{mass} > $max_mass){ - $max_mass = $hgrouprepres{$z}{$w}{mass}; - $max_mass_ion = $w; - } - if($hgrouprepres{$z}{$w}{squaredmassint} > $max_squared){ - $max_squared = $hgrouprepres{$z}{$w}{squaredmassint}; - $max_squared_ion = $w; - } - } - - my $max_int_max_mass_ion=""; - - if($repres_opt eq "max_intensity_max_mass"){ - my %hfirst; - my $first=0; - foreach my $w (reverse sort {$hgrouprepres{$z}{$a}{intensity} <=> $hgrouprepres{$z}{$b}{intensity} } keys %{$hgrouprepres{$z}}){ - $first ++; - if ($first <= 3){ - $hfirst{$w} = $hgrouprepres{$z}{$w}{intensity}; - } - } - - my $first_2 = 0; - my $intens_max = 0; - my $mass_max = 0; - - foreach my $y (reverse sort {$hfirst{$a} <=> $hfirst{$b}} keys %hfirst){ - - - - $first_2 ++; - if($first_2 == 1){ - $intens_max = $hfirst{$y}; - if($intensity_threshold > $intens_max){ - $intensity_threshold = 0; - } - $max_int_max_mass_ion = $y; - $mass_max = $hgrouprepres{$z}{$y}{mass}; - } - if($hgrouprepres{$z}{$y}{mass} > $mass_max){ - if($hfirst{$y}>$intensity_threshold){ - my $a = $intens_max * $intensity_pourc; - if($hfirst{$y} > $a){ - $max_int_max_mass_ion = $y; - $mass_max = $hgrouprepres{$z}{$y}{mass}; - } - } - } - - } - } - - - $hgrouprepres{$z}{max_int}=$max_int_ion; - $hgrouprepres{$z}{max_mass}=$max_mass_ion; - $hgrouprepres{$z}{max_squared}=$max_squared_ion; - $hgrouprepres{$z}{max_int_max_mass}=$max_int_max_mass_ion; - - } - - - ###################### - ### Print result ! ### - ###################### - - open(F4, ">$output_tabular"); - open(F5, $combined_DMVM); - - my $line_nb = 0; - my %hheader; - while (my $line = <F5>){ - chomp $line; - - - my @tline = split (/\t/, $line); - - if($line_nb == 0){ - print F4 "$line\tACF_groups"; - if($opt == 1){ - if($repres_opt eq "intensity"){print F4 "\tintensity_repres\tACF_filter\n"} - if($repres_opt eq "mass"){print F4 "\tmass_repres\tACF_filter\n"} - if($repres_opt eq "mixt"){print F4 "\tmass2intens_repres\tACF_filter\n"} - if($repres_opt eq "max_intensity_max_mass"){print F4 "\tmax_intensity_max_mass_repres\tACF_filter\n"} - - } - else{ - if($repres_opt eq "intensity"){print F4 "\tintensity_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"} - if($repres_opt eq "mass"){print F4 "\tmass_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"} - if($repres_opt eq "mixt"){print F4 "\tmass2intens_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"} - if($repres_opt eq "max_intensity_max_mass"){print F4 "\tmax_intensity_max_mass_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"} - } - - - ### Creation of a header hash - for(my $i=0; $i<scalar(@tline);$i++){ - my $a = $tline[$i]; - $hheader{$a}=$i; - } - } - - else{ - my $find = 0; - foreach my $v (keys %hcorrelgroup){ - if(defined($hgrouprepres{$v}{$tline[0]})){ - print F4 "$line\t$v"; - - - if($repres_opt eq "intensity"){print F4 "\t$hgrouprepres{$v}{max_int}\t"} - if($repres_opt eq "mass"){print F4 "\t$hgrouprepres{$v}{max_mass}\t"} - if($repres_opt eq "mixt"){print F4 "\t$hgrouprepres{$v}{max_squared}\t"} - if($repres_opt eq "max_intensity_max_mass"){print F4 "\t$hgrouprepres{$v}{max_int_max_mass}\t"} - - - if($opt != 1){ - if(defined($hcorrelgroup{$v}{$tline[0]})){ - print F4 "$hcorrelgroup{$v}{$tline[0]}\t"; - } - else{ - print F4 "\t"; - } - } - - if($repres_opt eq "intensity"){ - if($tline[0] eq $hgrouprepres{$v}{max_int}){ - print F4 "conserved\n"; - } - else{ - print F4 "deleted\n"; - } - $find = 1; - } - if($repres_opt eq "mass"){ - if($tline[0] eq $hgrouprepres{$v}{max_mass}){ - print F4 "conserved\n"; - } - else{ - print F4 "deleted\n"; - } - $find = 1; - } - if($repres_opt eq "mixt"){ - if($tline[0] eq $hgrouprepres{$v}{max_squared}){ - print F4 "conserved\n"; - } - else{ - print F4 "deleted\n"; - } - $find = 1; - } - if($repres_opt eq "max_intensity_max_mass"){ - if($tline[0] eq $hgrouprepres{$v}{max_int_max_mass}){ - print F4 "conserved\n"; - } - else{ - print F4 "deleted\n"; - } - $find = 1; - } - } - } - if($find == 0){ - $groupct ++; - my $group = "group".$groupct; - if($opt != 1){ - print F4 "$line\t$group\t-\t-\t-\n"; - } - else{ - print F4 "$line\t$group\t-\t-\n"; - } - } - - - } - - $line_nb ++; - } - - - return ($output_sif, $output_tabular); -} - - - - - -1; \ No newline at end of file
--- a/ACF/analytic_correlation_filtration.xml Thu Apr 11 08:42:38 2019 -0400 +++ b/ACF/analytic_correlation_filtration.xml Thu May 09 08:17:53 2019 -0400 @@ -3,10 +3,6 @@ : Detect analytic correlation among data and remove them. </description> - <requirements> - <requirement type="package">perl-statistics</requirement> - <requirement type="package" version="1.19">perl-soap-lite</requirement> - </requirements> <command><![CDATA[ @@ -166,8 +162,7 @@ .. class:: warningmark For more information about input files, refer to the corresponding "W4M HowTo" page: -W4M_table_format_for_Galaxy_ -.. _W4M_table_format_for_Galaxy: http://workflow4metabolomics.org/sites/workflow4metabolomics.org/files/files/w4m_TableFormatForGalaxy_150908.pdf +http://workflow4metabolomics.org/sites/workflow4metabolomics.org/files/files/w4m_TableFormatForGalaxy_150908.pdf Mass differences list @@ -175,7 +170,7 @@ * Example: .. image:: Adduct_fragment_list.JPG - :width: 800 + :width: 350 ---------------------------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ACF/lib/IonFiltration.pm Thu May 09 08:17:53 2019 -0400 @@ -0,0 +1,668 @@ +#!usr/bin/perl +package IonFiltration; + +### Perl modules +use strict; +use warnings; + + + + + + +######################################################################## +### Création of a hash containing all adduits and fragments possible ### +######################################################################## + + +sub MassCollecting{ + + my $mass_file = $_[0]; + my %hmass; + + open (F1, $mass_file); + + while(my $line = <F1>){ + chomp $line; + my @tline = split(/[\t;]/, $line); + if(defined($hmass{$tline[2]})){ + print "Il existe déjà une même différence de masse : $tline[2] !\n"; + } + $hmass{$tline[1]}{$tline[2]}=$tline[0]; + } + + close F1; +# my $count=0; +# foreach my $k (keys %hmass){ +# $count++; +# } + + return %hmass; + + +} + + + + + + + +######################################################## +### Creation of a sif table + correlation filtration ### +######################################################## + + +sub sifTableCreation{ + + my $file = $_[0]; + my $output_sif = $_[1]; + my $opt = $_[2]; + my $rt_threshold = $_[3]; + my $mass_threshold = $_[4]; + my $correl_threshold = $_[5]; + my $dataMatrix = $_[6]; + my $output_tabular = $_[7]; + my $combined_DMVM = $_[8]; + my $repres_opt = $_[9]; + my $intensity_threshold = $_[10]; + my $intensity_pourc = $_[11]; + my $refhmass = $_[12]; + + + + + my %hheader_file; + my %hduplicate; + + my %hcorrelgroup; + my $groupct=1; + + + my $linenb3=0; + my %hheader_line; + my %hrtmz; + + open (F5, $combined_DMVM); + while(my $line = <F5>){ + chomp $line; + my @tline = split(/\t/, $line); + + if($linenb3 == 0){ + for(my $i=0; $i<scalar(@tline);$i++){ + my $a = $tline[$i]; + $hheader_line{$a}=$i; + } + } + else{ + if(defined($hheader_line{mzmed})){ + my $b = $tline[$hheader_line{mzmed}]; + $hrtmz{$tline[0]}{mz}=$b; + } + else{ + my $b = $tline[$hheader_line{mz}]; + $hrtmz{$tline[0]}{mz}=$b; + } + if(defined($hheader_line{rtmed})){ + my $d = $tline[$hheader_line{rtmed}]; + $hrtmz{$tline[0]}{rt}=$d; + } + else{ + my $d = $tline[$hheader_line{rt}]; + $hrtmz{$tline[0]}{rt}=$d; + } + } + + $linenb3 ++; + } + close F5; + + + my $linenb=0; + + open (F1, $file) or die "Impossible to open $file\n"; + open(F2, ">$output_sif") or die "Impossible to open $output_sif\n"; + + while(my $line = <F1>){ + chomp $line; + my @tline = split(/\t/, $line); + + + ############################### + ### Création of a sif table ### + ############################### + + if($linenb == 0){ + for(my $i=0; $i<scalar(@tline);$i++){ + my $a = $tline[$i]; + $hheader_file{$i}=$a; + } + } + else{ + for(my $i=1; $i<scalar(@tline);$i++){ + my $a=$tline[0]; + my $b=$hheader_file{$i}; + my $coef=$tline[$i]; + + + if($a eq $b){ + # print "This is a correlation between A ($a) and A ($b) !\n" + } + else{ + + + ######################### + ### Remove duplicates ### + ######################### + + my $y = $a."/".$b; + my $z = $b."/".$a; + + if((!(defined($hduplicate{$y}))) && (!(defined($hduplicate{$z})))){ + + $hduplicate{$y}=1; + + if($coef > $correl_threshold){ + + print F2 "$a\t$coef\t$b\n"; + + my $count=0; + + ###################################################### + ### Analytic correlation filtrering follow options ### + ###################################################### + + + + my $amass=$hrtmz{$a}{mz}; + my $atemp=$hrtmz{$a}{rt}; + my $bmass= $hrtmz{$b}{mz}; + my $btemp=$hrtmz{$b}{rt}; + my $diff = $amass-$bmass; + $diff = abs($diff); + + ### Option 1: Don't take into acount mass information ### + + if($opt == 1){ + my $btplus = $btemp + $rt_threshold; + my $btmoins = $btemp - $rt_threshold; + if(($btmoins <= $atemp) && ($atemp <= $btplus)){ + foreach my $k (keys %hcorrelgroup){ + + if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){ + $hcorrelgroup{$k}{$a}=1; + $hcorrelgroup{$k}{$b}=1; + $count++; + last; + } + } + if($count == 0){ + my $groupnb="group".$groupct; + $hcorrelgroup{$groupnb}{$a}=1; + $hcorrelgroup{$groupnb}{$b}=1; + $groupct ++; + } + + } + } + + + ### Option 2: Check that all mass differences are include in a specific list taking into account RT information ### + + elsif($opt == 2){ + + my $print = 0; + foreach my $s (keys %{$refhmass}){ + foreach my $r (keys %{$refhmass->{$s}}){ + my $rm = $r - $mass_threshold; + my $rp = $r + $mass_threshold; + if(($diff <= $rp) && ($diff >= $rm)){ + if($print == 0){ + my $btplus = $btemp + $rt_threshold; + my $btmoins = $btemp - $rt_threshold; + + if(($btmoins <= $atemp) && ($atemp <= $btplus)){ +# foreach my $s (keys %{%$refhmass{$r}}){ + # print F2 "$line\t$hmass{$r}{$s} : $s ($r)\n"; + foreach my $k (keys %hcorrelgroup){ + if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){ + $hcorrelgroup{$k}{$a}=1; + $hcorrelgroup{$k}{$b}=1; +# $hcorrelgroup{$k}{frag}.="#".$s; + $count++; + last; + } + } + if($count == 0){ + my $groupnb="group".$groupct; + $hcorrelgroup{$groupnb}{$a}=1; + $hcorrelgroup{$groupnb}{$b}=1; +# $hcorrelgroup{$groupnb}{frag}.="#".$s; + $groupct ++; + + } + $print = 1; + } + + } + } + } + } + } + + + ### Option 3: Check that all mass differences are include in a specific list, ignoring RT information ### + + elsif($opt == 3){ + + my $print = 0; + foreach my $s (keys %{$refhmass}){ + foreach my $r (keys %{$refhmass->{$s}}){ + my $rm = $r - $mass_threshold; + my $rp = $r + $mass_threshold; + if(($diff <= $rp) && ($diff >= $rm)){ + if($print == 0){ + +# foreach my $s (keys %{%$refhmass{$r}}){ + # print F2 "$line\t$hmass{$r}{$s} : $s ($r)\n"; + foreach my $k (keys %hcorrelgroup){ + if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){ + $hcorrelgroup{$k}{$a}=1; + $hcorrelgroup{$k}{$b}=1; +# $hcorrelgroup{$k}{frag}.="#".$s; + $count++; + last; + } + } + if($count == 0){ + my $groupnb="group".$groupct; + $hcorrelgroup{$groupnb}{$a}=1; + $hcorrelgroup{$groupnb}{$b}=1; +# $hcorrelgroup{$groupnb}{frag}.="#".$s; + $groupct ++; + } + $print = 1; + } + } + } + } + } + } + + + } + } + } + } + $linenb ++; + } + close F1; + close F2; + + + + + ############################################# + ### Join groups that have been subdivided ### + ############################################# + + my @tdelete; + + foreach my $k (keys %hcorrelgroup){ + foreach my $i (keys %{$hcorrelgroup{$k}}){ + foreach my $v (keys %hcorrelgroup){ + my $count = 0; + if ($v ne $k){ + foreach my $w (keys %{$hcorrelgroup{$v}}){ +# if($w ne "frag"){ + if($w eq $i){ + $count = 1; + push(@tdelete, $v); + } +# } + } + } + if($count == 1){ + foreach my $w (keys %{$hcorrelgroup{$v}}){ + $hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w}; + } + delete($hcorrelgroup{$v}); + } + } + } + } + + foreach my $t (@tdelete){ + delete($hcorrelgroup{$t}); + } + + + ### Do it twice to see if it fix the problem of unmerge groups + + foreach my $k (keys %hcorrelgroup){ + foreach my $i (keys %{$hcorrelgroup{$k}}){ + foreach my $v (keys %hcorrelgroup){ + my $count = 0; + if ($v ne $k){ + foreach my $w (keys %{$hcorrelgroup{$v}}){ +# if($w ne "frag"){ + if($w eq $i){ + $count = 1; + push(@tdelete, $v); + } +# } + } + } + if($count == 1){ + foreach my $w (keys %{$hcorrelgroup{$v}}){ + $hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w}; + } + delete($hcorrelgroup{$v}); + } + } + } + } + + foreach my $t (@tdelete){ + delete($hcorrelgroup{$t}); + } + + + + ########################################## + ### Addition of annotation information ### + ########################################## + + foreach my $k (keys %hcorrelgroup){ + foreach my $i (keys %{$hcorrelgroup{$k}}){ + foreach my $j (keys %{$hcorrelgroup{$k}}){ + my $count = 0; + if ($i ne $j){ + + my $a = $hrtmz{$i}{mz}; + my $b = $hrtmz{$j}{mz}; + + my $diff = $a - $b; + my $sign; + if($diff>0){ + $sign="+"; + } + if($diff<0){ + $sign="-"; + } + $diff = abs($diff); + + foreach my $z (keys %{$refhmass}){ + + foreach my $y (keys %{$refhmass->{$z}}){ + my $ym = $y - $mass_threshold; + my $yp = $y + $mass_threshold; + + + if(($diff <= $yp) && ($diff >= $ym)){ + my $diff_list = $diff - $y; + $diff_list = abs($diff_list); + $diff_list = sprintf ("%0.6f", $diff_list); + + if($hcorrelgroup{$k}{$i} eq 1){ + my $val = "#".$j."|".$sign."(".$z.")(".$diff_list.")|"; + $hcorrelgroup{$k}{$i}=$val; + $count ++; + } + else{ + if($count == 0){ + my $val = "#".$j."|".$sign."(".$z.")(".$diff_list.")|"; + $hcorrelgroup{$k}{$i}.=$val; + $count ++; + } + else{ + my $val = $sign."(".$z.")(".$diff_list.")|"; + $hcorrelgroup{$k}{$i}.=$val; + $count ++; + } + } + } + } + } + } + } + } + } + + + + #################################################### + ### Choose the representative ion for each group ### + #################################################### + + my %hgrouprepres; + + open(F3, $dataMatrix); + + while (my $line = <F3>){ + chomp $line; + + my @tline = split (/\t/, $line); + + + + foreach my $k (keys %hcorrelgroup){ + foreach my $i (keys %{$hcorrelgroup{$k}}){ + + if($tline[0] eq $i){ + $hgrouprepres{$k}{$i}{mass}=$hrtmz{$tline[0]}{mz}; + + my $intensity; + my $nbsubjects=0; + for(my $y=1;$y<scalar(@tline);$y++){ + $intensity += $tline[$y]; + $nbsubjects ++; + } + my $meanintensity = $intensity/$nbsubjects; + $hgrouprepres{$k}{$i}{intensity}=$meanintensity; + $hgrouprepres{$k}{$i}{squaredmassint}=($hgrouprepres{$k}{$i}{mass}**2)/($hgrouprepres{$k}{$i}{intensity}); + } + } + } + } + close F3; + + foreach my $z (keys %hgrouprepres){ + my $max_intensity = 0; + my $max_int_ion = ""; + my $max_mass = 0; + my $max_mass_ion = ""; + my $max_squared = 0; + my $max_squared_ion = ""; + foreach my $w (keys %{$hgrouprepres{$z}}){ + if($hgrouprepres{$z}{$w}{intensity} > $max_intensity){ + $max_intensity = $hgrouprepres{$z}{$w}{intensity}; + $max_int_ion = $w; + + } + if($hgrouprepres{$z}{$w}{mass} > $max_mass){ + $max_mass = $hgrouprepres{$z}{$w}{mass}; + $max_mass_ion = $w; + } + if($hgrouprepres{$z}{$w}{squaredmassint} > $max_squared){ + $max_squared = $hgrouprepres{$z}{$w}{squaredmassint}; + $max_squared_ion = $w; + } + } + + my $max_int_max_mass_ion=""; + + if($repres_opt eq "max_intensity_max_mass"){ + my %hfirst; + my $first=0; + foreach my $w (reverse sort {$hgrouprepres{$z}{$a}{intensity} <=> $hgrouprepres{$z}{$b}{intensity} } keys %{$hgrouprepres{$z}}){ + $first ++; + if ($first <= 3){ + $hfirst{$w} = $hgrouprepres{$z}{$w}{intensity}; + } + } + + my $first_2 = 0; + my $intens_max = 0; + my $mass_max = 0; + + foreach my $y (reverse sort {$hfirst{$a} <=> $hfirst{$b}} keys %hfirst){ + + + + $first_2 ++; + if($first_2 == 1){ + $intens_max = $hfirst{$y}; + if($intensity_threshold > $intens_max){ + $intensity_threshold = 0; + } + $max_int_max_mass_ion = $y; + $mass_max = $hgrouprepres{$z}{$y}{mass}; + } + if($hgrouprepres{$z}{$y}{mass} > $mass_max){ + if($hfirst{$y}>$intensity_threshold){ + my $a = $intens_max * $intensity_pourc; + if($hfirst{$y} > $a){ + $max_int_max_mass_ion = $y; + $mass_max = $hgrouprepres{$z}{$y}{mass}; + } + } + } + + } + } + + + $hgrouprepres{$z}{max_int}=$max_int_ion; + $hgrouprepres{$z}{max_mass}=$max_mass_ion; + $hgrouprepres{$z}{max_squared}=$max_squared_ion; + $hgrouprepres{$z}{max_int_max_mass}=$max_int_max_mass_ion; + + } + + + ###################### + ### Print result ! ### + ###################### + + open(F4, ">$output_tabular"); + open(F5, $combined_DMVM); + + my $line_nb = 0; + my %hheader; + while (my $line = <F5>){ + chomp $line; + + + my @tline = split (/\t/, $line); + + if($line_nb == 0){ + print F4 "$line\tACF_groups"; + if($opt == 1){ + if($repres_opt eq "intensity"){print F4 "\tintensity_repres\tACF_filter\n"} + if($repres_opt eq "mass"){print F4 "\tmass_repres\tACF_filter\n"} + if($repres_opt eq "mixt"){print F4 "\tmass2intens_repres\tACF_filter\n"} + if($repres_opt eq "max_intensity_max_mass"){print F4 "\tmax_intensity_max_mass_repres\tACF_filter\n"} + + } + else{ + if($repres_opt eq "intensity"){print F4 "\tintensity_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"} + if($repres_opt eq "mass"){print F4 "\tmass_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"} + if($repres_opt eq "mixt"){print F4 "\tmass2intens_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"} + if($repres_opt eq "max_intensity_max_mass"){print F4 "\tmax_intensity_max_mass_repres\tisotopes_adducts_fragments_[#id|annotation(delta_annotation)]\tACF_filter\n"} + } + + + ### Creation of a header hash + for(my $i=0; $i<scalar(@tline);$i++){ + my $a = $tline[$i]; + $hheader{$a}=$i; + } + } + + else{ + my $find = 0; + foreach my $v (keys %hcorrelgroup){ + if(defined($hgrouprepres{$v}{$tline[0]})){ + print F4 "$line\t$v"; + + + if($repres_opt eq "intensity"){print F4 "\t$hgrouprepres{$v}{max_int}\t"} + if($repres_opt eq "mass"){print F4 "\t$hgrouprepres{$v}{max_mass}\t"} + if($repres_opt eq "mixt"){print F4 "\t$hgrouprepres{$v}{max_squared}\t"} + if($repres_opt eq "max_intensity_max_mass"){print F4 "\t$hgrouprepres{$v}{max_int_max_mass}\t"} + + + if($opt != 1){ + if(defined($hcorrelgroup{$v}{$tline[0]})){ + print F4 "$hcorrelgroup{$v}{$tline[0]}\t"; + } + else{ + print F4 "\t"; + } + } + + if($repres_opt eq "intensity"){ + if($tline[0] eq $hgrouprepres{$v}{max_int}){ + print F4 "conserved\n"; + } + else{ + print F4 "deleted\n"; + } + $find = 1; + } + if($repres_opt eq "mass"){ + if($tline[0] eq $hgrouprepres{$v}{max_mass}){ + print F4 "conserved\n"; + } + else{ + print F4 "deleted\n"; + } + $find = 1; + } + if($repres_opt eq "mixt"){ + if($tline[0] eq $hgrouprepres{$v}{max_squared}){ + print F4 "conserved\n"; + } + else{ + print F4 "deleted\n"; + } + $find = 1; + } + if($repres_opt eq "max_intensity_max_mass"){ + if($tline[0] eq $hgrouprepres{$v}{max_int_max_mass}){ + print F4 "conserved\n"; + } + else{ + print F4 "deleted\n"; + } + $find = 1; + } + } + } + if($find == 0){ + $groupct ++; + my $group = "group".$groupct; + if($opt != 1){ + print F4 "$line\t$group\t-\t-\t-\n"; + } + else{ + print F4 "$line\t$group\t-\t-\n"; + } + } + + + } + + $line_nb ++; + } + + + return ($output_sif, $output_tabular); +} + + + + + +1; \ No newline at end of file