# HG changeset patch # User melpetera # Date 1570724411 14400 # Node ID cfe4b819911b1962a6728de654fc7aa1947146de Uploaded diff -r 000000000000 -r cfe4b819911b ACF/Analytic_correlation_filtration.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ACF/Analytic_correlation_filtration.pl Thu Oct 10 12:20:11 2019 -0400 @@ -0,0 +1,644 @@ +#!usr/bin/perl + +### Perl modules +use warnings; +use strict; +use Getopt::Long qw(GetOptions); #Creation of script options +use Pod::Usage qw(pod2usage); #Creation of script options + +#Personnal packages +use FindBin ; ## Allows you to locate the directory of original perl script +#use lib $FindBin::Bin; +use lib "$FindBin::Bin/lib"; +use IonFiltration; + +my ($file, $mass_file, $opt, $dataMatrix, $combined_DMVM, $repres_opt, $rt_threshold, $mass_threshold, $output_sif, $output_tabular, $correl_threshold, $intensity_threshold, $intensity_pourc); #Options to complete + +######################## +### Options and help ### +######################## + +GetOptions("f=s"=>\$file, "m=s"=>\$mass_file, "o=s"=>\$opt, "d=s"=>\$dataMatrix, "v=s"=>\$combined_DMVM, "r=s"=>\$repres_opt, "rt=f"=>\$rt_threshold, "mass=f"=>\$mass_threshold, "output_sif=s"=>\$output_sif, "output_tabular=s"=>\$output_tabular, "correl=s"=>\$correl_threshold, "IT=f"=>\$intensity_threshold, "IP=f"=>\$intensity_pourc) or pod2usage(2); + +### Check required parameters : +pod2usage({-message=>q{Mandatory argument '-f' is missing}, -exitval=>1, -verbose=>0}) unless $file; +#pod2usage({-message=>q{Mandatory argument '-m' is missing}, -exitval=>1, -verbose=>0}) unless $mass_file; +pod2usage({-message=>q{Mandatory argument '-o' is missing. It correspond to the grouping method for analytical correlation groups formation. +#It should be a number (1 ; 2 or 3) : +# 1 : Don't take into acount mass information (only RT) ; +# 2 : Check that all mass differences are include in a specific list and taking into acount RT information +# 3 : Check that all mass differences are include in a specific list, ignoring RT information +#To use the tool without takinf into account mass and RT information, use option 1 and define the RT threshold to 999999999.}, -exitval=>1, -verbose=>0}) unless $opt; +pod2usage({-message=>q{Mandatory argument '-r' is missing. It correspond to the group representent choosing method for analytical correlation groups formation. +It should be one of the 3 options below : + "mass" : choose the ion with the highest mass as the representant + "intensity" : choose the ion with the highest intensity as the representant + "mixt" : choose the ion with the highest (mass^2 * intensity) as the representant + "max_intensity_max_mass" : choose tha ion witht he highest intenisty among the 5 most intense ions of the group}, -exitval=>1, -verbose=>0}) unless $repres_opt; +pod2usage({-message=>q{Mandatory argument '-d' is missing}, -exitval=>1, -verbose=>0}) unless $dataMatrix; +pod2usage({-message=>q{Mandatory argument '-v' is missing}, -exitval=>1, -verbose=>0}) unless $combined_DMVM; +#pod2usage({-message=>q{Mandatory argument '-rt' is missing}, -exitval=>1, -verbose=>0}) unless $rt_threshold; +#pod2usage({-message=>q{Mandatory argument '-mass' is missing}, -exitval=>1, -verbose=>0}) unless $mass_threshold; +pod2usage({-message=>q{Mandatory argument '-correl' is missing}, -exitval=>1, -verbose=>0}) unless $correl_threshold; +pod2usage({-message=>q{Mandatory argument '-output_tabular' is missing}, -exitval=>1, -verbose=>0}) unless $output_tabular; +pod2usage({-message=>q{Mandatory argument '-output_sif' is missing}, -exitval=>1, -verbose=>0}) unless $output_sif; + + +#if(($opt != 1) && ($opt != 2) && ($opt != 3)){ +# print "you must indicate \"1\", \"2\" or \"3\" for the --o otpion\n"; +# exit; +#} + + + +if(($repres_opt ne "mass") && ($repres_opt ne "intensity") && ($repres_opt ne "mixt") && ($repres_opt ne "max_intensity_max_mass")){ + print "you must indicate \"mass\", \"intensity\", \"mix\" or \"max_intensity_max_mass\" for the --r otpion\n"; + exit; +} + + + +######################################################################### +#### Création of a hash containing all adduits and fragments possible ### +######################################################################### + +my %hmass; +if($opt != 1){ + %hmass = IonFiltration::MassCollecting($mass_file); + +} + +my $refhmass = \%hmass; + +print "Création of a hash containing all adduits and fragments possible\n"; + + +######################################################## +### Creation of a sif table + correlation filtration ### +######################################################## + +my %hrtmz; +($output_sif, %hrtmz) = IonFiltration::sifTableCreation($file, $output_sif, $opt, $rt_threshold, $mass_threshold, $correl_threshold, $dataMatrix, $output_tabular, $combined_DMVM, $repres_opt, $intensity_threshold, $intensity_pourc, \%hmass); +print "Creation of a sif table + correlation filtration done\n"; + + +###################################################### +### Analytic correlation filtrering follow options ### +###################################################### + +my %hheader_file; +my %hduplicate; + +my %hcorrelgroup; +my $groupct=1; + +my $linenb3=0; +my %hheader_line; + + + +open (F1, $output_sif) or die "Impossible to open $output_sif\n"; + +while(my $line = ){ + my $count=0; + chomp $line; + my @tline = split(/\t/, $line); + my $a = $tline[0]; + my $b = $tline[2]; + + my $amass=$hrtmz{$a}{mz}; + my $atemp=$hrtmz{$a}{rt}; + my $bmass= $hrtmz{$b}{mz}; + my $btemp=$hrtmz{$b}{rt}; + print "YY : $a ==> $amass ; $b ==> $bmass\n"; + my $diff = $amass-$bmass; + $diff = abs($diff); + + ### Option 1: Don't take into acount mass information ### + + if($opt == 1){ + my $btplus = $btemp + $rt_threshold; + my $btmoins = $btemp - $rt_threshold; + if(($btmoins <= $atemp) && ($atemp <= $btplus)){ + foreach my $k (keys %hcorrelgroup){ + if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){ + $hcorrelgroup{$k}{$a}=1; + $hcorrelgroup{$k}{$b}=1; + $count++; + last; + } + } + if($count == 0){ + my $groupnb="group".$groupct; + $hcorrelgroup{$groupnb}{$a}=1; + $hcorrelgroup{$groupnb}{$b}=1; + $groupct ++; + } + } + } + + + + ### Option 2: Check that all mass differences are include in a specific list taking into account RT information ### + + elsif($opt == 2){ + + my $print = 0; + foreach my $s (keys %{$refhmass}){ + foreach my $r (keys %{$refhmass->{$s}}){ + my $rm = $r - $mass_threshold; + my $rp = $r + $mass_threshold; + if(($diff <= $rp) && ($diff >= $rm)){ + if($print == 0){ + my $btplus = $btemp + $rt_threshold; + my $btmoins = $btemp - $rt_threshold; + + if(($btmoins <= $atemp) && ($atemp <= $btplus)){ + foreach my $k (keys %hcorrelgroup){ + if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){ + $hcorrelgroup{$k}{$a}=1; + $hcorrelgroup{$k}{$b}=1; + $count++; + last; + } + } + if($count == 0){ + my $groupnb="group".$groupct; + $hcorrelgroup{$groupnb}{$a}=1; + $hcorrelgroup{$groupnb}{$b}=1; + $groupct ++; + } + $print = 1; + } + } + } + } + } + } + + + ### Option 3: Check that all mass differences are include in a specific list, ignoring RT information ### + + elsif($opt == 3){ + + my $print = 0; + foreach my $s (keys %{$refhmass}){ + foreach my $r (keys %{$refhmass->{$s}}){ + my $rm = $r - $mass_threshold; + my $rp = $r + $mass_threshold; + if(($diff <= $rp) && ($diff >= $rm)){ + if($print == 0){ + + foreach my $k (keys %hcorrelgroup){ + if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){ + $hcorrelgroup{$k}{$a}=1; + $hcorrelgroup{$k}{$b}=1; + $count++; + last; + } + } + if($count == 0){ + my $groupnb="group".$groupct; + $hcorrelgroup{$groupnb}{$a}=1; + $hcorrelgroup{$groupnb}{$b}=1; + $groupct ++; + } + $print = 1; + } + } + } + } + } +} +close F1; + +print "Analytic correlation filtrering follow options done\n"; + + +############################################# +### Join groups that have been subdivided ### +############################################# + +my @tdelete; + +foreach my $k (keys %hcorrelgroup){ + foreach my $i (keys %{$hcorrelgroup{$k}}){ + foreach my $v (keys %hcorrelgroup){ + my $count = 0; + if ($v ne $k){ + foreach my $w (keys %{$hcorrelgroup{$v}}){ + if($w eq $i){ + $count = 1; + push(@tdelete, $v); + } + } + } + if($count == 1){ + foreach my $w (keys %{$hcorrelgroup{$v}}){ + $hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w}; + } + delete($hcorrelgroup{$v}); + } + } + } +} + +foreach my $t (@tdelete){ + delete($hcorrelgroup{$t}); +} + + +### Do it twice to see if it fix the problem of unmerge groups + +foreach my $k (keys %hcorrelgroup){ + foreach my $i (keys %{$hcorrelgroup{$k}}){ + foreach my $v (keys %hcorrelgroup){ + my $count = 0; + if ($v ne $k){ + foreach my $w (keys %{$hcorrelgroup{$v}}){ + if($w eq $i){ + $count = 1; + push(@tdelete, $v); + } + } + } + if($count == 1){ + foreach my $w (keys %{$hcorrelgroup{$v}}){ + $hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w}; + } + delete($hcorrelgroup{$v}); + } + } + } +} + +foreach my $t (@tdelete){ + delete($hcorrelgroup{$t}); +} + +print "Join groups that have been subdivided done\n"; + +####################################################### +### Addition of annotation information among groups ### +####################################################### + +foreach my $k (keys %hcorrelgroup){ + foreach my $i (keys %{$hcorrelgroup{$k}}){ + foreach my $j (keys %{$hcorrelgroup{$k}}){ + my $count = 0; + if ($i ne $j){ + + my $a = $hrtmz{$i}{mz}; + my $b = $hrtmz{$j}{mz}; + + my $diff = $a - $b; + my $sign; + if($diff>0){ + $sign="+"; + } + if($diff<0){ + $sign="-"; + } + $diff = abs($diff); + + foreach my $z (keys %{$refhmass}){ + + foreach my $y (keys %{$refhmass->{$z}}){ + my $ym = $y - $mass_threshold; + my $yp = $y + $mass_threshold; + + + if(($diff <= $yp) && ($diff >= $ym)){ + my $diff_list = $diff - $y; + $diff_list = abs($diff_list); + $diff_list = sprintf ("%0.6f", $diff_list); + + if($hcorrelgroup{$k}{$i} eq 1){ + my $val = "@".$j."|".$sign."(".$z.")(".$diff_list.")|"; + $hcorrelgroup{$k}{$i}=$val; + $count ++; + } + else{ + if($count == 0){ + my $val = "@".$j."|".$sign."(".$z.")(".$diff_list.")|"; + $hcorrelgroup{$k}{$i}.=$val; + $count ++; + } + else{ + my $val = $sign."(".$z.")(".$diff_list.")|"; + $hcorrelgroup{$k}{$i}.=$val; + $count ++; + } + } + } + } + } + } + } + } +} + + +print "Addition of annotation information among groups done\n"; + + +#################################################### +### Choose the representative ion for each group ### +#################################################### + +my %hgrouprepres; + +open(F3, $dataMatrix); + +while (my $line = ){ + chomp $line; + + my @tline = split (/\t/, $line); + + foreach my $k (keys %hcorrelgroup){ + foreach my $i (keys %{$hcorrelgroup{$k}}){ + if($tline[0] eq $i){ + $hgrouprepres{$k}{$i}{mass}=$hrtmz{$tline[0]}{mz}; + my $intensity; + my $nbsubjects=0; + for(my $y=1;$y $max_intensity){ + $max_intensity = $hgrouprepres{$z}{$w}{intensity}; + $max_int_ion = $w; + } + if($hgrouprepres{$z}{$w}{mass} > $max_mass){ + $max_mass = $hgrouprepres{$z}{$w}{mass}; + $max_mass_ion = $w; + } + if($hgrouprepres{$z}{$w}{squaredmassint} > $max_squared){ + $max_squared = $hgrouprepres{$z}{$w}{squaredmassint}; + $max_squared_ion = $w; + } + } + + my $max_int_max_mass_ion=""; + + if($repres_opt eq "max_intensity_max_mass"){ + my %hfirst; + my $first=0; + foreach my $w (reverse sort {$hgrouprepres{$z}{$a}{intensity} <=> $hgrouprepres{$z}{$b}{intensity} } keys %{$hgrouprepres{$z}}){ + $first ++; + if ($first <= 3){ + $hfirst{$w} = $hgrouprepres{$z}{$w}{intensity}; + } + } + + my $first_2 = 0; + my $intens_max = 0; + my $mass_max = 0; + + foreach my $y (reverse sort {$hfirst{$a} <=> $hfirst{$b}} keys %hfirst){ + + $first_2 ++; + if($first_2 == 1){ + $intens_max = $hfirst{$y}; + if($intensity_threshold > $intens_max){ + $intensity_threshold = 0; + } + $max_int_max_mass_ion = $y; + $mass_max = $hgrouprepres{$z}{$y}{mass}; + } + if($hgrouprepres{$z}{$y}{mass} > $mass_max){ + if($hfirst{$y}>$intensity_threshold){ + my $a = $intens_max * $intensity_pourc; + if($hfirst{$y} > $a){ + $max_int_max_mass_ion = $y; + $mass_max = $hgrouprepres{$z}{$y}{mass}; + } + } + } + } + } + + $hgrouprepres{$z}{max_int}=$max_int_ion; + $hgrouprepres{$z}{max_mass}=$max_mass_ion; + $hgrouprepres{$z}{max_squared}=$max_squared_ion; + $hgrouprepres{$z}{max_int_max_mass}=$max_int_max_mass_ion; + +} + + +print "Choose the representative ion for each group done\n"; + +############################################################################# +### Addition of annotation information relative to the representative ion ### +############################################################################# + +my %hreprescomparison; + +my $representative=""; + +if($opt != 1){ + foreach my $k (keys %hcorrelgroup){ + foreach my $i (keys %{$hcorrelgroup{$k}}){ + + if($repres_opt eq "mass"){$representative = $hgrouprepres{$k}{max_mass}} + if($repres_opt eq "intensity"){$representative = $hgrouprepres{$k}{max_int}} + if($repres_opt eq "mixt"){$representative = $hgrouprepres{$k}{max_squared}} + if($repres_opt eq "max_intensity_max_mass"){$representative = $hgrouprepres{$k}{max_int_max_mass}} + + + my $count = 0; + if ($i ne $representative){ + + my $a = $hrtmz{$i}{mz}; + my $b = $hrtmz{$representative}{mz}; + + my $diff = $a - $b; + my $sign; + if($diff>0){ + $sign="+"; + } + if($diff<0){ + $sign="-"; + } + $diff = abs($diff); + + foreach my $z (keys %{$refhmass}){ + + foreach my $y (keys %{$refhmass->{$z}}){ + my $ym = $y - $mass_threshold; + my $yp = $y + $mass_threshold; + + if(($diff <= $yp) && ($diff >= $ym)){ + my $diff_list = $diff - $y; + $diff_list = abs($diff_list); + $diff_list = sprintf ("%0.4f", $diff_list); + if($hcorrelgroup{$k}{$i} eq 1){ + my $valrep = "[M ".$sign."(".$z.")]|"; + $hreprescomparison{$k}{$i}{repres_diff}=$valrep; + $count ++; + } + else{ + if($count == 0){ + my $valrep = "[M ".$sign."(".$z.")]|"; + $hreprescomparison{$k}{$i}{repres_diff}.=$valrep; + $count ++; + } + else{ + my $valrep = "[M ".$sign."(".$z.")]|"; + $hreprescomparison{$k}{$i}{repres_diff}.=$valrep; + $count ++; + } + } + } + } + } + } + else{ + $hreprescomparison{$k}{$i}{repres_diff}="M"; + } + } + } +} + + +print "Addition of annotation information relative to the representative ion done\n"; + +############################## +### Print in result file ! ### +############################## + +open(F4, ">$output_tabular"); +open(F5, $combined_DMVM); + +my $line_nb = 0; +my %hheader; +while (my $line = ){ + chomp $line; + + + my @tline = split (/\t/, $line); + + if($line_nb == 0){ + print F4 "$line\tACorF_groups"; + if($opt == 1){ + if($repres_opt eq "intensity"){print F4 "\tACorF_filter\tintensity_repres\n"} + if($repres_opt eq "mass"){print F4 "\tACorF_filter\tmass_repres\n"} + if($repres_opt eq "mixt"){print F4 "\tACorF_filter\tmass2intens_repres\n"} + if($repres_opt eq "max_intensity_max_mass"){print F4 "\tACorF_filter\tmax_intensity_max_mass_repres\n"} + } + else{ + if($repres_opt eq "intensity"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tintensity_repres\tannotation_relative_to_representative\n"} + if($repres_opt eq "mass"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tmass_repres\tannotation_relative_to_representative\n"} + if($repres_opt eq "mixt"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tmass2intens_repres\tannotation_relative_to_representative\n"} + if($repres_opt eq "max_intensity_max_mass"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tmax_intensity_max_mass_repres\tannotation_relative_to_representative\n"} + } + + + ### Creation of a header hash + for(my $i=0; $i + * **@date creation**: 2018/11/17 + * **@main usage**: Reduction of analytical redundancies in Metabolomics data + + +Configuration +----------- + +### Requirement: + * perl + + +### Deploy: + + +### Warnings: + + +Services provided +----------- + + + +Technical description +----------- + + +Notes +----------- + + + + +License (optional) +----------- + +This code is published under CECILL 2.1. diff -r 000000000000 -r cfe4b819911b ACF/analytic_correlation_filtration.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ACF/analytic_correlation_filtration.xml Thu Oct 10 12:20:11 2019 -0400 @@ -0,0 +1,206 @@ + + + : Detect analytic correlation among data and remove them. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Fist_Ion_ID \\t Correlation_Value \\t Second_Ion_ID + * Example: + +.. image:: Correlation_matrix.JPG + :width: 800 + +Data matrix file + * "variable x sample" **dataMatrix** : tabular separated file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the variable metadata (see below) + +Variable metadata file + * "variable x metadata" **variableMetadata** tabular separated file of the numeric and/or character variable metadata, with . as decimal and NA for missing values + +.. class:: warningmark + +For more information about input files, refer to the corresponding "W4M HowTo" page: +http://workflow4metabolomics.org/sites/workflow4metabolomics.org/files/files/w4m_TableFormatForGalaxy_150908.pdf + + +Mass differences list + * A file containing list of known adducts, fragments or isotopes with the mass differences linked to them + * Example: + +.. image:: Adduct_fragment_list.JPG + :width: 350 + +--------------------------------------------------- + +---------- +Parameters +---------- + +Take into account mass diffrences between 2 ions : + * You can enter a list of mass differences that are known. The file must be organized with a first column for the mass difference type (isotope, fragment, etc...), a second column with the mass difference chemical formula (H+, -2H+K, etc...) and a third column for the mass difference value + * If you are choosing to use a mass differences table, you have to choose a mass difference range that will be a threshold to accept or not a difference value as true (recognize a mass difference value in the file +/- this threshold). + +Take into acount retention time : + * You can use retention time as a criteria to group ions. You have to choose a value that will be use as intervalle : 2 ions are group when their retention time is equal +/- the threshold. + +Choose the representative ion for each group, there are 3 possibilities to determine the representative ion : + * The ion with the highest intensity (recommandated for LC/MS) + * The ion with the highest mass + * The ion with the highest "mass2 * intensity" value + * The ion with the highest mass between the 3 highest intensity of the group, except if the highest mass ion have an intensity < determined percentage of the highest intensity ion one (for exemple 50%) (recommandated for GC/MS) + + +--------------------------------------------------- + +-------------- +Example of use +-------------- + +Add exemples according to the ppt presentation ! + + + + ]]> + \ No newline at end of file diff -r 000000000000 -r cfe4b819911b ACF/data/default_list.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ACF/data/default_list.csv Thu Oct 10 12:20:11 2019 -0400 @@ -0,0 +1,225 @@ +adduit -2H+Na+K 59.9378259 +adduit H 1.007825032 +adduit -H+K 37.95588165 +adduit -H+Na 21.98194425 +adduit -3H+3Na 65.94583274 +adduit -4H+4K 151.8235266 +adduit -4H+4Na 87.92777699 +adduit -3H+3K 113.8676449 +adduit -2H+2K 75.9117633 +adduit -2H+2Na 43.9638885 +adduit 2H 2.015650064 +adduit Cl 34.96885268 +adduit -2H+Ca 37.94694092 +isotope 13C db 0.501677419 +isotope 13C 1.003354838 +isotope 15N 0.997034893 +isotope 18O 2.00424638 +isotope 34S 1.9957959 +isotope 41K 1.99811908 +isotope 37Cl 1.99704991 +isotope 13C2 2.006709676 +isotope 13C3 3.010064513 +isotope 13C+37Cl 3.000404748 +isotope 13C+18O 3.007601218 +isotope 13C+34S 2.999150738 +isotope 44Ca 3.99289082 +adduit CH3OH 32.02621475 +adduit CH3CN 41.0265491 +adduit H2O 18.01056468 +adduit 2(H2O 36.02112937 +adduit NaCl 57.95862196 +adduit HCOOH 46.0054793 +adduit +(HCOOH)+(HCOOK) 129.9668403 +adduit +(HCOOH)+(HCOONa) 113.9929029 +adduit +(HCOOH)+2(HCOONa) 181.9803264 +adduit HCOOK 83.96136095 +adduit +(HCOOK)+(HCOONa) 151.9487845 +adduit HCOONa 67.98742355 +adduit 2(HCOOH) 92.01095861 +adduit +2(HCOOH)+(HCOOK) 175.9723196 +adduit +2(HCOOH)+(HCOONa) 159.9983822 +adduit 2(HCOOK) 167.9227219 +adduit 2(HCOONa) 135.9748471 +fragment C11H18O9 294.0950822 +fragment C12H16O12 352.064176 +fragment C12H20O9 308.1107322 +fragment C2H2O 42.01056468 +fragment C2H3. 27.0229265 +fragment C2H3N 41.0265491 +fragment C2H3NO3 89.01129296 +fragment C2H3O. 43.01784112 +fragment C2H4 28.03130013 +fragment C2H4N. 42.03382553 +fragment C2H4O 44.02621475 +fragment C2H5. 29.03857656 +fragment C2H5N 43.04219916 +fragment C2H5NO2 75.0320284 +fragment C2H5O. 45.03349118 +fragment C2H5O6P 155.9823745 +fragment C2H6 30.04695019 +fragment C2H7N 45.05784922 +fragment C2HNO2 71.00072827 +fragment C3H4O3 88.01604399 +fragment C3H5. 41.03857656 +fragment C3H5NO2 87.0320284 +fragment -(C3H5O2NS)-(NH3) 136.0306485 +fragment C3H5O2NS 119.0040994 +fragment C3H6 42.04695019 +fragment C3H6O3 90.03169405 +fragment C3H7. 43.05422662 +fragment C3H7O2N 89.04767846 +fragment C3H7O2NS 121.0197495 +fragment C3H7O6P 169.9980246 +fragment C4H6 54.04695019 +fragment C4H6O2 86.03677943 +fragment C4H6O4 118.0266087 +fragment C4H7. 55.05422662 +fragment C4H8O3 104.0473441 +fragment C4H9 57.07042529 +fragment C5H7O3N 129.0425931 +fragment C5H8O3NS 162.0224891 +fragment C5H8O4 132.0422587 +fragment C6H10O4 146.0579088 +fragment -(C6H10O5)-(H2O) 180.0633881 +fragment C6H10O5 162.0528234 +fragment C6H10O7 194.0426527 +fragment C6H8O6 176.032088 +fragment CH2O 30.01056468 +fragment -(CH2S)-(HCOOH) 91.99320037 +fragment -(CH2S)-(NH3) 63.01427016 +fragment CH2S 45.98772106 +fragment CH3. 15.0229265 +fragment CH3COO. 59.01275574 +fragment CH3COOH 60.02112937 +fragment CH3N 29.0265491 +fragment CH3O. 31.01784112 +fragment CH3OH 32.02621475 +fragment CH4 16.03130013 +fragment CH4N. 30.03382553 +fragment -(CH4S)-(HCOOH) 94.00885043 +fragment -(CH4S)-(NH3) 65.02992022 +fragment CH4S 48.00337113 +fragment CH5N 31.04219916 +fragment Cl. 34.96830408 +fragment CO 27.99491462 +fragment -(CO2)-(CO) 71.98474386 +fragment CO2 43.98982924 +fragment -(H2)-(NH3) 19.04219916 +fragment H2 2.015650064 +fragment -(H2O)-(CO2) 62.00039392 +fragment -(H2O)-(HCOOH) 64.01604399 +fragment -(H2O)-(NH3) 35.03711378 +fragment H2O 18.01056468 +fragment -(H2O)-2(CO2) 105.9902232 +fragment -(H2S)-(H2O) 51.99828575 +fragment H2S 33.98772106 +fragment H2SO4 97.96737954 +fragment H3PO4 97.97689521 +fragment HCl 35.97667771 +fragment HCN 27.01089903 +fragment -(HCOOH)-(HCN) 73.01637834 +fragment HCOOH 46.0054793 +fragment HS. 32.97934743 +fragment -(NC3H9)-(CH3COOH) 119.0946287 +fragment -(NC3H9)-(H2O) 77.08406397 +fragment -(NC3H9)-(HCOOH) 105.0789786 +fragment NC3H9 59.07349929 +fragment NaCl 57.95862196 +fragment NH2CO. 44.01309008 +fragment -(NH3)-(CO2)-(H2O) 79.02694302 +fragment -(NH3)-(CO2) 61.01637834 +fragment -(NH3)-(CONH) 60.03236275 +fragment -(NH3)-(HCOOH) 63.0320284 +fragment NH3 17.0265491 +fragment NH3CO 45.02146372 +fragment NHCO 43.00581365 +fragment OH. 17.00219105 +fragment PO3 78.95850549 +fragment SO2 63.96190024 +fragment SO3 79.95681486 +fragment -2(H2O)-(CO2) 80.01095861 +fragment -2(H2O)-(HCOOH)-(NH3) 99.05315777 +fragment -2(H2O)-(HCOOH) 82.02660867 +fragment 2(H2O) 36.02112937 +fragment 2(HCOOH) 92.01095861 +fragment -2(NH3)-(CO)-(CO2) 106.0378421 +fragment -2(NH3)-(CO) 62.04801281 +fragment 2(NH3) 34.05309819 +fragment 3(H2O) 54.03169405 +fragment 3(NH3) 51.07964729 +fragment 4(H2O) 72.04225874 +fragment C10H11O3N5 249.0861892 +fragment C10H13O4N5 267.0967539 +fragment C10H14O7N5P 347.0630844 +fragment C10H15O5N5 285.1073186 +fragment C2H3NO2 73.01637834 +fragment C2H4O2 60.02112937 +fragment C2H5NO3 91.02694302 +fragment C2H6O2 62.03677943 +fragment C2H6O3 78.03169405 +fragment -(C2H6O3)-(H2O) 96.04225874 +fragment C2H6O4 94.02660867 +fragment C2H7NO2 77.04767846 +fragment C3H10O5 126.0528234 +fragment -(C3H6O3)-(CHNO) 133.0375077 +fragment C3H6O4 106.0266087 +fragment C3H8O3 92.04734412 +fragment C3H8O4 108.0422587 +fragment C4H10O5 138.0528234 +fragment C4H5NO3 115.026943 +fragment C4H8O4 120.0422587 +fragment C5H10O4 134.0579088 +fragment C5H13O4N 151.0844579 +fragment C6H11O4N 161.0688078 +fragment C6H11O5N 177.0637225 +fragment C6H13O5N 179.0793725 +fragment C5H10O5 150.0528234 +fragment C5H10O6 166.047738 +fragment C5H12O2 104.0837296 +fragment -(C5H12O2)-(H2O) 122.0942943 +fragment C5H5N5 135.0544952 +fragment C5H5ON5 151.0494098 +fragment C5H6O2 98.03677943 +fragment C5H7O2N5 169.0599745 +fragment -(C5H7O3N)-(CO2) 173.0324223 +fragment -(C5H7O3N)-(H2O) 147.0531578 +fragment C5H8N3 110.0718223 +fragment C5H8O3 116.0473441 +fragment C5H8O5N5P 249.026305 +fragment C5H9O3 117.0551691 +fragment C5H9O6P 196.0136746 +fragment C5H9O7P 212.0085893 +fragment C6H10O3 130.0629942 +fragment -(C6H10O3)-(H2O) 148.0735589 +fragment C6H11O4N3PS 252.0207885 +fragment C6H11O4NPS 224.0146405 +fragment C6H12O5 164.0684735 +fragment C6H14O6 182.0790382 +fragment C6H14O7 198.0739528 +fragment C6H16O7 200.0896029 +fragment C6H16O8 216.0845175 +fragment C6H8N3 122.0718223 +fragment C6H8NS 126.0377453 +fragment C7H5ON5 175.0494098 +fragment C7H6ON6 190.0603088 +fragment C7H7O2N5 193.0599745 +fragment C7H11O6N 205.0586371 +fragment C8H14O7 222.0739528 +fragment C8H5O3N5 219.039239 +fragment C8H7O4N5 237.0498037 +fragment C9H10O4N2 210.0640568 +fragment C9H11O3N3 209.0800412 +fragment C9H11O4N3 225.0749558 +fragment C9H12O5N2 228.0746215 +fragment C9H12O6N3P 289.0463717 +fragment C9H13O4N3 227.0906059 +fragment C9H14O7N3P 307.0569364 +fragment C9H16O8 252.0845175 +fragment CH2N2 42.02179806 +fragment -(CH2O)-(H2O) 48.02112937 +fragment CH5NO 47.03711378 +fragment -(H3PO4)-(CHNO) 140.9827089 +fragment -(H3PO4)-(H2O) 115.9874599 +fragment -(H3PO4)-(NH3) 115.0034443 +fragment HPO3 79.96633052 diff -r 000000000000 -r cfe4b819911b ACF/lib/IonFiltration.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ACF/lib/IonFiltration.pm Thu Oct 10 12:20:11 2019 -0400 @@ -0,0 +1,181 @@ +#!usr/bin/perl +package IonFiltration; + +### Perl modules +use strict; +use warnings; + + + + + + +######################################################################## +### Création of a hash containing all adduits and fragments possible ### +######################################################################## + + +sub MassCollecting{ + + my $mass_file = $_[0]; + my %hmass; + + open (F1, $mass_file); + + while(my $line = ){ + chomp $line; + my @tline = split(/[\t;]/, $line); + if(defined($hmass{$tline[2]})){ + print "The mass difference already exists : $tline[2] !\n"; + } + $hmass{$tline[1]}{$tline[2]}=$tline[0]; + } + + close F1; + return %hmass; + +} + + + + + + + +######################################################## +### Creation of a sif table + correlation filtration ### +######################################################## + + +sub sifTableCreation{ + + my $file = $_[0]; + my $output_sif = $_[1]; +# my $opt = $_[2]; +# my $rt_threshold = $_[3]; +# my $mass_threshold = $_[4]; + my $correl_threshold = $_[5]; +# my $dataMatrix = $_[6]; +# my $output_tabular = $_[7]; + my $combined_DMVM = $_[8]; +# my $repres_opt = $_[9]; +# my $intensity_threshold = $_[10]; +# my $intensity_pourc = $_[11]; +# my $refhmass = $_[12]; + + + + + my %hheader_file; + my %hduplicate; + + my %hcorrelgroup; + my $groupct=1; + + + my $linenb3=0; + my %hheader_line; + my %hrtmz; + + open (F5, $combined_DMVM); + while(my $line = ){ + chomp $line; + my @tline = split(/\t/, $line); + + if($linenb3 == 0){ + for(my $i=0; $i$output_sif") or die "Impossible to open $output_sif\n"; + + + while(my $line = ){ + chomp $line; + my @tline = split(/\t/, $line); + + ############################### + ### Création of a sif table ### + ############################### + + if($linenb == 0){ + for(my $i=0; $i $correl_threshold){ # Only when you want to consider negative correlations + if($coef > $correl_threshold){ + + print F2 "$a\t$coef\t$b\n"; + + my $count=0; + + } + } + } + } + } + $linenb ++; + } + close F1; + close F2; + return ($output_sif, %hrtmz); +} + + + + + +1; \ No newline at end of file diff -r 000000000000 -r cfe4b819911b ACF/static/images/Adduct_fragment_list.JPG Binary file ACF/static/images/Adduct_fragment_list.JPG has changed diff -r 000000000000 -r cfe4b819911b ACF/static/images/Correlation_matrix.JPG Binary file ACF/static/images/Correlation_matrix.JPG has changed