changeset 0:4660bf9c8059 draft

Uploaded
author jcb-mpl
date Tue, 27 Apr 2021 14:52:17 +0000
parents
children c1008b92a6c8
files 2018_07_check_peaks.xml src/mz_9_compares_peaks.sci src/mz_9_map_differences_export.sci test-data/Peaks_details.mat test-data/Peaks_list.tabular test-data/ref_mz_rt.tab
diffstat 6 files changed, 375 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/2018_07_check_peaks.xml	Tue Apr 27 14:52:17 2021 +0000
@@ -0,0 +1,175 @@
+<tool id="2021_05_checks_ref_features" name="Features -> checks references" version="0.0.1">
+
+  <description>   </description>
+  
+  <requirements>
+      <requirement type="package" >scilab</requirement>
+  </requirements>
+  
+  
+  <stdio>
+      <exit_code range="1:" level="fatal" />
+  </stdio>
+  
+  
+  <command>
+      <![CDATA[ 
+      if [ -d $__root_dir__/packages/scilab-6.1.0 ]; then $__root_dir__/packages/scilab-6.1.0/bin/scilab-cli -nb -quit -f $* < ${script_file}; else scilab-cli -nb -quit -f $* < ${script_file}; fi 
+      ]]>
+  </command>
+  
+
+  <configfiles>
+    <configfile name="script_file">
+        <![CDATA[ exec("$__tool_directory__/src/mz_9_compares_peaks.sci",-1); ...
+        lasterror(); ...
+        ... //load "${peaks_list}"; ... // 22avril21
+        peaks_list=glx_tab2div("${peaks_list}"); ...
+        ...
+        load "${peaks_detail}"; ...
+        x836=glx_tab2div("${peaks_reference}"); ...
+        diff_mz=${diff_mz}; ...
+        k=${k}; ...
+        x_peaks_final=mz_9_compares_peaks(peaks_list,peaks_detail,x836,diff_mz,k); ...
+        ...
+        div2tab(x_peaks_final,"${x_peaks_final}"); ... 
+        if ~isempty(lasterror(%f)); ...
+            write(0,lasterror()); ...
+        end; ]]>
+    </configfile>
+  </configfiles>
+
+
+  <inputs>
+    <param name="peaks_list"      format="tabular"     type="data"      label="Features list"   help="from function  EIC->peaks:features"    />
+    <param name="peaks_detail"    format="mat"     type="data"      label="Detailed features"     help="from function  EIC->peaks:features"  />
+    <param name="peaks_reference" format="tabular" type="data"      label="Reference features"     help="2columns: mz and RT, with column and line headers" />
+    <param name="diff_mz"         value="0.0050"   type="float"     label="Threshold for m/z values" />
+    <param name="k"               value="1"        type="integer"   label="Number of extracted features for each reference feature"  help="the closest to the reference peak" />
+   
+  </inputs>
+  
+  
+  <outputs>
+    <data name="x_peaks_final"   format="tabular"  label="Identified features" />
+  </outputs>
+  
+  
+  <tests>
+  
+  <test>
+       <param  name="peaks_list"          value="Peaks_list.tabular"/>
+       <param  name="peaks_detail"        value="Peaks_details.mat"/>
+       <param  name="peaks_reference"     value="ref_mz_rt.tab"/>
+       <param  name="diff_mz"             value="0.0050"/>
+       <param  name="k"                   value="1"/>
+       <output name="x_peaks_final">
+           <assert_contents> 
+       	<has_text text="136.0616"/>
+       	<has_text text="229.135"/>   
+           </assert_contents>
+       </output>
+    </test>
+    
+  </tests>
+
+
+<help>
+
+
+**Author**  Jean-Claude Boulet (INRA).
+
+
+---------------------------------------------------
+
+==================================
+FEATURES EXPERIMENTAL VS REFERENCE
+==================================
+
+
+-----------
+Description
+-----------
+
+This function checks reference features among experimental features. 
+
+-----------
+Input files
+-----------
+
+**Features list**
+
+The tabular file yielded by the function: EICs -> peaks:features
+
+
+**Detailed features**
+
+The HDF5-scilab file yielded by the function: EICs -> peaks:features
+
+
+**Reference features**
+
+A tabular file containing 2 columns: m/z values then retention times
+
+Row labels are the names of the reference features, e.g. chemical compounds
+
+
+
+----------
+Parameters
+----------
+
+**Threshold for m/z values**
+
+Observed features are associated to the reference features if 1) the difference in m/z values is under the threshold; 2) the reference RT falls within the range of the observed RTs.
+
+Default: 0.0050
+
+**Number of extracted features for each reference feature**
+
+e.g. if the number is 2, the algorithm will extract the 2 experimental features closest to each reference feature.
+
+
+------
+Output
+------
+
+
+**Identified features**
+
+A tabular file with the columns: 
+
+- name of the reference feature
+
+- number of the reference feature in the input file: Reference features
+
+- number of the experimental feature in the input files: Features List and Detailed features
+
+- number of identifications= the number of reference features each experimental feature has been attributed to; target = 1 
+
+- m/z value of the reference feature
+
+- m/z value of the experimental feature 
+
+- difference of m/z values
+
+- RT of the reference feature
+
+- RT of the experimental feature 
+
+- difference of RT 
+
+- signal of the experimental feature 
+
+
+</help>
+
+
+<citations>
+
+</citations>
+
+
+</tool>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/mz_9_compares_peaks.sci	Tue Apr 27 14:52:17 2021 +0000
@@ -0,0 +1,111 @@
+function x_peaks_final=mz_9_compares_peaks(peaks_list,peaks_detail,x836,diff_mz,k)
+    
+    // peaks_list et peaks_detail: les pics obtenus avec: 2018_07_mz_eics_to_features_v2
+    // x836: les pics de référence ; un div avec 2 colonnes: mz puis RT 
+    // k:identification des k plus proches voisins; par défaut: 1 
+
+    // depuis le répertoire: tampon2020/projet_publi_eics/jan20
+
+
+    x836=div(x836);
+    
+    if argn(2) <4 then
+        diff_mz=0.0050;
+    end
+    
+    if argn(2)<5 then
+        k=1;
+    end
+
+    diff_mz2=diff_mz;
+
+    
+    // tri selon m/z croissant annule le 11fev20
+    //[nul,index]=gsort(x836.d(:,1),'g','i');
+    //x836_mz=x836(index,:);
+    x836_mz=x836;
+    
+    // bilan : ----------------
+    // peaks_list = 4434 x 5 TRmin /TRmax/TRmedian/mz/signal
+    // x836_mz: 836 x 2      mz/TR 
+    
+    // identification des proches voisins 
+    //diff_mz2=0.0050; 
+    n=size(x836_mz.d,1);
+    xout.d=[];
+    
+    for i=1:n;
+        peaks_list3=peaks_list.d(:,4);    // mz
+        diff_temp=abs(peaks_list3-x836_mz.d(i,1));
+        tri1=find(diff_temp<diff_mz2);
+        tri2=find(diff_temp>=diff_mz2);
+        //pause
+        peaks_list3(tri1)=0;      // distance nulle 
+        peaks_list3(tri2)=10;     // forte distance
+        //pause
+        d=(peaks_list.d(:,3)-x836_mz.d(i,2))**2 + (peaks_list3)**2;
+        //pause
+        d2=[[1:size(peaks_list.d,1)]' d];     // rajout des indices  
+        [d_trie,tri]=gsort(d,'g','i');             // choix des k premiers 
+        tri2=tri(1:k); 
+        d_trie2=sqrt(d_trie(1:k));                  // distances  
+        xout_d=[ones(k,1)*x836_mz.d(i,:) peaks_list.d(tri2,:)]; 
+        xout_d=[xout_d(:,[1 6 2 4 7]) tri2];
+        if xout.d==[] then           
+            xout.d=xout_d;
+        else    
+            xout.d=[xout.d; xout_d]; 
+        end
+        //disp(i,'i=')
+    end
+       
+    // mise en forme + rajout des différences 
+    xout2.d=[xout.d(:,1:2) abs(xout.d(:,1)-xout.d(:,2)) xout.d(:,3:4)  abs(xout.d(:,3)-xout.d(:,4)) xout.d(:,5:6)];
+    xout2.d(:,1:3)=0.0001*round(10000*xout2.d(:,1:3));
+    xout2.d(:,4:6)=0.01*round(100*xout2.d(:,4:6));
+    xout2.v=['mz-ref';'mz-obs';'diff_mz';'RT-ref';'RT-obs';'diff_RT';'signal';'n° in peak_list'];
+    
+    x_peaks_final=div(xout2);
+    
+    // même pic associé à plusieurs mz ref?
+    nrepet=ones(k*n,1);
+    for i=1:k*n;
+        tri=find(xout2.d(:,8)==xout2.d(i,8));
+        ntri=max(size(tri));
+        nrepet(i)=ntri;
+    end
+ 
+    // sorties    
+    label_temp=x836.i;
+    label_temp=repmat(label_temp,[1,k]);
+    label_temp=label_temp';
+    label_temp=matrix(label_temp,[k*n,1]);
+    x_peaks_final.d=[x_peaks_final.d nrepet]
+    x_peaks_final.v=[x_peaks_final.v;'nbr of identifications']
+    x_peaks_final.i=label_temp;
+    x_peaks_final=div(x_peaks_final);
+    
+    // rajout des no ref 
+    no_ref=[1:n]';
+    no_ref=no_ref*ones(1,k);
+    no_ref=no_ref';
+    no_ref=matrix(no_ref,[k*n,1]);
+    
+    x_peaks_final.d=[no_ref x_peaks_final.d];
+    x_peaks_final.v=['no_ref';x_peaks_final.v];
+
+    // remise en ordre
+    x_peaks_final=x_peaks_final(:,[1,9,10,2:8]);
+    
+    
+endfunction
+
+
+
+
+    
+    
+    
+    
+    
+    
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/mz_9_map_differences_export.sci	Tue Apr 27 14:52:17 2021 +0000
@@ -0,0 +1,9 @@
+function mz_9_map_differences_export(x_peaks_final)
+    
+    // fonction destinée à identifier les points atypiques 
+    figure;
+    map(x_peaks_final,3,6)
+    h=gcf();
+    h.background=-2
+
+endfunction  
Binary file test-data/Peaks_details.mat has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Peaks_list.tabular	Tue Apr 27 14:52:17 2021 +0000
@@ -0,0 +1,74 @@
+	TRmin	TRmax	TRmedian	m/z	signal
+eic n0 1	0.202032	0.8740362	0.5691918	136.06209	3.556E+09
+eic n0 1	3.2942345	3.9105387	3.6019219	136.06209	1.518E+08
+eic n0 1	28.889865	29.59448	29.238288	136.06209	4218687.2
+eic n0 1	21.76055	22.452547	22.09671	136.06209	1967262.4
+eic n0 2	3.2942345	3.9105387	3.6019219	229.13399	4.727E+09
+eic n0 2	0.2203602	0.8885694	0.5836671	229.13399	8.588E+08
+eic n0 2	31.57015	32.19944	31.883678	229.13399	73354605
+eic n0 3	4.0719704	4.6930267	4.3814585	275.2123	3.761E+10
+eic n0 3	7.8410016	8.4922502	8.1668772	275.2123	11200733
+eic n0 3	6.6445351	7.2859162	6.9615174	275.2123	9878986.4
+eic n0 4	10.83308	11.512692	11.177614	302.24825	2.781E+10
+eic n0 4	10.490792	10.817532	10.817532	302.24825	711138.34
+eic n0 5	20.964748	21.634493	21.303928	318.27957	8.708E+10
+eic n0 5	21.649694	21.98315	21.649694	318.27957	2.661E+09
+eic n0 5	35.625329	36.071582	35.750755	318.27957	1.877E+08
+eic n0 5	31.181441	31.496686	31.496686	318.27957	1.179E+08
+eic n0 5	22.372591	22.80939	22.46783	318.27957	2.306E+08
+eic n0 5	20.596537	20.948644	20.948644	318.27957	20296829
+eic n0 6	12.487673	13.167396	12.827867	337.21793	9.548E+09
+eic n0 7	30.078848	30.777516	30.430385	358.08479	1.037E+10
+eic n0 7	4.0866653	4.7078728	4.3962109	358.08479	1560648
+eic n0 7	24.184739	24.904519	24.556609	358.08479	3039078
+eic n0 8	3.4405547	4.0573026	3.7486354	373.15288	1.091E+10
+eic n0 8	16.932438	17.629305	17.281601	373.15288	26949890
+eic n0 8	29.399213	30.096888	29.746012	373.15288	6217413.8
+eic n0 9	3.0012317	3.6165913	3.3088748	391.14794	1.788E+09
+eic n0 9	26.877499	27.572949	27.225672	391.14794	9360463.8
+eic n0 9	31.778297	32.409135	32.094236	391.14794	2861116.6
+eic n0 9	11.528541	12.205029	11.862015	391.14794	632626.12
+eic n0 10	26.498709	27.189621	26.845006	408.1375	1.326E+10
+eic n0 10	19.247534	19.958805	19.599651	408.1375	1.234E+10
+eic n0 10	27.983646	28.679486	28.324254	408.1375	8.067E+09
+eic n0 10	31.659594	32.289745	31.973724	408.1375	97701083
+eic n0 10	25.565822	26.25218	25.909456	408.1375	1.035E+08
+eic n0 10	7.3773693	8.0254553	7.7049089	408.1375	1.176E+08
+eic n0 10	28.695694	29.327883	28.969055	408.1375	99605304
+eic n0 10	15.071408	15.768963	15.423549	408.1375	60721749
+eic n0 10	15.83192	16.51566	16.170364	408.1375	3247112.8
+eic n0 11	29.84457	30.550006	30.195642	430.17042	5.379E+10
+eic n0 11	30.581591	31.256804	30.92318	430.17042	2.108E+09
+eic n0 11	31.973724	32.602898	32.289745	430.17042	7.452E+08
+eic n0 11	28.811539	29.514597	29.151444	430.17042	3.208E+08
+eic n0 11	29.530717	29.828981	29.828981	430.17042	32680491
+eic n0 12	5.9955512	6.6294239	6.3142063	447.09318	5.812E+09
+eic n0 13	8.2446237	8.905213	8.5714396	466.24602	3.894E+10
+eic n0 13	8.9209174	9.4884061	9.1696072	466.24602	2.423E+10
+eic n0 13	7.9017216	8.229195	8.229195	466.24602	96711882
+eic n0 13	34.00696	34.670287	34.33868	466.24602	86543001
+eic n0 13	27.360046	28.035732	27.701317	466.24602	9139101.8
+eic n0 14	3.7046136	4.3224948	4.0134259	486.26232	1.204E+10
+eic n0 14	16.137439	16.8222	16.483176	486.26232	2.139E+09
+eic n0 14	32.259635	32.896422	32.572726	486.26232	15539760
+eic n0 15	9.1232038	9.7756419	9.4426532	516.19583	5.371E+10
+eic n0 15	9.791053	10.28787	9.958761	516.19583	5.139E+09
+eic n0 15	10.663682	11.240996	10.896862	516.19583	1.944E+09
+eic n0 15	11.623635	11.992104	11.655338	516.19583	5.639E+08
+eic n0 15	12.008717	12.391399	12.058181	516.19583	3.736E+08
+eic n0 15	31.271782	31.898567	31.585079	516.19583	1.697E+08
+eic n0 15	13.311797	13.978267	13.642598	516.19583	3.540E+08
+eic n0 15	24.724795	25.234798	24.888036	516.19583	48547389
+eic n0 15	33.257404	33.912295	33.583528	516.19583	1.182E+08
+eic n0 15	8.7770606	9.1077848	9.1077848	516.19583	6911097.1
+eic n0 15	16.760462	17.450798	17.102267	516.19583	88159052
+eic n0 15	15.96153	16.647011	16.30613	516.19583	1.197E+08
+eic n0 15	27.950189	28.560762	28.212172	516.19583	73072615
+eic n0 15	28.811539	29.275364	29.151444	516.19583	48351131
+eic n0 15	20.425738	20.851601	20.494253	516.19583	14357365
+eic n0 15	25.250739	25.611367	25.267247	516.19583	25891406
+eic n0 16	23.367004	24.099081	23.742331	560.20828	1.660E+10
+eic n0 16	4.2635586	4.8863261	4.5741809	560.20828	1.343E+08
+eic n0 16	6.1022979	6.7347894	6.4187547	560.20828	4324000.6
+eic n0 16	13.736535	14.415718	14.073788	560.20828	640858.28
+eic n0 17	25.581033	26.268005	25.926805	974.3052	2.608E+08
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ref_mz_rt.tab	Tue Apr 27 14:52:17 2021 +0000
@@ -0,0 +1,6 @@
+	m/z	RT
+compose1	136.0616	3.61
+compose2	136.0616	15
+compose3	229.1350	3.5
+compose4	229.1350	5
+compose5	230.00	10