Mercurial > repos > jcb-mpl > features_check_references
changeset 0:4660bf9c8059 draft
Uploaded
author | jcb-mpl |
---|---|
date | Tue, 27 Apr 2021 14:52:17 +0000 |
parents | |
children | c1008b92a6c8 |
files | 2018_07_check_peaks.xml src/mz_9_compares_peaks.sci src/mz_9_map_differences_export.sci test-data/Peaks_details.mat test-data/Peaks_list.tabular test-data/ref_mz_rt.tab |
diffstat | 6 files changed, 375 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/2018_07_check_peaks.xml Tue Apr 27 14:52:17 2021 +0000 @@ -0,0 +1,175 @@ +<tool id="2021_05_checks_ref_features" name="Features -> checks references" version="0.0.1"> + + <description> </description> + + <requirements> + <requirement type="package" >scilab</requirement> + </requirements> + + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + + <command> + <![CDATA[ + if [ -d $__root_dir__/packages/scilab-6.1.0 ]; then $__root_dir__/packages/scilab-6.1.0/bin/scilab-cli -nb -quit -f $* < ${script_file}; else scilab-cli -nb -quit -f $* < ${script_file}; fi + ]]> + </command> + + + <configfiles> + <configfile name="script_file"> + <![CDATA[ exec("$__tool_directory__/src/mz_9_compares_peaks.sci",-1); ... + lasterror(); ... + ... //load "${peaks_list}"; ... // 22avril21 + peaks_list=glx_tab2div("${peaks_list}"); ... + ... + load "${peaks_detail}"; ... + x836=glx_tab2div("${peaks_reference}"); ... + diff_mz=${diff_mz}; ... + k=${k}; ... + x_peaks_final=mz_9_compares_peaks(peaks_list,peaks_detail,x836,diff_mz,k); ... + ... + div2tab(x_peaks_final,"${x_peaks_final}"); ... + if ~isempty(lasterror(%f)); ... + write(0,lasterror()); ... + end; ]]> + </configfile> + </configfiles> + + + <inputs> + <param name="peaks_list" format="tabular" type="data" label="Features list" help="from function EIC->peaks:features" /> + <param name="peaks_detail" format="mat" type="data" label="Detailed features" help="from function EIC->peaks:features" /> + <param name="peaks_reference" format="tabular" type="data" label="Reference features" help="2columns: mz and RT, with column and line headers" /> + <param name="diff_mz" value="0.0050" type="float" label="Threshold for m/z values" /> + <param name="k" value="1" type="integer" label="Number of extracted features for each reference feature" help="the closest to the reference peak" /> + + </inputs> + + + <outputs> + <data name="x_peaks_final" format="tabular" label="Identified features" /> + </outputs> + + + <tests> + + <test> + <param name="peaks_list" value="Peaks_list.tabular"/> + <param name="peaks_detail" value="Peaks_details.mat"/> + <param name="peaks_reference" value="ref_mz_rt.tab"/> + <param name="diff_mz" value="0.0050"/> + <param name="k" value="1"/> + <output name="x_peaks_final"> + <assert_contents> + <has_text text="136.0616"/> + <has_text text="229.135"/> + </assert_contents> + </output> + </test> + + </tests> + + +<help> + + +**Author** Jean-Claude Boulet (INRA). + + +--------------------------------------------------- + +================================== +FEATURES EXPERIMENTAL VS REFERENCE +================================== + + +----------- +Description +----------- + +This function checks reference features among experimental features. + +----------- +Input files +----------- + +**Features list** + +The tabular file yielded by the function: EICs -> peaks:features + + +**Detailed features** + +The HDF5-scilab file yielded by the function: EICs -> peaks:features + + +**Reference features** + +A tabular file containing 2 columns: m/z values then retention times + +Row labels are the names of the reference features, e.g. chemical compounds + + + +---------- +Parameters +---------- + +**Threshold for m/z values** + +Observed features are associated to the reference features if 1) the difference in m/z values is under the threshold; 2) the reference RT falls within the range of the observed RTs. + +Default: 0.0050 + +**Number of extracted features for each reference feature** + +e.g. if the number is 2, the algorithm will extract the 2 experimental features closest to each reference feature. + + +------ +Output +------ + + +**Identified features** + +A tabular file with the columns: + +- name of the reference feature + +- number of the reference feature in the input file: Reference features + +- number of the experimental feature in the input files: Features List and Detailed features + +- number of identifications= the number of reference features each experimental feature has been attributed to; target = 1 + +- m/z value of the reference feature + +- m/z value of the experimental feature + +- difference of m/z values + +- RT of the reference feature + +- RT of the experimental feature + +- difference of RT + +- signal of the experimental feature + + +</help> + + +<citations> + +</citations> + + +</tool> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/mz_9_compares_peaks.sci Tue Apr 27 14:52:17 2021 +0000 @@ -0,0 +1,111 @@ +function x_peaks_final=mz_9_compares_peaks(peaks_list,peaks_detail,x836,diff_mz,k) + + // peaks_list et peaks_detail: les pics obtenus avec: 2018_07_mz_eics_to_features_v2 + // x836: les pics de référence ; un div avec 2 colonnes: mz puis RT + // k:identification des k plus proches voisins; par défaut: 1 + + // depuis le répertoire: tampon2020/projet_publi_eics/jan20 + + + x836=div(x836); + + if argn(2) <4 then + diff_mz=0.0050; + end + + if argn(2)<5 then + k=1; + end + + diff_mz2=diff_mz; + + + // tri selon m/z croissant annule le 11fev20 + //[nul,index]=gsort(x836.d(:,1),'g','i'); + //x836_mz=x836(index,:); + x836_mz=x836; + + // bilan : ---------------- + // peaks_list = 4434 x 5 TRmin /TRmax/TRmedian/mz/signal + // x836_mz: 836 x 2 mz/TR + + // identification des proches voisins + //diff_mz2=0.0050; + n=size(x836_mz.d,1); + xout.d=[]; + + for i=1:n; + peaks_list3=peaks_list.d(:,4); // mz + diff_temp=abs(peaks_list3-x836_mz.d(i,1)); + tri1=find(diff_temp<diff_mz2); + tri2=find(diff_temp>=diff_mz2); + //pause + peaks_list3(tri1)=0; // distance nulle + peaks_list3(tri2)=10; // forte distance + //pause + d=(peaks_list.d(:,3)-x836_mz.d(i,2))**2 + (peaks_list3)**2; + //pause + d2=[[1:size(peaks_list.d,1)]' d]; // rajout des indices + [d_trie,tri]=gsort(d,'g','i'); // choix des k premiers + tri2=tri(1:k); + d_trie2=sqrt(d_trie(1:k)); // distances + xout_d=[ones(k,1)*x836_mz.d(i,:) peaks_list.d(tri2,:)]; + xout_d=[xout_d(:,[1 6 2 4 7]) tri2]; + if xout.d==[] then + xout.d=xout_d; + else + xout.d=[xout.d; xout_d]; + end + //disp(i,'i=') + end + + // mise en forme + rajout des différences + xout2.d=[xout.d(:,1:2) abs(xout.d(:,1)-xout.d(:,2)) xout.d(:,3:4) abs(xout.d(:,3)-xout.d(:,4)) xout.d(:,5:6)]; + xout2.d(:,1:3)=0.0001*round(10000*xout2.d(:,1:3)); + xout2.d(:,4:6)=0.01*round(100*xout2.d(:,4:6)); + xout2.v=['mz-ref';'mz-obs';'diff_mz';'RT-ref';'RT-obs';'diff_RT';'signal';'n° in peak_list']; + + x_peaks_final=div(xout2); + + // même pic associé à plusieurs mz ref? + nrepet=ones(k*n,1); + for i=1:k*n; + tri=find(xout2.d(:,8)==xout2.d(i,8)); + ntri=max(size(tri)); + nrepet(i)=ntri; + end + + // sorties + label_temp=x836.i; + label_temp=repmat(label_temp,[1,k]); + label_temp=label_temp'; + label_temp=matrix(label_temp,[k*n,1]); + x_peaks_final.d=[x_peaks_final.d nrepet] + x_peaks_final.v=[x_peaks_final.v;'nbr of identifications'] + x_peaks_final.i=label_temp; + x_peaks_final=div(x_peaks_final); + + // rajout des no ref + no_ref=[1:n]'; + no_ref=no_ref*ones(1,k); + no_ref=no_ref'; + no_ref=matrix(no_ref,[k*n,1]); + + x_peaks_final.d=[no_ref x_peaks_final.d]; + x_peaks_final.v=['no_ref';x_peaks_final.v]; + + // remise en ordre + x_peaks_final=x_peaks_final(:,[1,9,10,2:8]); + + +endfunction + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/mz_9_map_differences_export.sci Tue Apr 27 14:52:17 2021 +0000 @@ -0,0 +1,9 @@ +function mz_9_map_differences_export(x_peaks_final) + + // fonction destinée à identifier les points atypiques + figure; + map(x_peaks_final,3,6) + h=gcf(); + h.background=-2 + +endfunction
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Peaks_list.tabular Tue Apr 27 14:52:17 2021 +0000 @@ -0,0 +1,74 @@ + TRmin TRmax TRmedian m/z signal +eic n0 1 0.202032 0.8740362 0.5691918 136.06209 3.556E+09 +eic n0 1 3.2942345 3.9105387 3.6019219 136.06209 1.518E+08 +eic n0 1 28.889865 29.59448 29.238288 136.06209 4218687.2 +eic n0 1 21.76055 22.452547 22.09671 136.06209 1967262.4 +eic n0 2 3.2942345 3.9105387 3.6019219 229.13399 4.727E+09 +eic n0 2 0.2203602 0.8885694 0.5836671 229.13399 8.588E+08 +eic n0 2 31.57015 32.19944 31.883678 229.13399 73354605 +eic n0 3 4.0719704 4.6930267 4.3814585 275.2123 3.761E+10 +eic n0 3 7.8410016 8.4922502 8.1668772 275.2123 11200733 +eic n0 3 6.6445351 7.2859162 6.9615174 275.2123 9878986.4 +eic n0 4 10.83308 11.512692 11.177614 302.24825 2.781E+10 +eic n0 4 10.490792 10.817532 10.817532 302.24825 711138.34 +eic n0 5 20.964748 21.634493 21.303928 318.27957 8.708E+10 +eic n0 5 21.649694 21.98315 21.649694 318.27957 2.661E+09 +eic n0 5 35.625329 36.071582 35.750755 318.27957 1.877E+08 +eic n0 5 31.181441 31.496686 31.496686 318.27957 1.179E+08 +eic n0 5 22.372591 22.80939 22.46783 318.27957 2.306E+08 +eic n0 5 20.596537 20.948644 20.948644 318.27957 20296829 +eic n0 6 12.487673 13.167396 12.827867 337.21793 9.548E+09 +eic n0 7 30.078848 30.777516 30.430385 358.08479 1.037E+10 +eic n0 7 4.0866653 4.7078728 4.3962109 358.08479 1560648 +eic n0 7 24.184739 24.904519 24.556609 358.08479 3039078 +eic n0 8 3.4405547 4.0573026 3.7486354 373.15288 1.091E+10 +eic n0 8 16.932438 17.629305 17.281601 373.15288 26949890 +eic n0 8 29.399213 30.096888 29.746012 373.15288 6217413.8 +eic n0 9 3.0012317 3.6165913 3.3088748 391.14794 1.788E+09 +eic n0 9 26.877499 27.572949 27.225672 391.14794 9360463.8 +eic n0 9 31.778297 32.409135 32.094236 391.14794 2861116.6 +eic n0 9 11.528541 12.205029 11.862015 391.14794 632626.12 +eic n0 10 26.498709 27.189621 26.845006 408.1375 1.326E+10 +eic n0 10 19.247534 19.958805 19.599651 408.1375 1.234E+10 +eic n0 10 27.983646 28.679486 28.324254 408.1375 8.067E+09 +eic n0 10 31.659594 32.289745 31.973724 408.1375 97701083 +eic n0 10 25.565822 26.25218 25.909456 408.1375 1.035E+08 +eic n0 10 7.3773693 8.0254553 7.7049089 408.1375 1.176E+08 +eic n0 10 28.695694 29.327883 28.969055 408.1375 99605304 +eic n0 10 15.071408 15.768963 15.423549 408.1375 60721749 +eic n0 10 15.83192 16.51566 16.170364 408.1375 3247112.8 +eic n0 11 29.84457 30.550006 30.195642 430.17042 5.379E+10 +eic n0 11 30.581591 31.256804 30.92318 430.17042 2.108E+09 +eic n0 11 31.973724 32.602898 32.289745 430.17042 7.452E+08 +eic n0 11 28.811539 29.514597 29.151444 430.17042 3.208E+08 +eic n0 11 29.530717 29.828981 29.828981 430.17042 32680491 +eic n0 12 5.9955512 6.6294239 6.3142063 447.09318 5.812E+09 +eic n0 13 8.2446237 8.905213 8.5714396 466.24602 3.894E+10 +eic n0 13 8.9209174 9.4884061 9.1696072 466.24602 2.423E+10 +eic n0 13 7.9017216 8.229195 8.229195 466.24602 96711882 +eic n0 13 34.00696 34.670287 34.33868 466.24602 86543001 +eic n0 13 27.360046 28.035732 27.701317 466.24602 9139101.8 +eic n0 14 3.7046136 4.3224948 4.0134259 486.26232 1.204E+10 +eic n0 14 16.137439 16.8222 16.483176 486.26232 2.139E+09 +eic n0 14 32.259635 32.896422 32.572726 486.26232 15539760 +eic n0 15 9.1232038 9.7756419 9.4426532 516.19583 5.371E+10 +eic n0 15 9.791053 10.28787 9.958761 516.19583 5.139E+09 +eic n0 15 10.663682 11.240996 10.896862 516.19583 1.944E+09 +eic n0 15 11.623635 11.992104 11.655338 516.19583 5.639E+08 +eic n0 15 12.008717 12.391399 12.058181 516.19583 3.736E+08 +eic n0 15 31.271782 31.898567 31.585079 516.19583 1.697E+08 +eic n0 15 13.311797 13.978267 13.642598 516.19583 3.540E+08 +eic n0 15 24.724795 25.234798 24.888036 516.19583 48547389 +eic n0 15 33.257404 33.912295 33.583528 516.19583 1.182E+08 +eic n0 15 8.7770606 9.1077848 9.1077848 516.19583 6911097.1 +eic n0 15 16.760462 17.450798 17.102267 516.19583 88159052 +eic n0 15 15.96153 16.647011 16.30613 516.19583 1.197E+08 +eic n0 15 27.950189 28.560762 28.212172 516.19583 73072615 +eic n0 15 28.811539 29.275364 29.151444 516.19583 48351131 +eic n0 15 20.425738 20.851601 20.494253 516.19583 14357365 +eic n0 15 25.250739 25.611367 25.267247 516.19583 25891406 +eic n0 16 23.367004 24.099081 23.742331 560.20828 1.660E+10 +eic n0 16 4.2635586 4.8863261 4.5741809 560.20828 1.343E+08 +eic n0 16 6.1022979 6.7347894 6.4187547 560.20828 4324000.6 +eic n0 16 13.736535 14.415718 14.073788 560.20828 640858.28 +eic n0 17 25.581033 26.268005 25.926805 974.3052 2.608E+08