Mercurial > repos > recetox > test_recetox_aplcms_unsupervised
annotate main.R @ 0:be51059c2384 draft default tip
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
| author | recetox | 
|---|---|
| date | Fri, 18 Jun 2021 16:36:23 +0000 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 0 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 1 library(recetox.aplcms) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 2 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 3 save_extracted_features <- function(df, filename) { | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 4 df <- as.data.frame(df) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 5 columns <- c("mz", "pos", "sd1", "sd2", "area") | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 6 arrow::write_parquet(df[columns], filename) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 7 } | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 8 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 9 save_feature_sample_table <- function(df, filename) { | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 10 columns <- c("feature", "mz", "rt", "sample", "sample_rt", "sample_intensity") | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 11 arrow::write_parquet(df[columns], filename) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 12 } | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 13 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 14 known_table_columns <- function() { | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 15 c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type", | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 16 "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max", | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 17 "RT_mean", "RT_sd", "RT_min", "RT_max", "int_mean(log)", "int_sd(log)", | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 18 "int_min(log)", "int_max(log)") | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 19 } | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 20 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 21 save_known_table <- function(df, filename) { | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 22 columns <- known_table_columns() | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 23 arrow::write_parquet(df[columns], filename) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 24 } | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 25 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 26 read_known_table <- function(filename) { | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 27 arrow::read_parquet(filename, col_select = known_table_columns()) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 28 } | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 29 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 30 save_pairing <- function(df, filename) { | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 31 write.table(df, filename, row.names = FALSE, col.names = c("new", "old")) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 32 } | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 33 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 34 save_all_extracted_features <- function(dfs, filenames) { | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 35 filenames <- tools::file_path_sans_ext(basename(filenames)) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 36 filenames <- paste0(filenames, ".parquet") | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 37 filenames <- file.path("extracted", filenames) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 38 dir.create("extracted") | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 39 mapply(save_extracted_features, dfs, filenames) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 40 } | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 41 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 42 save_all_corrected_features <- function(dfs, filenames) { | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 43 filenames <- tools::file_path_sans_ext(basename(filenames)) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 44 filenames <- paste0(filenames, ".parquet") | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 45 filenames <- file.path("corrected", filenames) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 46 dir.create("corrected") | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 47 mapply(save_extracted_features, dfs, filenames) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 48 } | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 49 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 50 unsupervised_main <- function(sample_files, aligned_file, recovered_file, ...) { | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 51 sample_files <- sort_samples_by_acquisition_number(sample_files) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 52 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 53 res <- unsupervised(filenames = sample_files, ...) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 54 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 55 save_all_extracted_features(res$extracted_features, sample_files) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 56 save_all_corrected_features(res$corrected_features, sample_files) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 57 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 58 save_feature_sample_table(res$aligned_feature_sample_table, aligned_file) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 59 save_feature_sample_table(res$recovered_feature_sample_table, recovered_file) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 60 } | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 61 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 62 hybrid_main <- function(sample_files, known_table_file, updated_known_table_file, pairing_file, aligned_file, recovered_file, ...) { | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 63 sample_files <- sort_samples_by_acquisition_number(sample_files) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 64 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 65 known <- read_known_table(known_table_file) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 66 res <- hybrid(filenames = sample_files, known_table = known, ...) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 67 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 68 save_known_table(res$updated_known_table, updated_known_table_file) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 69 save_pairing(res$features_known_table_pairing, pairing_file) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 70 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 71 save_all_extracted_features(res$extracted_features, sample_files) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 72 save_all_corrected_features(res$corrected_features, sample_files) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 73 | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 74 save_feature_sample_table(res$aligned_feature_sample_table, aligned_file) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 75 save_feature_sample_table(res$recovered_feature_sample_table, recovered_file) | 
| 
be51059c2384
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 65d42862f9265e8ba3783368ac0bddb154e3a427-dirty"
 recetox parents: diff
changeset | 76 } | 
