Mercurial > repos > recetox > matchms_formatter
annotate formatter.py @ 11:803a430e9f0b draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f7bab98744e338dcdbdc9cf6f9de287632c76ea2
| author | recetox | 
|---|---|
| date | Tue, 18 Oct 2022 13:20:25 +0000 | 
| parents | 5c0e5344edf3 | 
| children | 2f0545b02020 | 
| rev | line source | 
|---|---|
| 0 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 1 import click | 
| 10 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 2 from matchms.importing import scores_from_json | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 3 from pandas import DataFrame | 
| 0 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 4 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 5 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 6 def create_long_table(data: DataFrame, value_id: str) -> DataFrame: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 7 """Convert the table from compact into long format. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 8 See DataFrame.melt(...). | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 9 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 10 Args: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 11 data (DataFrame): The data table to convert. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 12 value_id (str): The name to assign to the added column through conversion to long format. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 13 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 14 Returns: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 15 DataFrame: Table in long format. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 16 """ | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 17 return data.transpose().melt(ignore_index=False, var_name='compound', value_name=value_id) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 18 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 19 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 20 def join_df(x: DataFrame, y: DataFrame, on=[], how="inner") -> DataFrame: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 21 """Shortcut functions to join to dataframes on columns and index | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 22 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 23 Args: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 24 x (DataFrame): Table X | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 25 y (DataFrame): Table Y | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 26 on (list, optional): Columns on which to join. Defaults to []. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 27 how (str, optional): Join method, see DataFrame.join(...). Defaults to "inner". | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 28 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 29 Returns: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 30 DataFrame: Joined dataframe. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 31 """ | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 32 df_x = x.set_index([x.index] + on) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 33 df_y = y.set_index([y.index] + on) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 34 combined = df_x.join(df_y, how=how) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 35 return combined | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 36 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 37 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 38 def get_top_k_matches(data: DataFrame, k: int) -> DataFrame: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 39 """Function to get top k matches from dataframe with scores. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 40 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 41 Args: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 42 data (DataFrame): A table with score column. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 43 k (int): Number of top scores to retrieve. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 44 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 45 Returns: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 46 DataFrame: Table containing only the top k best matches for each compound. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 47 """ | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 48 return data.groupby(level=0, group_keys=False).apply(DataFrame.nlargest, n=k, columns=['score']) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 49 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 50 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 51 def filter_thresholds(data: DataFrame, t_score: float, t_matches: float) -> DataFrame: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 52 """Filter a dataframe with scores and matches to only contain values above specified thresholds. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 53 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 54 Args: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 55 data (DataFrame): Table to filter. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 56 t_score (float): Score threshold. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 57 t_matches (float): Matches threshold. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 58 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 59 Returns: | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 60 DataFrame: Filtered dataframe. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 61 """ | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 62 filtered = data[data['score'] > t_score] | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 63 filtered = filtered[filtered['matches'] > t_matches] | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 64 return filtered | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 65 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 66 | 
| 10 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 67 def scores_to_dataframes(scores): | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 68 """Unpack scores from matchms.scores into two dataframes of scores and matches. | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 69 | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 70 Args: | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 71 scores (matchms.scores): matchms.scores object. | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 72 | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 73 Returns: | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 74 DataFrame: Scores | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 75 DataFrame: Matches | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 76 """ | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 77 query_names = [spectra.metadata['compound_name'] for spectra in scores.queries] | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 78 reference_names = [spectra.metadata['compound_name'] for spectra in scores.references] | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 79 | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 80 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 81 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names) | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 82 | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 83 return dataframe_scores, dataframe_matches | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 84 | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 85 | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 86 def load_data(scores_filename: str) -> DataFrame: | 
| 0 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 87 """Load data from filenames and join on compound id. | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 88 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 89 Args: | 
| 10 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 90 scores_filename (str): Path to json file with serialized scores. | 
| 0 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 91 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 92 Returns: | 
| 10 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 93 DataFrame: Joined dataframe on compounds containing scores and matches in long format. | 
| 0 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 94 """ | 
| 10 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 95 scores = scores_from_json(scores_filename) | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 96 scores, matches = scores_to_dataframes(scores) | 
| 0 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 97 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 98 scores_long = create_long_table(scores, 'score') | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 99 matches_long = create_long_table(matches, 'matches') | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 100 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 101 combined = join_df(matches_long, scores_long, on=['compound'], how='inner') | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 102 return combined | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 103 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 104 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 105 @click.group() | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 106 @click.option('--sf', 'scores_filename', type=click.Path(exists=True), required=True) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 107 @click.option('--o', 'output_filename', type=click.Path(writable=True), required=True) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 108 @click.pass_context | 
| 10 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 109 def cli(ctx, scores_filename, output_filename): | 
| 0 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 110 ctx.ensure_object(dict) | 
| 10 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 111 ctx.obj['data'] = load_data(scores_filename) | 
| 0 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 112 pass | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 113 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 114 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 115 @cli.command() | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 116 @click.option('--st', 'scores_threshold', type=float, required=True) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 117 @click.option('--mt', 'matches_threshold', type=float, required=True) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 118 @click.pass_context | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 119 def get_thresholded_data(ctx, scores_threshold, matches_threshold): | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 120 result = filter_thresholds(ctx.obj['data'], scores_threshold, matches_threshold) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 121 return result | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 122 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 123 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 124 @cli.command() | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 125 @click.option('--k', 'k', type=int, required=True) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 126 @click.pass_context | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 127 def get_top_k_data(ctx, k): | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 128 result = get_top_k_matches(ctx.obj['data'], k) | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 129 return result | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 130 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 131 | 
| 10 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 132 @cli.result_callback() | 
| 
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
 recetox parents: 
9diff
changeset | 133 def write_output(result: DataFrame, scores_filename, output_filename): | 
| 0 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 134 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'}) | 
| 9 
4ca9807c56e6
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 38a5028a7abe99794086e9b1374ab4bb8bfa68de
 recetox parents: 
1diff
changeset | 135 result.to_csv(output_filename, sep="\t", index=False) | 
| 0 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 136 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 137 | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 138 if __name__ == '__main__': | 
| 
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
 recetox parents: diff
changeset | 139 cli(obj={}) | 
