Mercurial > repos > pieterlukasse > prims_metabolomics
comparison match_library.py @ 1:071a185c2ced
new tools
| author | pieter.lukasse@wur.nl |
|---|---|
| date | Fri, 24 Oct 2014 12:52:56 +0200 |
| parents | 4b94bb2d381c |
| children |
comparison
equal
deleted
inserted
replaced
| 0:4b94bb2d381c | 1:071a185c2ced |
|---|---|
| 15 ''' | 15 ''' |
| 16 Returns a Galaxy formatted list of tuples containing all possibilities for the | 16 Returns a Galaxy formatted list of tuples containing all possibilities for the |
| 17 GC-column types. Used by the library_lookup.xml tool | 17 GC-column types. Used by the library_lookup.xml tool |
| 18 @param library_file: given library file from which the list of GC-column types is extracted | 18 @param library_file: given library file from which the list of GC-column types is extracted |
| 19 ''' | 19 ''' |
| 20 (data, header) = read_library(library_file) | 20 if library_file == "": |
| 21 | 21 galaxy_output = [("", "", False)] |
| 22 if 'columntype' not in header: | 22 else: |
| 23 raise IOError('Missing columns in ', library_file) | 23 (data, header) = read_library(library_file) |
| 24 | 24 |
| 25 # Filter data on column type | 25 if 'columntype' not in header: |
| 26 column_type = header.index("columntype") | 26 raise IOError('Missing columns in ', library_file) |
| 27 amounts_in_list_dict = count_occurrence([row[column_type] for row in data]) | 27 |
| 28 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False) for a, b in amounts_in_list_dict.items()] | 28 # Filter data on column type |
| 29 column_type = header.index("columntype") | |
| 30 amounts_in_list_dict = count_occurrence([row[column_type] for row in data]) | |
| 31 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False) for a, b in amounts_in_list_dict.items()] | |
| 32 | |
| 29 return(galaxy_output) | 33 return(galaxy_output) |
| 30 | 34 |
| 31 | 35 |
| 32 def filter_column(library_file, column_type_name): | 36 def filter_column(library_file, column_type_name): |
| 33 ''' | 37 ''' |
| 34 Filters the Retention Index database on column type | 38 Filters the Retention Index database on column type |
| 35 @param library_file: file containing the database | 39 @param library_file: file containing the database |
| 36 @param column_type_name: column type to filter on | 40 @param column_type_name: column type to filter on |
| 37 ''' | 41 ''' |
| 38 (data, header) = read_library(library_file) | 42 if library_file == "": |
| 39 | 43 galaxy_output = [("", "", False)] |
| 40 if ('columntype' not in header or | 44 else: |
| 41 'columnphasetype' not in header): | 45 (data, header) = read_library(library_file) |
| 42 raise IOError('Missing columns in ', library_file) | 46 |
| 43 | 47 if ('columntype' not in header or |
| 44 column_type = header.index("columntype") | 48 'columnphasetype' not in header): |
| 45 statphase = header.index("columnphasetype") | 49 raise IOError('Missing columns in ', library_file) |
| 46 | 50 |
| 47 # Filter data on colunn type name | 51 column_type = header.index("columntype") |
| 48 statphase_list = [line[statphase] for line in data if line[column_type] == column_type_name] | 52 statphase = header.index("columnphasetype") |
| 49 amounts_in_list_dict = count_occurrence(statphase_list) | 53 |
| 50 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()] | 54 # Filter data on colunn type name |
| 55 statphase_list = [line[statphase] for line in data if line[column_type] == column_type_name] | |
| 56 amounts_in_list_dict = count_occurrence(statphase_list) | |
| 57 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()] | |
| 58 | |
| 51 return(sorted(galaxy_output)) | 59 return(sorted(galaxy_output)) |
| 52 | 60 |
| 53 | 61 |
| 54 def filter_column2(library_file, column_type_name, statphase): | 62 def filter_column2(library_file, column_type_name, statphase): |
| 55 ''' | 63 ''' |
| 56 Filters the Retention Index database on column type | 64 Filters the Retention Index database on column type |
| 57 @param library_file: file containing the database | 65 @param library_file: file containing the database |
| 58 @param column_type_name: column type to filter on | 66 @param column_type_name: column type to filter on |
| 59 @param statphase: stationary phase of the column to filter on | 67 @param statphase: stationary phase of the column to filter on |
| 60 ''' | 68 ''' |
| 61 (data, header) = read_library(library_file) | 69 if library_file == "": |
| 62 | 70 galaxy_output = [("", "", False)] |
| 63 if ('columntype' not in header or | 71 else: |
| 64 'columnphasetype' not in header or | 72 (data, header) = read_library(library_file) |
| 65 'columnname' not in header): | 73 |
| 66 raise IOError('Missing columns in ', library_file) | 74 if ('columntype' not in header or |
| 67 | 75 'columnphasetype' not in header or |
| 68 column_type_column = header.index("columntype") | 76 'columnname' not in header): |
| 69 statphase_column = header.index("columnphasetype") | 77 raise IOError('Missing columns in ', library_file) |
| 70 column_name_column = header.index("columnname") | 78 |
| 71 | 79 column_type_column = header.index("columntype") |
| 72 # Filter data on given column type name and stationary phase | 80 statphase_column = header.index("columnphasetype") |
| 73 statphase_list = [line[column_name_column] for line in data if line[column_type_column] == column_type_name and | 81 column_name_column = header.index("columnname") |
| 74 line[statphase_column] == statphase] | 82 |
| 75 amounts_in_list_dict = count_occurrence(statphase_list) | 83 # Filter data on given column type name and stationary phase |
| 76 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()] | 84 statphase_list = [line[column_name_column] for line in data if line[column_type_column] == column_type_name and |
| 85 line[statphase_column] == statphase] | |
| 86 amounts_in_list_dict = count_occurrence(statphase_list) | |
| 87 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()] | |
| 88 | |
| 77 return(sorted(galaxy_output)) | 89 return(sorted(galaxy_output)) |
| 78 | 90 |
| 79 | 91 |
| 80 def read_library(filename): | 92 def read_library(filename): |
| 81 ''' | 93 ''' |
| 94 returns the list of .txt files found as a dictionary | 106 returns the list of .txt files found as a dictionary |
| 95 with file name and full path so that it can | 107 with file name and full path so that it can |
| 96 fill a Galaxy drop-down combo box. | 108 fill a Galaxy drop-down combo box. |
| 97 | 109 |
| 98 ''' | 110 ''' |
| 99 files = glob.glob(dir_name + "/*.txt") | 111 files = glob.glob(dir_name + "/*.*") |
| 100 if len(files) == 0: | 112 if len(files) == 0: |
| 101 raise Exception("Configuration error: no library files found in <galaxy-home-dir>/" + dir_name) | 113 # Configuration error: no library files found in <galaxy-home-dir>/" + dir_name : |
| 114 galaxy_output = [("Configuration error: expected file not found in <galaxy-home-dir>/" + dir_name, "", False)] | |
| 102 else: | 115 else: |
| 103 galaxy_output = [(str(get_file_name_no_ext(file_name)), str(os.path.abspath(file_name)), False) for file_name in files] | 116 galaxy_output = [(str(get_file_name_no_ext(file_name)), str(os.path.abspath(file_name)), False) for file_name in files] |
| 104 return(galaxy_output) | 117 return(galaxy_output) |
| 105 | 118 |
| 106 def get_file_name_no_ext(full_name): | 119 def get_file_name_no_ext(full_name): |
