prims_metabolomics: match_library.py comparison

comparison match_library.py @ 1:071a185c2ced

new tools

author	pieter.lukasse@wur.nl
date	Fri, 24 Oct 2014 12:52:56 +0200
parents	4b94bb2d381c
children

comparison

equal deleted inserted replaced

-:4b94bb2d381c
+:071a185c2ced
 '''
 Returns a Galaxy formatted list of tuples containing all possibilities for the
 GC-column types. Used by the library_lookup.xml tool
 @param library_file: given library file from which the list of GC-column types is extracted
 '''
-(data, header) = read_library(library_file)
+if library_file == "":
+galaxy_output = [("", "", False)]
-if 'columntype' not in header:
+else:
-raise IOError('Missing columns in ', library_file)
+(data, header) = read_library(library_file)
-# Filter data on column type
+if 'columntype' not in header:
-column_type = header.index("columntype")
+raise IOError('Missing columns in ', library_file)
-amounts_in_list_dict = count_occurrence([row[column_type] for row in data])
-galaxy_output = [(str(a) + "(" + str(b) + ")", a, False) for a, b in amounts_in_list_dict.items()]
+# Filter data on column type
+column_type = header.index("columntype")
+amounts_in_list_dict = count_occurrence([row[column_type] for row in data])
+galaxy_output = [(str(a) + "(" + str(b) + ")", a, False) for a, b in amounts_in_list_dict.items()]
 return(galaxy_output)
 def filter_column(library_file, column_type_name):
 '''
 Filters the Retention Index database on column type
 @param library_file: file containing the database
 @param column_type_name: column type to filter on
 '''
-(data, header) = read_library(library_file)
+if library_file == "":
+galaxy_output = [("", "", False)]
-if ('columntype' not in header or
+else:
-'columnphasetype' not in header):
+(data, header) = read_library(library_file)
-raise IOError('Missing columns in ', library_file)
+if ('columntype' not in header or
-column_type = header.index("columntype")
+'columnphasetype' not in header):
-statphase = header.index("columnphasetype")
+raise IOError('Missing columns in ', library_file)
-# Filter data on colunn type name
+column_type = header.index("columntype")
-statphase_list = [line[statphase] for line in data if line[column_type] == column_type_name]
+statphase = header.index("columnphasetype")
-amounts_in_list_dict = count_occurrence(statphase_list)
-galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()]
+# Filter data on colunn type name
+statphase_list = [line[statphase] for line in data if line[column_type] == column_type_name]
+amounts_in_list_dict = count_occurrence(statphase_list)
+galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()]
 return(sorted(galaxy_output))
 def filter_column2(library_file, column_type_name, statphase):
 '''
 Filters the Retention Index database on column type
 @param library_file: file containing the database
 @param column_type_name: column type to filter on
 @param statphase: stationary phase of the column to filter on
 '''
-(data, header) = read_library(library_file)
+if library_file == "":
+galaxy_output = [("", "", False)]
-if ('columntype' not in header or
+else:
-'columnphasetype' not in header or
+(data, header) = read_library(library_file)
-'columnname' not in header):
-raise IOError('Missing columns in ', library_file)
+if ('columntype' not in header or
+'columnphasetype' not in header or
-column_type_column = header.index("columntype")
+'columnname' not in header):
-statphase_column = header.index("columnphasetype")
+raise IOError('Missing columns in ', library_file)
-column_name_column = header.index("columnname")
+column_type_column = header.index("columntype")
-# Filter data on given column type name and stationary phase
+statphase_column = header.index("columnphasetype")
-statphase_list = [line[column_name_column] for line in data if line[column_type_column] == column_type_name and
+column_name_column = header.index("columnname")
-line[statphase_column] == statphase]
-amounts_in_list_dict = count_occurrence(statphase_list)
+# Filter data on given column type name and stationary phase
-galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()]
+statphase_list = [line[column_name_column] for line in data if line[column_type_column] == column_type_name and
+line[statphase_column] == statphase]
+amounts_in_list_dict = count_occurrence(statphase_list)
+galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()]
 return(sorted(galaxy_output))
 def read_library(filename):
 '''
 returns the list of .txt files found as a dictionary
 with file name and full path so that it can
 fill a Galaxy drop-down combo box.
 '''
-files = glob.glob(dir_name + "/*.txt")
+files = glob.glob(dir_name + "/*.*")
 if len(files) == 0:
-raise Exception("Configuration error: no library files found in <galaxy-home-dir>/" + dir_name)
+# Configuration error: no library files found in <galaxy-home-dir>/" + dir_name :
+galaxy_output = [("Configuration error: expected file not found in <galaxy-home-dir>/" + dir_name, "", False)]
 else:
 galaxy_output = [(str(get_file_name_no_ext(file_name)), str(os.path.abspath(file_name)), False) for file_name in files]
 return(galaxy_output)
 def get_file_name_no_ext(full_name):

Mercurial > repos > pieterlukasse > prims_metabolomics

comparison match_library.py @ 1:071a185c2ced