Mercurial > repos > prog > lcmsmatching
comparison list-file-cols.py @ 6:b8f70d8216b3 draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
| author | prog |
|---|---|
| date | Mon, 27 Mar 2017 06:27:29 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 5:18254e8d1b72 | 6:b8f70d8216b3 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # vi: fdm=marker | |
| 3 | |
| 4 import csv | |
| 5 import re | |
| 6 import argparse | |
| 7 | |
| 8 # Get file cols {{{1 | |
| 9 ################################################################ | |
| 10 | |
| 11 def get_file_cols(file, preferred): | |
| 12 | |
| 13 cols = [] | |
| 14 | |
| 15 with open(file if isinstance(file, str) else file.get_file_name(), 'r') as f: | |
| 16 | |
| 17 # Read file header | |
| 18 reader = csv.reader(f, delimiter = "\t", quotechar='"') | |
| 19 header = reader.next() | |
| 20 | |
| 21 preferred = preferred.split(',') | |
| 22 | |
| 23 # Determine default value | |
| 24 perfect_matches = [] | |
| 25 partial_matches = [] | |
| 26 for p in preferred: | |
| 27 for c in header: | |
| 28 if c == p: | |
| 29 perfect_matches.append(c) # Perfect match ! | |
| 30 elif re.match(p, c): | |
| 31 partial_matches.append(c) # Keep this partial match in case we find no perfect match | |
| 32 | |
| 33 ordered_cols = perfect_matches + partial_matches | |
| 34 for c in header: | |
| 35 if not c in ordered_cols: | |
| 36 ordered_cols.append(c) | |
| 37 ordered_cols.append('NA') | |
| 38 | |
| 39 default = 0 | |
| 40 if len(perfect_matches) + len(partial_matches) == 0: | |
| 41 default = len(ordered_cols) - 1 | |
| 42 | |
| 43 # Build list of cols | |
| 44 for i, c in enumerate(ordered_cols): | |
| 45 cols.append( (c, c, i == default) ) | |
| 46 | |
| 47 return cols | |
| 48 | |
| 49 # Main {{{1 | |
| 50 ################################################################ | |
| 51 | |
| 52 if __name__ == '__main__': | |
| 53 | |
| 54 # Parse command line arguments | |
| 55 parser = argparse.ArgumentParser(description='Script for getting column names in a csv file.') | |
| 56 parser.add_argument('-f', help = 'CSV File (separator must be TAB)', dest = 'file', required = True) | |
| 57 parser.add_argument('-p', help = 'List (comma separated values) of preferred column names for default one.', dest = 'preferred', required = True) | |
| 58 args = parser.parse_args() | |
| 59 args_dict = vars(args) | |
| 60 | |
| 61 print(get_file_cols(**args_dict)) |
