Mercurial > repos > prog > lcmsmatching
annotate list-file-cols.py @ 7:882f2f20028b draft
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit 608d9e59a0d2dcf85a037968ddb2c61137fb9bce
| author | prog |
|---|---|
| date | Sun, 09 Apr 2017 10:44:21 -0400 |
| parents | b8f70d8216b3 |
| children |
| rev | line source |
|---|---|
|
6
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
2 # vi: fdm=marker |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
3 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
4 import csv |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
5 import re |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
6 import argparse |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
7 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
8 # Get file cols {{{1 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
9 ################################################################ |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
10 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
11 def get_file_cols(file, preferred): |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
12 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
13 cols = [] |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
14 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
15 with open(file if isinstance(file, str) else file.get_file_name(), 'r') as f: |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
16 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
17 # Read file header |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
18 reader = csv.reader(f, delimiter = "\t", quotechar='"') |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
19 header = reader.next() |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
20 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
21 preferred = preferred.split(',') |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
22 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
23 # Determine default value |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
24 perfect_matches = [] |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
25 partial_matches = [] |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
26 for p in preferred: |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
27 for c in header: |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
28 if c == p: |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
29 perfect_matches.append(c) # Perfect match ! |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
30 elif re.match(p, c): |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
31 partial_matches.append(c) # Keep this partial match in case we find no perfect match |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
32 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
33 ordered_cols = perfect_matches + partial_matches |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
34 for c in header: |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
35 if not c in ordered_cols: |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
36 ordered_cols.append(c) |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
37 ordered_cols.append('NA') |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
38 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
39 default = 0 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
40 if len(perfect_matches) + len(partial_matches) == 0: |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
41 default = len(ordered_cols) - 1 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
42 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
43 # Build list of cols |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
44 for i, c in enumerate(ordered_cols): |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
45 cols.append( (c, c, i == default) ) |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
46 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
47 return cols |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
48 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
49 # Main {{{1 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
50 ################################################################ |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
51 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
52 if __name__ == '__main__': |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
53 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
54 # Parse command line arguments |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
55 parser = argparse.ArgumentParser(description='Script for getting column names in a csv file.') |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
56 parser.add_argument('-f', help = 'CSV File (separator must be TAB)', dest = 'file', required = True) |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
57 parser.add_argument('-p', help = 'List (comma separated values) of preferred column names for default one.', dest = 'preferred', required = True) |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
58 args = parser.parse_args() |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
59 args_dict = vars(args) |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
60 |
|
b8f70d8216b3
planemo upload for repository https://github.com/workflow4metabolomics/lcmsmatching.git commit b3a4e34cf9356447ae3507cc6fe2ff6a1f24afbc-dirty
prog
parents:
diff
changeset
|
61 print(get_file_cols(**args_dict)) |
