Mercurial > repos > stevecassidy > wrassp
annotate cut_timeseries.py @ 3:f188eb0b526d draft
planemo upload commit 2d22bc12c66a3d1949d4bf4c84e7ce7c4610383c-dirty
| author | stevecassidy |
|---|---|
| date | Wed, 14 Dec 2016 21:53:57 -0500 |
| parents | 6f4db0e89117 |
| children | 0026cd452d0e |
| rev | line source |
|---|---|
|
2
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
1 import csv |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
2 import argparse |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
3 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
4 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
5 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
6 def parser(): |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
7 parser = argparse.ArgumentParser(description="Cut data for a segment from a timeseries") |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
8 parser.add_argument('--segment_list', required=True, action="store", type=str, help="File containing list of item URLs") |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
9 parser.add_argument('--timeseries', required=True, action="store", type=str, help="time series data (comma separated file names)") |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
10 parser.add_argument('--identifier', required=True, action="store", type=str, help="Time series dataset identifiers (comma separated)") |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
11 parser.add_argument('--cutat', required=True, action="store", type=float, help="cut point 0-1") |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
12 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
13 return parser.parse_args() |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
14 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
15 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
16 def read_segment_list(filename): |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
17 """Read an segment list from a file |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
18 which should be a tabular formatted file |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
19 with columns start, end, label, duration, identifier |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
20 Return a dictionary with the 'identifier' field as keys |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
21 and a dictionary of other values as the values. |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
22 """ |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
23 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
24 segments = [] |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
25 with open(filename) as fd: |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
26 csvreader = csv.DictReader(fd, dialect='excel-tab') |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
27 if 'identifier' not in csvreader.fieldnames: |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
28 return None |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
29 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
30 for row in csvreader: |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
31 segments.append(row) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
32 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
33 return segments |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
34 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
35 def get_tsfile(ident, tsfiles): |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
36 """Get the tsfile that matches the identifier """ |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
37 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
38 for tsid, dsname in tsfiles: |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
39 if ident in tsid: |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
40 return dsname |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
41 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
42 return '' |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
43 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
44 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
45 def cut(tsfiles, segfile, cutpoint): |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
46 """Cut data from tsfile corresponding to the |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
47 cutpoint (0-1) for the segment with the id |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
48 in segs. |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
49 Return... """ |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
50 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
51 segments = read_segment_list(segfile) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
52 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
53 headers = ['identifier', 'label'] |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
54 result = [] |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
55 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
56 for seg in segments: |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
57 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
58 start = float(seg['start']) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
59 end = float(seg['end']) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
60 label = seg['label'] |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
61 ident = seg['identifier'] |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
62 tsfile = get_tsfile(ident, tsfiles) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
63 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
64 if tsfile == '': |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
65 continue |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
66 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
67 collect = [] |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
68 with open(tsfile, 'r') as fd: |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
69 reader = csv.reader(fd, dialect=csv.excel_tab) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
70 for row in reader: |
|
3
f188eb0b526d
planemo upload commit 2d22bc12c66a3d1949d4bf4c84e7ce7c4610383c-dirty
stevecassidy
parents:
2
diff
changeset
|
71 if row[0] == 'time': |
|
2
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
72 tsheader = row |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
73 elif float(row[0]) > start and float(row[0]) < end: |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
74 collect.append(row) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
75 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
76 # grab the row at the cut point(s) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
77 n = int(cutpoint * len(collect)) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
78 row = [ident, label] |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
79 row.extend(collect[n]) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
80 result.append(row) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
81 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
82 headers.extend(tsheader) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
83 return (headers, result) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
84 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
85 if __name__=='__main__': |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
86 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
87 args = parser() |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
88 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
89 # get the list of timeseries files |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
90 tsfiles = args.timeseries.split(',') |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
91 tsidents = args.identifier.split(',') |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
92 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
93 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
94 headers,rows = cut(zip(tsidents, tsfiles), args.segment_list, args.cutat) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
95 |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
96 with open(args.output_path, 'w') as out: |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
97 writer = csv.writer(out) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
98 writer.writerow(headers) |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
99 for row in rows: |
|
6f4db0e89117
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff
changeset
|
100 writer.writerow(row) |
