annotate cut_timeseries.py @ 5:0026cd452d0e draft default tip

planemo upload commit 72cee9103c0ae4acb5794afaed179bea2c729f2c-dirty
author stevecassidy
date Sat, 11 Mar 2017 21:37:57 -0500
parents f188eb0b526d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
1 import csv
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
2 import argparse
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
3
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
4
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
5
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
6 def parser():
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
7 parser = argparse.ArgumentParser(description="Cut data for a segment from a timeseries")
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
8 parser.add_argument('--segment_list', required=True, action="store", type=str, help="File containing list of item URLs")
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
9 parser.add_argument('--timeseries', required=True, action="store", type=str, help="time series data (comma separated file names)")
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
10 parser.add_argument('--identifier', required=True, action="store", type=str, help="Time series dataset identifiers (comma separated)")
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
11 parser.add_argument('--cutat', required=True, action="store", type=float, help="cut point 0-1")
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
12 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file")
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
13 return parser.parse_args()
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
14
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
15
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
16 def read_segment_list(filename):
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
17 """Read an segment list from a file
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
18 which should be a tabular formatted file
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
19 with columns start, end, label, duration, identifier
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
20 Return a dictionary with the 'identifier' field as keys
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
21 and a dictionary of other values as the values.
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
22 """
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
23
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
24 segments = []
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
25 with open(filename) as fd:
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
26 csvreader = csv.DictReader(fd, dialect='excel-tab')
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
27 if 'identifier' not in csvreader.fieldnames:
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
28 return None
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
29
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
30 for row in csvreader:
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
31 segments.append(row)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
32
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
33 return segments
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
34
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
35 def get_tsfile(ident, tsfiles):
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
36 """Get the tsfile that matches the identifier """
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
37
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
38 for tsid, dsname in tsfiles:
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
39 if ident in tsid:
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
40 return dsname
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
41
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
42 return ''
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
43
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
44
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
45 def cut(tsfiles, segfile, cutpoint):
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
46 """Cut data from tsfile corresponding to the
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
47 cutpoint (0-1) for the segment with the id
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
48 in segs.
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
49 Return... """
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
50
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
51 segments = read_segment_list(segfile)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
52
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
53 headers = ['identifier', 'label']
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
54 result = []
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
55
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
56 for seg in segments:
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
57
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
58 start = float(seg['start'])
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
59 end = float(seg['end'])
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
60 label = seg['label']
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
61 ident = seg['identifier']
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
62 tsfile = get_tsfile(ident, tsfiles)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
63
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
64 if tsfile == '':
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
65 continue
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
66
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
67 collect = []
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
68 with open(tsfile, 'r') as fd:
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
69 reader = csv.reader(fd, dialect=csv.excel_tab)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
70 for row in reader:
3
f188eb0b526d planemo upload commit 2d22bc12c66a3d1949d4bf4c84e7ce7c4610383c-dirty
stevecassidy
parents: 2
diff changeset
71 if row[0] == 'time':
2
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
72 tsheader = row
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
73 elif float(row[0]) > start and float(row[0]) < end:
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
74 collect.append(row)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
75
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
76 # grab the row at the cut point(s)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
77 n = int(cutpoint * len(collect))
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
78 row = [ident, label]
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
79 row.extend(collect[n])
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
80 result.append(row)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
81
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
82 headers.extend(tsheader)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
83 return (headers, result)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
84
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
85 if __name__=='__main__':
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
86
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
87 args = parser()
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
88
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
89 # get the list of timeseries files
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
90 tsfiles = args.timeseries.split(',')
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
91 tsidents = args.identifier.split(',')
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
92
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
93
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
94 headers,rows = cut(zip(tsidents, tsfiles), args.segment_list, args.cutat)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
95
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
96 with open(args.output_path, 'w') as out:
5
0026cd452d0e planemo upload commit 72cee9103c0ae4acb5794afaed179bea2c729f2c-dirty
stevecassidy
parents: 3
diff changeset
97 writer = csv.writer(out, dialect=csv.excel_tab)
2
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
98 writer.writerow(headers)
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
99 for row in rows:
6f4db0e89117 planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
stevecassidy
parents:
diff changeset
100 writer.writerow(row)