Mercurial > repos > stevecassidy > wrassp
comparison cut_timeseries.py @ 2:6f4db0e89117 draft
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
| author | stevecassidy |
|---|---|
| date | Thu, 08 Dec 2016 01:45:31 -0500 |
| parents | |
| children | f188eb0b526d |
comparison
equal
deleted
inserted
replaced
| 1:dbcf9bc275e3 | 2:6f4db0e89117 |
|---|---|
| 1 import csv | |
| 2 import argparse | |
| 3 | |
| 4 | |
| 5 | |
| 6 def parser(): | |
| 7 parser = argparse.ArgumentParser(description="Cut data for a segment from a timeseries") | |
| 8 parser.add_argument('--segment_list', required=True, action="store", type=str, help="File containing list of item URLs") | |
| 9 parser.add_argument('--timeseries', required=True, action="store", type=str, help="time series data (comma separated file names)") | |
| 10 parser.add_argument('--identifier', required=True, action="store", type=str, help="Time series dataset identifiers (comma separated)") | |
| 11 parser.add_argument('--cutat', required=True, action="store", type=float, help="cut point 0-1") | |
| 12 parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") | |
| 13 return parser.parse_args() | |
| 14 | |
| 15 | |
| 16 def read_segment_list(filename): | |
| 17 """Read an segment list from a file | |
| 18 which should be a tabular formatted file | |
| 19 with columns start, end, label, duration, identifier | |
| 20 Return a dictionary with the 'identifier' field as keys | |
| 21 and a dictionary of other values as the values. | |
| 22 """ | |
| 23 | |
| 24 segments = [] | |
| 25 with open(filename) as fd: | |
| 26 csvreader = csv.DictReader(fd, dialect='excel-tab') | |
| 27 if 'identifier' not in csvreader.fieldnames: | |
| 28 return None | |
| 29 | |
| 30 for row in csvreader: | |
| 31 segments.append(row) | |
| 32 | |
| 33 return segments | |
| 34 | |
| 35 def get_tsfile(ident, tsfiles): | |
| 36 """Get the tsfile that matches the identifier """ | |
| 37 | |
| 38 for tsid, dsname in tsfiles: | |
| 39 if ident in tsid: | |
| 40 return dsname | |
| 41 | |
| 42 return '' | |
| 43 | |
| 44 | |
| 45 def cut(tsfiles, segfile, cutpoint): | |
| 46 """Cut data from tsfile corresponding to the | |
| 47 cutpoint (0-1) for the segment with the id | |
| 48 in segs. | |
| 49 Return... """ | |
| 50 | |
| 51 segments = read_segment_list(segfile) | |
| 52 | |
| 53 headers = ['identifier', 'label'] | |
| 54 result = [] | |
| 55 | |
| 56 for seg in segments: | |
| 57 | |
| 58 start = float(seg['start']) | |
| 59 end = float(seg['end']) | |
| 60 label = seg['label'] | |
| 61 ident = seg['identifier'] | |
| 62 tsfile = get_tsfile(ident, tsfiles) | |
| 63 | |
| 64 if tsfile == '': | |
| 65 continue | |
| 66 | |
| 67 collect = [] | |
| 68 with open(tsfile, 'r') as fd: | |
| 69 reader = csv.reader(fd, dialect=csv.excel_tab) | |
| 70 for row in reader: | |
| 71 if row[0] == 'sampletime': | |
| 72 tsheader = row | |
| 73 elif float(row[0]) > start and float(row[0]) < end: | |
| 74 collect.append(row) | |
| 75 | |
| 76 # grab the row at the cut point(s) | |
| 77 n = int(cutpoint * len(collect)) | |
| 78 row = [ident, label] | |
| 79 row.extend(collect[n]) | |
| 80 result.append(row) | |
| 81 | |
| 82 headers.extend(tsheader) | |
| 83 return (headers, result) | |
| 84 | |
| 85 if __name__=='__main__': | |
| 86 | |
| 87 args = parser() | |
| 88 | |
| 89 # get the list of timeseries files | |
| 90 tsfiles = args.timeseries.split(',') | |
| 91 tsidents = args.identifier.split(',') | |
| 92 | |
| 93 | |
| 94 headers,rows = cut(zip(tsidents, tsfiles), args.segment_list, args.cutat) | |
| 95 | |
| 96 with open(args.output_path, 'w') as out: | |
| 97 writer = csv.writer(out) | |
| 98 writer.writerow(headers) | |
| 99 for row in rows: | |
| 100 writer.writerow(row) |
