Mercurial > repos > stevecassidy > wrassp
view cut_timeseries.py @ 5:0026cd452d0e draft default tip
planemo upload commit 72cee9103c0ae4acb5794afaed179bea2c729f2c-dirty
| author | stevecassidy | 
|---|---|
| date | Sat, 11 Mar 2017 21:37:57 -0500 | 
| parents | f188eb0b526d | 
| children | 
line wrap: on
 line source
import csv import argparse def parser(): parser = argparse.ArgumentParser(description="Cut data for a segment from a timeseries") parser.add_argument('--segment_list', required=True, action="store", type=str, help="File containing list of item URLs") parser.add_argument('--timeseries', required=True, action="store", type=str, help="time series data (comma separated file names)") parser.add_argument('--identifier', required=True, action="store", type=str, help="Time series dataset identifiers (comma separated)") parser.add_argument('--cutat', required=True, action="store", type=float, help="cut point 0-1") parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") return parser.parse_args() def read_segment_list(filename): """Read an segment list from a file which should be a tabular formatted file with columns start, end, label, duration, identifier Return a dictionary with the 'identifier' field as keys and a dictionary of other values as the values. """ segments = [] with open(filename) as fd: csvreader = csv.DictReader(fd, dialect='excel-tab') if 'identifier' not in csvreader.fieldnames: return None for row in csvreader: segments.append(row) return segments def get_tsfile(ident, tsfiles): """Get the tsfile that matches the identifier """ for tsid, dsname in tsfiles: if ident in tsid: return dsname return '' def cut(tsfiles, segfile, cutpoint): """Cut data from tsfile corresponding to the cutpoint (0-1) for the segment with the id in segs. Return... """ segments = read_segment_list(segfile) headers = ['identifier', 'label'] result = [] for seg in segments: start = float(seg['start']) end = float(seg['end']) label = seg['label'] ident = seg['identifier'] tsfile = get_tsfile(ident, tsfiles) if tsfile == '': continue collect = [] with open(tsfile, 'r') as fd: reader = csv.reader(fd, dialect=csv.excel_tab) for row in reader: if row[0] == 'time': tsheader = row elif float(row[0]) > start and float(row[0]) < end: collect.append(row) # grab the row at the cut point(s) n = int(cutpoint * len(collect)) row = [ident, label] row.extend(collect[n]) result.append(row) headers.extend(tsheader) return (headers, result) if __name__=='__main__': args = parser() # get the list of timeseries files tsfiles = args.timeseries.split(',') tsidents = args.identifier.split(',') headers,rows = cut(zip(tsidents, tsfiles), args.segment_list, args.cutat) with open(args.output_path, 'w') as out: writer = csv.writer(out, dialect=csv.excel_tab) writer.writerow(headers) for row in rows: writer.writerow(row)
