Mercurial > repos > stevecassidy > wrassp
changeset 2:6f4db0e89117 draft
planemo upload commit 2acf1e5f5efe0ee3ef51a611a3f7c94ce73d3b89-dirty
| author | stevecassidy |
|---|---|
| date | Thu, 08 Dec 2016 01:45:31 -0500 |
| parents | dbcf9bc275e3 |
| children | f188eb0b526d |
| files | cut_timeseries.py cut_timeseries.xml |
| diffstat | 2 files changed, 133 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cut_timeseries.py Thu Dec 08 01:45:31 2016 -0500 @@ -0,0 +1,100 @@ +import csv +import argparse + + + +def parser(): + parser = argparse.ArgumentParser(description="Cut data for a segment from a timeseries") + parser.add_argument('--segment_list', required=True, action="store", type=str, help="File containing list of item URLs") + parser.add_argument('--timeseries', required=True, action="store", type=str, help="time series data (comma separated file names)") + parser.add_argument('--identifier', required=True, action="store", type=str, help="Time series dataset identifiers (comma separated)") + parser.add_argument('--cutat', required=True, action="store", type=float, help="cut point 0-1") + parser.add_argument('--output_path', required=True, action="store", type=str, help="Path to output file") + return parser.parse_args() + + +def read_segment_list(filename): + """Read an segment list from a file + which should be a tabular formatted file + with columns start, end, label, duration, identifier + Return a dictionary with the 'identifier' field as keys + and a dictionary of other values as the values. + """ + + segments = [] + with open(filename) as fd: + csvreader = csv.DictReader(fd, dialect='excel-tab') + if 'identifier' not in csvreader.fieldnames: + return None + + for row in csvreader: + segments.append(row) + + return segments + +def get_tsfile(ident, tsfiles): + """Get the tsfile that matches the identifier """ + + for tsid, dsname in tsfiles: + if ident in tsid: + return dsname + + return '' + + +def cut(tsfiles, segfile, cutpoint): + """Cut data from tsfile corresponding to the + cutpoint (0-1) for the segment with the id + in segs. + Return... """ + + segments = read_segment_list(segfile) + + headers = ['identifier', 'label'] + result = [] + + for seg in segments: + + start = float(seg['start']) + end = float(seg['end']) + label = seg['label'] + ident = seg['identifier'] + tsfile = get_tsfile(ident, tsfiles) + + if tsfile == '': + continue + + collect = [] + with open(tsfile, 'r') as fd: + reader = csv.reader(fd, dialect=csv.excel_tab) + for row in reader: + if row[0] == 'sampletime': + tsheader = row + elif float(row[0]) > start and float(row[0]) < end: + collect.append(row) + + # grab the row at the cut point(s) + n = int(cutpoint * len(collect)) + row = [ident, label] + row.extend(collect[n]) + result.append(row) + + headers.extend(tsheader) + return (headers, result) + +if __name__=='__main__': + + args = parser() + + # get the list of timeseries files + tsfiles = args.timeseries.split(',') + tsidents = args.identifier.split(',') + + + headers,rows = cut(zip(tsidents, tsfiles), args.segment_list, args.cutat) + + with open(args.output_path, 'w') as out: + writer = csv.writer(out) + writer.writerow(headers) + for row in rows: + writer.writerow(row)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cut_timeseries.xml Thu Dec 08 01:45:31 2016 -0500 @@ -0,0 +1,33 @@ +<tool id="cut_timeseries" name="Cut data from a timeseries" version="0.1"> + <description></description> + + <command interpreter="python"> + cut_timeseries.py --timeseries "${",".join(map(str, $timeseries))}" --identifier "${",".join(map(str, [t.element_identifier for t in $timeseries]))}" --cutat ${cutat} --segment_list '${segment_list}' --output_path '$output' + </command> + + <inputs> + <param name="timeseries" type="data" multiple="true" format="tabular" label="Time Series"/> + <param name="cutat" type="float" label="Cut point (0-1)" value="0.5"/> + <param name="segment_list" type="data" format="tabular" label="Segment List"/> + </inputs> + <outputs> + <data format="tabular" name="output"/> + </outputs> + <tests> + <test> + <param name="cutat" value="0.5"/> + <param name="timeseries" value="1_1119_2_22_001-formants.dat"/> + <param name="segment_list" value="segmentlist.dat"/> + <output name="output"> + <assert_contents> + <has_text text="sampletime"/> + <has_text text="@"/> + <has_text text="1_1119_2_22_001"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + + ]]></help> +</tool>
