annotate repmatch_gff3.py @ 0:d33030c8e2cc draft

Uploaded
author greg
date Tue, 17 Nov 2015 14:26:08 -0500
parents
children 6df81aade62c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d33030c8e2cc Uploaded
greg
parents:
diff changeset
1 # repmatch.py
d33030c8e2cc Uploaded
greg
parents:
diff changeset
2 #
d33030c8e2cc Uploaded
greg
parents:
diff changeset
3 # Replicate matching - matches paired peaks from two or more replicates
d33030c8e2cc Uploaded
greg
parents:
diff changeset
4 #
d33030c8e2cc Uploaded
greg
parents:
diff changeset
5 # Input: one or more gff files (simple output from cwpair2, each a list of paired peaks from a replicate
d33030c8e2cc Uploaded
greg
parents:
diff changeset
6 #
d33030c8e2cc Uploaded
greg
parents:
diff changeset
7 # Output: list of matched groups and list of unmatched orphans
d33030c8e2cc Uploaded
greg
parents:
diff changeset
8 # Files: key.tabular (file to replicate IDsummary.tabular, detail.tabular, orphans.tabular
d33030c8e2cc Uploaded
greg
parents:
diff changeset
9
d33030c8e2cc Uploaded
greg
parents:
diff changeset
10 import argparse
d33030c8e2cc Uploaded
greg
parents:
diff changeset
11 import repmatch_gff3_util
d33030c8e2cc Uploaded
greg
parents:
diff changeset
12
d33030c8e2cc Uploaded
greg
parents:
diff changeset
13 if __name__ == '__main__':
d33030c8e2cc Uploaded
greg
parents:
diff changeset
14 parser = argparse.ArgumentParser()
d33030c8e2cc Uploaded
greg
parents:
diff changeset
15 parser.add_argument('--input', dest='inputs', action='append', nargs=2, help="Input datasets")
d33030c8e2cc Uploaded
greg
parents:
diff changeset
16 parser.add_argument('--method', dest='method', default='closest', help='Method of finding match')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
17 parser.add_argument('--distance', dest='distance', type=int, default=50, help='Maximum distance between peaks in different replicates to allow merging')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
18 parser.add_argument('--step', dest='step', type=int, default=0, help='Step size of distance for each iteration')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
19 parser.add_argument('--replicates', dest='replicates', type=int, default=2, help='Minimum number of replicates that must be matched for merging to occur')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
20 parser.add_argument('--low_limit', dest='low_limit', type=int, default=-1000, help='Lower limit for c-w distance filter')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
21 parser.add_argument('--up_limit', dest='up_limit', type=int, default=1000, help='Upper limit for c-w distance filter')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
22 parser.add_argument('--output_files', dest='output_files', default='simple', help='Restrict output dataset collections.')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
23 parser.add_argument('--plot_format', dest='plot_format', default=None, help='Output format for graph')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
24 parser.add_argument('--output_summary', dest='output_summary', help='Matched groups in gff format')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
25 parser.add_argument('--output_orphan', dest='output_orphan', default=None, help='Orphans in tabular format')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
26 parser.add_argument('--output_detail', dest='output_detail', default=None, help='Details in tabular format')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
27 parser.add_argument('--output_key', dest='output_key', default=None, help='Keys in tabular format')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
28 parser.add_argument('--output_histogram', dest='output_histogram', default=None, help='Histogram in plot_format')
d33030c8e2cc Uploaded
greg
parents:
diff changeset
29
d33030c8e2cc Uploaded
greg
parents:
diff changeset
30 args = parser.parse_args()
d33030c8e2cc Uploaded
greg
parents:
diff changeset
31
d33030c8e2cc Uploaded
greg
parents:
diff changeset
32 dataset_paths = []
d33030c8e2cc Uploaded
greg
parents:
diff changeset
33 hids = []
d33030c8e2cc Uploaded
greg
parents:
diff changeset
34 for (dataset_path, hid) in args.inputs:
d33030c8e2cc Uploaded
greg
parents:
diff changeset
35 dataset_paths.append(dataset_path)
d33030c8e2cc Uploaded
greg
parents:
diff changeset
36 hids.append(hid)
d33030c8e2cc Uploaded
greg
parents:
diff changeset
37 repmatch_gff3_util.process_files(dataset_paths,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
38 hids,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
39 args.method,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
40 args.distance,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
41 args.step,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
42 args.replicates,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
43 args.up_limit,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
44 args.low_limit,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
45 args.output_files,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
46 args.plot_format,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
47 args.output_summary,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
48 args.output_orphan,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
49 args.output_detail,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
50 args.output_key,
d33030c8e2cc Uploaded
greg
parents:
diff changeset
51 args.output_histogram)