annotate name_changer.py @ 2:abc729336fd0 draft default tip

Uploaded bug fix
author brenninc
date Fri, 06 May 2016 09:46:28 -0400
parents c28a790f2566
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
1 #!/usr/bin/env python
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
2
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
3 import optparse
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
4 import os.path
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
5
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
6
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
7 def fix_header_line(start_header, header_line, new_names):
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
8 header_parts = header_line.split("\t")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
9 if len(header_parts) <= len(start_header):
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
10 raise Exception("Only found {0} columns in second (header) line expected at least {1}.".format(len(header_parts), (len(start_header) + 1)))
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
11 data_headers = header_parts[:len(start_header)]
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
12 if data_headers != start_header:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
13 raise Exception("Unexpected start to second (header) line Found: ")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
14 new_header = "\t".join(start_header)
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
15 file_headers = header_parts[len(start_header):]
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
16 if len(file_headers) != len(new_names):
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
17 raise Exception("Found {0} file columns in header line, but {1} new_name paramters provided.".format(len(file_headers), len(new_names)))
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
18 for i in range(len(file_headers)):
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
19 new_header += "\t"
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
20 new_header += new_names[i]
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
21 new_header += "\n"
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
22 return new_header
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
23
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
24
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
25 def clean_names(prefix, old_names):
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
26 if len(old_names) > 1:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
27 shared_start = old_names[0].strip()
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
28 shared_ends = old_names[0].strip()
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
29 for name in old_names:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
30 clean = name.strip()
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
31 while len(shared_start) > 0 and (not clean.startswith(shared_start)):
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
32 shared_start = shared_start[:-1]
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
33 while len(shared_ends) > 0 and (not clean.endswith(shared_ends)):
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
34 shared_ends = shared_ends[1:]
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
35 start = len(shared_start)
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
36 end = 0 - len(shared_ends)
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
37 else:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
38 start = 0
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
39 end = 0
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
40 new_names = []
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
41 if end < 0:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
42 for name in old_names:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
43 new_names.append(prefix + name.strip()[start:end])
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
44 else:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
45 for name in old_names:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
46 new_names.append(prefix + name.strip()[start:])
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
47 return new_names
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
48
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
49
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
50 def main():
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
51 #Parse Command Line
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
52 parser = optparse.OptionParser()
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
53 parser.add_option("--raw_count_file", action="store", type="string", default=None, help="path to file original with the counts")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
54 parser.add_option("--fixed_count_file", action="store", type="string", default=None, help="new path for renamaned counts file")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
55 parser.add_option("--raw_summary_file", action="store", type="string", default=None, help="path to file original with the summary")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
56 parser.add_option("--fixed_summary_file", action="store", type="string", default=None, help="new path for renamaned summary file")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
57 parser.add_option("--names_file", action="store", type="string", default=None, help="path to file which contains the names.")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
58 parser.add_option("--new_name", action="append", type="string", default=None,
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
59 help="Names to be used. Must be the same length as in the raw_count_file")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
60 parser.add_option("--names_prefix", action="store", type="string", default="", help="Prefix to add in from of every name.")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
61
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
62 (options, args) = parser.parse_args()
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
63
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
64 if not os.path.exists(options.raw_count_file):
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
65 parser.error("Unable to find raw_count_file {0}.".format(options.raw_count_file))
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
66 if options.names_file:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
67 if options.new_name:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
68 parser.error("names_file parameter clashes with new_names paramter(s)")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
69 if not os.path.exists(options.names_file):
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
70 parser.error("Unable to find names_file {0}.".format(options.names_file))
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
71 new_names = []
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
72 with open(options.names_file, "r") as names_file:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
73 for line in names_file:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
74 new_names.append(line.strip())
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
75 new_names = clean_names(options.names_prefix, new_names)
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
76 else:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
77 if not options.new_name:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
78 parser.error("No names_file or new_name paraters provided.")
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
79 new_names = options.new_name
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
80
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
81 print "Changing column names to ", new_names
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
82
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
83 with open(options.raw_count_file, "r") as input_file:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
84 with open(options.fixed_count_file, "w") as output_file:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
85 input_file.readline() # job line
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
86 start_header = ["Geneid", "Chr", "Start", "End", "Strand", "Length"]
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
87 header_line = fix_header_line(start_header, input_file.readline(), new_names)
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
88 output_file.write(header_line)
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
89 for line in input_file:
2
abc729336fd0 Uploaded bug fix
brenninc
parents: 0
diff changeset
90 output_file.write(line)
0
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
91
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
92 with open(options.raw_summary_file, "r") as input_file:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
93 with open(options.fixed_summary_file, "w") as output_file:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
94 start_header = ["Status"]
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
95 header_line = fix_header_line(start_header, input_file.readline(), new_names)
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
96 output_file.write(header_line)
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
97 for line in input_file:
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
98 output_file.write(line)
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
99
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
100
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
101 if __name__ == "__main__":
c28a790f2566 Uploaded first version
brenninc
parents:
diff changeset
102 main()