Mercurial > repos > yating-l > regtools_junctions_extract
comparison validator.py @ 2:cf258ca024ff draft
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
author | yating-l |
---|---|
date | Mon, 19 Dec 2016 14:49:37 -0500 |
parents | 2626c5b4c665 |
children | 24fc8a8efe19 |
comparison
equal
deleted
inserted
replaced
1:e9dcea52d079 | 2:cf258ca024ff |
---|---|
5 create a column and move the score column to that column. | 5 create a column and move the score column to that column. |
6 """ | 6 """ |
7 def checkAndFixBed(bedfile, revised_file): | 7 def checkAndFixBed(bedfile, revised_file): |
8 # Store the lines that have been removed | 8 # Store the lines that have been removed |
9 removedLines = [] | 9 removedLines = [] |
10 scoreLines = [] | |
10 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000 | 11 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000 |
11 with open(revised_file, 'w') as tmp: | 12 with open(revised_file, 'w') as tmp: |
12 with open(bedfile, 'r') as f: | 13 with open(bedfile, 'r') as f: |
13 lines = f.readlines() | 14 lines = f.readlines() |
14 i = 1 | 15 i = 1 |
15 for line in lines: | 16 for line in lines: |
16 fields = line.split() | 17 fields = line.split() |
17 strand = fields[5] | 18 strand = fields[5] |
18 score = fields[4] | 19 score = fields[4] |
19 fields[4] = '1000' | 20 if (int(fields[4]) > 1000): |
20 fields.append(score) | 21 scoreLines.append("line" + str(i) + ":" + line) |
22 fields[4] = '1000' | |
21 if (strand == '+' or strand == '-'): | 23 if (strand == '+' or strand == '-'): |
22 tmp.write('\t'.join(map(str, fields))) | 24 tmp.write('\t'.join(map(str, fields))) |
23 tmp.write("\n") | 25 tmp.write("\n") |
24 else: | 26 else: |
25 removedLines.append("line" + str(i) + ": " + line) | 27 removedLines.append("line" + str(i) + ": " + line) |
26 i = i+1 | 28 i = i+1 |
27 | 29 |
28 return removedLines | 30 return removedLines, scoreLines |
29 | 31 |
30 def main(): | 32 def main(): |
31 inputfile = str(sys.argv[1]) | 33 inputfile = str(sys.argv[1]) |
32 outputfile = str(sys.argv[2]) | 34 outputfile = str(sys.argv[2]) |
33 removed = checkAndFixBed(inputfile, outputfile) | 35 removed, changed = checkAndFixBed(inputfile, outputfile) |
34 if (removed != []): | 36 if (removed != []): |
35 print "\nRemoved invalid lines: \n" | 37 print "\nRemoved invalid lines: \n" |
36 print "\n".join(removed) | 38 print "\n".join(removed) |
39 if (changed != []): | |
40 print "\nThe following lines have scores > 1000, so they are changed to 1000:\n" | |
41 print "\n".join(changed) | |
37 | 42 |
38 if __name__ == "__main__": | 43 if __name__ == "__main__": |
39 main() | 44 main() |