Mercurial > repos > yating-l > regtools_junctions_extract
annotate validator.py @ 2:cf258ca024ff draft
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
author | yating-l |
---|---|
date | Mon, 19 Dec 2016 14:49:37 -0500 |
parents | 2626c5b4c665 |
children | 24fc8a8efe19 |
rev | line source |
---|---|
0
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
1 import sys |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
2 |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
3 """ |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
4 Call checkAndFixBed, check the integrity of bed file. If the strand is not "+" or "-" truncate that line and report to users |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
5 create a column and move the score column to that column. |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
6 """ |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
7 def checkAndFixBed(bedfile, revised_file): |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
8 # Store the lines that have been removed |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
9 removedLines = [] |
2
cf258ca024ff
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents:
0
diff
changeset
|
10 scoreLines = [] |
0
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
11 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000 |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
12 with open(revised_file, 'w') as tmp: |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
13 with open(bedfile, 'r') as f: |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
14 lines = f.readlines() |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
15 i = 1 |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
16 for line in lines: |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
17 fields = line.split() |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
18 strand = fields[5] |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
19 score = fields[4] |
2
cf258ca024ff
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents:
0
diff
changeset
|
20 if (int(fields[4]) > 1000): |
cf258ca024ff
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents:
0
diff
changeset
|
21 scoreLines.append("line" + str(i) + ":" + line) |
cf258ca024ff
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents:
0
diff
changeset
|
22 fields[4] = '1000' |
0
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
23 if (strand == '+' or strand == '-'): |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
24 tmp.write('\t'.join(map(str, fields))) |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
25 tmp.write("\n") |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
26 else: |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
27 removedLines.append("line" + str(i) + ": " + line) |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
28 i = i+1 |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
29 |
2
cf258ca024ff
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents:
0
diff
changeset
|
30 return removedLines, scoreLines |
0
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
31 |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
32 def main(): |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
33 inputfile = str(sys.argv[1]) |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
34 outputfile = str(sys.argv[2]) |
2
cf258ca024ff
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents:
0
diff
changeset
|
35 removed, changed = checkAndFixBed(inputfile, outputfile) |
0
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
36 if (removed != []): |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
37 print "\nRemoved invalid lines: \n" |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
38 print "\n".join(removed) |
2
cf258ca024ff
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents:
0
diff
changeset
|
39 if (changed != []): |
cf258ca024ff
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents:
0
diff
changeset
|
40 print "\nThe following lines have scores > 1000, so they are changed to 1000:\n" |
cf258ca024ff
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents:
0
diff
changeset
|
41 print "\n".join(changed) |
0
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
42 |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
43 if __name__ == "__main__": |
2626c5b4c665
planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff
changeset
|
44 main() |