annotate validator.py @ 2:cf258ca024ff draft

planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
author yating-l
date Mon, 19 Dec 2016 14:49:37 -0500
parents 2626c5b4c665
children 24fc8a8efe19
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
1 import sys
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
2
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
3 """
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
4 Call checkAndFixBed, check the integrity of bed file. If the strand is not "+" or "-" truncate that line and report to users
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
5 create a column and move the score column to that column.
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
6 """
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
7 def checkAndFixBed(bedfile, revised_file):
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
8 # Store the lines that have been removed
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
9 removedLines = []
2
cf258ca024ff planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents: 0
diff changeset
10 scoreLines = []
0
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
11 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
12 with open(revised_file, 'w') as tmp:
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
13 with open(bedfile, 'r') as f:
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
14 lines = f.readlines()
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
15 i = 1
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
16 for line in lines:
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
17 fields = line.split()
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
18 strand = fields[5]
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
19 score = fields[4]
2
cf258ca024ff planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents: 0
diff changeset
20 if (int(fields[4]) > 1000):
cf258ca024ff planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents: 0
diff changeset
21 scoreLines.append("line" + str(i) + ":" + line)
cf258ca024ff planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents: 0
diff changeset
22 fields[4] = '1000'
0
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
23 if (strand == '+' or strand == '-'):
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
24 tmp.write('\t'.join(map(str, fields)))
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
25 tmp.write("\n")
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
26 else:
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
27 removedLines.append("line" + str(i) + ": " + line)
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
28 i = i+1
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
29
2
cf258ca024ff planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents: 0
diff changeset
30 return removedLines, scoreLines
0
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
31
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
32 def main():
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
33 inputfile = str(sys.argv[1])
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
34 outputfile = str(sys.argv[2])
2
cf258ca024ff planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents: 0
diff changeset
35 removed, changed = checkAndFixBed(inputfile, outputfile)
0
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
36 if (removed != []):
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
37 print "\nRemoved invalid lines: \n"
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
38 print "\n".join(removed)
2
cf258ca024ff planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents: 0
diff changeset
39 if (changed != []):
cf258ca024ff planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents: 0
diff changeset
40 print "\nThe following lines have scores > 1000, so they are changed to 1000:\n"
cf258ca024ff planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
yating-l
parents: 0
diff changeset
41 print "\n".join(changed)
0
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
42
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
43 if __name__ == "__main__":
2626c5b4c665 planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
yating-l
parents:
diff changeset
44 main()