diff validator.py @ 0:2626c5b4c665 draft

planemo upload commit db634d933827ebc78981c7a0aa18205a85fa42e6-dirty
author yating-l
date Mon, 19 Dec 2016 13:07:32 -0500
parents
children cf258ca024ff
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/validator.py	Mon Dec 19 13:07:32 2016 -0500
@@ -0,0 +1,39 @@
+import sys
+
+"""
+Call checkAndFixBed, check the integrity of bed file. If the strand is not "+" or "-" truncate that line and report to users
+create a column and move the score column to that column.
+"""
+def checkAndFixBed(bedfile, revised_file):
+# Store the lines that have been removed
+    removedLines = []
+# Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000
+    with open(revised_file, 'w') as tmp:
+        with open(bedfile, 'r') as f:
+            lines = f.readlines()
+            i = 1
+            for line in lines:  
+                fields = line.split()
+                strand = fields[5]
+                score = fields[4]
+                fields[4] = '1000'  
+                fields.append(score)
+                if (strand == '+' or strand == '-'):
+                    tmp.write('\t'.join(map(str, fields)))
+                    tmp.write("\n")
+                else:
+                    removedLines.append("line" + str(i) + ": " + line)
+                i = i+1
+
+    return removedLines 
+
+def main():
+    inputfile = str(sys.argv[1])
+    outputfile = str(sys.argv[2])
+    removed = checkAndFixBed(inputfile, outputfile)
+    if (removed != []):
+        print "\nRemoved invalid lines: \n"
+        print "\n".join(removed)
+
+if __name__ == "__main__":
+    main()