diff scripts/format_transdecoder_headers.sh @ 0:f3600c96e961 draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f30d34a6decb05f6b7803f6d677dad4c50f9cebe
author abims-sbr
date Fri, 27 Oct 2017 10:38:52 -0400
parents
children 3f862f346967
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/format_transdecoder_headers.sh	Fri Oct 27 10:38:52 2017 -0400
@@ -0,0 +1,19 @@
+#/bin/bash
+
+
+#This script contains regex to re-write the outputs of transdecoder to the original AdaptSearch format 
+#Example :
+#OG0007971: m.35 g.35  ORF g.35 m.35 type_internal len_307 _+_ Th132_1/1_1.000_923_1-924_+_
+#Becomes :
+#Th132_1/1_1.000_923
+
+# removes 'OGxxxxxxx '
+sed -i -E 's/OG[0-9]{7}:\s//' $1 
+# replace _+_ by (+) because '_' causes bugs
+sed -i 's/_+_/(+)/g' $1
+# Replaces everything by '>'
+sed -i -E 's/m\.[0-9]{1,}[^()]+\(\+\)\s*/>/g' $1
+# Removes terminal '(+)'
+sed -i 's/(+)//g' $1
+# Removes last suite of unwanted numbers, underscore and dash
+sed -i -E 's/\_[0-9]{1,}-[0-9]{1,}//g' $1