view scripts/format_transdecoder_headers.sh @ 0:f3600c96e961 draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f30d34a6decb05f6b7803f6d677dad4c50f9cebe
author abims-sbr
date Fri, 27 Oct 2017 10:38:52 -0400
parents
children 3f862f346967
line wrap: on
line source

#/bin/bash


#This script contains regex to re-write the outputs of transdecoder to the original AdaptSearch format 
#Example :
#OG0007971: m.35 g.35  ORF g.35 m.35 type_internal len_307 _+_ Th132_1/1_1.000_923_1-924_+_
#Becomes :
#Th132_1/1_1.000_923

# removes 'OGxxxxxxx '
sed -i -E 's/OG[0-9]{7}:\s//' $1 
# replace _+_ by (+) because '_' causes bugs
sed -i 's/_+_/(+)/g' $1
# Replaces everything by '>'
sed -i -E 's/m\.[0-9]{1,}[^()]+\(\+\)\s*/>/g' $1
# Removes terminal '(+)'
sed -i 's/(+)//g' $1
# Removes last suite of unwanted numbers, underscore and dash
sed -i -E 's/\_[0-9]{1,}-[0-9]{1,}//g' $1