Mercurial > repos > abims-sbr > orthogroups_tool
diff scripts/format_transdecoder_headers.sh @ 1:3f862f346967 draft
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f-dirty
| author | abims-sbr |
|---|---|
| date | Wed, 17 Jan 2018 09:02:12 -0500 |
| parents | f3600c96e961 |
| children |
line wrap: on
line diff
--- a/scripts/format_transdecoder_headers.sh Fri Oct 27 10:38:52 2017 -0400 +++ b/scripts/format_transdecoder_headers.sh Wed Jan 17 09:02:12 2018 -0500 @@ -1,19 +1,27 @@ #/bin/bash +# v2 - this script modifies the 'Orthogroups.txt' file in order to make it easily readable by the following script, filter_orthofinder.py + #Example : + #OG0000001: Gene.117__As119_1/1_1.000_543__g.117__m.117 Gene.157__As170_1/1_1.000_1203__g.157__m.157 + #Becomes : + #As119_1/1_1.000_543 As170_1/1_1.000_1203 + +# removes 'OGxxxxxxx: ' +sed -E 's/OG[0-9]{7,}:\s//' $1 > $2 +# removes things like Gene.119__ +sed -i -E 's/Gene\.[0-9]{1,}\_\_/>/g' $2 +# removes things like __g.117__m.117 +sed -i -E 's/\_\_g\.[0-9]{1,}\_\_m\.[0-9]{1,}//g' $2 -#This script contains regex to re-write the outputs of transdecoder to the original AdaptSearch format -#Example : -#OG0007971: m.35 g.35 ORF g.35 m.35 type_internal len_307 _+_ Th132_1/1_1.000_923_1-924_+_ -#Becomes : -#Th132_1/1_1.000_923 +# Old version # removes 'OGxxxxxxx ' -sed -i -E 's/OG[0-9]{7}:\s//' $1 +#sed -E 's/OG[0-9]{7}:\s//' $1 > $2 # replace _+_ by (+) because '_' causes bugs -sed -i 's/_+_/(+)/g' $1 +#sed -i 's/_+_/(+)/g' $2 # Replaces everything by '>' -sed -i -E 's/m\.[0-9]{1,}[^()]+\(\+\)\s*/>/g' $1 +#sed -i -E 's/m\.[0-9]{1,}[^()]+\(\+\)\s*/>/g' $2 # Removes terminal '(+)' -sed -i 's/(+)//g' $1 +#sed -i 's/(+)//g' $2 # Removes last suite of unwanted numbers, underscore and dash -sed -i -E 's/\_[0-9]{1,}-[0-9]{1,}//g' $1 +#sed -i -E 's/\_[0-9]{1,}-[0-9]{1,}//g' $2
