Mercurial > repos > abims-sbr > orthogroups_tool
changeset 3:82cc91f8cacb draft
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit fe249b6d8df132b24f70207d6dd002dbf6942c2e
| author | abims-sbr |
|---|---|
| date | Fri, 19 Jan 2018 09:51:12 -0500 |
| parents | d557212b575b |
| children | 427dd144d915 |
| files | orthogroups_tool.xml scripts/filter_orthofinder.py |
| diffstat | 2 files changed, 18 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/orthogroups_tool.xml Wed Jan 17 11:32:14 2018 -0500 +++ b/orthogroups_tool.xml Fri Jan 19 09:51:12 2018 -0500 @@ -1,4 +1,4 @@ -<tool name="Orthogroups_Tool" id="orthogroups_tool" version="1.0"> +<tool name="Orthogroups_Tool" id="orthogroups_tool" version="1.0.1"> <description> This tool takes Orthogroups found by OrthoFinder and proceeds to retrieve nucleic sequences back, then write each orthogroups in its own fasta file. @@ -93,6 +93,8 @@ <![CDATA[ +The script of this tool has been written by Victor Mataigne. + ------------------------------------------- **Description**
--- a/scripts/filter_orthofinder.py Wed Jan 17 11:32:14 2018 -0500 +++ b/scripts/filter_orthofinder.py Fri Jan 19 09:51:12 2018 -0500 @@ -4,7 +4,7 @@ ## This script takes an output file of OrthoFinder (Orthogroups.txt), which contains a set of orthogroups, ## and rewrite it to split each orthogroup into a single fasta file. -import os, string, glob, argparse, csv +import os, string, glob, argparse, csv, itertools import numpy as np import pandas as pd @@ -15,18 +15,13 @@ def hashSequences(path): hashTable = {} # WARNING : sequences are expected to be on one line. If not, biopython can do it - for file in path: - originFile = open(file, "r") + for file in path: gene = "" sequence = "" - with originFile: - while (1): # Not the best way to do - gene = originFile.readline() - if not gene: - break - gene = gene[:-1] - sequence = originFile.readline() - sequence = sequence[:-1] + with open(file, "r") as origin: + for line1,line2 in itertools.izip_longest(*[origin]*2): + gene=line1.strip("\r\n ") + sequence=line2.strip(" \r\n ") hashTable[gene] = sequence return hashTable @@ -107,12 +102,12 @@ i,j = 1,1 for group in list_orthogroups: group = string.split(group, " ") # list of lists - group.sort() - if verbose or paralogs: - if len(group) >= mini: - list_orthogroups_withpara.append(group) - writeOutputFile(group, hashTable, j, True) - j += 1 + group.sort() + if verbose and len(group) >= mini: + list_orthogroups_withpara.append(group) + if paralogs and len(group) >= mini: + writeOutputFile(group, hashTable, j, True) + j += 1 new_group = [] rang=-1 # Keep only one paralogs per species (1st encounter) @@ -157,7 +152,7 @@ # Build hashtable print " Building hashTable IDs/sequences ...\n" - path = glob.glob('*.fasta') + path = glob.glob('*.fasta') hashTable = hashSequences(path) # Open txt file with orthogroups @@ -179,7 +174,8 @@ os.system("mv {} filtered_orthogroups".format(file)) print " \nFiltered orthogroups are written in the directory 'filtered_orthogroups'" - print " \nFull orthogroups files are written in the directory 'orthogroups_withParalogs'\n" + if args.paralogs: + print " \nFull orthogroups files are written in the directory 'orthogroups_withParalogs'\n" if __name__ == "__main__": main()
