# HG changeset patch # User abims-sbr # Date 1519738925 18000 # Node ID 427dd144d915deb7322bab83406487cc722f2fd5 # Parent 82cc91f8cacb5a4fba36341c0280ecbcc63a25fc planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 17acd02c547bd1f7661a846661aa99de9743efe9 diff -r 82cc91f8cacb -r 427dd144d915 adaptsearch_picture_helps.png Binary file adaptsearch_picture_helps.png has changed diff -r 82cc91f8cacb -r 427dd144d915 orthogroups_tool.xml --- a/orthogroups_tool.xml Fri Jan 19 09:51:12 2018 -0500 +++ b/orthogroups_tool.xml Tue Feb 27 08:42:05 2018 -0500 @@ -18,7 +18,7 @@ - - + + @@ -85,6 +85,24 @@ + + + + + + + + + + + + + + + + + + diff -r 82cc91f8cacb -r 427dd144d915 scripts/filter_orthofinder.py --- a/scripts/filter_orthofinder.py Fri Jan 19 09:51:12 2018 -0500 +++ b/scripts/filter_orthofinder.py Tue Feb 27 08:42:05 2018 -0500 @@ -21,8 +21,9 @@ with open(file, "r") as origin: for line1,line2 in itertools.izip_longest(*[origin]*2): gene=line1.strip("\r\n ") - sequence=line2.strip(" \r\n ") - hashTable[gene] = sequence + sequence=line2.strip("\r\n ") + hashTable[gene] = sequence + return hashTable ## PART 2 : Create orthogroups file (one file per orthogroup) @@ -76,7 +77,7 @@ name = "orthogroup_{}_{}_sequences.fasta".format(i, length) result = open(name, "w") with result: - for locus in orthogroup: + for locus in orthogroup: result.write("{}\n".format(locus)) # write geneID. ">%s\n" before result.write("{}\n".format(hashTable[locus])) # write sequence @@ -127,12 +128,12 @@ # STEP 3 - Print summaries ---------------------------------------------------- if verbose: print " Summary before paralogous filtering : \n" - frame1 = asFrame(countings(list_orthogroups_withpara, nbspecs)) - print frame1 + df1 = asFrame(countings(list_orthogroups_withpara, nbspecs)) + print df1.loc[df1.ne(0).any(1),df1.ne(0).any()] #print " Summary before paralogous filtering : \n",countings(list_orthogroups_withpara, nbspecs),"\n" - print " Summary after paralogous filtering : \n" - frame2= asFrame(countings(list_orthogroups_format, nbspecs)) - print frame2 + print "\n Summary after paralogous filtering : \n" + df2 = asFrame(countings(list_orthogroups_format, nbspecs)) + print df2.loc[df2.ne(0).any(1),df2.ne(0).any()] return len(list_orthogroups_format) #list_orthogroups_no_para @@ -156,7 +157,7 @@ hashTable = hashSequences(path) # Open txt file with orthogroups - print " Reading Orthogroups.txt and wrting orthogroups to separated files..." + print " Reading Orthogroups.txt and writing orthogroups to separated files..." print " (Dropping orthogroups of less than {} loci.)\n".format(args.minspec) list_orthogroups = formatAndFilter(args.files, args.minspec, args.nbspec, hashTable, args.verbose, args.paralogs) print "\n{} filtered orthogroups have been written in separated files".format(list_orthogroups) diff -r 82cc91f8cacb -r 427dd144d915 static/images/adaptsearch_picture_helps.png Binary file static/images/adaptsearch_picture_helps.png has changed diff -r 82cc91f8cacb -r 427dd144d915 test-data/2_orthogroups_tool.output --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2_orthogroups_tool.output Tue Feb 27 08:42:05 2018 -0500 @@ -0,0 +1,26 @@ + +-This script works on the 'Orthogroups' file output of Orthofinder to split each orthogroup in a single fasta file. +-It also gets rid of orthogroups with less sequences than the number specified by the user. + Building hashTable IDs/sequences ... + + Reading Orthogroups.txt and writing orthogroups to separated files... + (Dropping orthogroups of less than 2 loci.) + + Summary before paralogous filtering : + + 2 sps 3 sps 4 sps +2 seqs 1 0 0 +3 seqs 1 3 0 +5 seqs 0 0 1 +8 seqs 0 1 0 + + Summary after paralogous filtering : + + 2 sps 3 sps 4 sps +2 seqs 2 0 0 +3 seqs 0 4 0 +4 seqs 0 0 1 + +7 filtered orthogroups have been written in separated files + +Filtered orthogroups are written in the directory 'filtered_orthogroups' diff -r 82cc91f8cacb -r 427dd144d915 test-data/orthogroups_tool.output --- a/test-data/orthogroups_tool.output Fri Jan 19 09:51:12 2018 -0500 +++ b/test-data/orthogroups_tool.output Tue Feb 27 08:42:05 2018 -0500 @@ -3,27 +3,23 @@ -It also gets rid of orthogroups with less sequences than the number specified by the user. Building hashTable IDs/sequences ... - Reading Orthogroups.txt and wrting orthogroups to separated files... + Reading Orthogroups.txt and writing orthogroups to separated files... (Dropping orthogroups of less than 2 loci.) Summary before paralogous filtering : - 1 sps 2 sps 3 sps 4 sps -1 seqs 0 0 0 0 -2 seqs 0 1 0 0 -3 seqs 0 1 3 0 -4 seqs 0 0 0 0 -5 seqs 0 0 0 1 -6 seqs 0 0 0 0 -7 seqs 0 0 0 0 -8 seqs 0 0 1 0 + 2 sps 3 sps 4 sps +2 seqs 1 0 0 +3 seqs 1 3 0 +5 seqs 0 0 1 +8 seqs 0 1 0 + Summary after paralogous filtering : - 1 sps 2 sps 3 sps 4 sps -1 seqs 0 0 0 0 -2 seqs 0 2 0 0 -3 seqs 0 0 4 0 -4 seqs 0 0 0 1 + 2 sps 3 sps 4 sps +2 seqs 2 0 0 +3 seqs 0 4 0 +4 seqs 0 0 1 7 filtered orthogroups have been written in separated files