diff scripts/S11_post_processing_of_pairwise.py @ 4:6709645eff5d draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
author abims-sbr
date Wed, 17 Jan 2018 08:53:53 -0500
parents c8af52875b0f
children
line wrap: on
line diff
--- a/scripts/S11_post_processing_of_pairwise.py	Wed Sep 27 10:01:55 2017 -0400
+++ b/scripts/S11_post_processing_of_pairwise.py	Wed Jan 17 08:53:53 2018 -0500
@@ -4,88 +4,13 @@
 
 MINIMUM_LENGTH = 1
 
-############################
-##### DEF1 : Get Pairs #####
-############################
-def get_pairs(fasta_file_path):
-    F2 = open(fasta_file_path, "r")
-    list_pairwises = []
-    while 1:
-        next2 = F2.readline()
-        if not next2:
-            break
-        if next2[0] == ">":
-            fasta_name_query = next2[:-1]
-            next3 = F2.readline()
-            fasta_seq_query = next3[:-1]
-            next3 = F2.readline()    ## jump one empty line (if any after the sequence)
-            fasta_name_match = next3[:-1]
-            next3 = F2.readline()
-            fasta_seq_match = next3[:-1]
-            pairwise = [fasta_name_query,fasta_seq_query,fasta_name_match,fasta_seq_match]
-            
-            ## ADD pairwise with condition
-            list_pairwises.append(pairwise)
-    F2.close()
-    return(list_pairwises)
-##############################################
-
-
-#################################
-##### DEF2 : Extract length #####
-#################################
-def extract_length(length_string):   # format length string = 57...902
-    l3 = string.split(length_string, "...")
-    n1 = string.atoi(l3[0])
-    n2 = string.atoi(l3[1])
-    length = n2-n1
-    return(length)
-##############################################
-
-
-####################################
-##### DEF3 : Remove Redondancy #####
-####################################
-def filter_redondancy(list_paireu, MIN_LENGTH):
-
-    bash1 = {}
-    list_pairout = []
-    
-    for pair in list_paireu:
-         query_name = pair[0]
-         query_seq = pair[1]
-         match_name = pair[2]
-         match_seq = pair[3]
-
-         l1 = string.split(query_name, "||")
-         short_query_name = l1[0][1:]
-         length_matched =  extract_length(l1[1])          ### DEF2 ###
-         l2 = string.split(match_name, "||")
-         short_match_name = l2[0][1:]
-         binom = "%s_%s" %(short_query_name, short_match_name)
-         
-         if binom not in bash1.keys():
-             bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched]
-         else:
-             old_length = bash1[binom][-1]
-             if length_matched > old_length:
-                 bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched]
-
-    
-    for bino in bash1.keys():
-        length = bash1[bino][-1]
-        if length > MIN_LENGTH:
-            list_pairout.append(bash1[bino])
-
-    return(list_pairout)
-##############################################
-
-
 #######################
 ##### RUN RUN RUN #####
 #######################
 import string, os, time, re, sys
+from functions import get_pairs, extract_length, filter_redondancy
 
+## 1 ## INPUT/OUTPUT
 SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db
 
 F_IN = "%s/17_ReciprocalHits_%s.fasta" %(SHORT_FILE, SHORT_FILE)