diff scripts/S06_post_processing_of_pairwise.py @ 4:6709645eff5d draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
author abims-sbr
date Wed, 17 Jan 2018 08:53:53 -0500
parents c8af52875b0f
children
line wrap: on
line diff
--- a/scripts/S06_post_processing_of_pairwise.py	Wed Sep 27 10:01:55 2017 -0400
+++ b/scripts/S06_post_processing_of_pairwise.py	Wed Jan 17 08:53:53 2018 -0500
@@ -2,96 +2,13 @@
 ## AUTHOR: Eric Fontanillas
 ## LAST VERSION: 14/08/14 by Julie BAFFARD
 
-MINIMUM_LENGTH = 1    #bp
-
-
-############################
-##### DEF1 : Get Pairs #####
-############################
-def get_pairs(fasta_file_path):
-    F2 = open(fasta_file_path, "r")
-    list_pairwises = []
-    while 1:
-        next2 = F2.readline()
-        if not next2:
-            break
-        if next2[0] == ">":
-            fasta_name_query = next2[:-1]
-            next3 = F2.readline()
-            fasta_seq_query = next3[:-1]
-            next3 = F2.readline()    ## jump one empty line (if any after the sequence)
-            fasta_name_match = next3[:-1]
-            next3 = F2.readline()
-            fasta_seq_match = next3[:-1]
-            pairwise = [fasta_name_query,fasta_seq_query,fasta_name_match,fasta_seq_match]
-            
-            ## ADD pairwise with condition
-            list_pairwises.append(pairwise)
-    F2.close()
-
-    return(list_pairwises)
-##############################################
-
-#################################
-##### DEF2 : Extract length #####
-#################################
-def extract_length(length_string):   # format length string = 57...902
-    l3 = string.split(length_string, "...")
-    n1 = string.atoi(l3[0])
-    n2 = string.atoi(l3[1])
-    length = n2-n1
-
-    return(length)
-##############################################
-
-
-####################################
-##### DEF3 : Remove Redondancy ##### 
-####################################
-def filter_redondancy_and_length(list_paireu, MIN_LENGTH):
-    
-    bash1 = {}
-    list_pairout = []
-    
-    for pair in list_paireu:
-         query_name = pair[0]
-         query_seq = pair[1]
-         match_name = pair[2]
-         match_seq = pair[3]
-
-         l1 = string.split(query_name, "||")
-         short_query_name = l1[0][1:]
-         length_matched =  extract_length(l1[1])               ### DEF2 ###
-         l2 = string.split(match_name, "||")
-         short_match_name = l2[0][1:]
-         binom = "%s_%s" %(short_query_name, short_match_name)
-
-         ## TEST FOR REDONDANCY
-         ## REDONDANCY OF BINOME!!!! => MATCHE BETWEEN THE SAME 2 CONTIGS, BUT AT DIFFERENT POSITIONS ON THE CONTIG
-         ## REDONDANCY NOT REMOVED HERE:
-         ## 1/ Several "TERA" match with one "APN"  (Counted in script "09_formatMatch_getBackNucleotides.py")
-         ## 2/ Several "APN" match with one "TERA" (Counted
-         if binom not in bash1.keys():
-             bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched]
-         else:
-             old_length = bash1[binom][-1]
-             if length_matched > old_length:
-                 bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched]
-
-    
-    for bino in bash1.keys():
-        length = bash1[bino][-1]
-        if length > MIN_LENGTH:
-            list_pairout.append(bash1[bino])
-
-    return(list_pairout)
-##############################################
-
+MINIMUM_LENGTH = 1
 
 #######################
 ##### RUN RUN RUN #####
 #######################
 import string, os, time, re, sys
+from functions import get_pairs, extract_length, filter_redondancy
 
 ## 1 ## INPUT/OUTPUT
 SHORT_FILE = sys.argv[1] #short-name-query_short-name-db
@@ -106,7 +23,7 @@
 
 ## 2 ## RUN
 list_pairwises = get_pairs(F_IN)           ### DEF1 ###
-list_pairwises_filtered1 = filter_redondancy_and_length(list_pairwises, MINIMUM_LENGTH)                ### DEF3 ###
+list_pairwises_filtered1 = filter_redondancy(list_pairwises, MINIMUM_LENGTH)                ### DEF3 ###
 
 i = 0
 for pair in list_pairwises_filtered1: