Mercurial > repos > bebatut > format_cd_hit_output
diff format_cd_hit_output.py @ 1:4ba41bcee051 draft default tip
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
author | bebatut |
---|---|
date | Tue, 26 Apr 2016 08:54:26 -0400 |
parents | bbd903996900 |
children |
line wrap: on
line diff
--- a/format_cd_hit_output.py Wed Jan 27 03:28:42 2016 -0500 +++ b/format_cd_hit_output.py Tue Apr 26 08:54:26 2016 -0400 @@ -54,13 +54,14 @@ string = "A file with category distribution is expected but " string += "no mapping information are available" raise ValueError(string) - output_category_distribution_file = open( - args.output_category_distribution, 'w') - output_category_distribution_file.write('Cluster\tSequence_number') + output_cat_distri_file = open(args.output_category_distribution, 'w') + output_cat_distri_file.write('Cluster\tSequence_number') for category in categories: - output_category_distribution_file.write('\t' + category) + output_cat_distri_file.write('\t' + category) - output_category_distribution_file.write('\n') + output_cat_distri_file.write('\n') + else: + output_cat_distri_file = None with open(args.input_cluster_info,'r') as cluster_info_file: cluster_name = '' @@ -71,7 +72,7 @@ if line[0] == '>': flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster, cluster_category_distribution, categories, - output_category_distribution_file, cluster_seq_number) + output_cat_distri_file, cluster_seq_number) cluster_name = line[1:-1] cluster_name = cluster_name.replace(' ','_') cluster_category_distribution = init_category_distribution(categories) @@ -84,7 +85,7 @@ if categories != None: seq_count = 1 - if args.number_sum == 'false': + if args.number_sum != None: if seq_name.find('size') != -1: substring = seq_name[seq_name.find('size'):-1] seq_count = int(substring.split('=')[1]) @@ -93,7 +94,6 @@ raise ValueError(string) category = mapping_info[seq_name] cluster_category_distribution[category] += seq_count - if seq_info[-1] == '*': if cluster_ref_seq != '': @@ -104,11 +104,11 @@ cluster_ref_seq = seq_name flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster, - cluster_category_distribution, categories, - output_category_distribution_file, cluster_seq_number) + cluster_category_distribution, categories, output_cat_distri_file, + cluster_seq_number) if args.output_category_distribution != None: - output_category_distribution_file.close() + output_cat_distri_file.close() return ref_seq_cluster