format_cd_hit_output: format_cd_hit

comparison format_cd_hit_output.py @ 1:4ba41bcee051 draft default tip

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty

author	bebatut
date	Tue, 26 Apr 2016 08:54:26 -0400
parents	bbd903996900
children

comparison

equal deleted inserted replaced

-:bbd903996900
+:4ba41bcee051
 if args.output_category_distribution != None:
 if mapping_info == None or categories == None:
 string = "A file with category distribution is expected but "
 string += "no mapping information are available"
 raise ValueError(string)
-output_category_distribution_file = open(
+output_cat_distri_file = open(args.output_category_distribution, 'w')
-args.output_category_distribution, 'w')
+output_cat_distri_file.write('Cluster\tSequence_number')
-output_category_distribution_file.write('Cluster\tSequence_number')
 for category in categories:
-output_category_distribution_file.write('\t' + category)
+output_cat_distri_file.write('\t' + category)
-output_category_distribution_file.write('\n')
+output_cat_distri_file.write('\n')
+else:
+output_cat_distri_file = None
 with open(args.input_cluster_info,'r') as cluster_info_file:
 cluster_name = ''
 cluster_category_distribution = init_category_distribution(categories)
 cluster_ref_seq = ''
 cluster_seq_number = 0
 for line in cluster_info_file.readlines():
 if line[0] == '>':
 flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster,
 cluster_category_distribution, categories,
-output_category_distribution_file, cluster_seq_number)
+output_cat_distri_file, cluster_seq_number)
 cluster_name = line[1:-1]
 cluster_name = cluster_name.replace(' ','_')
 cluster_category_distribution = init_category_distribution(categories)
 cluster_ref_seq = ''
 cluster_seq_number = 0
 seq_name = seq_info[1][1:-3]
 cluster_seq_number += 1
 if categories != None:
 seq_count = 1
-if args.number_sum == 'false':
+if args.number_sum != None:
 if seq_name.find('size') != -1:
 substring = seq_name[seq_name.find('size'):-1]
 seq_count = int(substring.split('=')[1])
 if not mapping_info.has_key(seq_name):
 string = seq_name + " not found in mapping"
 raise ValueError(string)
 category = mapping_info[seq_name]
 cluster_category_distribution[category] += seq_count
 if seq_info[-1] == '*':
 if cluster_ref_seq != '':
 string = "A reference sequence (" + cluster_ref_seq
 string += ") already found for cluster " + cluster_name
 string += " (" + seq_name + ")"
 raise ValueError(string)
 cluster_ref_seq = seq_name
 flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster,
-cluster_category_distribution, categories,
+cluster_category_distribution, categories, output_cat_distri_file,
-output_category_distribution_file, cluster_seq_number)
+cluster_seq_number)
 if args.output_category_distribution != None:
-output_category_distribution_file.close()
+output_cat_distri_file.close()
 return ref_seq_cluster
 def rename_representative_sequences(args, ref_seq_cluster):
 with open(args.input_representative_sequences,'r') as input_sequences:

Mercurial > repos > bebatut > format_cd_hit_output

comparison format_cd_hit_output.py @ 1:4ba41bcee051 draft default tip