repmatch_gff3: repmatch_gff3_util.py comparison

comparison repmatch_gff3_util.py @ 8:d10ae3aeebc8 draft

Uploaded

author	greg
date	Wed, 02 Dec 2015 16:15:35 -0500
parents	53cbf79396d7
children

comparison

equal deleted inserted replaced

-:1807688a8a5f
+:d10ae3aeebc8
 os.close(fd)
 return name
 def process_files(dataset_paths, galaxy_hids, method, distance, step, replicates, up_limit, low_limit, output_files,
-output_summary, output_orphan, output_detail, output_key, output_histogram):
+output_matched_peaks, output_unmatched_peaks, output_detail, output_statistics_table, output_statistics_histogram):
-output_histogram_file = output_files in ["all"] and method in ["all"]
+output_statistics_histogram_file = output_files in ["all"] and method in ["all"]
 if len(dataset_paths) < 2:
 return
 if method == 'all':
 match_methods = METHODS.keys()
 else:
 step,
 replicates,
 up_limit,
 low_limit,
 output_files,
-output_summary,
+output_matched_peaks,
-output_orphan,
+output_unmatched_peaks,
 output_detail,
-output_key,
+output_statistics_table,
-output_histogram)
+output_statistics_histogram)
-if output_histogram_file:
+if output_statistics_histogram_file:
-tmp_histogram_path = get_temporary_plot_path()
+tmp_statistics_histogram_path = get_temporary_plot_path()
 frequency_histogram([stat['distribution'] for stat in [statistics]],
-tmp_histogram_path,
+tmp_statistics_histogram_path,
 METHODS.keys())
-shutil.move(tmp_histogram_path, output_histogram)
+shutil.move(tmp_statistics_histogram_path, output_statistics_histogram)
 def perform_process(dataset_paths, galaxy_hids, method, distance, step, num_required, up_limit, low_limit, output_files,
-output_summary, output_orphan, output_detail, output_key, output_histogram):
+output_matched_peaks, output_unmatched_peaks, output_detail, output_statistics_table, output_statistics_histogram):
 output_detail_file = output_files in ["all"] and output_detail is not None
-output_key_file = output_files in ["all"] and output_key is not None
+output_statistics_table_file = output_files in ["all"] and output_statistics_table is not None
-output_orphan_file = output_files in ["all", "simple_orphan"] and output_orphan is not None
+output_unmatched_peaks_file = output_files in ["all", "matched_peaks_unmatched_peaks"] and output_unmatched_peaks is not None
-output_histogram_file = output_files in ["all"] and output_histogram is not None
+output_statistics_histogram_file = output_files in ["all"] and output_statistics_histogram is not None
 replicates = []
 for i, dataset_path in enumerate(dataset_paths):
 try:
 galaxy_hid = galaxy_hids[i]
 r = Replicate(galaxy_hid, dataset_path)
 'median midpoint',
 'median midpoint+1',
 'c-w sum',
 'c-w distance',
 'replicate id')
-summary_output = td_writer(output_summary)
+matched_peaks_output = td_writer(output_matched_peaks)
-if output_key_file:
+if output_statistics_table_file:
-key_output = td_writer(output_key)
+statistics_table_output = td_writer(output_statistics_table)
-key_output.writerow(('data', 'median read count'))
+statistics_table_output.writerow(('data', 'median read count'))
 if output_detail_file:
 detail_output = td_writer(output_detail)
 detail_output.writerow(labels)
-if output_orphan_file:
+if output_unmatched_peaks_file:
-orphan_output = td_writer(output_orphan)
+unmatched_peaks_output = td_writer(output_unmatched_peaks)
-orphan_output.writerow(('chrom', 'midpoint', 'midpoint+1', 'c-w sum', 'c-w distance', 'replicate id'))
+unmatched_peaks_output.writerow(('chrom', 'midpoint', 'midpoint+1', 'c-w sum', 'c-w distance', 'replicate id'))
 # Perform filtering
 if up_limit < 1000 or low_limit > -1000:
 for replicate in replicates:
 replicate.filter(up_limit, low_limit)
 # Actually merge the peaks
 peak_groups = []
-orphans = []
+unmatched_peaks = []
 freq = FrequencyDistribution()
 def do_match(reps, distance):
 # Copy list because we will mutate it, but keep replicate references.
 reps = reps[:]
 else:
 for d in range(0, distance, step):
 do_match(replicates, d)
 for group in peak_groups:
 freq.add(group.num_replicates)
-# Collect together the remaining orphans
+# Collect together the remaining unmatched_peaks
 for replicate in replicates:
 for chromosome in replicate.chromosomes.values():
 for peak in chromosome.peaks:
 freq.add(1)
-orphans.append(peak)
+unmatched_peaks.append(peak)
-# Average the orphan count in the graph by # replicates
+# Average the unmatched_peaks count in the graph by # replicates
 med = median([peak.value for group in peak_groups for peak in group.peaks.values()])
 for replicate in replicates:
 replicate.median = median([peak.value for group in peak_groups for peak in group.peaks.values() if peak.replicate == replicate])
-key_output.writerow((replicate.id, replicate.median))
+statistics_table_output.writerow((replicate.id, replicate.median))
 for group in peak_groups:
-# Output summary (matched pairs).
+# Output matched_peaks (matched pairs).
-summary_output.writerow(gff_row(cname=group.chrom,
+matched_peaks_output.writerow(gff_row(cname=group.chrom,
 start=group.midpoint,
 end=group.midpoint+1,
 source='repmatch',
 score=group.normalized_value(med),
 attrs={'median_distance': group.median_distance,
 'replicates': group.num_replicates,
 'value_sum': group.value_sum}))
 if output_detail_file:
-summary = (group.chrom,
+matched_peaks = (group.chrom,
 group.midpoint,
 group.midpoint+1,
 group.normalized_value(med),
 group.num_replicates,
 group.median_distance,
 group.value_sum)
 for peak in group.peaks.values():
-summary += (peak.chrom, peak.midpoint, peak.midpoint+1, peak.value, peak.distance, peak.replicate.id)
+matched_peaks += (peak.chrom, peak.midpoint, peak.midpoint+1, peak.value, peak.distance, peak.replicate.id)
-detail_output.writerow(summary)
+detail_output.writerow(matched_peaks)
-if output_orphan_file:
+if output_unmatched_peaks_file:
-for orphan in orphans:
+for unmatched_peak in unmatched_peaks:
-orphan_output.writerow((orphan.chrom,
+unmatched_peaks_output.writerow((unmatched_peak.chrom,
-orphan.midpoint,
+unmatched_peak.midpoint,
-orphan.midpoint+1,
+unmatched_peak.midpoint+1,
-orphan.value,
+unmatched_peak.value,
-orphan.distance,
+unmatched_peak.distance,
-orphan.replicate.id))
+unmatched_peak.replicate.id))
-if output_histogram_file:
+if output_statistics_histogram_file:
-tmp_histogram_path = get_temporary_plot_path()
+tmp_statistics_histogram_path = get_temporary_plot_path()
-frequency_histogram([freq], tmp_histogram_path)
+frequency_histogram([freq], tmp_statistics_histogram_path)
-shutil.move(tmp_histogram_path, output_histogram)
+shutil.move(tmp_statistics_histogram_path, output_statistics_histogram)
 return {'distribution': freq}

Mercurial > repos > greg > repmatch_gff3

comparison repmatch_gff3_util.py @ 8:d10ae3aeebc8 draft