changeset 6:063348f7c3e4 draft

Deleted selected files
author bzonnedda
date Mon, 06 Feb 2017 11:10:16 -0500
parents 5748dc3b41de
children 5dcedbe7edff
files c_calls.xml c_plotcalls.xml c_rpkm.xml conifer.py conifer_functions.py conifer_wrapper.pl
diffstat 6 files changed, 0 insertions(+), 1301 deletions(-) [+]
line wrap: on
line diff
--- a/c_calls.xml	Mon Feb 06 10:56:20 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-<tool id="conifer_call" name="CoNIFER CNVs caller" version="1.0">
-  <description></description>
-  <command>
-       python \${CONIFER_PATH}/conifer.py call
-	--input $input
-	--output temporary
-    --threshold $threshold
-	2&gt;&amp;1
-    &amp;&amp; awk '{print \$2,\$3,\$4,\$5,\$1}' OFS="\t" temporary &gt; $outputFile
-  </command>
-  <inputs>
-    <param format="hdf5" name="input" type="data" label="CoNIFER output analysis file (HDF5)" />
-    <param name="threshold" type="float" value="1.5" label="Threshold" />
-  </inputs>
-  <outputs>
-    <data format="bed" name="outputFile" label="${tool.name} on ${on_string}" />
-  </outputs>
- <help>
-**What it does**
-
-This tool is a CNVs caller using SVD-ZRPKM thresholding.
-
-**License and citation**
-
-This Galaxy tool is Copyright © 2014 `CRS4 Srl.`_ and is released under the `MIT license`_.
-
-.. _CRS4 Srl.: http://www.crs4.it/
-.. _MIT license: http://opensource.org/licenses/MIT
-
-You can use this tool only if you agree to the license terms of: `CoNIFER`_.
-
-.. _CoNIFER: http://conifer.sourceforge.net/
-
-If you use this tool, please cite:
-
-- |Cuccuru2014|_
-- |Krumm2012|_.
-
-.. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics* 30(13), 1928-1929
-.. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/30/13/1928
-.. |Krumm2012| replace::  Krumm, N., Sudmant, P. H., Ko, A., O'Roak, B. J., Malig, M., Coe, B. P., NHLBI Exome Sequencing Project, Quinlan, A. R., Nickerson, D. A., Eichler, E. E. (2012) Copy number variation detection and genotyping from exome sequence data. *Genome Res.* 22(8), 1525-1532
-.. _Krumm2012: http://genome.cshlp.org/content/22/8/1525
-  </help>
-  <citations>
-    <citation type="doi">10.1093/bioinformatics/btu135</citation>
-    <citation type="doi">10.1101/gr.138115.112</citation>
-  </citations>
-</tool>
--- a/c_plotcalls.xml	Mon Feb 06 10:56:20 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-<tool id="conifer_plotcalls" name="CoNIFER plot caller" version="1.0">
-	<description>cnv caller</description>
-	<command interpreter="perl">
-    conifer_wrapper.pl --input $input --regions $plot_option.regions --html_file $html_file --html_folder $html_file.files_path
-    #if str($plot_option.plot_option_select) == "single"
-        --sample "$plot_option.sample"
-    #else
-        --window $plot_option.window
-        --multiple
-    #end if
-    2&gt;&amp;1
-	</command>
-	<inputs>
-		<param format="hdf5" name="input" type="data" label="CoNIFER output analysis file (hdf5)"/>
-		<conditional name="plot_option">
-			<param name="plot_option_select" type="select" label="Single or multiple regions?">
-				<option value="single" selected="true">Single region</option>
-				<option value="multiple">Multiple regions</option>
-			</param>
-			<when value="single">
-				<param name="regions" type="text" size="50" optional="false" label="Region" help="Region for the plot (i.e. chr1:1000-2000)"/>
-				<param name="sample" type="text" size="50" optional="true" value="" label="Sample" help="Sample names to be highlighted in the plot (optional)"/>
-			</when>
-			<when value="multiple">
-				<param format="tabular" name="regions" type="data" label="CoNIFER output calls file (tabular)"/>
-                <param name="window" size="10" type="integer" value="50" label="Window"/>
-			</when>
-		</conditional>
-    </inputs>
-    <outputs>
-        <data format="html" name="html_file" label="${tool.name} on ${on_string}" />
-    </outputs>
-<help>
-**What it does**
-
-Draw a HTML page with CNV plots. It accepts as input either a single region or a file with CoNIFER calls.
-
-**License and citation**
-
-This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.
-
-.. _CRS4 Srl.: http://www.crs4.it/
-.. _MIT license: http://opensource.org/licenses/MIT
-
-If you use this tool in Galaxy, please cite |Cuccuru2014|_.
-
-.. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics*, accepted
-.. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/early/2014/03/10/bioinformatics.btu135
-
-This tool uses CoNIFER, which is licensed separately. Please cite:
-
-- |Krumm2012|_.
-
-.. |Krumm2012| replace::  (Krumm et al., 2012) Copy number variation detection and genotyping from exome sequence data. Genome research.
-.. _Krumm2012: http://genome.cshlp.org/content/22/8/1525.full?sid=4a7a300a-b960-4544-8611-effc3315411c
-  </help>
-</tool>
--- a/c_rpkm.xml	Mon Feb 06 10:56:20 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-<tool id="conifer_rpkm" name="CoNIFER RPKM and Analyze" version="1.0">
-  <description></description>
-  <command>
-     mkdir rpkm_dir;
-#for $input_bam in $inputs:
-	#if str($input_bam.label.value) != "":
-	     ln -s ${input_bam.inputFile} ${$input_bam.label.value}.bam;
-	     ln -s ${input_bam.inputFile.metadata.bam_index} ${$input_bam.label.value}.bam.bai;
-	     python conifer.py rpkm 
-	     --probes $probesFile 
-	     --input ${$input_bam.label.value}.bam
-	     --output rpkm_dir/${$input_bam.label.value}.txt;
-	#else
-	     ln -s ${input_bam.inputFile} ${input_bam.inputFile.dataset.name}.bam;
-	     ln -s ${input_bam.inputFile.metadata.bam_index} ${input_bam.inputFile.dataset.name}.bam.bai;
-	     python conifer.py rpkm 
-	     --probes $probesFile 
-	     --input ${input_bam.inputFile.dataset.name}.bam
-	     --output rpkm_dir/${input_bam.inputFile.dataset.name}.txt;
-	#end if
-#end for
-     python conifer.py analyze 
-     --probes $probesFile 
-     --rpkm_dir rpkm_dir  
-     --output $outputFile 
-     --svd $svd 
-     2&gt;&amp;1;
-  </command>
-  <inputs>
-    <param format="bed" name="probesFile" type="data" label="Target regions (BED)" />
-    <repeat name="inputs" title="BAM" min="2" help="Need to add more files? Use controls below.">
-      <param format="bam" name="inputFile" type="data" label="BAM file" />
-      <param name="label" type="text" size="30" value="" label="Label" help="Label to use in the output. If not given, the dataset name will be used instead">
-        <validator type="regex" message="Spaces are not allowed">^\S*$</validator>
-      </param>
-    </repeat>
-    <param name="svd" type="integer" value="2" label="SVD" />
-  </inputs>
-  <outputs>
-    <data format="hdf5" name="outputFile" label="${tool.name} on ${on_string}: hdf5" />
-  </outputs>
-  <help>
-
-.. class:: warningmark
-
-**Warning about SVD**
-
-The SVD value must be less than the number of samples.
-
-E.g.: if the number of samples is 3, the value of SVD must be 2 or less.
-
-**What it does**
-
-This tool calculates RPKM for each BAM file and analyzes them, afterward it creates a unique HDF5 file containing all SVD-ZRPKM values,
-probes and samples for downstream analysis with 'CoNIFER CNV caller' and 'CoNIFER plot'.
-
-**License and citation**
-
-This Galaxy tool is Copyright © 2015 `CRS4 Srl.`_ and is released under the `MIT license`_.
-
-.. _CRS4 Srl.: http://www.crs4.it/
-.. _MIT license: http://opensource.org/licenses/MIT
-
-You can use this tool only if you agree to the license terms of: `CoNIFER`_.
-
-.. _CoNIFER: http://conifer.sourceforge.net/
-
-If you use this tool, please cite:
-
-- |Cuccuru2014|_
-- |Krumm2012|_.
-
-.. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics* 30(13), 1928-1929
-.. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/30/13/1928
-.. |Krumm2012| replace::  Krumm, N., Sudmant, P. H., Ko, A., O'Roak, B. J., Malig, M., Coe, B. P., NHLBI Exome Sequencing Project, Quinlan, A. R., Nickerson, D. A., Eichler, E. E. (2012) Copy number variation detection and genotyping from exome sequence data. *Genome Res.* 22(8), 1525-1532
-.. _Krumm2012: http://genome.cshlp.org/content/22/8/1525
-  </help>
-  <citations>
-    <citation type="doi">10.1093/bioinformatics/btu135</citation>
-    <citation type="doi">10.1101/gr.138115.112</citation>
-  </citations>
-</tool>
--- a/conifer.py	Mon Feb 06 10:56:20 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,692 +0,0 @@
-#######################################################################
-#######################################################################
-# CoNIFER: Copy Number Inference From Exome Reads
-# Developed by Niklas Krumm (C) 2012
-# nkrumm@gmail.com
-# 
-# homepage: http://conifer.sf.net
-# This program is described in:
-# Krumm et al. 2012. Genome Research. doi:10.1101/gr.138115.112
-#
-# This file is part of CoNIFER.
-# CoNIFER is free software:  you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-# 
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#######################################################################
-#######################################################################
-
-import argparse
-import os, sys, copy
-import glob
-import csv
-import conifer_functions as cf
-import operator
-from tables import *
-import numpy  as np
-
-def CF_analyze(args):
-	# do path/file checks:
-	try: 
-		# read probes table
-		probe_fn = str(args.probes[0])
-		probes = cf.loadProbeList(probe_fn)
-		num_probes = len(probes)
-		print '[INIT] Successfully read in %d probes from %s' % (num_probes, probe_fn)
-	except IOError as e: 
-		print '[ERROR] Cannot read probes file: ', probe_fn
-		sys.exit(0)
-	
-	try: 
-		svd_outfile_fn = str(args.output)
-		# h5file_out = openFile(svd_outfile_fn, mode='w')
-		h5file_out = open_file(svd_outfile_fn, mode='w')
-		# probe_group = h5file_out.createGroup("/","probes","probes")
-		probe_group = h5file_out.create_group("/","probes","probes")
-	except IOError as e: 
-		print '[ERROR] Cannot open SVD output file for writing: ', svd_outfile_fn
-		sys.exit(0)
-	
-	if args.write_svals != "":
-		sval_f = open(args.write_svals,'w')
-	
-	if args.plot_scree != "":
-		try:
-			import matplotlib
-			matplotlib.use('Agg')
-			import matplotlib.pyplot as plt
-			import pylab as P
-			from matplotlib.lines import Line2D
-			from matplotlib.patches import Rectangle
-		except:
-			print "[ERROR] One or more of the required modules for plotting cannot be loaded! Are matplotlib and pylab installed?"
-			sys.exit(0)
-		
-		plt.gcf().clear()
-		fig = plt.figure(figsize=(10,5))
-		ax = fig.add_subplot(111)
-	
-	rpkm_dir = str(args.rpkm_dir[0])
-	rpkm_files = glob.glob(rpkm_dir + "/*")
-	if len(rpkm_files) == 0:
-		print '[ERROR] Cannot find any files in RPKM directory (or directory path is incorrect): ', rpkm_dir
-		sys.exit(0)
-	elif len(rpkm_files) == 1:
-		print '[ERROR] Found only 1 RPKM file (sample). CoNIFER requires multiple samples (8 or more) to run. Exiting.'
-		sys.exit(0)
-	elif len(rpkm_files) < 8:
-		print '[WARNING] Only found %d samples... this is less than the recommended minimum, and CoNIFER may not analyze this dataset correctly!' % len(rpkm_files)
-	elif len(rpkm_files) <= int(args.svd):
-		print '[ERROR] The number of SVD values specified (%d) must be less than the number of samples (%d). Either add more samples to the analysis or reduce the --svd parameter! Exiting.' % (len(rpkm_files), int(args.svd))
-		sys.exit(0)
-	else:
-		print '[INIT] Found %d RPKM files in %s' % (len(rpkm_files), rpkm_dir)
-	
-	# read in samples names and generate file list
-	samples = {}
-	for f in rpkm_files:
-		s = '.'.join(f.split('/')[-1].split('.')[0:-1])
-		print "[INIT] Mapping file to sampleID: %s --> %s" % (f, s)
-		samples[s] = f
-	
-	#check uniqueness and total # of samples
-	if len(set(samples)) != len(set(rpkm_files)):
-		print '[ERROR] Could not successfully derive sample names from RPKM filenames. There are probably non-unique sample names! Please rename files using <sampleID>.txt format!'
-		sys.exit(0)
-	
-	# LOAD RPKM DATA
-	RPKM_data = np.zeros([num_probes,len(samples)], dtype=np.float)
-	failed_samples = 0
-	
-	for i,s in enumerate(samples.keys()):
-		t = np.loadtxt(samples[s], dtype=np.float, delimiter="\t", skiprows=0, usecols=[2])
-		if len(t) != num_probes:
-			print "[WARNING] Number of RPKM values for %s in file %s does not match number of defined probes in %s. **This sample will be dropped from analysis**!" % (s, samples[s], probe_fn)
-			_ = samples.pop(s)
-			failed_samples += 1
-		else:
-			RPKM_data[:,i] = t
-			print "[INIT] Successfully read RPKM data for sampleID: %s" % s	
-	
-	RPKM_data = RPKM_data[:,0:len(samples)]
-	print "[INIT] Finished reading RPKM files. Total number of samples in experiment: %d (%d failed to read properly)" % (len(samples), failed_samples)
-	
-	if len(samples) <= int(args.svd):
-		print '[ERROR] The number of SVD values specified (%d) must be less than the number of samples (%d). Either add more samples to the analysis or reduce the --svd parameter! Exiting.' % (int(args.svd), len(samples))
-		sys.exit(0)
-	
-	# BEGIN 
-	chrs_to_process = set(map(operator.itemgetter("chr"),probes))
-	chrs_to_process_str = ', '.join([cf.chrInt2Str(c) for c in chrs_to_process])
-	print '[INIT] Attempting to process chromosomes: ', chrs_to_process_str
-	
-	
-	
-	for chr in chrs_to_process:
-		print "[RUNNING: chr%d] Now on: %s" %(chr, cf.chrInt2Str(chr))
-		chr_group_name = "chr%d" % chr
-		# chr_group = h5file_out.createGroup("/",chr_group_name,chr_group_name)
-		chr_group = h5file_out.create_group("/",chr_group_name,chr_group_name)
-		
-		chr_probes = filter(lambda i: i["chr"] == chr, probes)
-		num_chr_probes = len(chr_probes)
-		start_probeID = chr_probes[0]['probeID']
-		stop_probeID = chr_probes[-1]['probeID']
-		print "[RUNNING: chr%d] Found %d probes; probeID range is [%d-%d]" % (chr, len(chr_probes), start_probeID-1, stop_probeID) # probeID is 1-based and slicing is 0-based, hence the start_probeID-1 term
-		
-		rpkm = RPKM_data[start_probeID:stop_probeID,:]
-		
-		print "[RUNNING: chr%d] Calculating median RPKM" % chr
-		median = np.median(rpkm,1)
-		sd = np.std(rpkm,1)
-		probe_mask = median >= float(args.min_rpkm)
-		print "[RUNNING: chr%d] Masking %d probes with median RPKM < %f" % (chr, np.sum(probe_mask==False), float(args.min_rpkm))
-		
-		rpkm = rpkm[probe_mask, :]
-		num_chr_probes = np.sum(probe_mask)
-		
-		if num_chr_probes <= len(samples):
-			print "[ERROR] This chromosome has fewer informative probes than there are samples in the analysis! There are probably no mappings on this chromosome. Please remove these probes from the probes.txt file"
-			sys.exit(0)
-		
-		probeIDs = np.array(map(operator.itemgetter("probeID"),chr_probes))[probe_mask]
-		probe_starts = np.array(map(operator.itemgetter("start"),chr_probes))[probe_mask]
-		probe_stops = np.array(map(operator.itemgetter("stop"),chr_probes))[probe_mask]	
-		gene_names =  np.array(map(operator.itemgetter("name"),chr_probes))[probe_mask]	
-		
-		dt = np.dtype([('probeID',np.uint32),('start',np.uint32),('stop',np.uint32), ('name', np.str_, 20)])
-		
-		out_probes = np.empty(num_chr_probes,dtype=dt)
-		out_probes['probeID'] = probeIDs
-		out_probes['start'] = probe_starts
-		out_probes['stop'] = probe_stops
-		out_probes['name'] = gene_names
-		# probe_table = h5file_out.createTable(probe_group,"probes_chr%d" % chr,cf.probe,"chr%d" % chr)
-		probe_table = h5file_out.create_table(probe_group,"probes_chr%d" % chr,cf.probe,"chr%d" % chr)
-		probe_table.append(out_probes)
-		
-		print "[RUNNING: chr%d] Calculating ZRPKM scores..." % chr
-		rpkm = np.apply_along_axis(cf.zrpkm, 0, rpkm, median[probe_mask], sd[probe_mask])
-		
-		# svd transform
-		print "[RUNNING: chr%d] SVD decomposition..." % chr
-		components_removed = int(args.svd)
-		
-		U, S, Vt = np.linalg.svd(rpkm,full_matrices=False)
-		new_S = np.diag(np.hstack([np.zeros([components_removed]),S[components_removed:]]))
-		
-		if args.write_svals != "":
-			sval_f.write('chr' + str(chr) + '\t' + '\t'.join([str(_i) for _i in S]) + "\n")
-		
-		if args.plot_scree != "":
-			ax.plot(S, label='chr' + str(chr),lw=0.5)
-		
-		# reconstruct data matrix
-		rpkm = np.dot(U, np.dot(new_S, Vt))
-		
-		
-		# save to HDF5 file
-		print "[RUNNING: chr%d] Saving SVD-ZRPKM values" % chr
-		
-		for i,s in enumerate(samples):
-			out_data = np.empty(num_chr_probes,dtype='u4,f8')
-			out_data['f0'] = probeIDs
-			out_data['f1'] = rpkm[:,i]
-			# sample_tbl = h5file_out.createTable(chr_group,"sample_" + str(s),cf.rpkm_value,"%s" % str(s))
-			sample_tbl = h5file_out.create_table(chr_group,"sample_" + str(s),cf.rpkm_value,"%s" % str(s))
-			sample_tbl.append(out_data)
-	
-	
-	print "[RUNNING] Saving sampleIDs to file..."
-	# sample_group = h5file_out.createGroup("/","samples","samples")
-	# sample_table = h5file_out.createTable(sample_group,"samples",cf.sample,"samples")
-	sample_group = h5file_out.create_group("/","samples","samples")
-	sample_table = h5file_out.create_table(sample_group,"samples",cf.sample,"samples")
-	dt = np.dtype([('sampleID',np.str_,100)])
-	out_samples = np.empty(len(samples.keys()),dtype=dt)
-	out_samples['sampleID'] = np.array(samples.keys())
-	sample_table.append(out_samples)
-	
-	
-	if args.write_sd != "":
-		print "[RUNNING] Calculating standard deviations for all samples (this can take a while)..."
-		
-		sd_file = open(args.write_sd,'w')
-		
-		for i,s in enumerate(samples):
-			# collect all SVD-ZRPKM values
-			count = 1
-			for chr in chrs_to_process:
-				if count == 1:
-					sd_out = h5file_out.root._f_getChild("chr%d" % chr)._f_getChild("sample_%s" % s).read(field="rpkm").flatten()
-				else:
-					sd_out = np.hstack([sd_out,out.h5file_out.root._f_getChild("chr%d" % chr)._f_getChild("sample_%s" % s).read(field="rpkm").flatten()])
-				
-				sd = np.std(sd_out)
-			sd_file.write("%s\t%f\n" % (s,sd))
-		
-		sd_file.close()
-	
-	if args.plot_scree != "":
-		plt.title("Scree plot")
-		if len(samples) < 50:
-			plt.xlim([0,len(samples)])
-			plt.xlabel("S values")
-		else:
-			plt.xlim([0,50])
-			plt.xlabel("S values (only first 50 plotted)")
-		plt.ylabel("Relative contributed variance")		
-		plt.savefig(args.plot_scree)
-	
-	print "[FINISHED]"
-	h5file_out.close()
-	sys.exit(0)
-
-def CF_export(args):
-	try: 
-		h5file_in_fn = str(args.input)
-		# h5file_in = openFile(h5file_in_fn, mode='r')
-		h5file_in = open_file(h5file_in_fn, mode='r')
-	except IOError as e: 
-		print '[ERROR] Cannot open CoNIFER input file for reading: ', h5file_in_fn
-		sys.exit(0)	
-	
-	# read probes
-	probes = {}
-	for probes_chr in h5file_in.root.probes:
-		probes[probes_chr.title] = probes_chr.read()
-	
-	if args.sample =='all':
-		all_samples = list(h5file_in.root.samples.samples.read(field="sampleID"))
-		
-		out_path = os.path.abspath(args.output)
-		
-		print "[INIT] Preparing to export all samples (%d samples) to %s" % (len(all_samples), out_path)
-		for sample in all_samples:
-			try:
-				outfile_fn = out_path + "/" + sample + ".bed"
-				outfile_f = open(outfile_fn,'w')
-			except IOError as e:
-				print '[ERROR] Cannot open output file for writing: ', outfile_fn
-				sys.exit(0)
-			print "[RUNNING] Exporting %s" % sample
-			
-			cf.export_sample(h5file_in,sample,probes,outfile_f)
-			outfile_f.close()
-	
-	elif len(args.sample) == 1:
-		out_path = os.path.abspath(args.output)
-		sample = args.sample[0]
-		print "[INIT] Preparing to export sampleID %s to %s" % (args.sample[0], out_path)
-		try:
-			if os.path.isdir(out_path):
-				outfile_fn = out_path + "/" + sample + ".bed"
-			else:
-				outfile_fn = out_path
-			outfile_f = open(outfile_fn,'w')
-		except IOError as e:
-			print '[ERROR] Cannot open output file for writing: ', outfile_fn
-			sys.exit(0)
-		print "[RUNNING] Exporting %s to %s" % (sample, outfile_fn)
-		
-		cf.export_sample(h5file_in,sample,probes,outfile_f)
-		outfile_f.close()
-	
-	else:
-		out_path = os.path.abspath(args.output)
-		print "[INIT] Preparing to export %d samples to %s" % (len(args.sample), out_path)
-		for sample in args.sample:
-			try:
-				if os.path.isdir(out_path):
-					outfile_fn = out_path + "/" + sample + ".bed"
-				else:
-					outfile_fn = out_path
-				outfile_f = open(outfile_fn,'w')
-			except IOError as e:
-				print '[ERROR] Cannot open output file for writing: ', outfile_fn
-				sys.exit(0)
-			print "[RUNNING] Exporting %s to %s" % (sample, outfile_fn)
-			
-			cf.export_sample(h5file_in,sample,probes,outfile_f)
-			outfile_f.close()		
-	sys.exit(0)
-
-def CF_call(args):
-	try: 
-		h5file_in_fn = str(args.input)
-		# h5file_in = openFile(h5file_in_fn, mode='r')
-		h5file_in = open_file(h5file_in_fn, mode='r')
-	except IOError as e: 
-		print '[ERROR] Cannot open CoNIFER input file for reading: ', h5file_in_fn
-		sys.exit(0)		
-	
-	try: 
-		callfile_fn = str(args.output)
-		callfile_f = open(callfile_fn, mode='w')
-	except IOError as e: 
-		print '[ERROR] Cannot open output file for writing: ', callfile_fn
-		sys.exit(0)
-	
-	chrs_to_process = []
-	for chr in h5file_in.root:
-		if chr._v_title not in ('probes','samples'):
-			chrs_to_process.append(chr._v_title.replace("chr",""))
-	
-	h5file_in.close()
-	
-	print '[INIT] Initializing caller at threshold = %f' % (args.threshold)
-	
-	r = cf.rpkm_reader(h5file_in_fn)
-	
-	all_calls = []
-	
-	for chr in chrs_to_process:
-		print '[RUNNING] Now processing chr%s' % chr
-		data = r.getExonValuesByRegion(chr)
-		
-		#raw_data = copy.copy(data)
-		_ = data.smooth()
-		
-		mean= np.mean(data.rpkm,axis=1)
-		sd =  np.std(data.rpkm,axis=1)
-		
-		for sample in r.getSampleList():
-			sample_data = data.getSample([sample]).flatten()
-			#sample_raw_data = raw_data.getSample([sample]).flatten()
-			
-			dup_mask = sample_data >= args.threshold
-			del_mask = sample_data <= -1*args.threshold
-			
-			dup_bkpoints = cf.getbkpoints(dup_mask) #returns exon coordinates for this chromosome (numpy array coords)
-			del_bkpoints = cf.getbkpoints(del_mask)
-			
-			
-			dups = []
-			for start,stop in dup_bkpoints:
-				try: new_start =  np.max(np.where(sample_data[:start] < (mean[:start] + 3*sd[:start])))
-				except ValueError: new_start = 0
-				try: new_stop = stop + np.min(np.where(sample_data[stop:] < (mean[stop:] + 3*sd[stop:])))
-				except ValueError: new_stop = data.shape[1]-1
-				dups.append({"sampleID":sample,"chromosome":  cf.chrInt2Str(chr), "start":data.exons[new_start]["start"], "stop": data.exons[new_stop]["stop"], "state": "dup"})
-			
-			dels = []
-			for start,stop in del_bkpoints:	
-				try: new_start =  np.max(np.where(sample_data[:start] > (-1*mean[:start] - 3*sd[:start])))
-				except ValueError: new_start = 0
-				try: new_stop = stop + np.min(np.where(sample_data[stop:] > (-1*mean[stop:] - 3*sd[stop:])))
-				except ValueError: new_stop = data.shape[1]-1
-				dels.append({"sampleID":sample,"chromosome": cf.chrInt2Str(chr), "start":data.exons[new_start]["start"], "stop": data.exons[new_stop]["stop"], "state": "del"})
-			
-			dels = cf.mergeCalls(dels) #merges overlapping calls
-			dups = cf.mergeCalls(dups)
-			
-			#print sampleID, len(dels), len(dups)
-			
-			all_calls.extend(list(dels))
-			all_calls.extend(list(dups))
-	
-	# print calls to file
-	header = ['sampleID','chromosome','start','stop','state']
-	
-	callfile_f.write('\t'.join(header) + "\n")
-	for call in all_calls:
-		print "%s\t%s\t%d\t%d\t%s" % (call["sampleID"], call["chromosome"], call["start"], call["stop"], call["state"])
-		callfile_f.write("%s\t%s\t%d\t%d\t%s\n" % (call["sampleID"], call["chromosome"], call["start"], call["stop"], call["state"]))
-	
-	sys.exit(0)
-
-def CF_plot(args):
-	try:
-		import locale
-		import matplotlib
-		matplotlib.use('Agg')
-		import matplotlib.pyplot as plt
-		import pylab as P
-		from matplotlib.lines import Line2D
-		from matplotlib.patches import Rectangle
-		_ = locale.setlocale(locale.LC_ALL, '')
-	except:
-		print "[ERROR] One or more of the required modules for plotting cannot be loaded! Are matplotlib and pylab installed?"
-		sys.exit(0)
-		
-		
-	chr, start, stop = cf.parseLocString(args.region)
-	
-	r = cf.rpkm_reader(args.input)
-	
-	data = r.getExonValuesByRegion(chr,start,stop)
-	_ = data.smooth()
-	
-	plt.gcf().clear()
-	fig = plt.figure(figsize=(10,5))
-	ax = fig.add_subplot(111)
-	
-	
-	ax.plot(data.rpkm, linewidth = 0.3, c='k')
-	
-	
-	if args.sample != 'none':
-		cnt = 1
-		coloriter = iter(['r','b','g','y'])
-		for sample in args.sample:
-			try:
-				color, sampleID = sample.split(":")
-			except:
-				color =coloriter.next()
-				sampleID = sample
-			
-			ax.plot(data.getSample([sampleID]), linewidth = 1, c=color, label = sampleID)
-			
-			if cnt == 1:
-				cf.plotRawData(ax, r.getExonValuesByRegion(chr,start,stop,sampleList=[sampleID]).getSample([sampleID]),color=color)
-			cnt +=1
-		plt.legend(prop={'size':10},frameon=False)
-		
-	cf.plotGenes(ax, data)
-	cf.plotGenomicCoords(plt,data)
-	plt.xlim(0,data.shape[1])
-	plt.ylim(-3,3)
-	
-	plt.title("%s: %s - %s" % (cf.chrInt2Str(chr),locale.format("%d",start, grouping=True),locale.format("%d",stop, grouping=True)))
-	plt.xlabel("Position")
-	plt.ylabel("SVD-ZRPKM Values")
-	
-	plt.savefig(args.output)
-	
-	sys.exit(0)
-
-def CF_plotcalls(args):
-	try:
-		import matplotlib
-		matplotlib.use('Agg')
-		import matplotlib.pyplot as plt
-		import pylab as P
-		from matplotlib.lines import Line2D
-		from matplotlib.patches import Rectangle
-	except:
-		print "[ERROR] One or more of the required modules for plotting cannot be loaded! Are matplotlib and pylab installed?"
-		sys.exit(0)
-	
-	import locale	
-	try:
-		_ = locale.setlocale(locale.LC_ALL, 'en_US')
-	except:
-		_ = locale.setlocale(locale.LC_ALL, '')
-	
-	try: 
-		callfile_fn = str(args.calls)
-		callfile_f = open(callfile_fn, mode='r')
-	except IOError as e: 
-		print '[ERROR] Cannot open call file for reading: ', callfile_fn
-		sys.exit(0)
-	
-	all_calls = []
-	header = callfile_f.readline()
-	
-	for line in callfile_f:
-		sampleID, chr, start, stop, state = line.strip().split()
-		chr = cf.chrStr2Int(chr)
-		all_calls.append({"chromosome":int(chr), "start":int(start), "stop":int(stop), "sampleID":sampleID})
-	
-	r = cf.rpkm_reader(args.input)
-	
-	for call in all_calls:
-		chr = call["chromosome"]
-		start = call["start"]
-		stop = call["stop"]
-		sampleID = call["sampleID"]
-		
-		exons = r.getExonIDs(chr,int(start),int(stop))
-		
-		
-		window_start = max(exons[0]-args.window,0)
-		window_stop = exons[-1]+args.window
-		
-		data = r.getExonValuesByExons(chr,window_start, window_stop)
-		_ = data.smooth()
-		
-		plt.gcf().clear()
-		fig = plt.figure(figsize=(10,5))
-		ax = fig.add_subplot(111)
-		
-		
-		ax.plot(data.rpkm, linewidth = 0.3, c='k')
-		
-		
-		ax.plot(data.getSample([sampleID]), linewidth = 1, c='r', label = sampleID)
-		cf.plotRawData(ax, r.getExonValuesByExons(chr,window_start, window_stop,sampleList=[sampleID]).getSample([sampleID]),color='r')
-		
-		plt.legend(prop={'size':10},frameon=False)
-		
-		cf.plotGenes(ax, data)
-		cf.plotGenomicCoords(plt,data)
-		
-		exon_start = np.where(data.exons["start"] == start)[0][0]
-		exon_stop = np.where(data.exons["stop"] == stop)[0][0]
-		_ = ax.add_line(matplotlib.lines.Line2D([exon_start,exon_stop],[2,2],color='k',lw=6,linestyle='-',alpha=1,solid_capstyle='butt'))
-		
-		_ = plt.xlim(0,data.shape[1])
-		_ = plt.ylim(-3,3)
-		
-		plt.title("%s: %s - %s" % (cf.chrInt2Str(chr),locale.format("%d",start, grouping=True),locale.format("%d",stop, grouping=True)))
-		plt.xlabel("Position")
-		plt.ylabel("SVD-ZRPKM Values")
-		outfile = "%s_%d_%d_%s.png" % (cf.chrInt2Str(chr), start, stop, sampleID)
-		plt.savefig(args.outputdir + "/" + outfile)
-
-def CF_bam2RPKM(args):
-	try:
-		import pysam
-	except:
-		print '[ERROR] Cannot load pysam module! Make sure it is insalled'
-		sys.exit(0)
-	try: 
-		# read probes table
-		probe_fn = str(args.probes[0])
-		probes = cf.loadProbeList(probe_fn)
-		num_probes = len(probes)
-		print '[INIT] Successfully read in %d probes from %s' % (num_probes, probe_fn)
-	except IOError as e: 
-		print '[ERROR] Cannot read probes file: ', probe_fn
-		sys.exit(0)
-	
-	try:
-		rpkm_f = open(args.output[0],'w')
-	except IOError as e:
-		print '[ERROR] Cannot open rpkm file for writing: ', args.output
-		sys.exit(0)
-	
-	print "[RUNNING] Counting total number of reads in bam file..."
-	total_reads = float(pysam.view("-c", args.input[0])[0].strip("\n"))
-	print "[RUNNING] Found %d reads" % total_reads
-	
-	f = pysam.Samfile(args.input[0], "rb" )	
-	
-	# if not f._hasIndex():
-	if not f.has_index():
-		print "[ERROR] No index found for bam file (%s)!\n[ERROR] You must first index the bam file and include the .bai file in the same directory as the bam file!" % args.input[0]
-		sys.exit(0)
-    
-	# will be storing values in these arrays
-	readcount = np.zeros(num_probes)
-	exon_bp = np.zeros(num_probes)
-	probeIDs = np.zeros(num_probes)
-	counter = 0
-	
-	# detect contig naming scheme here # TODO, add an optional "contigs.txt" file or automatically handle contig naming
-	bam_contigs = f.references
-	probes_contigs = [str(p) for p in set(map(operator.itemgetter("chr"),probes))]
-	
-	probes2contigmap = {}
-	
-	for probes_contig in probes_contigs:
-		if probes_contig in bam_contigs:
-			probes2contigmap[probes_contig] = probes_contig
-		elif cf.chrInt2Str(probes_contig) in bam_contigs:
-			probes2contigmap[probes_contig] = cf.chrInt2Str(probes_contig)
-		elif cf.chrInt2Str(probes_contig).replace("chr","") in bam_contigs:
-			probes2contigmap[probes_contig] = cf.chrInt2Str(probes_contig).replace("chr","")
-		else:
-			print "[ERROR] Could not find contig '%s' from %s in bam file! \n[ERROR] Perhaps the contig names for the probes are incompatible with the bam file ('chr1' vs. '1'), or unsupported contig naming is used?" % (probes_contig, probe_fn)
-			sys.exit(0)
-	
-	print "[RUNNING] Calculating RPKM values..."
-	
-	# loop through each probe	
-	for p in probes:
-		
-		# f.fetch is a pysam method and returns an iterator for reads overlapping interval
-		
-		p_chr = probes2contigmap[str(p["chr"])]
-		
-		p_start = p["start"]
-		p_stop = p["stop"]
-		try:
-			iter = f.fetch(p_chr,p_start,p_stop)
-		except:
-			print "[ERROR] Could not retrieve mappings for region %s:%d-%d. Check that contigs are named correctly and the bam file is properly indexed" % (p_chr,p_start,p_stop)
-			sys.exit(0)
-		
-		for i in iter:
-			if i.pos+1 >= p_start: #this checks to make sure a read actually starts in an interval
-				readcount[counter] += 1
-		
-		exon_bp[counter] = p_stop-p_start
-		probeIDs[counter] = counter +1 #probeIDs are 1-based
-		counter +=1
-	
-	#calcualte RPKM values for all probes
-	rpkm = (10**9*(readcount)/(exon_bp))/(total_reads)
-	
-	out = np.vstack([probeIDs,readcount,rpkm])
-	
-	np.savetxt(rpkm_f,out.transpose(),delimiter='\t',fmt=['%d','%d','%f'])
-	
-	rpkm_f.close()
-
-	
-
-VERSION = "0.2.2"
-parser = argparse.ArgumentParser(prog="CoNIFER", description="This is CoNIFER %s (Copy Number Inference From Exome Reads), designed to detect and genotype CNVs and CNPs from exome sequence read-depth data. See Krumm et al., Genome Research (2012) doi:10.1101/gr.138115.112 \nNiklas Krumm, 2012\n nkrumm@uw.edu" % VERSION)
-parser.add_argument('--version', action='version', version='%(prog)s ' + VERSION)
-subparsers = parser.add_subparsers(help='Command to be run.')
-
-# rpkm command
-rpkm_parser= subparsers.add_parser('rpkm', help='Create an RPKM file from a BAM file')
-rpkm_parser.add_argument('--probes',action='store', required=True, metavar='/path/to/probes_file.txt',  nargs=1,help="Probe definition file")
-rpkm_parser.add_argument('--input',action='store', required=True, metavar='sample.bam',nargs=1,  help="Aligned BAM file")
-rpkm_parser.add_argument('--output',action='store', required=True, metavar='sample.rpkm.txt',nargs=1,  help="RPKM file to write")
-rpkm_parser.set_defaults(func=CF_bam2RPKM)
-
-# analyze command
-analyze_parser= subparsers.add_parser('analyze', help='Basic CoNIFER analysis. Reads a directory of RPKM files and a probe list and outputs a HDF5 file containing SVD-ZRPKM values.')
-analyze_parser.add_argument('--probes',action='store', required=True, metavar='/path/to/probes_file.txt',  nargs=1,help="Probe definition file")
-analyze_parser.add_argument('--rpkm_dir',action='store', required=True, metavar='/path/to/folder/containing/rpkm_files/',nargs=1,  help="Location of folder containing RPKM files. Folder should contain ONLY contain RPKM files, and all readable RPKM files will used in analysis.")
-analyze_parser.add_argument('--output','-o', required=True, metavar='/path/to/output_file.hdf5', type=str, help="Output location of HDF5 file to contain SVD-ZRPKM values")
-analyze_parser.add_argument('--svd', metavar='12', type=int, nargs='?', default = 12,help="Number of components to remove")
-analyze_parser.add_argument('--min_rpkm', metavar='1.00', type=float, nargs="?", default = 1.00,help="Minimum population median RPKM per probe.")
-analyze_parser.add_argument('--write_svals', metavar='SingularValues.txt', type=str, default= "", help="Optional file to write singular values (S-values). Used for Scree Plot.")
-analyze_parser.add_argument('--plot_scree', metavar='ScreePlot.png', type=str, default= "", help="Optional graphical scree plot. Requires matplotlib.")
-analyze_parser.add_argument('--write_sd', metavar='StandardDeviations.txt', type=str, default= "", help="Optional file with sample SVD-ZRPKM standard deviations. Used to filter noisy samples.")
-analyze_parser.set_defaults(func=CF_analyze)
-
-# export command
-export_parser= subparsers.add_parser('export', help='Export SVD-ZRPKM values from a HDF5 file to bed or vcf format.')
-export_parser.add_argument('--input','-i',action='store', required=True, metavar='CoNIFER_SVD.hdf5',help="HDF5 file from CoNIFER 'analyze' step")
-export_parser.add_argument('--output','-o',action='store', required=False, default='.', metavar='output.bed',help="Location of output file[s]. When exporting multiple samples, top-level directory of this path will be used.")
-export_parser.add_argument('--sample','-s',action='store', required=False, metavar='sampleID', default='all', nargs="+",help="SampleID or comma-separated list of sampleIDs to export. Default: export all samples")
-export_parser.set_defaults(func=CF_export)
-
-# plot command
-plot_parser= subparsers.add_parser('plot', help='Plot SVD-ZRPKM values using matplotlib')
-plot_parser.add_argument('--input','-i',action='store', required=True, metavar='CoNIFER_SVD.hdf5',help="HDF5 file from CoNIFER 'analyze' step")
-plot_parser.add_argument('--region',action='store', required=True, metavar='chr#:start-stop',help="Region to plot")
-plot_parser.add_argument('--output',action='store', required=True, metavar='image.png',help="Output path and filetype. PDF, PNG, PS, EPS, and SVG are supported.")
-plot_parser.add_argument('--sample',action='store', required=False, metavar='sampleID',nargs="+",default='none',help="Sample[s] to highlight. The following optional color spec can be used: <color>:<sampleID>. Available colors are r,b,g,y,c,m,y,k. The unsmoothed SVD-ZRPKM values for the first sample in this list will be drawn. Default: No samples highlighted.")
-plot_parser.set_defaults(func=CF_plot)
-
-# make calls command
-call_parser= subparsers.add_parser('call', help='Very rudimentary caller for CNVs using SVD-ZRPKM thresholding.')
-call_parser.add_argument('--input','-i',action='store', required=True, metavar='CoNIFER_SVD.hdf5',help="HDF5 file from CoNIFER 'analyze' step")
-call_parser.add_argument('--output',action='store', required=True, metavar='calls.txt',help="Output file for calls")
-call_parser.add_argument('--threshold', metavar='1.50', type=float, nargs='?', required=False, default = 1.50,help="+/- Threshold for calling (minimum SVD-ZRPKM)")
-call_parser.set_defaults(func=CF_call)
-
-# plotcalls command
-plotcalls_parser= subparsers.add_parser('plotcalls', help='Make basic plots from call file from "call" command.')
-plotcalls_parser.add_argument('--input','-i',action='store', required=True, metavar='CoNIFER_SVD.hdf5',help="HDF5 file from CoNIFER 'analyze' step")
-plotcalls_parser.add_argument('--calls',action='store', required=True, metavar='calls.txt',help="File with calls from 'call' command.")
-plotcalls_parser.add_argument('--outputdir',action='store', required=True, metavar='/path/to/directory',help="Output directory for plots")
-plotcalls_parser.add_argument('--window',action='store', required=False, metavar='50',default=50,help="In exons, the amount of padding to plot around each call")
-plotcalls_parser.set_defaults(func=CF_plotcalls)
-
-args = parser.parse_args()
-args.func(args)
--- a/conifer_functions.py	Mon Feb 06 10:56:20 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,371 +0,0 @@
-#######################################################################
-#######################################################################
-# CoNIFER: Copy Number Inference From Exome Reads
-# Developed by Niklas Krumm (C) 2012
-# nkrumm@gmail.com
-# 
-# homepage: http://conifer.sf.net
-# This program is described in:
-# Krumm et al. 2012. Genome Research. doi:10.1101/gr.138115.112
-#
-# This file is part of CoNIFER.
-# CoNIFER is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-# 
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#######################################################################
-#######################################################################
-
-import csv
-from tables import *
-import numpy as np
-import operator
-
-class rpkm_value(IsDescription):
-	probeID = UInt32Col(pos=0)
-	rpkm = FloatCol(pos=1)
-
-class probe(IsDescription):
-	probeID = 	UInt32Col(pos=0)
-	start = 	UInt32Col(pos=1) # start of probe
-	stop = 		UInt32Col(pos=2) # stop of probe
-	name = 		StringCol(20,pos=3)   # 20-character String
-
-def chrInt2Str(chromosome_int):
-	if int(chromosome_int) == 23:
-		return 'chrX'
-	elif int(chromosome_int) == 24:
-		return 'chrY' 
-	else:
-		return 'chr' + str(chromosome_int)
-
-
-def chrStr2Int(chromosome_str):
-	chr = chromosome_str.replace('chr','')
-	if chr == 'X':
-		return 23
-	elif chr == 'Y':
-		return 24 
-	else:
-		return int(chr)
-
-def parseLocString(locstr):
-	try:
-		chr,locstr = locstr.split(":")
-		start, stop = locstr.split("-")
-	except:
-		chr, start, stop = locstr.split("\t")
-	
-	chr = chrStr2Int(chr)	
-	start = int(start)
-	stop = int(stop)
-	return (chr,start,stop)	
-
-def zrpkm(rpkm,median,sd):
-	return (rpkm - median) / sd
-
-
-
-class sample(IsDescription):
-	sampleID = 	StringCol(100,pos=0) # 20-char string (sampleID)
-
-def loadProbeList(CF_probe_filename):
-	# Load data files
-	probefile = open(CF_probe_filename, 'rb')
-	s = csv.Sniffer()
-	header = s.has_header(probefile.read(1024))
-	probefile.seek(0)
-	dialect = s.sniff(probefile.read(1024))
-	probefile.seek(0)
-	if header:
-		r = csv.DictReader(probefile, dialect=dialect)
-	else:
-		r = csv.DictReader(probefile, dialect=dialect, fieldnames=['chr','start','stop','name'])
-	
-	probes = []
-	probeID = 1
-	for row in r:
-		probes.append({'probeID': probeID, 'chr':chrStr2Int(row['chr']),'start':int(row['start']),'stop':int(row['stop']), 'name':row['name']})
-		probeID +=1
-	
-	if len(probes) == 0:
-		raise Exception("No probes in probe file")
-		
-	return probes
-
-
-def export_sample(h5file_in,sample,probes,outfile_f):
-	dt = np.dtype([('chr','|S10'),('start', '<u4'), ('stop', '<u4'), ('name', '|S20'),('SVDZRPKM',np.float)])
-	for chr in h5file_in.root:
-		if chr._v_title in ('probes','samples'):
-			continue
-		
-		out_data = np.empty(len(probes[chr._v_title]),dtype=dt)
-		out_data["SVDZRPKM"] = chr._f_getChild("sample_" + sample).read(field='rpkm')
-		out_data["chr"] = np.repeat(chr._v_title,len(out_data))
-		out_data["start"] = probes[chr._v_title]["start"]
-		out_data["stop"] = probes[chr._v_title]["stop"]
-		out_data["name"] = probes[chr._v_title]["name"]
-		np.savetxt(outfile_f, out_data,fmt=["%s","%d","%d","%s","%f"], delimiter="\t")
-
-
-
-def plotGenes(axis, rpkm_data, levels=5,x_pos=-2,text_pos='right',line_spacing=0.1,text_offset=0.25,data_range=None):
-	from matplotlib.lines import Line2D
-	counter = 0
-	prev_gene = ""
-	if data_range is not None:
-		exon_set = rpkm_data.exons[data_range]
-	else:
-		exon_set = rpkm_data.exons
-	for gene in exon_set["name"]:
-		if gene == prev_gene:
-			continue
-		elif gene == 'None':
-			continue
-		start = np.min(np.where(exon_set["name"] == gene)) 
-		stop = np.max(np.where(exon_set["name"] == gene)) + 1
-		_ = axis.add_line(Line2D([start-0.5,stop-0.5],[x_pos - (counter * line_spacing),x_pos - (counter * line_spacing)],color=(102/255.,33/255.,168/255.,0.6),linewidth=5,linestyle='-',alpha=0.5,solid_capstyle='butt'))
-		_ = axis.text(stop+text_offset, x_pos - (counter * line_spacing), gene, ha='left',va='center',fontsize=6)
-		counter +=1
-		prev_gene = gene
-		if counter > 5:
-			counter = 0
-
-
-def plotGenomicCoords(plt, rpkm_data,fontsize=10,rotation=0):
-	import operator
-	import locale
-	exon_set = rpkm_data.exons
-	genomic_coords = np.array(map(operator.itemgetter("start"),exon_set))
-	
-	ticks = range(0,len(exon_set),len(exon_set)/5)
-	
-	ticks[-1] -= 1 # the last tick is going to be off the chart, so we estimate it as the second to last genomic coord.
-	labels = [locale.format("%d", genomic_coords[i], grouping=True) for i in ticks if i < len(genomic_coords)]
-	if rotation != 0:
-		ha = "right"
-	else:
-		ha = "center"
-	_ = plt.xticks(ticks,labels,fontsize=fontsize,rotation=rotation,ha=ha)
-
-
-def plotRawData(axis, rpkm_data, color='r',linewidth=0.7):
-	zero_stack = np.zeros(len(rpkm_data))
-	positions = np.repeat(np.arange(0,len(rpkm_data)),3)
-	logr = np.vstack([zero_stack,rpkm_data.flatten(),zero_stack]).transpose().ravel()
-	axis.plot(positions,logr,color=color,marker=None,linewidth=1)
-
-def getbkpoints(mask):
-	bkpoints = np.nonzero(np.logical_xor(mask[0:-1],mask[1:]))[0]+1
-	if mask[0] == 1:
-		bkpoints = np.hstack([0,bkpoints])
-	if mask[-1] == 1:
-		bkpoints = np.hstack([bkpoints,len(mask)])
-	return bkpoints.reshape(len(bkpoints)/2,2)
-
-def mergeCalls(calls):
-	if len(calls) == 0:
-		return []
-	
-	out_calls = []
-	calls=np.array(calls)[np.argsort(np.array(map(operator.itemgetter("start"),calls),dtype=np.int))]
-	pstart = calls[0]["start"]
-	pstop = calls[0]["stop"]
-	for d in calls:
-		if d["start"] <= pstop:
-			pstop = max(d["stop"],pstop)
-		else:
-			out_calls.append({"sampleID": d["sampleID"], "chromosome": d["chromosome"], "start":pstart, "stop":pstop, "state": d["state"]})
-			pstart = d["start"]
-			pstop = d["stop"]
-	
-	out_calls.append({"sampleID": d["sampleID"], "chromosome": d["chromosome"], "start":pstart, "stop":pstop, "state": d["state"]})
-	return out_calls
-
-class rpkm_data:
-	def __init__(self):
-		self.rpkm = None
-		self.samples = None
-		self.exons = None
-		self.isGenotype = False
-		self.calls = []
-		self.refined_calls = []
-		
-	def smooth(self, window = 15, padded = False): #todo, fix the padding here
-		if self.isGenotype:
-			print "Warning: the data in this rpkm_data container are single genotype values. Smoothing will have no effect!"
-			return self.rpkm
-		
-		if window > 0:
-			weightings=np.blackman(window)
-			weightings = weightings/weightings.sum()
-			smoothed_data = np.array([])
-			for row in self.rpkm.transpose():
-				smoothed = np.convolve(row, weightings)[(window-1)/2:-((window-1)/2)]
-				if len(smoothed_data) == 0:
-					smoothed_data  = smoothed
-				else:
-					smoothed_data  = np.vstack([smoothed_data,smoothed])
-			
-			self.rpkm = smoothed_data.transpose()
-			return self.rpkm
-		else:
-			return self.rpkm
-	
-	def getSample(self, sampleIDs):
-		sample_array = np.array(self.samples)
-		if isinstance(sampleIDs,list):
-			mask = np.zeros(len(sample_array),dtype=np.bool)
-			for sampleID in sampleIDs:
-				mask = np.logical_or(mask, sample_array == str(sampleID))
-			
-			return self.rpkm[:,mask]
-		else:		
-			mask = sample_array == str(sampleID)
-			return self.rpkm[:,mask]
-	
-	def getSamples(self, sampleIDs):
-		return self.getSample(sampleIDs)
-	
-	@property
-	def shape(self):
-		if self.isGenotype:
-			return [len(self.samples), 1]
-		else:
-			return [len(self.samples), len(self.exons)]
-
-
-class rpkm_reader:
-	def __init__(self, rpkm_fn=None):
-		"""Initialize an rpkm_reader instance. Specify the location of the data file"""
-		
-		if rpkm_fn == None:
-			print "Must specify RPKM HDF5 file!"
-			return 0
-		# set up file access
-		# self.h5file = openFile(rpkm_fn, mode='r')
-		self.h5file = open_file(rpkm_fn, mode='r')
-		self.sample_table = self.h5file.root.samples.samples
-		
-	def __del__(self):
-		self.h5file.close()
-	
-	def getExonValuesByExons(self, chromosome, start_exon, stop_exon, sampleList=None,genotype=False):
-		
-		probe_tbl = self.h5file.root.probes._f_getChild("probes_chr" + str(chromosome))
-		#table_rows = probe_tbl.getWhereList('(start >= %d) & (stop <= %d)' % (start,stop))
-		start_exon = max(start_exon,0)
-		stop_exon = min(stop_exon, probe_tbl.nrows)
-		#print start_exon, stop_exon
-		table_rows = np.arange(start_exon,stop_exon,1)
-		data_tbl  = self.h5file.root._f_getChild("chr" + str(chromosome))
-		
-		if sampleList == None:
-			num_samples = data_tbl._v_nchildren
-			samples = data_tbl	
-		else:
-			num_samples = len(sampleList)
-			samples = [data_tbl._f_getChild("sample_" + s) for s in sampleList]
-		
-		data = np.empty([num_samples,len(table_rows)],dtype=np.float)
-		
-		out_sample_list = []
-		cnt = 0
-		for sample_tbl in samples:
-			d = sample_tbl.readCoordinates(table_rows,field="rpkm")
-			data[cnt,:] = d
-			cnt +=1
-			out_sample_list.append(sample_tbl.title)
-		
-		d = rpkm_data()
-		if genotype: # return average #todo-- implement median and SD?
-			d.rpkm = data.transpose().mean(axis=0)
-			d.isGenotype = True
-		else: #return all data points
-			d.rpkm = data.transpose()
-		d.samples = out_sample_list
-		d.exons = probe_tbl.readCoordinates(table_rows)
-		
-		return d
-	
-	def getExonValuesByRegion(self, chromosome, start=None, stop=None, sampleList=None,genotype=False):
-		probe_tbl = self.h5file.root.probes._f_getChild("probes_chr" + str(chromosome))
-		if (start is not None) and (stop is not None):
-			table_rows = probe_tbl.getWhereList('(start >= %d) & (stop <= %d)' % (start,stop))
-		else:
-			table_rows = probe_tbl.getWhereList('(start >= 0) & (stop <= 1000000000)')
-		
-		data_tbl  = self.h5file.root._f_getChild("chr" + str(chromosome))
-		
-		if sampleList == None:
-			num_samples = data_tbl._v_nchildren
-			samples = data_tbl	
-		else:
-			num_samples = len(sampleList)
-			samples = [data_tbl._f_getChild("sample_" + s) for s in sampleList]
-		
-		data = np.empty([num_samples,len(table_rows)],dtype=np.float)
-		
-		out_sample_list = []
-		cnt = 0
-		for sample_tbl in samples:
-			d = sample_tbl.readCoordinates(table_rows,field="rpkm")
-			data[cnt,:] = d
-			cnt +=1
-			out_sample_list.append(sample_tbl.title)
-		
-		d = rpkm_data()
-		if genotype: # return average #todo-- implement median and SD?
-			d.rpkm = data.transpose().mean(axis=0)
-			d.isGenotype = True
-		else: #return all data points
-			d.rpkm = data.transpose()
-		d.samples = out_sample_list
-		d.exons = probe_tbl.readCoordinates(table_rows)
-		
-		return d
-	
-	def getSampleList(self,cohort=None,sex=None,ethnicity=None,custom=None):
-		"""Return a list of available samples in the current data file. Specifying no arguments will return all available samples"""
-		
-		readWhereStr = ""
-		if custom != None:
-			readWhereStr = custom
-		else:
-			if cohort != None:
-				if isinstance(cohort,list):
-					for c in cohort:
-						readWhereStr += "(cohort=='%s') | " % c
-					readWhereStr = readWhereStr.strip(" |")
-					readWhereStr += " & "
-				else:
-					readWhereStr += "(cohort=='%s') " % cohort
-			if sex != None:
-				if sex not in ['M','F']:	
-					sex = sex.upper()[0]
-				readWhereStr += " (sex=='%s') &" % sex
-			if ethnicity != None:
-				readWhereStr += " (ethnicity=='%s') &" % ethnicity
-			
-			readWhereStr = readWhereStr.strip(" &") # remove leading or trailing characters
-		if readWhereStr != "":
-			#print readWhereStr
-			sampleIDs = self.sample_table.readWhere(readWhereStr,field='sampleID')
-		else:
-			sampleIDs = self.sample_table.read(field='sampleID')
-		
-		return sampleIDs
-	
-	def getExonIDs(self, chromosome, start, stop):
-		probe_tbl = self.h5file.root.probes._f_getChild("probes_chr" + str(chromosome))
-		exons = probe_tbl.getWhereList('(start >= %d) & (stop <= %d)' % (start,stop))
-		return exons
--- a/conifer_wrapper.pl	Mon Feb 06 10:56:20 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-#!/usr/bin/env perl
-
-# Execute CoNIFER plotcalls and
-# returns a HTML page with links to PNG plots
-
-use strict;
-use warnings;
-use Getopt::Long;
-use File::Basename; 
-use File::Path qw(make_path remove_tree);
-
-my $command;
-my $dir=$ENV{'CONIFER_PATH'};
-
-our ($multiple, $input, $regions, $sample, $window, $html_file, $html_folder, $verbose);
-
-GetOptions('multiple'=>\$multiple, 'input=s'=>\$input, 'regions=s'=>\$regions,
-    'sample:s'=>\$sample, 'window:i'=>\$window, 'verbose'=>\$verbose,
-    'html_file=s'=>\$html_file, 'html_folder=s'=>\$html_folder);
-
-make_path($html_folder);
-
-# Build command
-if ($multiple){
-    # Reformat file with regions as required by CoNIFER plotcalls
-    system("awk '{print \$5,\$1,\$2,\$3,\$4}' OFS=\"\t\" $regions > regions_sorted");
-    
-    $command = "python ".$dir."/conifer.py plotcalls --input $input --calls regions_sorted --window $window --outputdir $html_folder 2>&1";
-}else{
-    my $sample_command = ($sample eq "") ? "" : "--sample $sample";
-    my $plot_name = $regions;
-    $plot_name =~ s/[:-]/_/g;
-    $command = "python ".$dir."/conifer.py plot --input $input --region $regions $sample_command --output $html_folder/$plot_name.png 2>&1";
-}
-
-# Run CoNIFER
-system($command);
-$verbose and print $command,"\n";
-
-# Write HTML file
-open(HTML, ">$html_file");
-print HTML "<html><head><title>CoNIFER: Copy Number Analysis for Targeted Resequencing</title></head><body><h3>CoNIFER Output Files:</h3><p><ul>\n";
-opendir(DIR, $html_folder);
-
-my @FILES= grep { /png$/ }  readdir(DIR); 
-closedir(DIR);
-foreach my $file (@FILES) {
-    print HTML "<li><a href=$file>$file</a><img src=\"$file\" height=\"50\" width=\"100\"></li>\n";
-}
-print HTML "</ul></p></body></html>\n";
-close(HTML);