Mercurial > repos > bcrain-completegenomics > testing4

--- a/scripts/datatypes_conf.xml	Wed Jun 13 14:06:45 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-<?xml version="1.0"?>
-<datatypes>
-  <datatype_files>
-    <datatype_file name="completegenomics.py"/>
-  </datatype_files>
-
-  <registration>
-    <!--
-      Add the following section to datatypes_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance
-    -->
-    <!-- Start Complete Genomics Datatypes -->
-    <datatype extension="cg_var" type="galaxy.datatypes.tabular:CG_Var" display_in_upload="true" />
-    <datatype extension="cg_mastervar" type="galaxy.datatypes.tabular:CG_MasterVar" display_in_upload="true" />
-    <datatype extension="cg_gene" type="galaxy.datatypes.tabular:CG_Gene" display_in_upload="true" />
-    <!-- End Complete Genomics Datatypes -->
-  </registration>
-  <sniffers>
-  </sniffers>
-</datatypes>
--- a/scripts/tool-data/cg_crr_files.loc.sample	Wed Jun 13 14:06:45 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use .crr reference files.  You will need to download or create
-#the .crr reference files and then create a cg_crr_files.loc file
-#similar to this one (store it in this directory) that points to
-#the location of the files. The cg_crr_files.loc
-#file has this format (white space characters are TAB characters):
-#
-#<value>	<dbkey>	<name>	<path>
-#
-#hg19	hg19	hg19.crr	/Users/bcrain/Documents/hg19.crr
-
--- a/scripts/tool_data_table_conf.xml.sample	Wed Jun 13 14:06:45 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-<tables>
-	<!--
-			 Add the following section to tool_data_table_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance
-	-->
-	<!-- Start location of cgatools crr files -->
-	<table name="cg_crr_files" comment_char="#">
-			<columns>value, dbkey, name, path</columns>
-			<file path="tool-data/cg_crr_files.loc" />
-	</table>
-	<!-- End Location of cgatools crr files -->
-</tables>
--- a/scripts/tools/cg_scripts/Calculate_TestVariants_Variant_Frequencies.xml	Wed Jun 13 14:06:45 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-<tool id="pl_calculatefreq" name="Calculate_Variant_Frequencies" version="0.0.1">
-
-  <description>in cgatools-testvariants file</description> <!--adds description in toolbar-->
-
-  <command interpreter="perl">
-  Calculate_TestVariants_Variant_Frequencies_0_1_0.pl
-  --Input $input
-  --First_Genome_Field_Nr $first_col
-  --Last_Genome_Field_Nr $last_col
-  --Output1 $output1
-	--Output2 $output2
-  </command>
-
-  <outputs>
-  	<data format="tabular" name="output1" label="TestVariant_Frequencies on "/>
-  	<data format="tabular" name="output2" label="TestVariant_Frequencies_Short on "/>
-  </outputs>
-
-  <inputs>
-    <param name="input" type="data" format="tabular" label="TestVariants input file">
-      <validator type="unspecified_build" />
-    </param>
-    <param name="first_col" type="text" label="What column number is the first genome"/>
-    <param name="last_col" type="text" label="What column number is the last genome"/>
-  </inputs>
-
-
-  <help>
-
-**What it does**
-
-This tool calculates the allele frequencies for all variants present in the testvariant file.
-
------
-
-**Instructions**::
-
-	Calculate the frequencies of variants in a testvariants output file
-	Two values calculated:
-		Frequency vs all alleles
-		Frequency vs called alleles
-
-	Input: testvariants file
-	Outputs:
-		All data to *-Freq.tsv, including scores and quals
-		vars and freqs to *-Freq_Short.tsv
-		Exceptions to *-Freq_Log
-		Stats to *-Freq_Stats
-
-
-	perl Calculate_TestVariants_Variant_Frequencies_0_0_3.pl \
-	--Input input_file \
-	--First_Genome_Field_Nr col_nr1 \
-	--Last_Genome_Field_Nr col_nr2
-	--Output1 output1 \
-	--Output2 output_short \
-	eg
-	perl Calculate_TestVariants_Variant_Frequencies_0_0_3.pl \
-	--Input /data/Family_Quartet_testvariants.tsv \
-	--Output /data/Family_Quartet_testvariants
-	--First_Genome_Field_Nr 9 \
-	--Last_Genome_Field_Nr 11
-	--Output1 /data/Family_Quartet_testvariants
-	--Output2 /data/Family_Quartet_testvariants_short
-
-  </help>
-</tool>
--- a/scripts/tools/cg_scripts/Calculate_TestVariants_Variant_Frequencies_0_1_0.pl	Wed Jun 13 14:06:45 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,271 +0,0 @@
-#!/usr/bin/perl
-use strict;
-#use feature "say";
-#use File::Basename;
-$| = 1;
-
-# Get_TestVariants_Variant_Frequencies
-# Calculate the frequencies of variants in a testvariants output file
-# Two values calculated:
-#	Frequency vs all alleles
-# 	Frequency vs called alleles
-
-# Input is a testvariants file
-# Outputs:
-# All data to *-Freq.tsv, including scores and quals
-# vars and freqs to *-Freq_Short.tsv
-# Exceptions to *-Freq_Log
-# Stats to *-Freq_Stats
-
-# Format:
-# perl prog file dir
-# ie
-# perl Get_TestVariants_Variant_Frequencies \
-# --Input input_file \
-# --First_Genome_Field_Nr col_nr1 \
-# --Last_Genome_Field_Nr col_nr2
-# --Output1 output1 \
-# --Output2 output2
-
-# eg
-# perl /perl/Get_TestVariants_Variant_Frequencies_0_0_1.pl \
-# --Input /data/Family_Quartet_testvariants.tsv \
-# --First_Genome_Field_Nr 9 \
-# --Last_Genome_Field_Nr 11
-# --Output1 output1 \
-# --Output2 output2
-
-
-# Rick Tearle 2010-11
-
-my $Time -= time; # start time
-my $Debug = 0;
-
-# Parsing and storing input parameters
-# Only childfields can be repeated
-print "$0 @ARGV\nProcessing input parameters\n";
-my %ExpectedParams =  GetExpectedParams ();
-my %EnteredParams = GetEnteredParams ();
-
-# Setting up prog paras from input paras
-my $FileIn = $EnteredParams{input};
-unless (-f $FileIn) {die "Testvariants input file $FileIn not found\n";} # requires existing file
-#my $FileOut = $EnteredParams{output}; #
-#$DirectoryOut =~ s/\/$//; # remove trailing slash if present
-#unless (-d $DirectoryOut) {die "Output directory $DirectoryOut not found\n";} # requires existing file
-#print "$FileIn\n$DirectoryOut\n";
-#$FileIn =~ /(^.+\/)(.+?)\./; # get filename without path and without extensions
-
-# my $FileOut1 = $FileOut."-Freq.tsv";
-# my $FileOut2 = $FileOut."-Freq_Short.tsv";
-# my $FileOut3 = $FileOut."-Freq_Stats.tsv";
-# my $FileOut4 = $FileOut."-Freq_Log.tsv";
-
-print "\nOpening Input File:\n\t$FileIn\n";
-my $IN = OpenFile ($FileIn); # open the file with correct file format
-
-#print "\nOpening Output Files:\n\t$FileOut1\n\t$FileOut2\n\t$FileOut3\n\t$FileOut4\n"; #exit;
-open my $OUT1, ">", $EnteredParams{output1};
-open my $OUT2, ">", $EnteredParams{output2};
-#open my $OUT3, ">", $EnteredParams{output3};
-#open my $OUT4, ">", $EnteredParams{output4};
-
-# Get col header and genomes fields
-my $ColHeader = <$IN>; # get col header
-chomp $ColHeader;
-my @ColHeader = split /\t/, $ColHeader;
-my $StartGenomes = $EnteredParams{first_genome_field_nr} - 1; # first column with testvariants data, 1 based -> 0 based
-my $StopGenomes = $EnteredParams{last_genome_field_nr} - 1; # first column with testvariants data, 1 based -> 0 based
-if ($StartGenomes < 0) {die "No valid entry for First_Genome_Field_Nr, must be 1 or greater\n";}
-if ($StopGenomes < 0) {die "No valid entry for Last_Genome_Field_Nr, must be 1 or greater\n";}
-if ($StartGenomes > $StopGenomes) {die "Last_Genome_Field_Nr must be greater than or equal to First_Genome_Field_Nr\n";}
-if ($StartGenomes > int @ColHeader) {die "First_Genome_Field_Nr > number of fields in column header\n";}
-if ($StopGenomes > int @ColHeader) {die "Last_Genome_Field_Nr > number of fields in column header\n";}
-my $NrGenomes = $StopGenomes - $StartGenomes + 1;
-#print "$StartGenomes\t$StopGenomes\n"; #exit;
-#print "First Genome Field:\n\t$ColHeader[$StartGenomes]\n";
-#print "Last Genome Field:\n\t$ColHeader[$StopGenomes]\n\n";
-
-# print column headers
-print $OUT1 join("\t",@ColHeader),"\tAllFreq\tCalledFreq\n";
-print $OUT2 join("\t",@ColHeader[0..7]),"\tAllFreq\tCalledFreq\n";
-print join("\t",@ColHeader),"\n";
-print "First Genome Field: $ColHeader[$StartGenomes]\n";
-print "Last Genome Field: $ColHeader[$StopGenomes]\n";
-print "Nr Genomes: $NrGenomes\n\n";
-
-print "\nProcessing Variants....\n";
-my $VariantCount = 0; # variant locus counter, not used
-my %AllFreqCounts; # storing histogram of all freq counts
-my %CalledFreqCounts; # storing histogram of called freq counts
-my $Warnings;
-while (<$IN>)
-{
-	# testvariants fields: variantId chromosome begin end varType reference alleleSeq xRef GS000000XX1-ASM GS000000XX2-ASM [GS000000XXN-ASM]
-	my $Line = $_; # save line for output below
-    chomp $Line;
-    my @F = split /\t/, $Line; # split in to fields
-    $VariantCount++; # increment variant counter
-	my $UseFields = join ("",@F[$StartGenomes..$StopGenomes]); # get genome fields as string, to count 0s and 1s
-	my $Count1 = () = $UseFields =~ /1/g; # count the number of 1s
-	my $Count0 = () = $UseFields =~ /0/g; # count the number of 0s
-	my $CountN = () = $UseFields =~ /N/g; # count the number of Ns
-	my $NrAlleles = $Count1 + $Count0 + $CountN; # total count
-	unless ($NrAlleles == $NrGenomes *2 or $NrAlleles == $NrGenomes) # count does not match expected for diploid/haploid locus
-	{
-		print "$NrAlleles alleles for variant ",join(" ",@F[0..7]),"\n"; # log warning
-		#print "Expected $NrGenomes or ",$NrGenomes*2," alleles depending on ploidy of locus\n";
-		#if ($Warnings++ > 10) {die "Have found $Warnings exceptions for this file, termnating processing\n";} # terminate if too many warnings
-	}
-	my $AllFreq = sprintf("%0.3f",$Count1/$NrAlleles); # calculate freq of 1s vs all alleles
-	my $CalledFreq = sprintf("%0.3f",0);
-	if ($Count1+$Count0) {$CalledFreq = sprintf("%0.3f",$Count1/($Count1+$Count0));} # calculate freq of 1s vs called alleles, if there are any
-	$AllFreqCounts{$AllFreq}++; # increment all freq histogram
-	$CalledFreqCounts{$CalledFreq}++; # increment called freq histogram
-	#print "$Line\n$AlleleCount\t$Count1\t$Count0\t$AllFreq\t$CalledFreq\n"; #exit;
-	print $OUT1 "$Line\t$AllFreq\t$CalledFreq\n"; # output full testvariants plus frequencies for this var
-	print $OUT2 join("\t",@F[0..7]),"\t$AllFreq\t$CalledFreq\n"; # output just var info plus frequencies for this var
-	#exit if $VariantCount > 20;
-}
-close $OUT1;
-close $OUT2;
-
-# Print frequency histograms
-print "Nr Variants at each Frequency (All):\nFreq\tCount\n"; # header
-foreach my $Freq (sort {$a <=> $b} keys %AllFreqCounts) {print "$Freq\t$AllFreqCounts{$Freq}\n";}
-
-print "\nNr Variants at each Frequency (Called):\nFreq\tCount\n"; # header
-foreach my $Freq (sort {$a <=> $b} keys %CalledFreqCounts) {print "$Freq\t$CalledFreqCounts{$Freq}\n";}
-
-$Time += time;
-print "\ntime $Time\n";
-
-###########################################################################
-#                                   SUBS                                  #
-###########################################################################
-
-sub GetExpectedParams
-{
-	my %Hash =
-	(
-	"input" => -1,
-	"output_dir" => -1,
-	); # store parameters and values
-	return %Hash;
-}
-
-sub GetEnteredParams
-{
-	# Processing @ARGV
-	my %Hash;
-
-	my @ARGVs = split /--/, join (" ",@ARGV); # split args on --, into array
-	#print "Start\n", join ("\n",@ARGVs),"\n",int @ARGVs - 1,"\n\n" if $Debug;
-	#print "Key\tVal\n" if $Debug; #exit;
-	for my $n (1..$#ARGVs) # parse each
-	{
-		$ARGVs[$n] =~ s/\s+$//; # remove any trailing spaces
-		my ($Key, $Val) = split / /, $ARGVs[$n], 2; # put first element into key, any other elements into val
-		$Key = lc $Key;
-		$Hash{$Key} = $Val; # make a hash entry out of key and val
-		#print "$Key\t$EnteredParams{$Key}\n" if $Debug;
-	}
-		#print int(keys %Hash),"\n" if $Debug;
-		#foreach my $Arg (keys %Hash) {print "Arg: $Arg\t",$ExpectedParams{$Arg},"\n";}
-		#print "Arg string:\t",join (" ",@ARGV),"\n" if $Debug;
-		#exit if $Debug;
-	return %Hash; # hash now has each -- entry param, with associated values
-}
-
-sub SaveArrayAsString
-{
-	my $FH = shift;
-	my $Fields = shift;
-	#print "$Fields\n";
-	print $FH join("\t",@$Fields),"\n";
-}
-
-sub ConcatenateVariants
-{
-    my $ArrayIn = shift; # ptr to array
-    my $StateFieldNr = shift; # field to process
-    #print int(@$ArrayIn),"\n";
-    my @ArrayOut; # array to store records out
-    my $Nr = -1;
-    foreach my $Entry (@$ArrayIn)
-    {
-	}
-    return \@ArrayOut; # return ptr to array
-}
-
-sub LoadStateRecord
-{
-	my $Out = shift;
-	my $In = shift;
-	my $StateFieldNr = shift;
-
-				$Out->{State} = $$In[$StateFieldNr]; # get state for new record
-				$Out->{Chr} = $$In[1]; # get chr
-				$Out->{Begin} = $$In[2]; # get begin of state range
-				$Out->{End} = $$In[3]; # get current end of state range
-				$Out->{Records}++; # record added to new count
-}
-
-sub OpenFile
-{
-    my $File = shift;
-    my $FH;
-		open ($FH, "$File") or die ("$!: can't open file $File");
-    return $FH;
-}
-
-sub OpenFileold
-{
-    my $File = shift;
-    my $FH;
-
-    if ($File =~ /.bz2$/)
-    {
-		open ($FH, "bzcat $File |") or die ("$!: can't open file $File");
-    }
-    elsif ($File =~ /.gz$/)
-    {
-		open ($FH, "gunzip -c $File |") or die ("$!: can't open file $File");
-    }
-    elsif ($File =~ /.tsv$/)
-    {
-		open ($FH, "cat $File |") or die ("$!: can't open file $File");
-    }
-    else
-    {
-		die ("$!: do not recognise file type $File");
-    }
-    return $FH;
-}
-
-sub LoadNewRecord
-{
-    my $In = shift;
-    my $Out = shift;
-    $Out->{Chr} = $In->{Chr};
-    $Out->{State} = $In->{State};
-    $Out->{Begin} = $In->{Begin};
-    $Out->{End} = $In->{End};
-    $Out->{Records} = $In->{Records};
-}
-
-sub NewStateRecord
-{
-    my $Record =
-    {
-	Chr => "",
-        Begin => -1,
-        End => -1,
-		State => "",
-        Records => 0,
-		MIEs => 0,
-		StateErrors => 0,
-        Length => -1,
-    };
-    return $Record;
-}
--- a/scripts/tools/cg_scripts/List_Unique_Variants.xml	Wed Jun 13 14:06:45 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,323 +0,0 @@
-<tool id="pl_listuniquevariants" name="List_Unique_Variants" version="0.0.1">
-
-  <description>with annotations from gene or var files</description> <!--adds description in toolbar-->
-
-  <command interpreter="perl"> <!--run executable-->
-		#if $file_types.file_type =="var2"
-			List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output
-			--Var_Type $file_types.variants
-			$file_types.scoresVAF
-			$file_types.scoresEAF
-			$file_types.varQuality
-			#if $file_types.data_sources.data_source == "in"
-				#for $v in $file_types.data_sources.varfiles <!--get each var file-->
-				--Input_File ${v.input}
-				#end for
-			#else
-				`cat $file_types.data_sources.varlist`
-			#end if
-
-		#else if $file_types.file_type =="var1"
-			List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output
-			--Var_Type $file_types.variants
-			$file_types.scores
-			#if $file_types.data_sources.data_source == "in"
-				#for $v in $file_types.data_sources.varfiles <!--get each var file-->
-				--Input_File ${v.input}
-				#end for
-			#else
-				`cat $file_types.data_sources.varlist`
-			#end if
-
-		#else if $file_types.file_type =="gene"
-			List_Unique_Variants_2_1_0.pl --File_Type G --Output_File $output
-			--Var_Type $file_types.variants
-			--Component $file_types.component
-			--Impact $file_types.impact
-			#if $file_types.data_sources.data_source == "in"
-				#for $g in $file_types.data_sources.genefiles <!--get each var file-->
-				--Input_File ${g.input}
-				#end for
-			#else
-				`cat $file_types.data_sources.genelist`
-			#end if
-		#end if
-  </command>
-
-  <outputs>
-    <data format="tabular" name="output" />
-  </outputs>
-
-  <inputs>
-    <conditional name="file_types">
-			<!--form field to select file type-->
-			<param name="file_type" type="select" label="Select the input file type">
-				<option value="var2" selected="True">var files, format 2.x</option>
-				<option value="var1">var files, format 1.x</option>
-				<option value="gene">gene files</option>
-			</param>
-
-			<when value="var2">
-				<!--form field to select all variant types to annotate-->
-				<param name="variants" label="Select variant types to include" type="select" multiple="true" >
-					<!--<validator type="no_options" message="Please select at least one variant type."/>-->
-					<option value="All" selected="true">All</option>
-					<option value="snp">snp</option>
-					<option value="ins">ins</option>
-					<option value="del">del</option>
-					<option value="sub">sub</option>
-					<option value="ref">ref</option>
-				</param>
-
-				<!--form field to select varScoresVAF-->
-				<param name="scoresVAF" type="select" label="Include varScoreVAF?">
-					<option value="--Scores_VAF yes" selected="true">yes</option>
-					<option value="--Scores_VAF no">no</option>
-				</param>
-				<!--form field to select varScoresEAF-->
-				<param name="scoresEAF" type="select" label="Include varScoreEAF?">
-					<option value="--Scores_EAF yes" selected="true">yes</option>
-					<option value="--Scores_EAF no">no</option>
-				</param>
-				<!--form field to select varQuality-->
-				<param name="varQuality" type="select" label="Include varQuality?">
-					<option value="--Score_Qualities yes" selected="true">yes</option>
-					<option value="--Score_Qualities no">no</option>
-				</param>
-
-				<!--conditional to select variant file input-->
-				<conditional name="data_sources">
-					<param name="data_source" type="select" label="Where are the input var files?">
-						<option value="in" selected="true">imported into Galaxy</option>
-						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
-					</param>
-					<when value="in">
-						<!--form field to select variant files-->
-						<repeat name="varfiles" title="Variant files">
-							<param name="input" type="data" format="cg_var" label="Dataset">
-								<validator type="unspecified_build" />
-								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
-								 metadata_name="dbkey" metadata_column="1"
-								 message="cgatools is not currently available for this build."/>
-							</param>
-						</repeat>
-					</when>
-					<when value="out">
-						<!--form field to select crr file-->
-						<param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/>
-					</when>
-				</conditional>
-			</when>
-
-			<when value="var1">
-				<!--form field to select all variant types to annotate-->
-				<param name="variants" label="Select variant types to include" type="select" multiple="true" >
-					<!--<validator type="no_options" message="Please select at least one variant type."/>-->
-					<option value="All" selected="true">All</option>
-					<option value="snp">snp</option>
-					<option value="ins">ins</option>
-					<option value="del">del</option>
-					<option value="sub">sub</option>
-					<option value="ref">ref</option>
-				</param>
-
-				<!--form field to select scores-->
-				<param name="scores" type="select" label="Include totalScore?">
-					<option value="--Scores yes" selected="true">yes</option>
-					<option value="--Scores no">no</option>
-				</param>
-
-				<!--conditional to select variant file input-->
-				<conditional name="data_sources">
-					<param name="data_source" type="select" label="Where are the input var files?">
-						<option value="in" selected="true">imported into Galaxy</option>
-						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
-					</param>
-					<when value="in">
-						<!--form field to select variant files-->
-						<repeat name="varfiles" title="Variant files">
-							<param name="input" type="data" format="cg_var" label="Dataset">
-								<validator type="unspecified_build" />
-								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
-								 metadata_name="dbkey" metadata_column="1"
-								 message="cgatools is not currently available for this build."/>
-							</param>
-						</repeat>
-					</when>
-					<when value="out">
-						<!--form field to select crr file-->
-						<param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/>
-					</when>
-				</conditional>
-			</when>
-
-			<when value="gene">
-				<!--form field to select all variant types to annotate-->
-				<param name="variants" label="Select variant types to include" type="select" multiple="true" >
-					<!--<validator type="no_options" message="Please select at least one variant type."/>-->
-					<option value="All" selected="true">All</option>
-					<option value="snp">snp</option>
-					<option value="ins">ins</option>
-					<option value="del">del</option>
-					<option value="sub">sub</option>
-					<option value="ref">ref</option>
-				</param>
-
-				<!--form field to select component in gene file-->
-				<param name="component" type="select" label="Select component types to include" multiple="true" >
-				  <option value="All" selected="true">All</option>
-					<option value="CDS">CDS</option>
-					<option value="INTRON">INTRON</option>
-					<option value="DONOR">DONOR</option>
-					<option value="ACCEPTOR">ACCEPTOR</option>
-					<option value="TSS-UPSTREAM">TSS-UPSTREAM</option>
-					<option value="SPAN5">SPAN5</option>
-					<option value="SPAN3">SPAN3</option>
-					<option value="SPAN">SPAN</option>
-					<option value="UTR5">UTR5</option>
-					<option value="UTR3">UTR3</option>
-					<option value="UTR">UTR</option>
-				</param>
-
-				<!--form field to select impact in gene file-->
-				<param name="impact" type="select" label="Select impact types to include" multiple="true" >
-				  <option value="All" selected="true">All</option>
-					<option value="NO-CHANGE">NO-CHANGE</option>
-					<option value="SYNONYMOUS">SYNONYMOUS</option>
-					<option value="MISSENES">MISSENES</option>
-					<option value="NONSENSE">NONSENSE</option>
-					<option value="NONSSTOP">NONSSTOP</option>
-					<option value="DELETE">DELETE</option>
-					<option value="INSERT">INSERT</option>
-					<option value="DELETE+">DELETE+</option>
-					<option value="INSERT+">INSERT+</option>
-					<option value="FRAMESHIFT">FRAMESHIFT</option>
-					<option value="MISSTART">MISSTART</option>
-					<option value="DISRUPT">DISRUPT</option>
-					<option value="UNKNOWN-VNC">UNKNOWN-VNC</option>
-					<option value="UNKNOWN-INC">UNKNOWN-INC</option>
-					<option value="UNKNOWN-TR">UNKNOWN-TR</option>
-				</param>
-
-				<!--conditional to select gene file input-->
-				<conditional name="data_sources">
-					<param name="data_source" type="select" label="Where are the input gene files?">
-						<option value="in" selected="true">imported into Galaxy</option>
-						<option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
-					</param>
-					<when value="in">
-						<!--form field to select variant files-->
-						<repeat name="genefiles" title="Gene files">
-							<param name="input" type="data" format="cg_gene" label="Dataset">
-								<validator type="unspecified_build" />
-								<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
-								 metadata_name="dbkey" metadata_column="1"
-								 message="cgatools is not currently available for this build."/>
-							</param>
-						</repeat>
-					</when>
-					<when value="out">
-						<!--form field to select crr file-->
-						<param name="genelist" type="text" label="List of gene files (/path/file)" size="200" help="file with list of gene files (/path/genefile), gene files can be compressed (gz, bz2)."/>
-					</when>
-				</conditional>
-			</when>
-
-		</conditional>
-  </inputs>
-
-
-  <help>
-
-**What it does**
-
-This tool identifies all called variants present in the var or gene files and generates annotated variant list.
-
------
-
-**Instructions**::
-
-		List Unique Variants for Pipeline 1.x and 2.x
-		[Uses header if available, checks for position of xref field if not]
-		Take one or more var or gene files
-		Extract a non-redundant set of variants
-
-		For var files:
-		The fields used to define non-redundant variants are are:
-			chromosome begin end varType reference alleleSeq xRef
-		User can nominate class(es) of varType to filter on
-		Outputs varScoreEAF, varScoreVAF and varQuality as a default but user can turn
-			them off (separately)
-		Scores and qualities stored in separate fields, all values for a variant across
-			a set of genomes.
-		Values for different genomes separated by ':', for two hom entries for the same
-			genome by '|'
-		Output is accepted by testvariants to generate a variant table, all fields kept
-			in testvariants output
-
-		For gene files:
-		The fields used to define non-redundant gene variants are:
-			chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol
-			orientation component componentIndex codingRegionKnown impact nucleotidePos
-			proteinPos annotationRefSequence sampleSequence genomeRefSequence
-		User can nominate class(es) of varType, component or impact to filter on
-		All gene entries kept ie  multiple entries if multiple transcripts
-
-		NB Now treating xref as a separate component in var recs, as it is not consistent
-			between X and Y vars
-		Not fixed for gene recs yet
-
-		perl List_Unique_Variants_2_0_11.pl
-		--File_Type [V|G]
-		--Input_File input_file_1 [set of var or gene files]
-		--Input_File input_file_2
-		...
-		--Input_File input_file_n
-		--Output_File filename
-		--Var_Type [For both file types, 'All' or any value from the varType field,
-				multiple values allowed, separated by comma]
-		--Component [Gene file specific,'All' or any value from component field of gene
-				file, multiple allowed; 'All" is default]
-		--Impact All [Gene file specific,'All' or any value from impact field of gene
-				file, multiple allowed; 'All" is default]
-		--Scores [1.x var file specific, yes|no, yes is default]
-		--Scores_VAF [2.0 var file specific, yes|no, yes is default]
-		--Scores_EAF [2.0 var file specific, yes|no, yes is default]
-		--Score_Qualities [yes|no, yes is default]
-		eg
-		perl List_Unique_Variants_2_0_11.pl \
-		--File_Type V \
-		--Input_File /Yoruban_Trio_1100_37/GS19238-1100-37/GS00028-DNA_A01/ASM/gene-GS19238-1100-37-ASM.tsv.bz2 \
-		--Input_File /Yoruban_Trio_1100_37/GS19239-1100-37/GS00028-DNA_B01/ASM/gene-GS19239-1100-37-ASM.tsv.bz2 \
-		--Input_File /Yoruban_Trio_1100_37/GS19240-1100-37/GS00028-DNA_C01/ASM/gene-GS19240-1100-37-ASM.tsv.bz2 \
-		--Output_File /Users/rtearle/Documents/TBF/YRI_Trio_Protein_Coding.tsv \
-		--Var_Type All
-		--Component All
-		--Impact All
-		--Scores_VAF yes \
-		--Scores_EAF yes \
-		--Score_Qualities yes
-
-		var fields
-		1.x	locus ploidy haplotype chromosome begin end varType reference alleleSeq
-				totalScore hapLink xRef
-		2.0	locus ploidy allele chromosome begin end varType reference alleleSeq
-				varScoreVAF varScoreEAF varQuality hapLink xRef
-
-		gene fields
-		1.x index locus allele chromosome begin end varType reference call xRef geneId
-				mrnaAcc proteinAcc symbol orientation component componentIndex
-				codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence
-				sampleSequence genomeRefSequence
-		2.0 index locus allele chromosome begin end varType reference call xRef geneId
-				mrnaAcc proteinAcc symbol orientation component componentIndex hasCodingRegion
-				impact nucleotidePos proteinPos annotationRefSequence sampleSequence
-				genomeRefSequence pfam
-
-		Parsing and storing input parameters
-		Only input_file fields can be repeated
-		input paramaters are case insensitive
-
-
-  </help>
-</tool>
--- a/scripts/tools/cg_scripts/List_Unique_Variants_2_1_0.pl	Wed Jun 13 14:06:45 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,583 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use File::Basename;
-$| = 1;
-
-# List Unique Variants for Pipeline 1.x and 2.x
-# [Uses header if available, checks for position of xref field if not]
-# Take one or more var or gene files
-# Extract a non-redundant set of variants
-
-# For var files:
-# The fields used to define non-redundant variants are are:
-# chromosome begin end varType reference alleleSeq xRef
-# User can nominate class(es) of varType to filter on
-# Outputs varScoreEAF, varScoreVAF and varQuality as a default but user can turn them off (separately)
-# Scores and qualities stored in separate fields, all values for a variant across a set of genomes.
-# Values for different genomes separated by ':', for two hom entries for the same genome by '|'
-# Output is accepted by testvariants to generate a variant table, all fields kept in testvariants output
-
-# For gene files:
-# The fields used to define non-redundant gene variants are:
-# chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol orientation component componentIndex codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence sampleSequence genomeRefSequence
-# User can nominate class(es) of varType, component or impact to filter on
-# All gene entries kept ie  multiple entries if multiple transcripts
-
-# NB Now treating xref as a separate component in var recs, as it is not consistent between X and Y vars
-# Not fixed for gene recs yet
-
-# perl List_Unique_Variants_2_0_9.pl
-# --File_Type [V|G]
-# --Input_File input_file_1 [set of var or gene files]
-# --Input_File input_file_2
-# ...
-# --Input_File input_file_n
-# --Output_File filename
-# --Var_Type [For both file types, 'All' or any value from the varType field, multiple values allowed, separated by comma]
-# --Component [Gene file specific,'All' or any value from component field of gene file, multiple allowed; 'All" is default]
-# --Impact All [Gene file specific,'All' or any value from impact field of gene file, multiple allowed; 'All" is default]
-# --Scores [1.x var file specific, yes|no, yes is default]
-# --Scores_VAF [2.0 var file specific, yes|no, yes is default]
-# --Scores_EAF [2.0 var file specific, yes|no, yes is default]
-# --Score_Qualities [yes|no, yes is default]
-# eg
-# perl /Users/rtearle/Documents/Programming/Perl/Scripts/Dev/List_Unique_Variants_2_0_4 \
-# --File_Type V \
-# --Input_File /Yoruban_Trio_1100_37/GS19238-1100-37/GS00028-DNA_A01/ASM/gene-GS19238-1100-37-ASM.tsv.bz2 \
-# --Input_File /Yoruban_Trio_1100_37/GS19239-1100-37/GS00028-DNA_B01/ASM/gene-GS19239-1100-37-ASM.tsv.bz2 \
-# --Input_File /Yoruban_Trio_1100_37/GS19240-1100-37/GS00028-DNA_C01/ASM/gene-GS19240-1100-37-ASM.tsv.bz2 \
-# --Output_File /Users/rtearle/Documents/TBF/YRI_Trio_Protein_Coding.tsv \
-# --Var_Type All
-# --Component All
-# --Impact All
-# --Scores_VAF yes \
-# --Scores_EAF yes \
-# --Score_Qualities yes
-
-# var fields
-# 1.x
-# locus ploidy haplotype chromosome begin end varType reference alleleSeq totalScore hapLink xRef
-# 2.0
-# locus ploidy allele chromosome begin end varType reference alleleSeq varScoreVAF varScoreEAF varQuality hapLink xRef
-
-# gene fields
-# 1.x index locus allele chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol orientation
-#  component componentIndex codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence sampleSequence genomeRefSequence
-# 2.0 index locus allele chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol orientation
-# component componentIndex hasCodingRegion impact nucleotidePos proteinPos annotationRefSequence sampleSequence genomeRefSequence pfam
-
-# Parsing and storing input parameters
-# Only input_file fields can be repeated
-# input paramaters are case insensitive
-
-print "$0 @ARGV\nProcessing input parameters\n";
-my $NrParams;
-my %ExpectedParams =  GetExpectedParams (); # list of expected parms
-my %EnteredParams = GetEnteredParams (); # list of entered params
-
-# Input Files
-my $FileType = $EnteredParams{file_type};
-if ($FileType ne "V" and $FileType ne "G") {die "File Type must be 'V' or 'G', not '$FileType'\n";}
-my $FilesIn = $EnteredParams{input_file}; # ptr to list of input files
-print "File Type: $FileType\nInput Files:\n";
-my $NrInputFiles = int(@$FilesIn);
-foreach my $File (@$FilesIn) {print "$File\n";} # requires existing files
-foreach my $File (@$FilesIn) {unless (-f $File) {die "Input file $File not found\n";}} # requires existing files
-for my $n (0.. $NrInputFiles-2) # look for duplicates in list
-	{for my $m ($n+1.. $NrInputFiles-1) {if ($$FilesIn[$n] eq $$FilesIn[$m] ) {die "File $$FilesIn[$n] is repeated in input file list\n";}}}
-
-# Output Dir
-#my $DirectoryOut = $EnteredParams{output_dir}; # output dir
-#$DirectoryOut =~ s/\/$//; # remove trailing slash if present
-#unless (-d $DirectoryOut) {mkdir $DirectoryOut or die "Cannot find/create output directory $DirectoryOut\n";} # uses existing dir or makes a new dir if it can
-
-# Ouput File
-my $FileOut = $EnteredParams{output_file}; # output file
-print "FileOut: $FileOut\n";
-$FileOut =~ /(^.+\/)?(.+$)/; # split in to path and filename
-my $DirectoryOut = $1; # assign path  # NEED MORE TESTING EG EMPTY PATH #
-$FileOut = $2; # assign file prefix
-print "File: $FileOut\nDir: $DirectoryOut\n"; #exit;
-# if (-f $DirectoryOut.$FileOut) # ouput file exists, create a new one based on the name
-# {
-# 	print "Output file $FileOut exists, modifying to unique file name ";
-# 	$FileOut =~ /^(.+?)\./; # find name without extensions
-# 	my $Stub = $1; # set stub to name without extensions
-# 	$FileOut =~ /(\..+)?$/; # get extension(s)
-# 	my $Ext = $1; # set ext to extensions
-# 	my $n = 1; # n will increment to find a unique name
-# 	my $Suff = ""; # suff tracks n
-# 	while (-f $DirectoryOut.$Stub.$Suff.$Ext) {$Suff = "-$n"; $n++;} # loop till we have a new unique filename
-# 	$FileOut = $Stub.$Suff.$Ext; # file out now has same name, same extensions, but also -n at the end of the name, making it unique
-# 	print "$FileOut\n";
-# }
-
-#print "Files\n",join("\n",@$FilesIn),"\n\n";
-#print "Ouput Dir\n$DirectoryOut\n";
-#print "Ouput File\n$FileOut\n";
-
-# Extract Header & Column Header
-my $IN = OpenFile ($$FilesIn[0]); # open the first file with correct file format
-my $Header = GetHeaderAsString ($IN); # get header
-unless ($Header) {close $IN; $IN = OpenFile ($$FilesIn[0]);} # if there is no header, close and reopen file, ie start file again
-my $ColHeader = <$IN>; # get col header, first remaining line
-chomp $ColHeader;
-
-# Get version if filetype is var - needed because there are new fields in 2.0 and posn of xRef changed
-my ($Version, $XrefField);
-if ($FileType eq "V")
-{
-	($Version, $XrefField) = GetVersion ($Header, $ColHeader);
-	#print "$Version $XrefField\n"; exit;
-	unless ($Version) {die "Cannot determine format version of first file in list\nNeed either a native Complete header or a native Complete Column Header with an xRef field\n";}
-}
-
-# Shared input params
-my $OutputVarTypes = lc $EnteredParams{var_type} || $ExpectedParams{var_type}; # var types listed in file in lc
-$OutputVarTypes =~ s/\,/\|/g; # create regex string
-$OutputVarTypes =~ s/\,| //; # remove extraneous commas, spaces
-
-# Input Params for var file
-my ($KeepScoresVAF, $KeepScoresEAF, $KeepQuals, $KeepScores, $VarExtras);
-if ($FileType eq "V")
-{
-	if ($Version == 2)
-	{
-		$KeepQuals = lc $EnteredParams{score_quality} || $ExpectedParams{score_quality}; # keep scoresQuality for 2.0
-		$KeepQuals = 1 if $KeepQuals eq "yes"; # converting to boolean
-		$KeepScoresVAF = lc $EnteredParams{scores_vaf} || $ExpectedParams{scores_vaf}; # keep scoresVAF for 2.0
-		$KeepScoresVAF = 1 if $KeepScoresVAF eq "yes"; # converting to boolean
-		$KeepScoresEAF = lc $EnteredParams{scores_eaf} || $ExpectedParams{scores_eaf}; # keep scoresEAF for 2.0	$KeepQuals = lc $EnteredParams{score_qualities} || $ExpectedParams{score_qualities}; # keep scoresQuality for 2.0
-		$KeepScoresEAF = 1 if $KeepScoresEAF eq "yes"; # converting to boolean
-	}
-	else # Version 1
-	{
-		$KeepScores = lc $EnteredParams{scores} || $ExpectedParams{scores}; # keep scores for 1.x
-		$KeepScores = 1 if $KeepScores eq "yes"; # converting to boolean
-	}
-	$VarExtras = 1 if $KeepScoresVAF or $KeepScoresEAF or $KeepQuals or $KeepScores; # flag to process var file for scores info
-}
-
-# Input Params for gene file
-my $OutputComponents = uc $EnteredParams{component} || $ExpectedParams{component}; # components listed in file in uc
-my $OutputImpacts = uc $EnteredParams{impact} || $ExpectedParams{impact}; # impacts listed in file in uc
-
-# Loading chr nrs, setting up var hash
-my @ChrNames = ('chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10',
-				'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19',
-				'chr20', 'chr21', 'chr22', 'chrX', 'chrY', 'chrM'); # using this array forces the output order of chrs into the correct order
-my %Vars; # hash to store var records in an array for each chr
-foreach my $Chr (@ChrNames) {$Vars{$Chr} = {};} # print "$Chr\t";}  # set up hash of hashes, one for each chr
-#print "\n"; #exit;
-
-# Create ouput col header
-if ($FileType eq "V")
-{
-	# 1.x locus ploidy haplotype chromosome begin end varType reference alleleSeq totalScore hapLink xRef
-	# 2.0 locus ploidy allele chromosome begin end varType reference alleleSeq varScoreVAF varScoreEAF varQuality hapLink xRef
-	my @Fields = split "\t", $ColHeader;
-	$ColHeader = join("\t",@Fields[3..8])."\t".$Fields[$XrefField];
-	if ($Version == 2) # 2.0
-	{
-		if ($KeepScoresVAF) {$ColHeader .= "\tvarScoreVAF";}
-		if ($KeepScoresEAF) {$ColHeader .= "\tvarScoreEAF";}
-		if ($KeepQuals) {$ColHeader .= "\tvarQuality";}
-	}
-	else # 1.x
-	{
-		if ($KeepScores) {$ColHeader .= "\ttotalScore";}
-	}
-}
-elsif ($FileType eq "G")
-{
-	# 1.x index locus allele chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol orientation
-	#  component componentIndex codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence sampleSequence genomeRefSequence
-	# 2.0 index locus allele chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol orientation
-	# component componentIndex hasCodingRegion impact nucleotidePos proteinPos annotationRefSequence sampleSequence genomeRefSequence pfam
-	my @Fields = split "\t", $ColHeader;
-	$ColHeader = join("\t",@Fields[3..30]); # 30 much more than needed
-	$OutputComponents =~ s/\,/\|/g; # create regex string
-	$OutputComponents =~ s/\,| //; # remove extraneous commas, spaces
-	$OutputComponents =~ s/\,| //; # remove extraneous commas, spaces
-	$OutputImpacts =~ s/\,/\|/g; # create regex string
-	$OutputImpacts =~ s/\,| //; # remove extraneous commas, spaces
-	#print "OutputVarTypes: $OutputVarTypes\nOutputComponents: $OutputComponents\nOutputImpacts: $OutputImpacts\n";
-}
-else
-{
-	die "FileType $FileType not understood\n"; # redundant
-}
-
-# Set up Processing Subs
-if ($FileType eq "G") # use gene subs for 'G'
-{
-	*ExtractSub2Use = \&ExtractGeneFields;
-	*AddRecSub2Use = \&AddGeneRec;
-}
-else # use var subs for 'V'
-{
-	*ExtractSub2Use = \&ExtractVarFields;
-	*AddRecSub2Use = \&AddVarRec;
-}
-#print "Gene: ",\&ExtractGeneFields," ",\&AddGeneRec," Var: ",\&ExtractVarFields," ",\&AddVarRec," Using: ",\&ExtractSub2Use," ",\&AddRecSub2Use,"\n"; exit;
-
-# Process Files
-my $RecCount = 0; # total nr recs that are coding/splicing/spanning
-my $FileCount = 0; # keeps track of nr files to use to format eg scores field
-my $XRef;
-foreach my $File (@$FilesIn)
-{
-	print "Processing file $File\n";
-	$FileCount++; # counting nr files
-
-	# Open file, process header, col header
-	my $IN = OpenFile ($File); # open the file with correct file format
-	my $Header = GetHeaderAsString ($IN); # get  header
-	unless ($Header) {close $IN; $IN = OpenFile ($$FilesIn[0]);} # if no header, close and reopen file, ie start file again
-	my $ColHeader = <$IN>; # get col header, first remaining line
-	unless ($ColHeader =~ /^>/) {print "Suspect column header for file $File:\n$ColHeader\n;"}
-
-	my $Count = 0; # cnr recs for this file that are coding/splicing/spanning
-	while (<$IN>) # loop through remainder of file ie data
-	{
-		my ($Rec, $Chr, $ScoreVAF, $ScoreEAF, $ScoreQual, $XRef) = ExtractSub2Use ($_, $XrefField); # sub extracts wanted fields in rec as string, chr, other fields optionally
-		next unless $Rec;
-		AddRecSub2Use ($Rec, $Vars{$Chr}, $ScoreVAF, $ScoreEAF, $ScoreQual, $FileCount, $XRef) if $Rec; # only process if rec is not empty
-		$Count++; # increment count of coding/splicing/spanning vars
-	}
-	print "Nr matched records for this file: $Count\n";
-	$RecCount += $Count; # add this file's count to total count
-	close $IN;
-}
-print "Nr matched records across all files:\t $RecCount\n"; # total count
-
-# Open file out, write col header
-print "Sorting and Saving to file $DirectoryOut$FileOut ...\n";
-open my $OUT, ">", $DirectoryOut . $FileOut or die "could not write to $DirectoryOut/$FileOut\n";
-if ($FileType eq "V")
-{
-	print $OUT "variantId\t";
-} # first col header for var file
-else
-{
-	$ColHeader =~ s/\tcall\t/\talleleSeq\t/;
-	print $OUT "index\t";
-} # first col header for gene file
-print $OUT "$ColHeader\n"; # remainder of col header
-
-$RecCount = 0; # reuse total count for nr of non-reduntant vars
-$FileCount--; # reduce by one, used below to add missing delimiters
-foreach my $Chr (@ChrNames) # sort records in each chr array and print with count
-{
-	foreach my $Rec (sort {SortStringsasArrays ($a, $b)} keys %{$Vars{$Chr}}) # using sub to sort on being, end fields
-	{
-		next unless $Rec;
-		$RecCount++; # increment count of coding/splicing/spanning vars
-		print $OUT "$RecCount\t$Rec"; # printing rec and count
-		print $OUT "\t",$Vars{$Chr}->{$Rec}->[4] if $FileType eq "V"; # print xref if var files
-
-		if ($VarExtras)
-		{
-			my $FieldDelimiterCount = () = $Vars{$Chr}->{$Rec}->[3] =~ /:/g;
-			#print "$SpacerCount $Vars{$Chr}->{$Rec}->[1]\n";
-			#exit if $TmpCount++ > 10;
-			my $Addition = ":" x ($FileCount - $FieldDelimiterCount);
-			if ($Version == 2)
-			{
-				print $OUT "\t",$Vars{$Chr}->{$Rec}->[1],$Addition if $KeepScoresVAF;
-				print $OUT "\t",$Vars{$Chr}->{$Rec}->[2],$Addition if $KeepScoresEAF;
-				print $OUT "\t",$Vars{$Chr}->{$Rec}->[3],$Addition if $KeepQuals;
-			}
-			else
-			{
-				print $OUT "\t",$Vars{$Chr}->{$Rec}->[1],$Addition if $KeepScores;
-			}
-			#print $OUT "$Rec\t$Vars{$Chr}->{$Rec}\n"; # printing rec and count
-		}
-		print $OUT "\n";
-	}
-}
-print "Nr saved records:\t $RecCount\n"; # count of non-redundant vars, c/f all vars abovve
-
-
-###########################################################################
-#                                   SUBS                                  #
-###########################################################################
-
-sub GetExpectedParams
-{
-	my %Hash = # hash to store expected params
-	(
-		"file_type" => -1,
-		"input_file" => [],
-		"output_file" => -1,
-		"var_type" => "all",
-		"component" => "ALL",
-		"impact" => "ALL",
-		"scores" => "yes",
-		"scores_eaf" => "yes",
-		"scores_vaf" => "yes",
-		"score_quality" => "yes",
-	);
-	$NrParams = int keys %Hash;
-	return %Hash;
-}
-
-sub GetEnteredParams
-{
-	# Processing @ARGV
-	my %Hash;
-	my @ARGVs = split /--/, join (" ",@ARGV); # split args on --, into array
-	for my $n (1..$#ARGVs) # parse each [nb arg 0 is empty so ignored]
-	{
-		$ARGVs[$n] =~ s/\s+$//; # remove any trailing spaces
-		my ($Key, $Val) = split / /, $ARGVs[$n], 2; # put first element into key, any other elements into val
-		$Key = lc $Key; # make lower case, ie case insensitive
-		if ($Key eq "input_file") # multiple entries expected, setting up array
-		{
-			push @{$Hash{$Key}}, $Val; # add input to input hash
-
-		}
-		else
-		{
-			$Hash{$Key} = $Val; # make a hash entry out of key and val
-		}
-	}
-	return %Hash; # hash now has each --entry param, with associated values
-}
-
-sub OpenFile
-{
-    my $File = shift;
-    my $FH;
-		open $FH, $File;
-		return $FH;
-}
-
-sub OpenFileold
-{
-    my $File = shift;
-    my $FH;
-
-    if ($File =~ /.bz2$/)
-    {
-			open ($FH, "bzcat $File |") or die ("$!: can't open file $File");
-    }
-		elsif ($File =~ /.gz$/)
-    {
-			open ($FH, "gunzip -c $File |") or die ("$!: can't open file $File");
-    }
-    elsif ($File =~ /.tsv$/ or $File =~ /.txt$/)
-    {
-			open ($FH, "cat $File |") or die ("$!: can't open file $File");
-    }
-    else
-    {
-			print ("Do not recognise file type for file $File.\nOpening as text file\n");
-			open ($FH, "cat $File |") or die ("$!: can't open file $File");
-    }
-    return $FH;
-}
-
-sub GetHeaderAsString
-{
-    my $FH = shift;
-    my $Header = "";
-    my $Count = 0;
-    while (<$FH>) # loop until a line is empty
-    {
-			chomp;
-			if ($_ eq "") # exit when empty line
-			{
-				return $Header ; # return ref to array
-			}
-			else
-			{
-				$Header .= $_;
-			}
-			return "" if $Count++ > 50; # too many lines for a header, must be no header, return empty array
-    }
-}
-
-sub GetVersion
-{
-	my $Header = shift;
-	my $ColHeader = shift;
-
-	my $Version = 0; # need to know if it is 1.x or 2.x
-	my $XrefField = -1;
-
-	if ($FileType eq "V")
-	{
-		if ($Header)
-		{
-			$Header =~ /#FORMAT_VERSION\t(\d)/;
-			if ($1 == 1) {$Version = 1; $XrefField = 11;}
-			elsif ($1 == 2) {$Version = 2; $XrefField = 13;}
-			else {print "Warning: Format Version not found in Header\n";} # not in header
-		}
-		unless ($Version)
-		{
-			my @ColHeader = split /\t/, $ColHeader;
-			for my $n (0..int(@ColHeader)-1)
-			{
-				if ($ColHeader eq "xRef")
-				{
-					$XrefField = $n;
-					if ($n == 11) 	 {$Version = 1;}
-					elsif ($n == 13) {$Version = 2;}
-					last;
-				}
-			}
-		}
-	}
-	return ($Version, $XrefField);
-}
-
-sub ExtractGeneFields # expects a gene file rec, strips out file specific fields, gets chr
-{
-	my $Rec = shift;
-	# gene fields
-	# >index locus allele chromosome begin end varType reference call xRef geneId mrnaAcc
-	# proteinAcc symbol orientation component componentIndex hasCodingRegion impact
-	# nucleotidePos proteinPos annotationRefSequence sampleSequence genomeRefSequence pfam
-
-	chomp $Rec; # remove return
-	my @Fields = split "\t", $Rec;
-	#print "$Fields[6] $Fields[15] $Fields[18]\t$OutputVarTypes $OutputComponents $OutputImpacts\n";
-	unless ($OutputVarTypes eq "all" or $Fields[6] =~ /$OutputVarTypes/) {return ("","");} # nominated vals not found, leave
-	unless ($OutputComponents eq "ALL" or $Fields[15] =~ /$OutputComponents/) {return ("","");} # nominated vals not found, leave
-	unless ($OutputImpacts eq "ALL" or $Fields[18] =~ /$OutputImpacts/) {return ("","");} # nominated vals not found, leave
-	my $Chr = $Fields[3]; # assign chr
-	$Rec = join("\t",@Fields[3..24]);
-	#$Rec =~ s/\t$//; # remove trailing tab if there is one
-
-	return ($Rec, $Chr);
-}
-
-sub ExtractVarFields # expects a gene file rec, strips out file specific fields, gets chr
-{
-	my $Rec = shift;
-	my $XrefField = shift;
-	# var fields
-	# 1.x locus ploidy haplotype chromosome begin end varType reference alleleSeq totalScore hapLink xRef
-	# 2.0 locus ploidy allele chromosome begin end varType reference alleleSeq varScoreVAF varScoreEAF varQuality hapLink xRef
-
-	chomp $Rec; # remove return
-	my @Fields = split "\t", $Rec;
-	#print "$Fields[6] $OutputVarTypes \n"; exit;
-	unless ($OutputVarTypes eq "all" or $Fields[6] =~ /$OutputVarTypes/) {return ("","");} # nominated vals not found, leave
-	my $Chr = $Fields[3]; # assign chr
-	#$Rec = join("\t",@Fields[3..8]);
-	$Rec = join("\t",@Fields[3..8]);
-	#$Rec =~ s/\t$//; # remove trailing tab if there is one
-
-	if ($VarExtras)
-	{
-		return ($Rec, $Chr, $Fields[9], $Fields[10], $Fields[11], $Fields[$XrefField]) if $Version == 2;
-		return ($Rec, $Chr, $Fields[9], $Fields[$XrefField]) # $Version == 1;
-	}
-	else
-	{
-		return ($Rec, $Chr);
-	}
-}
-
-sub AddVarRec
-{
-	my $Rec = shift;
-	my $RecHash = shift;
-	my $ScoreVAF = shift;
-	my $ScoreEAF = shift;
-	my $ScoreQual = shift;
-	my $FileCount = shift;
-	my $XRef = shift;
-
-	if ($VarExtras) # need to extract scores information
-	{
-		# locus ploidy allele chromosome begin end varType reference alleleSeq varScoreVAF varScoreEAF varQuality hapLink xRef
-		# Set delimiter
-		my $Delimiter;
-		if ($RecHash->{$Rec}) # hash entry for this var already exists
-		{
-			if ($RecHash->{$Rec}->[0] == $FileCount)
-			{
-				$Delimiter = "|"; # same chr, var is hom, use |
-			}
-			else # diff chr, use :
-			{
-				my $FieldDelimiterCount = () = $RecHash->{$Rec}->[3] =~ /:/g; # count nr field delims
-				$Delimiter = ":" x ($FileCount - $FieldDelimiterCount - 1); # delimiters for any processed files, that didnt have this var
-			}
-			$RecHash->{$Rec}->[4] = $XRef if length $XRef > length $RecHash->{$Rec}->[4]; # replace xref if new xref is longer
-		}
-		else  # new var
-		{
-			$RecHash->{$Rec} = []; # create array to hold it
-			$Delimiter = ":" x ($FileCount - 1); # delimiters for prev processed files, that didnt have this var
-			$RecHash->{$Rec}->[4] = $XRef; # add xref
-		}
-
-		# Process var
-		$RecHash->{$Rec}->[0] = $FileCount;
-		if ($Version == 2)
-		{
-			$RecHash->{$Rec}->[1] .= $Delimiter.$ScoreVAF; # add delimiter, varScoreVAF
-			$RecHash->{$Rec}->[2] .= $Delimiter.$ScoreEAF; # add delimiter, varScoreVAF
-			$RecHash->{$Rec}->[3] .= $Delimiter.($ScoreQual eq "VQHIGH" ? "H" : "L"); # add delimiter, qual
-		}
-		else
-		{
-			$RecHash->{$Rec}->[1] .= $Delimiter.$ScoreVAF; # add delimiter, totalScore
-		}
-
-	}
-	else # just the rec, no var extras being extrcted
-	{
-		$RecHash->{$Rec}++; # hash with rec as key, increment count for this key
-		$RecHash->{$Rec}->[4] = $XRef if length $XRef > length $RecHash->{$Rec}->[4]; # replace xref if new xref is longer, wasting space here
-	}
-}
-
-sub AddGeneRec
-{
-	my $Rec = shift;
-	my $RecHash = shift;
-
-		$RecHash->{$Rec}++; # hash with rec as key, increment count for this key
-}
-
-sub SortStringsasArrays # sorts based on begin and end of two recs
-{
-	my $String1 = shift; # first string
-	my $String2 = shift; # second string
-
-	my @Array1 = split "\t", $String1; # put fields into array
-	my @Array2 = split "\t", $String2;
-
-	# array[1] is begin, array[2] is end, returning order based on these fields
-	if ($Array1[1] < $Array2[1]) # begin of 1 < begin of 2
-	{
-		return -1;
-	}
-	elsif ($Array1[1] == $Array2[1]) # begin of 1 == begin of 2
-	{
-		if ($Array1[2] < $Array2[2]) # end of 1 < end of 2
-		{
-			return -1;
-		}
-		elsif ($Array1[2] == $Array2[2]) # end of 1 == end of 2
-		{
-			return 0;
-		}
-		else # end of 1 > end of 2
-		{
-			return 1;
-		}
-	}
-	else # begin of 1 > begin of 2
-	{
-		return 1;
-	}
-
-}
--- a/tool_data_table_conf.xml.sample	Wed Jun 13 14:06:45 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-<tables>
-	<!--
-			 Add the following section to tool_data_table_conf.xml file in your Galaxy distribution if you are adding Complete Genomics tools manually to your Galaxy instance
-	-->
-	<!-- Start location of cgatools crr files -->
-	<table name="cg_crr_files" comment_char="#">
-			<columns>value, dbkey, name, path</columns>
-			<file path="tool-data/cg_crr_files.loc" />
-	</table>
-	<!-- End Location of cgatools crr files -->
-</tables>