annotate Calculate_TestVariants_Variant_Frequencies_0_1_0.pl @ 3:a2b8590be75e draft

Deleted selected files
author bcrain-completegenomics
date Thu, 24 May 2012 15:17:00 -0400
parents 3a4894be7df2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
1 #!/usr/bin/perl
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
2 use strict;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
3 #use feature "say";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
4 #use File::Basename;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
5 $| = 1;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
6
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
7 # Get_TestVariants_Variant_Frequencies
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
8 # Calculate the frequencies of variants in a testvariants output file
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
9 # Two values calculated:
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
10 # Frequency vs all alleles
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
11 # Frequency vs called alleles
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
12
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
13 # Input is a testvariants file
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
14 # Outputs:
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
15 # All data to *-Freq.tsv, including scores and quals
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
16 # vars and freqs to *-Freq_Short.tsv
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
17 # Exceptions to *-Freq_Log
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
18 # Stats to *-Freq_Stats
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
19
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
20 # Format:
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
21 # perl prog file dir
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
22 # ie
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
23 # perl Get_TestVariants_Variant_Frequencies \
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
24 # --Input input_file \
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
25 # --First_Genome_Field_Nr col_nr1 \
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
26 # --Last_Genome_Field_Nr col_nr2
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
27 # --Output1 output1 \
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
28 # --Output2 output2
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
29
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
30 # eg
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
31 # perl /perl/Get_TestVariants_Variant_Frequencies_0_0_1.pl \
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
32 # --Input /data/Family_Quartet_testvariants.tsv \
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
33 # --First_Genome_Field_Nr 9 \
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
34 # --Last_Genome_Field_Nr 11
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
35 # --Output1 output1 \
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
36 # --Output2 output2
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
37
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
38
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
39 # Rick Tearle 2010-11
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
40
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
41 my $Time -= time; # start time
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
42 my $Debug = 0;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
43
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
44 # Parsing and storing input parameters
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
45 # Only childfields can be repeated
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
46 print "$0 @ARGV\nProcessing input parameters\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
47 my %ExpectedParams = GetExpectedParams ();
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
48 my %EnteredParams = GetEnteredParams ();
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
49
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
50 # Setting up prog paras from input paras
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
51 my $FileIn = $EnteredParams{input};
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
52 unless (-f $FileIn) {die "Testvariants input file $FileIn not found\n";} # requires existing file
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
53 #my $FileOut = $EnteredParams{output}; #
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
54 #$DirectoryOut =~ s/\/$//; # remove trailing slash if present
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
55 #unless (-d $DirectoryOut) {die "Output directory $DirectoryOut not found\n";} # requires existing file
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
56 #print "$FileIn\n$DirectoryOut\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
57 #$FileIn =~ /(^.+\/)(.+?)\./; # get filename without path and without extensions
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
58
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
59 # my $FileOut1 = $FileOut."-Freq.tsv";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
60 # my $FileOut2 = $FileOut."-Freq_Short.tsv";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
61 # my $FileOut3 = $FileOut."-Freq_Stats.tsv";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
62 # my $FileOut4 = $FileOut."-Freq_Log.tsv";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
63
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
64 print "\nOpening Input File:\n\t$FileIn\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
65 my $IN = OpenFile ($FileIn); # open the file with correct file format
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
66
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
67 #print "\nOpening Output Files:\n\t$FileOut1\n\t$FileOut2\n\t$FileOut3\n\t$FileOut4\n"; #exit;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
68 open my $OUT1, ">", $EnteredParams{output1};
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
69 open my $OUT2, ">", $EnteredParams{output2};
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
70 #open my $OUT3, ">", $EnteredParams{output3};
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
71 #open my $OUT4, ">", $EnteredParams{output4};
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
72
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
73 # Get col header and genomes fields
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
74 my $ColHeader = <$IN>; # get col header
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
75 chomp $ColHeader;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
76 my @ColHeader = split /\t/, $ColHeader;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
77 my $StartGenomes = $EnteredParams{first_genome_field_nr} - 1; # first column with testvariants data, 1 based -> 0 based
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
78 my $StopGenomes = $EnteredParams{last_genome_field_nr} - 1; # first column with testvariants data, 1 based -> 0 based
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
79 if ($StartGenomes < 0) {die "No valid entry for First_Genome_Field_Nr, must be 1 or greater\n";}
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
80 if ($StopGenomes < 0) {die "No valid entry for Last_Genome_Field_Nr, must be 1 or greater\n";}
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
81 if ($StartGenomes > $StopGenomes) {die "Last_Genome_Field_Nr must be greater than or equal to First_Genome_Field_Nr\n";}
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
82 if ($StartGenomes > int @ColHeader) {die "First_Genome_Field_Nr > number of fields in column header\n";}
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
83 if ($StopGenomes > int @ColHeader) {die "Last_Genome_Field_Nr > number of fields in column header\n";}
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
84 my $NrGenomes = $StopGenomes - $StartGenomes + 1;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
85 #print "$StartGenomes\t$StopGenomes\n"; #exit;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
86 #print "First Genome Field:\n\t$ColHeader[$StartGenomes]\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
87 #print "Last Genome Field:\n\t$ColHeader[$StopGenomes]\n\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
88
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
89 # print column headers
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
90 print $OUT1 join("\t",@ColHeader),"\tAllFreq\tCalledFreq\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
91 print $OUT2 join("\t",@ColHeader[0..7]),"\tAllFreq\tCalledFreq\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
92 print join("\t",@ColHeader),"\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
93 print "First Genome Field: $ColHeader[$StartGenomes]\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
94 print "Last Genome Field: $ColHeader[$StopGenomes]\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
95 print "Nr Genomes: $NrGenomes\n\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
96
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
97 print "\nProcessing Variants....\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
98 my $VariantCount = 0; # variant locus counter, not used
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
99 my %AllFreqCounts; # storing histogram of all freq counts
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
100 my %CalledFreqCounts; # storing histogram of called freq counts
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
101 my $Warnings;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
102 while (<$IN>)
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
103 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
104 # testvariants fields: variantId chromosome begin end varType reference alleleSeq xRef GS000000XX1-ASM GS000000XX2-ASM [GS000000XXN-ASM]
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
105 my $Line = $_; # save line for output below
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
106 chomp $Line;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
107 my @F = split /\t/, $Line; # split in to fields
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
108 $VariantCount++; # increment variant counter
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
109 my $UseFields = join ("",@F[$StartGenomes..$StopGenomes]); # get genome fields as string, to count 0s and 1s
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
110 my $Count1 = () = $UseFields =~ /1/g; # count the number of 1s
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
111 my $Count0 = () = $UseFields =~ /0/g; # count the number of 0s
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
112 my $CountN = () = $UseFields =~ /N/g; # count the number of Ns
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
113 my $NrAlleles = $Count1 + $Count0 + $CountN; # total count
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
114 unless ($NrAlleles == $NrGenomes *2 or $NrAlleles == $NrGenomes) # count does not match expected for diploid/haploid locus
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
115 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
116 print "$NrAlleles alleles for variant ",join(" ",@F[0..7]),"\n"; # log warning
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
117 #print "Expected $NrGenomes or ",$NrGenomes*2," alleles depending on ploidy of locus\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
118 #if ($Warnings++ > 10) {die "Have found $Warnings exceptions for this file, termnating processing\n";} # terminate if too many warnings
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
119 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
120 my $AllFreq = sprintf("%0.3f",$Count1/$NrAlleles); # calculate freq of 1s vs all alleles
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
121 my $CalledFreq = sprintf("%0.3f",0);
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
122 if ($Count1+$Count0) {$CalledFreq = sprintf("%0.3f",$Count1/($Count1+$Count0));} # calculate freq of 1s vs called alleles, if there are any
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
123 $AllFreqCounts{$AllFreq}++; # increment all freq histogram
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
124 $CalledFreqCounts{$CalledFreq}++; # increment called freq histogram
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
125 #print "$Line\n$AlleleCount\t$Count1\t$Count0\t$AllFreq\t$CalledFreq\n"; #exit;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
126 print $OUT1 "$Line\t$AllFreq\t$CalledFreq\n"; # output full testvariants plus frequencies for this var
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
127 print $OUT2 join("\t",@F[0..7]),"\t$AllFreq\t$CalledFreq\n"; # output just var info plus frequencies for this var
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
128 #exit if $VariantCount > 20;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
129 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
130 close $OUT1;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
131 close $OUT2;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
132
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
133 # Print frequency histograms
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
134 print "Nr Variants at each Frequency (All):\nFreq\tCount\n"; # header
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
135 foreach my $Freq (sort {$a <=> $b} keys %AllFreqCounts) {print "$Freq\t$AllFreqCounts{$Freq}\n";}
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
136
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
137 print "\nNr Variants at each Frequency (Called):\nFreq\tCount\n"; # header
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
138 foreach my $Freq (sort {$a <=> $b} keys %CalledFreqCounts) {print "$Freq\t$CalledFreqCounts{$Freq}\n";}
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
139
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
140 $Time += time;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
141 print "\ntime $Time\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
142
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
143 ###########################################################################
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
144 # SUBS #
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
145 ###########################################################################
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
146
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
147 sub GetExpectedParams
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
148 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
149 my %Hash =
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
150 (
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
151 "input" => -1,
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
152 "output_dir" => -1,
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
153 ); # store parameters and values
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
154 return %Hash;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
155 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
156
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
157 sub GetEnteredParams
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
158 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
159 # Processing @ARGV
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
160 my %Hash;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
161
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
162 my @ARGVs = split /--/, join (" ",@ARGV); # split args on --, into array
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
163 #print "Start\n", join ("\n",@ARGVs),"\n",int @ARGVs - 1,"\n\n" if $Debug;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
164 #print "Key\tVal\n" if $Debug; #exit;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
165 for my $n (1..$#ARGVs) # parse each
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
166 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
167 $ARGVs[$n] =~ s/\s+$//; # remove any trailing spaces
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
168 my ($Key, $Val) = split / /, $ARGVs[$n], 2; # put first element into key, any other elements into val
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
169 $Key = lc $Key;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
170 $Hash{$Key} = $Val; # make a hash entry out of key and val
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
171 #print "$Key\t$EnteredParams{$Key}\n" if $Debug;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
172 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
173 #print int(keys %Hash),"\n" if $Debug;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
174 #foreach my $Arg (keys %Hash) {print "Arg: $Arg\t",$ExpectedParams{$Arg},"\n";}
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
175 #print "Arg string:\t",join (" ",@ARGV),"\n" if $Debug;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
176 #exit if $Debug;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
177 return %Hash; # hash now has each -- entry param, with associated values
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
178 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
179
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
180 sub SaveArrayAsString
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
181 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
182 my $FH = shift;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
183 my $Fields = shift;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
184 #print "$Fields\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
185 print $FH join("\t",@$Fields),"\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
186 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
187
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
188 sub ConcatenateVariants
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
189 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
190 my $ArrayIn = shift; # ptr to array
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
191 my $StateFieldNr = shift; # field to process
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
192 #print int(@$ArrayIn),"\n";
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
193 my @ArrayOut; # array to store records out
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
194 my $Nr = -1;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
195 foreach my $Entry (@$ArrayIn)
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
196 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
197 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
198 return \@ArrayOut; # return ptr to array
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
199 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
200
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
201 sub LoadStateRecord
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
202 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
203 my $Out = shift;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
204 my $In = shift;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
205 my $StateFieldNr = shift;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
206
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
207 $Out->{State} = $$In[$StateFieldNr]; # get state for new record
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
208 $Out->{Chr} = $$In[1]; # get chr
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
209 $Out->{Begin} = $$In[2]; # get begin of state range
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
210 $Out->{End} = $$In[3]; # get current end of state range
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
211 $Out->{Records}++; # record added to new count
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
212 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
213
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
214 sub OpenFile
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
215 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
216 my $File = shift;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
217 my $FH;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
218 open ($FH, "$File") or die ("$!: can't open file $File");
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
219 return $FH;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
220 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
221
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
222 sub OpenFileold
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
223 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
224 my $File = shift;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
225 my $FH;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
226
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
227 if ($File =~ /.bz2$/)
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
228 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
229 open ($FH, "bzcat $File |") or die ("$!: can't open file $File");
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
230 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
231 elsif ($File =~ /.gz$/)
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
232 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
233 open ($FH, "gunzip -c $File |") or die ("$!: can't open file $File");
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
234 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
235 elsif ($File =~ /.tsv$/)
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
236 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
237 open ($FH, "cat $File |") or die ("$!: can't open file $File");
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
238 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
239 else
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
240 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
241 die ("$!: do not recognise file type $File");
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
242 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
243 return $FH;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
244 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
245
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
246 sub LoadNewRecord
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
247 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
248 my $In = shift;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
249 my $Out = shift;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
250 $Out->{Chr} = $In->{Chr};
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
251 $Out->{State} = $In->{State};
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
252 $Out->{Begin} = $In->{Begin};
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
253 $Out->{End} = $In->{End};
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
254 $Out->{Records} = $In->{Records};
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
255 }
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
256
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
257 sub NewStateRecord
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
258 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
259 my $Record =
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
260 {
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
261 Chr => "",
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
262 Begin => -1,
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
263 End => -1,
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
264 State => "",
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
265 Records => 0,
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
266 MIEs => 0,
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
267 StateErrors => 0,
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
268 Length => -1,
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
269 };
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
270 return $Record;
3a4894be7df2 Uploaded
bcrain-completegenomics
parents:
diff changeset
271 }