changeset 1:66cb7175c473

Modify defuse create_reference_dataset.pl to filter out GTF lines that do not have a gene_name property, needed for CanFam3.1
author Jim Johnson <jj@umn.edu>
date Tue, 15 Oct 2013 15:51:47 -0500
parents 370c0672e531
children 252b7103d152
files tool_dependencies.xml
diffstat 1 files changed, 1 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/tool_dependencies.xml	Tue Sep 03 11:55:14 2013 -0400
+++ b/tool_dependencies.xml	Tue Oct 15 15:51:47 2013 -0500
@@ -23,6 +23,7 @@
                    sed 's#wget_gunzip("ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/intronEst.txt.gz", $est_alignments);##' |
                    sed 's#^\(my .*ensembl_genome_version.*config.*get_value.*;\)#\1Qmy $ensembl_organism = $config->get_value("ensembl_organism");Qmy $ensembl_prefix = $config->get_value("ensembl_prefix");Qmy $ncbi_organism = $config->get_value("ncbi_organism");Qmy $ncbi_prefix = $config->get_value("ncbi_prefix");#' |
                    sed 's/^\(sub wget_gunzip\)/sub try_wgetQ{Q my $url = shift;Q my $filename = shift;Q my $filename_gz = $filename.".gz";Q my $rslt = system "wget $url -O $filename_gz";Q if($rslt == 0)Q {Q  $rslt = system "gunzip $filename_gz";Q }Q return $rslt;Q}QQ\1/' |
+                   sed 's#^\(.*/gtf/.*\)$#\1Q        system_with_check("if `grep -v gene_name $gene_models > /dev/null`; then mv $gene_models ${gene_models}.orig; grep gene_name  ${gene_models}.orig &gt; $gene_models; fi");#' | 
                    tr 'Q' '\n' |
                    awk 'BEGIN{pfx="p1";fn="p2";}/if \(not -e \$repeats_filename\)/{pfx="rmsk";fn="repeats_filename";} /if \(not -e \$est_alignments\)/{pfx="intronEst";fn="est_alignments"} /ucsc_genome_version eq "hg18"/{printf("\tif (try_wget(\"ftp://hgdownload.cse.ucsc.edu/goldenPath/$ucsc_genome_version/database/%s.txt.gz\", \$%s) != 0)\n",pfx,fn);} $0 !~ /ucsc_genome_version eq "hg18/{print $0;}' |
                    sed 's#UniGene/Homo_sapiens#UniGene/$ncbi_organism#' |