# HG changeset patch # User Jim Johnson # Date 1381870307 18000 # Node ID 66cb7175c47317730ac18ba331d559458a364913 # Parent 370c0672e531ad458c307ebe9914302861fb9b51 Modify defuse create_reference_dataset.pl to filter out GTF lines that do not have a gene_name property, needed for CanFam3.1 diff -r 370c0672e531 -r 66cb7175c473 tool_dependencies.xml --- a/tool_dependencies.xml Tue Sep 03 11:55:14 2013 -0400 +++ b/tool_dependencies.xml Tue Oct 15 15:51:47 2013 -0500 @@ -23,6 +23,7 @@ sed 's#wget_gunzip("ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/intronEst.txt.gz", $est_alignments);##' | sed 's#^\(my .*ensembl_genome_version.*config.*get_value.*;\)#\1Qmy $ensembl_organism = $config->get_value("ensembl_organism");Qmy $ensembl_prefix = $config->get_value("ensembl_prefix");Qmy $ncbi_organism = $config->get_value("ncbi_organism");Qmy $ncbi_prefix = $config->get_value("ncbi_prefix");#' | sed 's/^\(sub wget_gunzip\)/sub try_wgetQ{Q my $url = shift;Q my $filename = shift;Q my $filename_gz = $filename.".gz";Q my $rslt = system "wget $url -O $filename_gz";Q if($rslt == 0)Q {Q $rslt = system "gunzip $filename_gz";Q }Q return $rslt;Q}QQ\1/' | + sed 's#^\(.*/gtf/.*\)$#\1Q system_with_check("if `grep -v gene_name $gene_models > /dev/null`; then mv $gene_models ${gene_models}.orig; grep gene_name ${gene_models}.orig > $gene_models; fi");#' | tr 'Q' '\n' | awk 'BEGIN{pfx="p1";fn="p2";}/if \(not -e \$repeats_filename\)/{pfx="rmsk";fn="repeats_filename";} /if \(not -e \$est_alignments\)/{pfx="intronEst";fn="est_alignments"} /ucsc_genome_version eq "hg18"/{printf("\tif (try_wget(\"ftp://hgdownload.cse.ucsc.edu/goldenPath/$ucsc_genome_version/database/%s.txt.gz\", \$%s) != 0)\n",pfx,fn);} $0 !~ /ucsc_genome_version eq "hg18/{print $0;}' | sed 's#UniGene/Homo_sapiens#UniGene/$ncbi_organism#' |