Mercurial > repos > mahtabm > ensembl
view variant_effect_predictor/Bio/EnsEMBL/Pipeline/FASTA/WuBlastIndexer.pm @ 3:d30fa12e4cc5 default tip
Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author | devteam <devteam@galaxyproject.org> |
---|---|
date | Mon, 13 Jan 2014 10:38:30 -0500 |
parents | 1f6dce3d34e0 |
children |
line wrap: on
line source
=pod =head1 LICENSE Copyright (c) 1999-2012 The European Bioinformatics Institute and Genome Research Limited. All rights reserved. This software is distributed under a modified Apache license. For license details, please see http://www.ensembl.org/info/about/code_licence.html =head1 CONTACT Please email comments or questions to the public Ensembl developers list at <dev@ensembl.org>. Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>. =head1 NAME Bio::EnsEMBL::Pipeline::FASTA::WuBlastIndexer =head1 DESCRIPTION Creates WUBlast indexes of the given GZipped file. The resulting index is created under the parameter location I<base_path> in blast and then in a directory defined by the type of dump. The type of dump also changes the file name generated. Genomic dumps have their release number replaced with the last repeat masked date. Allowed parameters are: =over 8 =item file - The file to index =item program - The location of the xdformat program =item molecule - The type of molecule to index. I<dna> and I<pep> are allowed =item type - Type of index we are creating. I<genomic> and I<genes> are allowed =item base_path - The base of the dumps =item release - Required for correct DB naming =back =cut package Bio::EnsEMBL::Pipeline::FASTA::WuBlastIndexer; use strict; use warnings; use base qw/Bio::EnsEMBL::Pipeline::FASTA::Indexer/; use Bio::EnsEMBL::Utils::Exception qw/throw/; use File::Copy qw/copy/; use File::Spec; use POSIX qw/strftime/; sub param_defaults { my ($self) = @_; return { program => 'xdformat', # molecule => 'pep', #pep or dna # type => 'genes' #genes or genomic }; } sub fetch_input { my ($self) = @_; my $mol = $self->param('molecule'); if($mol ne 'dna' && $mol ne 'pep') { throw "param 'molecule' must be set to 'dna' or 'pep'"; } my $type = $self->param('type'); if($type ne 'genomic' && $type ne 'genes') { throw "param 'type' must be set to 'genomic' or 'genes'"; } $self->assert_executable($self->param('program')); $self->assert_executable('gunzip'); } sub write_output { my ($self) = @_; $self->dataflow_output_id({ species => $self->param('species'), type => $self->param('type'), molecule => $self->param('molecule'), index_base => $self->param('index_base') }, 1); return; } sub index_file { my ($self, $file) = @_; my $molecule_arg = ($self->param('molecule') eq 'dna') ? '-n' : '-p' ; my $silence = ($self->debug()) ? 0 : 1; my $target_dir = $self->target_dir(); my $target_file = $self->target_file($file); my $db_title = $self->db_title($file); my $date = $self->db_date(); my $cmd = sprintf(q{cd %s && %s %s -q%d -I -t %s -d %s -o %s %s }, $target_dir, $self->param('program'), $molecule_arg, $silence, $db_title, $date, $target_file, $file); $self->info('About to run "%s"', $cmd); my $output = `$cmd 2>&1`; my $rc = $? >> 8; throw "Cannot run program '$cmd'. Return code was ${rc}. Program output was $output" if $rc; unlink $file or throw "Cannot remove the file '$file' from the filesystem: $!"; $self->param('index_base', $target_file); return; } sub target_file { my ($self, $file) = @_; my $target_dir = $self->target_dir(); my $target_filename = $self->target_filename($file); return File::Spec->catfile($target_dir, $target_filename); } # Produce a dir like /nfs/path/to/blast/genes/XXX && /nfs/path/to/blast/dna/XXX sub target_dir { my ($self) = @_; return $self->get_dir('blast', $self->param('type')); } sub db_title { my ($self, $source_file) = @_; my ($vol, $dir, $file) = File::Spec->splitpath($source_file); my $release = $self->param('release'); my $title = $file; $title =~ s/$release\.//; return $title; } sub db_date { my ($self) = @_; return strftime('%d-%m-%Y', gmtime()); } #Source like Homo_sapiens.GRCh37.68.dna.toplevel.fa #Filename like Homo_sapiens.GRCh37.20090401.dna.toplevel.fa sub target_filename { my ($self, $source_file) = @_; my ($vol, $dir, $file) = File::Spec->splitpath($source_file); if($self->param('type') eq 'genomic') { my @split = split(/\./, $file); my $rm_date = $self->repeat_mask_date(); $split[-4] = $rm_date; return join(q{.}, @split); } return $file; } 1;