Mercurial > repos > greg > snpeff_v2_from_pablo
view snpEff_2_1a/scripts/download.sh @ 0:f8eaa3f8194b default tip
Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author | greg |
---|---|
date | Fri, 20 Apr 2012 14:47:09 -0400 |
parents | |
children |
line wrap: on
line source
#!/bin/sh -e RELEASE=66 # mkdir download cd download #--- # Download #--- # # Download GTF files (annotations) # wget -r -A "*gtf.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/" # # # Download FASTA files (reference genomes) # wget -r -A "*toplevel.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/" # # # Download CDS sequences # wget -r -A "*cdna.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/" # # # Download PROTEIN sequences # wget -r -A "*.pep.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/" #--- # Create directory structure #--- # # Move all downloaded file to this directory # mv `find ftp.ensembl.org -type f` . # # Gene annotations files # for gtf in *.gtf.gz # do # short=`../scripts/file2GenomeName.pl $gtf | cut -f 5` # echo ANNOTATIONS: $short # # mkdir -p data/$short # cp $gtf data/$short/genes.gtf.gz # done # # # Reference genomes files # mkdir -p data/genomes # for fasta in *.dna.toplevel.fa.gz # do # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5` # echo REFERENCE: $genome # # cp $fasta data/genomes/$genome.fa.gz # done # # # CDS genomes files # for fasta in *.cdna.all.fa.gz # do # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5` # echo CDS: $genome # # cp $fasta data/$genome/cds.fa.gz # done # # # Protein seuqence files # for pep in *.pep.all.fa.gz # do # short=`../scripts/file2GenomeName.pl $pep | cut -f 5` # echo PROTEIN: $short # # mkdir -p data/$short # cp $pep data/$short/protein.fa.gz # done #--- # Config file entries #--- # for fasta in *.cdna.all.fa.gz # do # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 4` # short=`../scripts/file2GenomeName.pl $fasta | cut -f 5` # # # Individual genome entry # echo -e "$short.genome : $genome" # echo -e "$short.reference : ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/" # echo # done # Back to parent dir cd - > /dev/null #--- # Create build queue entries #--- # rm -vf queue_build.txt # # # Build from TXT files # for genes in data/*/genes.txt* # do # dir=`dirname $genes` # genomeName=`basename $dir` # echo "./scripts/snpEffXL.sh build -v $genomeName" # done | sort >> queue_build.txt # # # Build from GFF2 files # echo "./scripts/snpEffXL.sh build -v -gff2 amel2" >> queue_build.txt # # # Build from GFF3 files # for genes in `ls data/*/genes.gff* | grep -v amel2` # do # dir=`dirname $genes` # genomeName=`basename $dir` # echo "./scripts/snpEffXL.sh build -v -gff3 $genomeName" # done | sort >> queue_build.txt # # # Build from GTF22 files # for genes in data/*/genes.gtf* # do # dir=`dirname $genes` # genomeName=`basename $dir` # echo "./scripts/snpEffXL.sh build -v -gtf22 $genomeName" # done | sort >> queue_build.txt