view snpEff_2_1a/scripts/download.sh @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
line wrap: on
line source

#!/bin/sh -e

RELEASE=66

# mkdir download
cd download

#---
# Download
#---

# # Download GTF files (annotations)
# wget -r -A "*gtf.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/"
# 
# # Download FASTA files (reference genomes)
# wget -r -A "*toplevel.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
# 
# # Download CDS sequences
# wget -r -A "*cdna.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
# 
# # Download PROTEIN sequences
# wget -r -A "*.pep.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"

#---
# Create directory structure
#---

# # Move all downloaded file to this directory
# mv `find ftp.ensembl.org -type f` .
 
# # Gene annotations files
# for gtf in *.gtf.gz
# do
# 	short=`../scripts/file2GenomeName.pl $gtf | cut -f 5`
# 	echo ANNOTATIONS: $short
# 
# 	mkdir -p data/$short
# 	cp $gtf data/$short/genes.gtf.gz
# done
#  
# # Reference genomes files
# mkdir -p data/genomes
# for fasta in *.dna.toplevel.fa.gz
# do
# 	genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
# 	echo REFERENCE: $genome
# 
# 	cp $fasta data/genomes/$genome.fa.gz
# done
# 
# # CDS genomes files
# for fasta in *.cdna.all.fa.gz
# do
# 	genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
# 	echo CDS: $genome
# 
# 	cp $fasta data/$genome/cds.fa.gz
# done
# 
# # Protein seuqence files
# for pep in *.pep.all.fa.gz
# do
# 	short=`../scripts/file2GenomeName.pl $pep | cut -f 5`
# 	echo PROTEIN: $short
# 
# 	mkdir -p data/$short
# 	cp $pep data/$short/protein.fa.gz
# done

#---
# Config file entries
#---

# for fasta in *.cdna.all.fa.gz
# do
# 	genome=`../scripts/file2GenomeName.pl $fasta | cut -f 4`
# 	short=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
# 
# 	# Individual genome entry
# 	echo -e "$short.genome : $genome"
# 	echo -e "$short.reference : ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/"
# 	echo
# done

# Back to parent dir
cd - > /dev/null

#---
# Create build queue entries
#---

# rm -vf queue_build.txt
# 
# # Build from TXT files
# for genes in data/*/genes.txt*
# do
# 	dir=`dirname $genes`
# 	genomeName=`basename $dir`
# 	echo "./scripts/snpEffXL.sh build -v $genomeName"
# done | sort >> queue_build.txt
# 
# # Build from GFF2 files
# echo "./scripts/snpEffXL.sh build -v -gff2 amel2" >> queue_build.txt
# 
# # Build from GFF3 files
# for genes in `ls data/*/genes.gff* | grep -v amel2`
# do
# 	dir=`dirname $genes`
# 	genomeName=`basename $dir`
# 	echo "./scripts/snpEffXL.sh build -v -gff3 $genomeName"
# done | sort >> queue_build.txt
# 
# # Build from GTF22 files
# for genes in data/*/genes.gtf*
# do
# 	dir=`dirname $genes`
# 	genomeName=`basename $dir`
# 	echo "./scripts/snpEffXL.sh build -v -gtf22 $genomeName"
# done | sort >> queue_build.txt