annotate sra_fetch.py @ 23:064f4fb1af40 draft

added vdb-validate tool
author Matt Shirley <mdshw5@gmail.com>
date Wed, 19 Jun 2013 16:44:33 -0400
parents 93a60318b9ca
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
1 import sys
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
2 import os
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
3 from ftplib import FTP
14
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
4 try:
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
5 import argparse
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
6 except ImportError:
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
7 import getopt
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
8
13
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
9 def main(args):
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
10 """ Get accession number from argument """
14
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
11 if module_exists('argparse'):
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
12 args = dict(vars(args))
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
13 prefix = args['accession'][0:3]
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
14 middle = args['accession'][3:6]
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
15 suffix = args['accession'][6:9]
0
ffdd41766195 Initial version - still need to test if datatype works correctly, and implement scripted download of SRA binaries.
matt-shirley <mdshw5@gmail.com>
parents:
diff changeset
16
13
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
17 ftp = FTP('ftp-trace.ncbi.nih.gov')
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
18 # Open file and transfer requested SRA as a file
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
19 # Try to change the working directory until it works
14
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
20 with open(args['out'], 'wb') as sra:
13
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
21 ftpPath = os.path.join('/sra/sra-instant/reads/ByRun/sra/',
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
22 prefix,
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
23 prefix + middle,
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
24 prefix + middle + suffix)
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
25 ftp.login('ftp')
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
26 connected = False
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
27 while not connected:
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
28 try:
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
29 ftp.cwd(ftpPath)
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
30 connected = True
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
31 except:
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
32 pass
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
33 ftp.retrbinary('RETR ' + prefix + middle + suffix + '.sra', sra.write)
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
34 ftp.quit()
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
35
14
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
36 def module_exists(module_name):
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
37 try:
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
38 __import__(module_name)
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
39 except ImportError:
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
40 return False
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
41 else:
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
42 return True
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
43
13
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
44 def arguments():
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
45 parser = argparse.ArgumentParser(description="Download an SRA from the NCBI SRA FTP")
14
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
46 parser.add_argument('-a', '--accession', type=str, help="SRA accession ex: SRR000001")
13
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
47 parser.add_argument('-o', '--out', type=str, help="Name for SRA file ")
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
48 args = parser.parse_args()
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
49 return args
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
50
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
51 if __name__ == "__main__":
14
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
52 if module_exists('argparse'):
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
53 args = arguments()
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
54 else:
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
55 ## fall back to getopt for python < 2.7
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
56 args = dict()
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
57 options, remainder = getopt.getopt(sys.argv[1:], 'a:o:', ['accession=', 'out='])
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
58 for opt, arg in options:
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
59 if opt in ('-a', '--accession'):
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
60 args['accession'] = arg
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
61 elif opt in ('-o', '--out'):
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
62 args['out'] = arg
082fd6374582 fallback to getopt for fetch_sra.py
Matt Shirley <mdshw5@gmail.com>
parents: 13
diff changeset
63
13
45031bbf6b27 better sra_fetch code, compliant (?) datatype, updated dependencies in tool wrappers
Matt Shirley <mdshw5@gmail.com>
parents: 0
diff changeset
64 main(args)