Mercurial > repos > bcclaywell > argo_navis
annotate venv/bin/dynamodb_load @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
rev | line source |
---|---|
0
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
1 #!/Users/bclaywel/workspace/argo-navis/venv/bin/python2.7 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
2 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
3 import argparse |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
4 import os |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
5 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
6 import boto |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
7 from boto.compat import json |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
8 from boto.dynamodb.schema import Schema |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
9 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
10 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
11 DESCRIPTION = """Load data into one or more DynamoDB tables. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
12 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
13 For each table, data is read from two files: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
14 - {table_name}.metadata for the table's name, schema and provisioned |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
15 throughput (only required if creating the table). |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
16 - {table_name}.data for the table's actual contents. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
17 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
18 Both files are searched for in the current directory. To read them from |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
19 somewhere else, use the --in-dir parameter. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
20 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
21 This program does not wipe the tables prior to loading data. However, any |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
22 items present in the data files will overwrite the table's contents. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
23 """ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
24 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
25 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
26 def _json_iterload(fd): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
27 """Lazily load newline-separated JSON objects from a file-like object.""" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
28 buffer = "" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
29 eof = False |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
30 while not eof: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
31 try: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
32 # Add a line to the buffer |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
33 buffer += fd.next() |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
34 except StopIteration: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
35 # We can't let that exception bubble up, otherwise the last |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
36 # object in the file will never be decoded. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
37 eof = True |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
38 try: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
39 # Try to decode a JSON object. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
40 json_object = json.loads(buffer.strip()) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
41 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
42 # Success: clear the buffer (everything was decoded). |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
43 buffer = "" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
44 except ValueError: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
45 if eof and buffer.strip(): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
46 # No more lines to load and the buffer contains something other |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
47 # than whitespace: the file is, in fact, malformed. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
48 raise |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
49 # We couldn't decode a complete JSON object: load more lines. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
50 continue |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
51 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
52 yield json_object |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
53 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
54 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
55 def create_table(metadata_fd): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
56 """Create a table from a metadata file-like object.""" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
57 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
58 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
59 def load_table(table, in_fd): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
60 """Load items into a table from a file-like object.""" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
61 for i in _json_iterload(in_fd): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
62 # Convert lists back to sets. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
63 data = {} |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
64 for k, v in i.iteritems(): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
65 if isinstance(v, list): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
66 data[k] = set(v) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
67 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
68 data[k] = v |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
69 table.new_item(attrs=data).put() |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
70 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
71 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
72 def dynamodb_load(tables, in_dir, create_tables): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
73 conn = boto.connect_dynamodb() |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
74 for t in tables: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
75 metadata_file = os.path.join(in_dir, "%s.metadata" % t) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
76 data_file = os.path.join(in_dir, "%s.data" % t) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
77 if create_tables: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
78 with open(metadata_file) as meta_fd: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
79 metadata = json.load(meta_fd) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
80 table = conn.create_table( |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
81 name=t, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
82 schema=Schema(metadata["schema"]), |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
83 read_units=metadata["read_units"], |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
84 write_units=metadata["write_units"], |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
85 ) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
86 table.refresh(wait_for_active=True) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
87 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
88 table = conn.get_table(t) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
89 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
90 with open(data_file) as in_fd: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
91 load_table(table, in_fd) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
92 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
93 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
94 if __name__ == "__main__": |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
95 parser = argparse.ArgumentParser( |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
96 prog="dynamodb_load", |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
97 description=DESCRIPTION |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
98 ) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
99 parser.add_argument( |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
100 "--create-tables", |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
101 action="store_true", |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
102 help="Create the tables if they don't exist already (without this flag, attempts to load data into non-existing tables fail)." |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
103 ) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
104 parser.add_argument("--in-dir", default=".") |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
105 parser.add_argument("tables", metavar="TABLES", nargs="+") |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
106 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
107 namespace = parser.parse_args() |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
108 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
109 dynamodb_load(namespace.tables, namespace.in_dir, namespace.create_tables) |