Mercurial > repos > greg > validate_affy_metadata
comparison validate_affy_metadata.py @ 9:d15ae203013e draft
Uploaded
author | greg |
---|---|
date | Thu, 15 Nov 2018 10:20:49 -0500 |
parents | 0735cfcc7490 |
children | 96fbb0504ac9 |
comparison
equal
deleted
inserted
replaced
8:0735cfcc7490 | 9:d15ae203013e |
---|---|
28 | 28 |
29 def stop_error(msg): | 29 def stop_error(msg): |
30 sys.exit(msg) | 30 sys.exit(msg) |
31 | 31 |
32 | 32 |
33 def string_as_boolean_string(string): | |
34 if str(string).lower() in ['true', 'yes', 'on', '1']: | |
35 return 'True' | |
36 else: | |
37 return 'False' | |
38 | |
39 | |
33 def validate_date_string(line_no, date_string, accumulated_msgs): | 40 def validate_date_string(line_no, date_string, accumulated_msgs): |
41 if len(date_string) == 0: | |
42 return accumulated_msgs | |
34 try: | 43 try: |
35 datetime.datetime.strptime(date_string, '%y/%m/%d') | 44 datetime.datetime.strptime(date_string, '%Y-%m-%d') |
36 return accumulated_msgs | 45 return accumulated_msgs |
37 except ValueError: | 46 except ValueError: |
38 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YY/MM/DD)." % (line_no, date_string)) | 47 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string)) |
39 | 48 |
40 | 49 |
41 def validate_decimal(line_no, decimal_string, accumulated_msgs): | 50 def validate_decimal(line_no, decimal_string, accumulated_msgs): |
42 try: | 51 try: |
43 decimal.Decimal(decimal_string) | 52 decimal.Decimal(decimal_string) |
62 if i == 0: | 71 if i == 0: |
63 # Skip the header. | 72 # Skip the header. |
64 continue | 73 continue |
65 line = line.rstrip("\r\n") | 74 line = line.rstrip("\r\n") |
66 if i > 97: | 75 if i > 97: |
67 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 96 data lines.") | 76 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 97 lines (must be 1 header line and no more than 96 data lines).") |
68 stop_error(accumulated_msgs) | 77 stop_error(accumulated_msgs) |
69 items = line.split(",") | 78 items = line.split(",") |
70 if len(items) != 31: | 79 if len(items) != 29: |
71 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 31)." % (i, len(items))) | 80 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 29)." % (i, len(items))) |
72 stop_error(accumulated_msgs) | 81 stop_error(accumulated_msgs) |
73 # Required. | |
74 sample_id = items[0] | |
75 if len(sample_id) == 0: | |
76 accumulated_msgs = empty_value(i, "sample_id", accumulated_msgs) | |
77 # Required and validated. | 82 # Required and validated. |
78 date_entered_db = items[1] | 83 date_entered_db = items[0] |
79 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) | 84 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) |
80 # Required. | 85 # Required. |
81 user_specimen_id = items[2] | 86 user_specimen_id = items[1] |
82 if len(user_specimen_id) == 0: | 87 if len(user_specimen_id) == 0: |
83 accumulated_msgs = empty_value(i, "user_specimen_id", accumulated_msgs) | 88 accumulated_msgs = empty_value(i, "user_specimen_id", accumulated_msgs) |
84 # Optional. | 89 # Optional. |
85 duplicate_sample = items[3] | 90 field_call = items[2] |
86 # Optional. | 91 # Optional. |
87 matching_samples = items[4] | 92 bcoral_genet_id = items[3] |
88 # Optional. | 93 # Optional. |
89 field_call = items[5] | 94 bsym_genet_id = items[4] |
90 # Optional. | |
91 bcoral_genet_id = items[6] | |
92 # Optional. | |
93 bsym_genet_id = items[7] | |
94 # Required. | 95 # Required. |
95 reef = items[8] | 96 reef = items[5] |
96 if len(reef) == 0: | 97 if len(reef) == 0: |
97 accumulated_msgs = empty_value(i, "reef", accumulated_msgs) | 98 accumulated_msgs = empty_value(i, "reef", accumulated_msgs) |
98 # Required. | 99 # Required. |
99 region = items[9] | 100 region = items[6] |
100 if len(region) == 0: | 101 if len(region) == 0: |
101 accumulated_msgs = empty_value(i, "region", accumulated_msgs) | 102 accumulated_msgs = empty_value(i, "region", accumulated_msgs) |
102 # Required and validated. | 103 # Required and validated. |
103 latitude = items[10] | 104 latitude = items[7] |
104 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs) | 105 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs) |
105 # Required and validated. | 106 # Required and validated. |
106 longitude = items[11] | 107 longitude = items[8] |
107 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs) | 108 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs) |
108 # Optional. | 109 # Optional. |
109 geographic_origin = items[12] | 110 geographic_origin = items[9] |
110 # Optional. | 111 # Optional. |
111 sample_location = items[13] | 112 sample_location = items[10] |
112 # Optional. | 113 # Optional. |
113 latitude_outplant = items[14] | 114 latitude_outplant = items[11] |
114 # Optional. | 115 # Optional. |
115 longitude_outplant = items[15] | 116 longitude_outplant = items[12] |
116 # Optional. | 117 # Optional. |
117 depth = items[16] | 118 depth = items[13] |
118 # Optional. | 119 # Optional. |
119 dist_shore = items[17] | 120 dist_shore = items[14] |
120 # Optional. | 121 # Optional. |
121 disease_resist = items[18] | 122 disease_resist = items[15] |
122 # Optional. | 123 # Optional. |
123 bleach_resist = items[19] | 124 bleach_resist = items[16] |
124 # Optional. | 125 # Optional. |
125 mortality = items[20] | 126 mortality = items[17] |
126 # Optional. | 127 # Optional. |
127 tle = items[21] | 128 tle = items[18] |
128 # Optional. | 129 # Optional. |
129 spawning = items[22] | 130 spawning = string_as_boolean_string(items[19]) |
130 # Required. | 131 # Required. |
131 collector = items[23] | 132 collector_last_name = items[21] |
132 if len(collector) == 0: | 133 if len(collector_last_name) == 0: |
133 accumulated_msgs = empty_value(i, "collector", accumulated_msgs) | 134 accumulated_msgs = empty_value(i, "collector_last_name", accumulated_msgs) |
134 # Required. | 135 # Required. |
135 org = items[24] | 136 collector_first_name = items[22] |
137 if len(collector_first_name) == 0: | |
138 accumulated_msgs = empty_value(i, "collector_first_name", accumulated_msgs) | |
139 # Required. | |
140 org = items[23] | |
136 if len(org) == 0: | 141 if len(org) == 0: |
137 accumulated_msgs = empty_value(i, "org", accumulated_msgs) | 142 accumulated_msgs = empty_value(i, "org", accumulated_msgs) |
138 # Required and validated. | 143 # Required and validated. |
139 collection_date = items[25] | 144 collection_date = items[24] |
140 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) | 145 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) |
141 # Required and validated. | 146 # Required and validated. |
142 contact_email = items[26] | 147 contact_email = items[25] |
143 accumulated_msgs = validate_email(i, contact_email, accumulated_msgs) | 148 accumulated_msgs = validate_email(i, contact_email, accumulated_msgs) |
144 # Required. | 149 # Required. |
145 seq_facility = items[27] | 150 seq_facility = items[26] |
146 if len(seq_facility) == 0: | 151 if len(seq_facility) == 0: |
147 accumulated_msgs = empty_value(i, "seq_facility", accumulated_msgs) | 152 accumulated_msgs = empty_value(i, "seq_facility", accumulated_msgs) |
148 # Optional. | 153 # Optional. |
149 array_version = items[28] | 154 array_version = items[27] |
150 # Optional. | 155 # Optional. |
151 data_sharing = items[29] | 156 public = string_as_boolean_string(items[28]) |
152 # Optional. | 157 # Optional. |
153 data_hold = items[30] | 158 public_after_date = items[30] |
159 accumulated_msga = validate_date_string(public_after_date) | |
154 | 160 |
155 if len(accumulated_msgs) > 0: | 161 if len(accumulated_msgs) > 0: |
156 stop_error(accumulated_msgs) | 162 stop_error(accumulated_msgs) |
157 | 163 |
158 shutil.copyfile(args.input, args.output) | 164 shutil.copyfile(args.input, args.output) |