comparison validate_affy_metadata.py @ 9:d15ae203013e draft

Uploaded
author greg
date Thu, 15 Nov 2018 10:20:49 -0500
parents 0735cfcc7490
children 96fbb0504ac9
comparison
equal deleted inserted replaced
8:0735cfcc7490 9:d15ae203013e
28 28
29 def stop_error(msg): 29 def stop_error(msg):
30 sys.exit(msg) 30 sys.exit(msg)
31 31
32 32
33 def string_as_boolean_string(string):
34 if str(string).lower() in ['true', 'yes', 'on', '1']:
35 return 'True'
36 else:
37 return 'False'
38
39
33 def validate_date_string(line_no, date_string, accumulated_msgs): 40 def validate_date_string(line_no, date_string, accumulated_msgs):
41 if len(date_string) == 0:
42 return accumulated_msgs
34 try: 43 try:
35 datetime.datetime.strptime(date_string, '%y/%m/%d') 44 datetime.datetime.strptime(date_string, '%Y-%m-%d')
36 return accumulated_msgs 45 return accumulated_msgs
37 except ValueError: 46 except ValueError:
38 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YY/MM/DD)." % (line_no, date_string)) 47 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string))
39 48
40 49
41 def validate_decimal(line_no, decimal_string, accumulated_msgs): 50 def validate_decimal(line_no, decimal_string, accumulated_msgs):
42 try: 51 try:
43 decimal.Decimal(decimal_string) 52 decimal.Decimal(decimal_string)
62 if i == 0: 71 if i == 0:
63 # Skip the header. 72 # Skip the header.
64 continue 73 continue
65 line = line.rstrip("\r\n") 74 line = line.rstrip("\r\n")
66 if i > 97: 75 if i > 97:
67 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 96 data lines.") 76 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 97 lines (must be 1 header line and no more than 96 data lines).")
68 stop_error(accumulated_msgs) 77 stop_error(accumulated_msgs)
69 items = line.split(",") 78 items = line.split(",")
70 if len(items) != 31: 79 if len(items) != 29:
71 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 31)." % (i, len(items))) 80 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 29)." % (i, len(items)))
72 stop_error(accumulated_msgs) 81 stop_error(accumulated_msgs)
73 # Required.
74 sample_id = items[0]
75 if len(sample_id) == 0:
76 accumulated_msgs = empty_value(i, "sample_id", accumulated_msgs)
77 # Required and validated. 82 # Required and validated.
78 date_entered_db = items[1] 83 date_entered_db = items[0]
79 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) 84 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs)
80 # Required. 85 # Required.
81 user_specimen_id = items[2] 86 user_specimen_id = items[1]
82 if len(user_specimen_id) == 0: 87 if len(user_specimen_id) == 0:
83 accumulated_msgs = empty_value(i, "user_specimen_id", accumulated_msgs) 88 accumulated_msgs = empty_value(i, "user_specimen_id", accumulated_msgs)
84 # Optional. 89 # Optional.
85 duplicate_sample = items[3] 90 field_call = items[2]
86 # Optional. 91 # Optional.
87 matching_samples = items[4] 92 bcoral_genet_id = items[3]
88 # Optional. 93 # Optional.
89 field_call = items[5] 94 bsym_genet_id = items[4]
90 # Optional.
91 bcoral_genet_id = items[6]
92 # Optional.
93 bsym_genet_id = items[7]
94 # Required. 95 # Required.
95 reef = items[8] 96 reef = items[5]
96 if len(reef) == 0: 97 if len(reef) == 0:
97 accumulated_msgs = empty_value(i, "reef", accumulated_msgs) 98 accumulated_msgs = empty_value(i, "reef", accumulated_msgs)
98 # Required. 99 # Required.
99 region = items[9] 100 region = items[6]
100 if len(region) == 0: 101 if len(region) == 0:
101 accumulated_msgs = empty_value(i, "region", accumulated_msgs) 102 accumulated_msgs = empty_value(i, "region", accumulated_msgs)
102 # Required and validated. 103 # Required and validated.
103 latitude = items[10] 104 latitude = items[7]
104 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs) 105 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs)
105 # Required and validated. 106 # Required and validated.
106 longitude = items[11] 107 longitude = items[8]
107 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs) 108 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs)
108 # Optional. 109 # Optional.
109 geographic_origin = items[12] 110 geographic_origin = items[9]
110 # Optional. 111 # Optional.
111 sample_location = items[13] 112 sample_location = items[10]
112 # Optional. 113 # Optional.
113 latitude_outplant = items[14] 114 latitude_outplant = items[11]
114 # Optional. 115 # Optional.
115 longitude_outplant = items[15] 116 longitude_outplant = items[12]
116 # Optional. 117 # Optional.
117 depth = items[16] 118 depth = items[13]
118 # Optional. 119 # Optional.
119 dist_shore = items[17] 120 dist_shore = items[14]
120 # Optional. 121 # Optional.
121 disease_resist = items[18] 122 disease_resist = items[15]
122 # Optional. 123 # Optional.
123 bleach_resist = items[19] 124 bleach_resist = items[16]
124 # Optional. 125 # Optional.
125 mortality = items[20] 126 mortality = items[17]
126 # Optional. 127 # Optional.
127 tle = items[21] 128 tle = items[18]
128 # Optional. 129 # Optional.
129 spawning = items[22] 130 spawning = string_as_boolean_string(items[19])
130 # Required. 131 # Required.
131 collector = items[23] 132 collector_last_name = items[21]
132 if len(collector) == 0: 133 if len(collector_last_name) == 0:
133 accumulated_msgs = empty_value(i, "collector", accumulated_msgs) 134 accumulated_msgs = empty_value(i, "collector_last_name", accumulated_msgs)
134 # Required. 135 # Required.
135 org = items[24] 136 collector_first_name = items[22]
137 if len(collector_first_name) == 0:
138 accumulated_msgs = empty_value(i, "collector_first_name", accumulated_msgs)
139 # Required.
140 org = items[23]
136 if len(org) == 0: 141 if len(org) == 0:
137 accumulated_msgs = empty_value(i, "org", accumulated_msgs) 142 accumulated_msgs = empty_value(i, "org", accumulated_msgs)
138 # Required and validated. 143 # Required and validated.
139 collection_date = items[25] 144 collection_date = items[24]
140 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) 145 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs)
141 # Required and validated. 146 # Required and validated.
142 contact_email = items[26] 147 contact_email = items[25]
143 accumulated_msgs = validate_email(i, contact_email, accumulated_msgs) 148 accumulated_msgs = validate_email(i, contact_email, accumulated_msgs)
144 # Required. 149 # Required.
145 seq_facility = items[27] 150 seq_facility = items[26]
146 if len(seq_facility) == 0: 151 if len(seq_facility) == 0:
147 accumulated_msgs = empty_value(i, "seq_facility", accumulated_msgs) 152 accumulated_msgs = empty_value(i, "seq_facility", accumulated_msgs)
148 # Optional. 153 # Optional.
149 array_version = items[28] 154 array_version = items[27]
150 # Optional. 155 # Optional.
151 data_sharing = items[29] 156 public = string_as_boolean_string(items[28])
152 # Optional. 157 # Optional.
153 data_hold = items[30] 158 public_after_date = items[30]
159 accumulated_msga = validate_date_string(public_after_date)
154 160
155 if len(accumulated_msgs) > 0: 161 if len(accumulated_msgs) > 0:
156 stop_error(accumulated_msgs) 162 stop_error(accumulated_msgs)
157 163
158 shutil.copyfile(args.input, args.output) 164 shutil.copyfile(args.input, args.output)