Mercurial > repos > greg > validate_affy_metadata
comparison validate_affy_metadata.py @ 9:d15ae203013e draft
Uploaded
| author | greg |
|---|---|
| date | Thu, 15 Nov 2018 10:20:49 -0500 |
| parents | 0735cfcc7490 |
| children | 96fbb0504ac9 |
comparison
equal
deleted
inserted
replaced
| 8:0735cfcc7490 | 9:d15ae203013e |
|---|---|
| 28 | 28 |
| 29 def stop_error(msg): | 29 def stop_error(msg): |
| 30 sys.exit(msg) | 30 sys.exit(msg) |
| 31 | 31 |
| 32 | 32 |
| 33 def string_as_boolean_string(string): | |
| 34 if str(string).lower() in ['true', 'yes', 'on', '1']: | |
| 35 return 'True' | |
| 36 else: | |
| 37 return 'False' | |
| 38 | |
| 39 | |
| 33 def validate_date_string(line_no, date_string, accumulated_msgs): | 40 def validate_date_string(line_no, date_string, accumulated_msgs): |
| 41 if len(date_string) == 0: | |
| 42 return accumulated_msgs | |
| 34 try: | 43 try: |
| 35 datetime.datetime.strptime(date_string, '%y/%m/%d') | 44 datetime.datetime.strptime(date_string, '%Y-%m-%d') |
| 36 return accumulated_msgs | 45 return accumulated_msgs |
| 37 except ValueError: | 46 except ValueError: |
| 38 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YY/MM/DD)." % (line_no, date_string)) | 47 return add_error_msg(accumulated_msgs, "Line %d contains an incorrect date format (%s must be YYYY-MM-DD)." % (line_no, date_string)) |
| 39 | 48 |
| 40 | 49 |
| 41 def validate_decimal(line_no, decimal_string, accumulated_msgs): | 50 def validate_decimal(line_no, decimal_string, accumulated_msgs): |
| 42 try: | 51 try: |
| 43 decimal.Decimal(decimal_string) | 52 decimal.Decimal(decimal_string) |
| 62 if i == 0: | 71 if i == 0: |
| 63 # Skip the header. | 72 # Skip the header. |
| 64 continue | 73 continue |
| 65 line = line.rstrip("\r\n") | 74 line = line.rstrip("\r\n") |
| 66 if i > 97: | 75 if i > 97: |
| 67 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 96 data lines.") | 76 accumulated_msgs = add_error_msg(accumulated_msgs, "The input file contains more than 97 lines (must be 1 header line and no more than 96 data lines).") |
| 68 stop_error(accumulated_msgs) | 77 stop_error(accumulated_msgs) |
| 69 items = line.split(",") | 78 items = line.split(",") |
| 70 if len(items) != 31: | 79 if len(items) != 29: |
| 71 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 31)." % (i, len(items))) | 80 accumulated_msgs = add_error_msg(accumulated_msgs, "Line %d contains %s columns, (must be 29)." % (i, len(items))) |
| 72 stop_error(accumulated_msgs) | 81 stop_error(accumulated_msgs) |
| 73 # Required. | |
| 74 sample_id = items[0] | |
| 75 if len(sample_id) == 0: | |
| 76 accumulated_msgs = empty_value(i, "sample_id", accumulated_msgs) | |
| 77 # Required and validated. | 82 # Required and validated. |
| 78 date_entered_db = items[1] | 83 date_entered_db = items[0] |
| 79 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) | 84 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) |
| 80 # Required. | 85 # Required. |
| 81 user_specimen_id = items[2] | 86 user_specimen_id = items[1] |
| 82 if len(user_specimen_id) == 0: | 87 if len(user_specimen_id) == 0: |
| 83 accumulated_msgs = empty_value(i, "user_specimen_id", accumulated_msgs) | 88 accumulated_msgs = empty_value(i, "user_specimen_id", accumulated_msgs) |
| 84 # Optional. | 89 # Optional. |
| 85 duplicate_sample = items[3] | 90 field_call = items[2] |
| 86 # Optional. | 91 # Optional. |
| 87 matching_samples = items[4] | 92 bcoral_genet_id = items[3] |
| 88 # Optional. | 93 # Optional. |
| 89 field_call = items[5] | 94 bsym_genet_id = items[4] |
| 90 # Optional. | |
| 91 bcoral_genet_id = items[6] | |
| 92 # Optional. | |
| 93 bsym_genet_id = items[7] | |
| 94 # Required. | 95 # Required. |
| 95 reef = items[8] | 96 reef = items[5] |
| 96 if len(reef) == 0: | 97 if len(reef) == 0: |
| 97 accumulated_msgs = empty_value(i, "reef", accumulated_msgs) | 98 accumulated_msgs = empty_value(i, "reef", accumulated_msgs) |
| 98 # Required. | 99 # Required. |
| 99 region = items[9] | 100 region = items[6] |
| 100 if len(region) == 0: | 101 if len(region) == 0: |
| 101 accumulated_msgs = empty_value(i, "region", accumulated_msgs) | 102 accumulated_msgs = empty_value(i, "region", accumulated_msgs) |
| 102 # Required and validated. | 103 # Required and validated. |
| 103 latitude = items[10] | 104 latitude = items[7] |
| 104 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs) | 105 accumulated_msgs = validate_decimal(i, latitude, accumulated_msgs) |
| 105 # Required and validated. | 106 # Required and validated. |
| 106 longitude = items[11] | 107 longitude = items[8] |
| 107 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs) | 108 accumulated_msgs = validate_decimal(i, longitude, accumulated_msgs) |
| 108 # Optional. | 109 # Optional. |
| 109 geographic_origin = items[12] | 110 geographic_origin = items[9] |
| 110 # Optional. | 111 # Optional. |
| 111 sample_location = items[13] | 112 sample_location = items[10] |
| 112 # Optional. | 113 # Optional. |
| 113 latitude_outplant = items[14] | 114 latitude_outplant = items[11] |
| 114 # Optional. | 115 # Optional. |
| 115 longitude_outplant = items[15] | 116 longitude_outplant = items[12] |
| 116 # Optional. | 117 # Optional. |
| 117 depth = items[16] | 118 depth = items[13] |
| 118 # Optional. | 119 # Optional. |
| 119 dist_shore = items[17] | 120 dist_shore = items[14] |
| 120 # Optional. | 121 # Optional. |
| 121 disease_resist = items[18] | 122 disease_resist = items[15] |
| 122 # Optional. | 123 # Optional. |
| 123 bleach_resist = items[19] | 124 bleach_resist = items[16] |
| 124 # Optional. | 125 # Optional. |
| 125 mortality = items[20] | 126 mortality = items[17] |
| 126 # Optional. | 127 # Optional. |
| 127 tle = items[21] | 128 tle = items[18] |
| 128 # Optional. | 129 # Optional. |
| 129 spawning = items[22] | 130 spawning = string_as_boolean_string(items[19]) |
| 130 # Required. | 131 # Required. |
| 131 collector = items[23] | 132 collector_last_name = items[21] |
| 132 if len(collector) == 0: | 133 if len(collector_last_name) == 0: |
| 133 accumulated_msgs = empty_value(i, "collector", accumulated_msgs) | 134 accumulated_msgs = empty_value(i, "collector_last_name", accumulated_msgs) |
| 134 # Required. | 135 # Required. |
| 135 org = items[24] | 136 collector_first_name = items[22] |
| 137 if len(collector_first_name) == 0: | |
| 138 accumulated_msgs = empty_value(i, "collector_first_name", accumulated_msgs) | |
| 139 # Required. | |
| 140 org = items[23] | |
| 136 if len(org) == 0: | 141 if len(org) == 0: |
| 137 accumulated_msgs = empty_value(i, "org", accumulated_msgs) | 142 accumulated_msgs = empty_value(i, "org", accumulated_msgs) |
| 138 # Required and validated. | 143 # Required and validated. |
| 139 collection_date = items[25] | 144 collection_date = items[24] |
| 140 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) | 145 accumulated_msgs = validate_date_string(i, date_entered_db, accumulated_msgs) |
| 141 # Required and validated. | 146 # Required and validated. |
| 142 contact_email = items[26] | 147 contact_email = items[25] |
| 143 accumulated_msgs = validate_email(i, contact_email, accumulated_msgs) | 148 accumulated_msgs = validate_email(i, contact_email, accumulated_msgs) |
| 144 # Required. | 149 # Required. |
| 145 seq_facility = items[27] | 150 seq_facility = items[26] |
| 146 if len(seq_facility) == 0: | 151 if len(seq_facility) == 0: |
| 147 accumulated_msgs = empty_value(i, "seq_facility", accumulated_msgs) | 152 accumulated_msgs = empty_value(i, "seq_facility", accumulated_msgs) |
| 148 # Optional. | 153 # Optional. |
| 149 array_version = items[28] | 154 array_version = items[27] |
| 150 # Optional. | 155 # Optional. |
| 151 data_sharing = items[29] | 156 public = string_as_boolean_string(items[28]) |
| 152 # Optional. | 157 # Optional. |
| 153 data_hold = items[30] | 158 public_after_date = items[30] |
| 159 accumulated_msga = validate_date_string(public_after_date) | |
| 154 | 160 |
| 155 if len(accumulated_msgs) > 0: | 161 if len(accumulated_msgs) > 0: |
| 156 stop_error(accumulated_msgs) | 162 stop_error(accumulated_msgs) |
| 157 | 163 |
| 158 shutil.copyfile(args.input, args.output) | 164 shutil.copyfile(args.input, args.output) |
