Spaces:
Configuration error
Configuration error
| import json | |
| import re | |
| def clean_mimic_json(messy_json, cleaned_output): | |
| with open(messy_json, 'r') as f: | |
| messy_data = json.load(f) | |
| clean_data = [] | |
| for image_id, captions in messy_data.items(): | |
| image_id_clean = image_id.split('.')[0] | |
| caption_clean = ' '.join(captions) | |
| clean_item = { | |
| "image_id": image_id_clean, | |
| "caption": caption_clean | |
| } | |
| clean_data.append(clean_item) | |
| with open(cleaned_output, 'w') as outfile: | |
| json.dump(clean_data, outfile, indent=2) | |
| def clean_vqa_json(messy_json, cleaned_output): | |
| with open(messy_json, "r") as file: | |
| messy_json = json.load(file) | |
| organized_json = {} | |
| for key, values in messy_json.items(): | |
| organized_json[key] = [] | |
| for value in values: | |
| organized_json[key].append({ | |
| "question": value["question"], | |
| "answer": value["answer"] | |
| }) | |
| with open(cleaned_output, "w") as outfile: | |
| json.dump(organized_json, outfile, indent=4) | |
| def clean_detection_json(messy_json, cleaned_output): | |
| with open(messy_json, "r") as input_file: | |
| input_json = json.load(input_file) | |
| organized_data = [] | |
| for key, value in input_json.items(): | |
| if value and isinstance(value, list) and len(value) > 0: | |
| caption = value[0] | |
| objects_match = caption.split("<p>") | |
| if len(objects_match) == 2: | |
| object_part = objects_match[1].split("</p>")[0].strip() | |
| else: | |
| object_part = "" | |
| bbox_match = re.findall(r'<(\d+)>', caption) | |
| if object_part and bbox_match and len(bbox_match) == 4: | |
| key_part = key.split(".png")[0] | |
| bbox_values = [float(val) for val in bbox_match] | |
| organized_item = { | |
| "key": key_part, | |
| "objects": [object_part], | |
| "bbox": [bbox_values], | |
| } | |
| organized_data.append(organized_item) | |
| with open(cleaned_output, "w") as output_file: | |
| json.dump(organized_data, output_file, indent=4) |