| import json |
| from collections import defaultdict |
|
|
| original_json = json.load(open("mscoco_dataset/new_annotations/dataset_coco.json")) |
|
|
| subsets = ['train', 'val', 'test'] |
| savepath = "mscoco_dataset/new_annotations" |
|
|
| import os |
| if not os.path.exists(savepath): |
| os.makedirs(savepath) |
|
|
| savename = { |
| 'train': "captions_train113k.json", |
| 'val': "captions_val5k.json", |
| 'test': "captions_test5k.json", |
| } |
|
|
| imagefields = defaultdict(list) |
| annotationsfields = defaultdict(list) |
|
|
| for imagecaps in original_json['images']: |
| filepath = imagecaps['filepath'] |
| filename = imagecaps['filename'] |
| image_id = int(filename.split(".")[0].split('_')[-1]) |
| split = imagecaps['split'] |
| if split == 'restval': |
| split = 'train' |
| imagefields[split].append({ |
| "file_name": filename, |
| "file_path": filepath, |
| "id": image_id |
| }) |
| for sen in imagecaps['sentences']: |
| annotationsfields[split].append({ |
| "image_id": image_id, |
| "id": sen["sentid"], |
| "caption": sen["raw"], |
| }) |
| |
| for subset in subsets: |
| data = { |
| "images": imagefields[subset], |
| "annotations": annotationsfields[subset] |
| } |
| json.dump(data, open(os.path.join(savepath, savename[subset]), "w")) |
| pass |
|
|
|
|
|
|
|
|