| import json |
| from collections import defaultdict |
| import jsonlines |
|
|
| subsets = ['train', 'val', 'test'] |
| savepath = "flickr30k/annotations" |
|
|
| set2jsonline = { |
| 'train': 'flickr30k/all_data_final_train_2014.jsonline', |
| 'val': 'flickr30k/all_data_final_val_set0_2014.jsonline', |
| 'test': 'flickr30k/all_data_final_test_set0_2014.jsonline', |
| } |
|
|
| import os |
| if not os.path.exists(savepath): |
| os.makedirs(savepath) |
|
|
|
|
| savename = { |
| 'train': "flickr30k/captions_train.json", |
| 'val': "flickr30k/captions_val.json", |
| 'test': "flickr30k/captions_test.json", |
| } |
|
|
| |
| |
|
|
| for subset in subsets: |
| imagefield = [] |
| annotaionfiled = [] |
| sen_id = 0 |
| with jsonlines.open(set2jsonline[subset]) as reader: |
| for annotation in reader: |
| sentences = annotation["sentences"] |
| image_id = annotation["img_path"] |
| imagefield.append({ |
| "filename": annotation["img_path"], |
| "id": annotation['id'], |
| }) |
| for sentence in sentences: |
| annotaionfiled.append({ |
| "image_id": annotation['id'], |
| "id": sen_id, |
| "caption": sentence, |
| }) |
| sen_id += 1 |
| |
| data = { |
| "images": imagefield, |
| "annotations": annotaionfiled, |
| } |
| json.dump( data, open(savename[subset], "w")) |
| |