annotated_objects_open_images.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. from collections import defaultdict
  2. from csv import DictReader, reader as TupleReader
  3. from pathlib import Path
  4. from typing import Dict, List, Any
  5. import warnings
  6. from taming.data.annotated_objects_dataset import AnnotatedObjectsDataset
  7. from taming.data.helper_types import Annotation, Category
  8. from tqdm import tqdm
  9. OPEN_IMAGES_STRUCTURE = {
  10. 'train': {
  11. 'top_level': '',
  12. 'class_descriptions': 'class-descriptions-boxable.csv',
  13. 'annotations': 'oidv6-train-annotations-bbox.csv',
  14. 'file_list': 'train-images-boxable.csv',
  15. 'files': 'train'
  16. },
  17. 'validation': {
  18. 'top_level': '',
  19. 'class_descriptions': 'class-descriptions-boxable.csv',
  20. 'annotations': 'validation-annotations-bbox.csv',
  21. 'file_list': 'validation-images.csv',
  22. 'files': 'validation'
  23. },
  24. 'test': {
  25. 'top_level': '',
  26. 'class_descriptions': 'class-descriptions-boxable.csv',
  27. 'annotations': 'test-annotations-bbox.csv',
  28. 'file_list': 'test-images.csv',
  29. 'files': 'test'
  30. }
  31. }
  32. def load_annotations(descriptor_path: Path, min_object_area: float, category_mapping: Dict[str, str],
  33. category_no_for_id: Dict[str, int]) -> Dict[str, List[Annotation]]:
  34. annotations: Dict[str, List[Annotation]] = defaultdict(list)
  35. with open(descriptor_path) as file:
  36. reader = DictReader(file)
  37. for i, row in tqdm(enumerate(reader), total=14620000, desc='Loading OpenImages annotations'):
  38. width = float(row['XMax']) - float(row['XMin'])
  39. height = float(row['YMax']) - float(row['YMin'])
  40. area = width * height
  41. category_id = row['LabelName']
  42. if category_id in category_mapping:
  43. category_id = category_mapping[category_id]
  44. if area >= min_object_area and category_id in category_no_for_id:
  45. annotations[row['ImageID']].append(
  46. Annotation(
  47. id=i,
  48. image_id=row['ImageID'],
  49. source=row['Source'],
  50. category_id=category_id,
  51. category_no=category_no_for_id[category_id],
  52. confidence=float(row['Confidence']),
  53. bbox=(float(row['XMin']), float(row['YMin']), width, height),
  54. area=area,
  55. is_occluded=bool(int(row['IsOccluded'])),
  56. is_truncated=bool(int(row['IsTruncated'])),
  57. is_group_of=bool(int(row['IsGroupOf'])),
  58. is_depiction=bool(int(row['IsDepiction'])),
  59. is_inside=bool(int(row['IsInside']))
  60. )
  61. )
  62. if 'train' in str(descriptor_path) and i < 14000000:
  63. warnings.warn(f'Running with subset of Open Images. Train dataset has length [{len(annotations)}].')
  64. return dict(annotations)
  65. def load_image_ids(csv_path: Path) -> List[str]:
  66. with open(csv_path) as file:
  67. reader = DictReader(file)
  68. return [row['image_name'] for row in reader]
  69. def load_categories(csv_path: Path) -> Dict[str, Category]:
  70. with open(csv_path) as file:
  71. reader = TupleReader(file)
  72. return {row[0]: Category(id=row[0], name=row[1], super_category=None) for row in reader}
  73. class AnnotatedObjectsOpenImages(AnnotatedObjectsDataset):
  74. def __init__(self, use_additional_parameters: bool, **kwargs):
  75. """
  76. @param data_path: is the path to the following folder structure:
  77. open_images/
  78. │ oidv6-train-annotations-bbox.csv
  79. ├── class-descriptions-boxable.csv
  80. ├── oidv6-train-annotations-bbox.csv
  81. ├── test
  82. │ ├── 000026e7ee790996.jpg
  83. │ ├── 000062a39995e348.jpg
  84. │ └── ...
  85. ├── test-annotations-bbox.csv
  86. ├── test-images.csv
  87. ├── train
  88. │ ├── 000002b66c9c498e.jpg
  89. │ ├── 000002b97e5471a0.jpg
  90. │ └── ...
  91. ├── train-images-boxable.csv
  92. ├── validation
  93. │ ├── 0001eeaf4aed83f9.jpg
  94. │ ├── 0004886b7d043cfd.jpg
  95. │ └── ...
  96. ├── validation-annotations-bbox.csv
  97. └── validation-images.csv
  98. @param: split: one of 'train', 'validation' or 'test'
  99. @param: desired image size (returns square images)
  100. """
  101. super().__init__(**kwargs)
  102. self.use_additional_parameters = use_additional_parameters
  103. self.categories = load_categories(self.paths['class_descriptions'])
  104. self.filter_categories()
  105. self.setup_category_id_and_number()
  106. self.image_descriptions = {}
  107. annotations = load_annotations(self.paths['annotations'], self.min_object_area, self.category_mapping,
  108. self.category_number)
  109. self.annotations = self.filter_object_number(annotations, self.min_object_area, self.min_objects_per_image,
  110. self.max_objects_per_image)
  111. self.image_ids = list(self.annotations.keys())
  112. self.clean_up_annotations_and_image_descriptions()
  113. def get_path_structure(self) -> Dict[str, str]:
  114. if self.split not in OPEN_IMAGES_STRUCTURE:
  115. raise ValueError(f'Split [{self.split} does not exist for Open Images data.]')
  116. return OPEN_IMAGES_STRUCTURE[self.split]
  117. def get_image_path(self, image_id: str) -> Path:
  118. return self.paths['files'].joinpath(f'{image_id:0>16}.jpg')
  119. def get_image_description(self, image_id: str) -> Dict[str, Any]:
  120. image_path = self.get_image_path(image_id)
  121. return {'file_path': str(image_path), 'file_name': image_path.name}