# Copyright (c) 2022-2024 InterDigital Communications, Inc
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted (subject to the limitations in the disclaimer
# below) provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * Neither the name of InterDigital Communications, Inc nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
# THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Convert from MPEG/VCM format to OpenImageV6 that can be read with fiftyone
This is the official OpenImageV6 dir structure:
```
├── data [5000 entries exceeds filelimit, not opening dir]
├── labels
│ ├── classifications.csv # image-level annotations
│ ├── detections.csv # bbox annotations
│ ├── masks (several subdirectories)
| | 0/, 1/, 2/, .. each directory with lots of png images featuring the masks
| | A/, B/, C/, ..
| |
│ ├── relationships.csv
│ └── segmentations.csv # segmentations
| [MaskPath,ImageID,LabelName,BoxID,BoxXMin,BoxXMax,BoxYMin,BoxYMax,PredictedIoU,Clicks]
| MaskPath refers to png files in that masks directory (omitting directories 0/, 1/, etc.?)
|
└── metadata
├── attributes.csv
├── classes.csv # all classes for image level labels (and bboxes?)
├── hierarchy.json
├── image_ids.csv
└── segmentation_classes.csv # all classes for segmentation
```
For minimal bbox detection problem, this is sufficient:
```
.
├── data -> ../../images
├── labels
│ └── detections.csv
└── metadata
└── classes.csv
```
"""
import glob
import os
import pathlib
import shutil
from pathlib import Path
[docs]def imageIdFileList(*args):
"""Just list arguments of .lst files. They will be combined together.
::
imageIdFileIt(first.lst, second.lst, ..)
.lst file format is:
::
bef50424c62d12c5.jpg
c540d9c96b6a79a2.jpg
a1b20ed591193c06.jpg
945d6f685752e31b.jpg
d18700eda95548c8.jpg
...
"""
lis = []
for fname in args:
assert os.path.exists(fname), "can't find file " + fname
with open(fname, "r") as source:
for line in source: # bef50424c62d12c5.jpg, ..
ImageID = line.strip().split(".")[0] # bef50424c62d12c5
# yield ImageID # we're an iterator # nopes..
if ImageID not in lis:
lis.append(ImageID)
return lis
[docs]def MPEGVCMToOpenImageV6( # noqa: C901
validation_csv_file: str = None, # detection_validation_labels_5k.csv # OR # segmentation_validation_labels_5k.csv # image-level labels
list_file: str = None, # detection_validation_input_5k.lst
bbox_csv_file: str = None, # detection_validation_5k_bbox.csv # OR # segmentation_validation_bbox_5k.csv # OPTIONAL
segmentation_csv_file: str = None, # segmentation_validation_masks_5k.csv # OPTIONAL # TODO
output_directory: str = None,
data_dir: str = None,
mask_dir: str = None,
link=True,
verbose=False,
append_mask_dir=None,
):
"""From MPEG/VCM input file format to proper OpenImageV6 format
:param validation_csv_file: MPEG/VCM image-level labels (typically ``detection_validation_labels_5k.csv`` or ``segmentation_validation_labels_5k.csv``)
:param list_file: MPEG/VCM image list (typically ``detection_validation_input_5k.lst`` or ``segmentation_validation_input_5k.lst``)
:param bbox_csv_file: MPEG/VCM detection input file (typically ``detection_validation_5k_bbox.csv`` or ``segmentation_validation_bbox_5k.csv``)
:param seg_masks_csv_file: MPEG/VCM segmentation input file (typically ``segmentation_validation_masks_5k.csv``)
:param output_directory: Path where the OpenImageV6 formatted files are dumped
:param data_dir: Source directory where the image jpg files are. Use the standard OpenImageV6 directory.
:param mask_dir: Source directory where the mask png files are. Use the standard OpenImageV6 directory.
:param link: True (default): create a softlink from source data_dir to target data_dir. False: copy all images to target.
More details on the conversion follow
``bbox_csv_file``: A filename (``detection_validation_5k_bbox.csv``) with the MPEG/VCM format that looks like this:
::
ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
bef50424c62d12c5,airplane,0.15641026,0.8282050999999999,0.16284987,0.82188296,0
c540d9c96b6a79a2,person,0.4421875,0.5796875,0.67083335,0.84791666,0
...
--> Converted to proper OpenImageV6 format:
::
ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
...
``seg_masks_csv_file``: A filename (``segmentation_validation_masks_5k.csv``) with the MPEG/VCM format that looks like this:
::
ImageID,LabelName,ImageWidth,ImageHeight,XMin,YMin,XMax,YMax,IsGroupOf,Mask,MaskPath
001464cfae2a30b8,sandwich,1024,683,0.261062,0.245575,0.681416,0.573009,0,eNqtlNlSwzAMR..GtiA5L,001464cfae2a30b8_m0cdn1_5fa59bf3.png
...
We're using mask bitmaps from the original OpenImageV6 image set, i.e. we're omitting that "Mask" column that seems to be a byte blob encoded in some way
--> Converted to proper OpenImageV6 format:
::
MaskPath,ImageID,LabelName,BoxID,BoxXMin,BoxXMax,BoxYMin,BoxYMax,PredictedIoU,Clicks
114d6b81e7b1fa08_m01bl7v_b62eb236.png,114d6b81e7b1fa08,/m/01bl7v,b62eb236,0.036101,0.332130,0.099278,0.888087,0.00000
...
``validation_csv_file`` = ``detection_validation_labels_5k.csv`` looks like this:
::
ImageID,LabelName,Confidence
0001eeaf4aed83f9,airplane,1
000a1249af2bc5f0,person,1
001083f05db4352b,car,1
00146ba1e50ed8d8,person,1
...
--> Converted to proper OpenImageV6 format (into ``classifications.csv``):
::
ImageID,Source,LabelName,Confidence
0001eeaf4aed83f9,verification,/m/0cmf2,1
0004886b7d043cfd,verification,/m/01g317,0
0004886b7d043cfd,verification,/m/04hgtk,0
0004886b7d043cfd,verification,/m/09j2d,0
...
``output_directory``: Path to where the OpenImageV6 formatted files are dumped. Files under that path are:
::
.
├── data : --> softlink to original images
├── labels
│ └── detections.csv (converted from 'detection_validation_5k_bbox.csv' / 'segmentation_validation_bbox_5k.csv') # bbox_csv_file
| classifications.csv (converted from 'detection_validation_labels_5k.csv' / 'segmentation_validation_labels_5k.csv') # validation_csv_file # image-level labels
| segmentations.csv (converted from 'segmentation_validation_masks_5k.csv')
| masks/ --> softlink to original mask png files
└── metadata
└── classes.csv take all possible classes from classifications.csv
In particular, ``detections.csv`` has this format:
::
ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
0001eeaf4aed83f9,source,tag,1,0.022673031,0.9642005,0.07103825,0.80054647,0,0,0,0,0
...
"""
assert (
validation_csv_file is not None
), "you _must_ provide at least MPEG/VCM-formatted image-level labels csv file, aka 'detection_validation_labels_5k.csv'"
# assert(bbox_csv_file is not None), "please provide MPEG/VCM-formatted bbox csv file, aka 'detection_validation_5k_bbox.csv'" # OPT
assert output_directory is not None, "please provide output root directory"
assert data_dir is not None, "please provide data_dir where the images are located"
# input path etc. check
assert os.path.exists(validation_csv_file), (
"file " + validation_csv_file + " does not exist"
)
if bbox_csv_file is not None:
assert os.path.exists(bbox_csv_file), (
"file " + bbox_csv_file + " does not exist"
)
if segmentation_csv_file is not None:
assert os.path.exists(segmentation_csv_file), (
"file " + segmentation_csv_file + " does not exist"
)
assert (
mask_dir is not None
), "please provide mask_dir where the mask images are located (typically at labels/masks of your main OpenImageV6 dir)"
assert os.path.exists(mask_dir), "directory " + mask_dir + " does not exist"
assert os.path.exists(data_dir), "directory " + data_dir + " does not exist"
if list_file is not None:
assert os.path.exists(list_file), "file " + list_file + " does not exist"
# target directories & files
if os.path.exists(output_directory):
print("WARNING: directory " + output_directory + " already exists")
metadata_dir = os.path.join(output_directory, "metadata")
# metadata/
classes_csv = os.path.join(metadata_dir, "classes.csv")
segmentation_classes_csv = os.path.join(
metadata_dir, "segmentation_classes.csv"
) # TODO
attributes_csv = os.path.join(metadata_dir, "attributes.csv")
image_ids_csv = os.path.join(metadata_dir, "image_ids.csv")
labels_dir = os.path.join(output_directory, "labels")
# labels/
detections_csv = os.path.join(labels_dir, "detections.csv")
segmentations_csv = os.path.join(labels_dir, "segmentations.csv")
classifications_csv = os.path.join(labels_dir, "classifications.csv")
target_data_dir = os.path.join(output_directory, "data")
target_mask_dir = os.path.join(output_directory, "labels", "masks")
if append_mask_dir is not None:
"""For custom formats, user might provide a segmask dir without
the 0/, 1/, etc. subdirectories
So let's create labels/masks and link from labels/masks/0 -> segmask dir
"""
os.makedirs(target_mask_dir, exist_ok=True) # labels/masks
# new link: labels/masks/0
target_mask_dir = os.path.join(
output_directory, "labels", "masks", append_mask_dir
)
if verbose:
print("creating dirs")
for d in [labels_dir, metadata_dir]:
pathlib.Path(d).mkdir(parents=True, exist_ok=True)
# all ready to go
# get all existing labels
if verbose:
print("reading classes from", validation_csv_file)
# can we safely assume that all used class labels are in the validation aka image-level annotation file?
with open(validation_csv_file, "r") as f:
f.readline() # read the header line away: ImageID,LabelName,Confidence
classes = []
for line in f:
name = line.strip().split(",")[1]
# WARNING: MPEG/VCM input files include corrupt label names, i.e. "cell_phone"
# instead of "cell phone" as understood by COCO-trained detectors
# why provide such corrupt input files.. no idea. In the original MPEG/VCM pipeline, these labels are corrected "on-the-fly"
name = name.replace("_", " ")
if name not in classes:
classes.append(name)
if verbose:
print("got classes", classes)
# write classes.csv
if verbose:
print("writing", classes_csv)
with open(classes_csv, "w") as f:
for class_ in classes:
f.write(class_ + "," + class_ + "\n")
if segmentation_csv_file is not None:
if verbose:
print("writing", segmentation_classes_csv)
with open(segmentation_classes_csv, "w") as f:
for class_ in classes:
f.write(class_ + "\n")
with open(attributes_csv, "w") as f:
f.write("nada,nada\n")
if list_file is not None:
with open(list_file, "r") as source:
with open(image_ids_csv, "w") as target:
target.write(
"ImageID,Subset,OriginalURL,OriginalLandingURL,License,AuthorProfileURL,Author,Title,OriginalSize,OriginalMD5,Thumbnail300KURL,Rotation\n"
)
for line in source: # bef50424c62d12c5.jpg, ..
ImageID = line.strip().split(".")[0] # bef50424c62d12c5
# and yes, all info is lost / not provided by MPEG/VCM files
Subset = "nada"
OriginalURL = "nada"
OriginalLandingURL = "nada"
License = "nada"
AuthorProfileURL = "nada"
Author = "John Doe"
Title = "nada"
OriginalSize = "nada"
OriginalMD5 = "nada"
Thumbnail300KURL = "nada"
Rotation = "0.0"
target.write(
",".join(
(
ImageID,
Subset,
OriginalURL,
OriginalLandingURL,
License,
AuthorProfileURL,
Author,
Title,
OriginalSize,
OriginalMD5,
Thumbnail300KURL,
Rotation,
)
)
+ "\n"
)
else: # no list file provided! but we still need image_ids.json
with open(image_ids_csv, "w") as target:
target.write(
"ImageID,Subset,OriginalURL,OriginalLandingURL,License,AuthorProfileURL,Author,Title,OriginalSize,OriginalMD5,Thumbnail300KURL,Rotation\n"
)
# print(">>>", data_dir)
for img_file_path in glob.glob(os.path.join(data_dir, "*")):
p = Path(img_file_path) # /path/to/some.jpg
ImageID = p.stem # some
suffix = p.suffix # .jpg
# print(">>>>>", ImageID, suffix)
if suffix not in [".png", ".jpg"]:
print("WARNING: omitting file", img_file_path)
continue
Subset = "nada"
OriginalURL = "nada"
OriginalLandingURL = "nada"
License = "nada"
AuthorProfileURL = "nada"
Author = "John Doe"
Title = "nada"
OriginalSize = "nada"
OriginalMD5 = "nada"
Thumbnail300KURL = "nada"
Rotation = "0.0"
target.write(
",".join(
(
ImageID,
Subset,
OriginalURL,
OriginalLandingURL,
License,
AuthorProfileURL,
Author,
Title,
OriginalSize,
OriginalMD5,
Thumbnail300KURL,
Rotation,
)
)
+ "\n"
)
if verbose:
print("reading", validation_csv_file, "and writing", classifications_csv)
with open(validation_csv_file, "r") as source:
source.readline() # ImageID,LabelName,Confidence
with open(classifications_csv, "w") as target:
target.write("ImageID,Source,LabelName,Confidence\n")
for inp in source:
row = inp.split(",") # ImageID,LabelName,Confidence
row = [r.strip() for r in row]
ImageID, LabelName, Confidence = row
Source = "mpeg_vcm"
# WARNING: MPEG/VCM input files include corrupt label names, i.e. "cell_phone"
# instead of "cell phone" as understood by COCO-trained detectors
# why provide such corrupt input files.. no idea. In the original MPEG/VCM pipeline, these labels are corrected "on-the-fly"
LabelName = LabelName.replace("_", " ")
target.write(",".join((ImageID, Source, LabelName, Confidence)) + "\n")
# used_image_ids=[] # keep track of necessary images
if bbox_csv_file is not None:
if verbose:
print("reading", bbox_csv_file, "and writing", detections_csv)
with open(bbox_csv_file, "r") as source:
source.readline() # ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
# MPEG/VCM format:
# ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
# bef50424c62d12c5,airplane,0.15641026,0.8282050999999999,0.16284987,0.82188296,0
# c540d9c96b6a79a2,person,0.4421875,0.5796875,0.67083335,0.84791666,0
with open(detections_csv, "w") as target:
# ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside
# 0001eeaf4aed83f9,xclick,/m/0cmf2,1,0.022673031,0.9642005,0.07103825,0.80054647,0,0,0,0,0
target.write(
"ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside\n"
)
for inp in source:
row = inp.split(
","
) # ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
row = [r.strip() for r in row]
ImageID, LabelName, XMin, XMax, YMin, YMax, IsGroupOf = row
# missing stuff
Source = "mpeg_vcm"
Confidence = "1"
IsOccluded = "0" # petty this information is lots..
IsTruncated = "0"
# IsGroupOf="0" # don't change this!
IsDepiction = "0"
IsInside = "0"
# WARNING: MPEG/VCM input files include corrupt label names, i.e. "cell_phone"
# instead of "cell phone" as understood by COCO-trained detectors
# why provide such corrupt input files.. no idea. In the original MPEG/VCM pipeline, these labels are corrected "on-the-fly"
LabelName = LabelName.replace("_", " ")
target.write(
",".join(
(
ImageID,
Source,
LabelName,
Confidence,
XMin,
XMax,
YMin,
YMax,
IsOccluded,
IsTruncated,
IsGroupOf,
IsDepiction,
IsInside,
)
)
+ "\n"
)
"""this is up to the dataset provider to define!
if ImageID not in used_image_ids:
used_image_ids.append(ImageID)
"""
if segmentation_csv_file is not None:
if verbose:
print("reading", segmentation_csv_file, "and writing", segmentations_csv)
with open(segmentation_csv_file, "r") as source:
source.readline()
# ImageID,LabelName,ImageWidth,ImageHeight,XMin,YMin,XMax,YMax,IsGroupOf,Mask,MaskPath
with open(segmentations_csv, "w") as target:
target.write(
"MaskPath,ImageID,LabelName,BoxID,BoxXMin,BoxXMax,BoxYMin,BoxYMax,PredictedIoU,Clicks\n"
)
for inp in source:
row = inp.split(
","
) # ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
row = [r.strip() for r in row]
(
ImageID,
LabelName,
ImageWidth,
ImageHeight,
XMin,
YMin,
XMax,
YMax,
IsGroupOf,
Mask,
MaskPath,
) = row
# BoxID: 001464cfae2a30b8_m0cdn1_5fa59bf3.png == ImageID_xxx_BoxID.png
BoxID = MaskPath.split("_")[-1].split(".")[0] # 5fa59bf3
BoxXMin = XMin # I guess..?
BoxXMax = XMax
BoxYMin = YMin
BoxYMax = YMax
PredictedIoU = "0.0" # lost information..
Clicks = "0.0" # lost information..
# WARNING: MPEG/VCM input files include corrupt label names, i.e. "cell_phone"
LabelName = LabelName.replace("_", " ")
target.write(
",".join(
(
MaskPath,
ImageID,
LabelName,
BoxID,
BoxXMin,
BoxXMax,
BoxYMin,
BoxYMax,
PredictedIoU,
Clicks,
)
)
+ "\n"
)
# pathlib.Path(target_data_dir).mkdir(parents=True, exist_ok=True) # not this
"""
if os.path.exists(target_data_dir):
print(
"WARNING: the target data_dir (image directory) already exists. Will leave as is"
)
print("DONE!")
return
if (segmentation_csv_file is not None) and os.path.exists(target_mask_dir):
print(
"WARNING: the target mask_dir (segmentation mask image directory) already exists. Will leave as is"
)
print("DONE!")
return
"""
if link:
if verbose:
print("linking image dir", data_dir, "to", target_data_dir)
if os.path.islink(target_data_dir):
print("WARNING: link", target_data_dir, "exist already. Will remove")
os.remove(target_data_dir)
os.symlink(data_dir, target_data_dir)
if segmentation_csv_file is not None:
if os.path.islink(target_mask_dir):
print("WARNING: link", target_mask_dir, "exist already. Will remove")
os.remove(target_mask_dir)
os.symlink(mask_dir, target_mask_dir)
else:
if verbose:
print(
"copying image dir",
data_dir,
"to",
target_data_dir,
"this might take a while..",
)
shutil.copytree(data_dir, target_data_dir)
if segmentation_csv_file is not None:
if verbose:
print(
"copying segmentation mask image dir",
mask_dir,
"to",
target_mask_dir,
"this might take a while..",
)
shutil.copytree(mask_dir, target_mask_dir)
print("DONE!")