Source code for compressai_vision.pipelines.fo_vcm.fo.predict

# Copyright (c) 2022-2024 InterDigital Communications, Inc
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted (subject to the limitations in the disclaimer
# below) provided that the following conditions are met:

# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
# * Neither the name of InterDigital Communications, Inc nor the names of its
#   contributors may be used to endorse or promote products derived from this
#   software without specific prior written permission.

# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
# THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import traceback

import cv2
from detectron2.data import MetadataCatalog
from fiftyone import ProgressBar
from fiftyone.core.dataset import Dataset

from compressai_vision.pipelines.fo_vcm.conversion.detectron2 import (
    detectron251,
    findLabels,
    findVideoLabels,
)
from compressai_vision.pipelines.fo_vcm.pipeline.base import EncoderDecoder


[docs]def annexPredictions( # noqa: C901 predictors: list = None, fo_dataset: Dataset = None, gt_field: str = "detections", predictor_fields: list = None, encoder_decoder=None, # compressai_vision.evaluation.pipeline.base.EncoderDecoder use_pb: bool = False, # progressbar. captures stdion use_print: int = 1, # print progress at each n:th line. good for batch jobs ): """Run detector and EncoderDecoder instance on a dataset. Append detector results and bits-per-pixel to each sample. :param predictors: A list of Detectron2 predictor. It can be a single element list for a single task or multiple elements for multi-task scenario :param fo_dataset: Fiftyone dataset :param gt_field: Which dataset member to use for ground truths. Default: "detections" :param predictor_fields: Which dataset member to use for saving the Detectron2 results. Default: "detectron-predictions". It also can be a list when evaluating multiple vision tasks. :param encoder_decoder: (optional) a ``compressai_vision.evaluation.pipeline.EncoderDecoder`` subclass instance to apply on the image before detection :param use_pb: Show progressbar or not. Nice for interactive runs, not so much for batch jobs. Default: False. :param use_print: Print progress at every n:th. step. Default: 0 = no printing. """ predictor_fields = ( [ "detectron-predictions", ] if predictor_fields is None else predictor_fields ) assert predictors is not None, "provide Detectron2 predictor" assert fo_dataset is not None, "provide fiftyone dataset" if encoder_decoder is not None: assert issubclass( encoder_decoder.__class__, EncoderDecoder ), "encoder_decoder instances needs to be a subclass of EncoderDecoder" model_meta = None for predictor in predictors: curr_meta = MetadataCatalog.get(predictor.cfg.DATASETS.TRAIN[0]) if model_meta is not None: assert model_meta == curr_meta model_meta = curr_meta """we don't need this! d2_dataset = FO2DetectronDataset( fo_dataset=fo_dataset, detection_field=detection_field, model_catids = model_meta.things_classes, ) """ try: _ = findLabels(fo_dataset, detection_field=gt_field) except ValueError: print( "your ground truths are empty: samples have no member '", gt_field, "' will set allowed_labels to empty list", ) # allowed_labels = [] # use open image ids if avail if fo_dataset.get_field("open_images_id"): id_field_name = "open_images_id" else: id_field_name = "id" npix_sum = 0 nbits_sum = 0 cc = 0 # with ProgressBar(fo_dataset) as pb: # captures stdout if use_pb: pb = ProgressBar(fo_dataset) for sample in fo_dataset: cc += 1 # sample.filepath path = sample.filepath im = cv2.imread(path) if im is None: print("FATAL: could not read the image file '" + path + "'") return -1 # tag = path.split(os.path.sep)[-1].split(".")[0] # i.e.: /path/to/some.jpg --> some.jpg --> some # if open_images_id is avail, then use it, otherwise use normal id tag = sample[id_field_name] if encoder_decoder is not None: # before using a detector, crunch through # encoder/decoder try: nbits, im_ = encoder_decoder.BGR( im, tag=tag ) # include a tag for cases where EncoderDecoder uses caching except Exception as e: print("EncoderDecoder failed with '" + str(e) + "'") print("Traceback:") traceback.print_exc() return -1 if nbits < 0: # there's something wrong with the encoder/decoder process # say, corrupt data from the VTMEncode bitstream etc. print("EncoderDecoder returned error: will try using it once again") nbits, im_ = encoder_decoder.BGR(im, tag=tag) if nbits < 0: print("EncoderDecoder returned error - again! Will abort calculation") return -1 # NOTE: use tranformed image im_ npix_sum += im_.shape[0] * im_.shape[1] nbits_sum += nbits else: im_ = im for e, predictor in enumerate(predictors): res = predictor(im_) field = predictor_fields[e] predictions = detectron251( res, model_catids=model_meta.thing_classes, # allowed_labels=allowed_labels # not needed, really ) # --> fiftyone Detections object """# could save nbits into each sample: if encoder_decoder is not None: predictions.nbits = nbits """ sample[field] = predictions sample.save() if use_pb: pb.update() # print(">>>", cc%use_print) if use_print > 0 and ((cc % use_print) == 0): print("sample: ", cc, "/", len(fo_dataset)) if use_pb: pb.close() # calculate bpp as defined by the VCM working group: bpp = None if encoder_decoder: if npix_sum < 1: print("error: number of pixels sum < 1") return -1 if nbits_sum < 1: print("error: number of bits sum < 1") return -1 bpp = nbits_sum / npix_sum return bpp
[docs]def annexVideoPredictions( # noqa: C901 predictors: list = None, fo_dataset: Dataset = None, # video dataset gt_field: str = "detections", predictor_fields: list = None, encoder_decoder=None, # compressai_vision.evaluation.pipeline.base.EncoderDecoder use_pb: bool = False, # progressbar. captures stdion use_print: int = 1, # print progress at each n:th line. good for batch jobs ): """Run detector and EncoderDecoder instance on a dataset. Append detector results and bits-per-pixel to each sample. Dataset.Sample.Frames :param predictors: A list of Detectron2 predictor. It can be a single element list for a single task or multiple elements for multi-task scenario :param fo_dataset: A fiftyone video dataset :param gt_field: Which dataset member to use for ground truths. Default: "detections" :param predictor_fields: Which dataset member to use for saving the Detectron2 results. Default: "detectron-predictions". It also can be a list when evaluating multiple vision tasks. :param encoder_decoder: (optional) a ``compressai_vision.evaluation.pipeline.EncoderDecoder`` subclass instance to apply on the image before detection :param use_pb: Show progressbar or not. Nice for interactive runs, not so much for batch jobs. Default: False. :param use_print: Print progress at every n:th. step. Default: 0 = no printing. Video datasets look like this: :: Name: sfu-hw-objects-v1 Media type: video Num samples: 1 Persistent: True Tags: [] Sample fields: id: fiftyone.core.fields.ObjectIdField filepath: fiftyone.core.fields.StringField tags: fiftyone.core.fields.ListField(fiftyone.core.fields.StringField) metadata: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.VideoMetadata) media_type: fiftyone.core.fields.StringField class_tag: fiftyone.core.fields.StringField name_tag: fiftyone.core.fields.StringField Frame fields: id: fiftyone.core.fields.ObjectIdField frame_number: fiftyone.core.fields.FrameNumberField detections: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections) Difference between image & video datasets Image dataset: :: Dataset first() __index__() iterator --> return Sample objects fields: id, filepath, ground-truths, detections, etc. Video dataset :: Dataset first() __index__ iterator --> return Sample objects fields: id, filepath frames: Frames object --> __index__ iterator --> returns Frame object fields: id, ground-truths, detections, etc. """ predictor_fields = ( [ "detectron-predictions", ] if predictor_fields is None else predictor_fields ) assert predictors is not None, "provide Detectron2 predictor" assert fo_dataset is not None, "provide fiftyone dataset" if encoder_decoder is not None: assert issubclass( encoder_decoder.__class__, EncoderDecoder ), "encoder_decoder instances needs to be a subclass of EncoderDecoder" model_meta = None for predictor in predictors: curr_meta = MetadataCatalog.get(predictor.cfg.DATASETS.TRAIN[0]) if model_meta is not None: assert model_meta == curr_meta model_meta = curr_meta try: _ = findVideoLabels(fo_dataset, detection_field=gt_field) except ValueError: print( "your ground truths are empty: samples have no member '", gt_field, "' will set allowed_labels to empty list", ) # allowed_labels = [] # use custom id field if avail if fo_dataset.get_field("custom_id"): id_field_name = "custom_id" else: id_field_name = "id" # LOOP OVER SAMPLES / START for sample in fo_dataset: print("USING VIDEO", sample.filepath) if use_pb: pb = ProgressBar(len(sample.frames)) # read video! vid = cv2.VideoCapture(sample.filepath) npix_sum = 0 nbits_sum = 0 cc = 0 # ITERATE OVER FRAMES / START for ( n_frame ) in sample.frames: # the iterator spits out frame numbers, nothing else cc += 1 frame = sample.frames[n_frame] # Frame object if frame.id is None: # ghost frames are created if you do sample.frames[num] with non-existent frame numbers (!) # https://github.com/voxel51/fiftyone/issues/2238 print( "void frame in fiftyone video dataset at frame number", n_frame, "will skip", ) continue n_frame_file = ( int(vid.get(cv2.CAP_PROP_POS_FRAMES)) + 1 ) # next frame number from next call to vid.read() # print(frame.frame_number, n_frame_file) if frame.frame_number != n_frame_file: print("seeking to", frame.frame_number) ok = vid.set(cv2.CAP_PROP_POS_FRAMES, frame.frame_number - 1) if not ok: vid.release() print("could not seek to", frame.frame_number - 1) return -1 ok, arr = vid.read() if not ok: print("read failed at", frame.frame_number, "will try again") print("seeking to", frame.frame_number) ok = vid.set(cv2.CAP_PROP_POS_FRAMES, frame.frame_number - 1) if not ok: vid.release() print("could not seek to", frame.frame_number) return -1 ok, arr = vid.read() if not ok: vid.release() print("could not read video at frame", frame.frame_number) return -1 im = arr if im is None: print("FATAL: error reading video: got None array") vid.release() return -1 # a unique tag for this frame image consists of video sample tag and the frame number tag = sample[id_field_name] + "_" + str(frame.frame_number) if encoder_decoder is not None: # before using a detector, crunch through # encoder/decoder try: nbits, im_ = encoder_decoder.BGR( im, tag=tag ) # include a tag for cases where EncoderDecoder uses caching except Exception as e: print("EncoderDecoder failed with '" + str(e) + "'") print("Traceback:") traceback.print_exc() vid.release() return -1 if nbits < 0: # there's something wrong with the encoder/decoder process # say, corrupt data from the VTMEncode bitstream etc. print("EncoderDecoder returned error: will try using it once again") nbits, im_ = encoder_decoder.BGR(im, tag=tag) if nbits < 0: print( "EncoderDecoder returned error - again! Will abort calculation" ) vid.release() return -1 # NOTE: use tranformed image im_ npix_sum += im_.shape[0] * im_.shape[1] nbits_sum += nbits else: im_ = im for e, predictor in enumerate(predictors): res = predictor(im_) field = predictor_fields[e] predictions = detectron251( res, model_catids=model_meta.thing_classes, # allowed_labels=allowed_labels # not needed, really ) # --> fiftyone Detections object """# could save nbits into each sample: if encoder_decoder is not None: predictions.nbits = nbits """ frame[field] = predictions frame.save() if use_pb: pb.update() # print(">>>", cc%use_print) if use_print > 0 and ((cc % use_print) == 0): print("frame: ", cc, "/", len(sample.frames), "of", sample.filepath) # ITERATE OVER FRAMES / STOP vid.release() if use_pb: pb.close() if encoder_decoder: # alert user if EncoderDecoder class was requested if npix_sum < 1: print("error: number of pixels sum < 1 for video", sample.filepath) if nbits_sum < 1: print("error: number of bits sum < 1 for video", sample.filepath) sample["nbits_sum"] = nbits_sum sample["npix_sum"] = npix_sum sample.save() # LOOP OVER SAMPLES / STOP # calculate final bpp # this can be calculated separately as well as we save the numbers # into the sample bpp = None if encoder_decoder: npix_grand_sum = 0 nbits_grand_sum = 0 cc = 0 for sample in fo_dataset: cc += 1 nbits_sum = sample["nbits_sum"] npix_sum = sample["npix_sum"] if npix_sum < 1 or nbits_sum < 1: print("WARNING: For bpp calculation, skipping video", sample.filepath) continue npix_grand_sum += npix_sum nbits_grand_sum += nbits_sum bpp = nbits_grand_sum / npix_grand_sum return bpp