Source code for compressai_vision.pipelines.fo_vcm.pipeline.vtm

# Copyright (c) 2022-2024 InterDigital Communications, Inc
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted (subject to the limitations in the disclaimer
# below) provided that the following conditions are met:

# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
# * Neither the name of InterDigital Communications, Inc nor the names of its
#   contributors may be used to endorse or promote products derived from this
#   software without specific prior written permission.

# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
# THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import glob
import logging
import os
import shlex
import shutil
import subprocess
from uuid import uuid4 as uuid

from compressai_vision.pipelines.fo_vcm.constant import inv_vf_per_scale, vf_per_scale
from compressai_vision.pipelines.fo_vcm.ffmpeg import FFMpeg
from compressai_vision.pipelines.fo_vcm.tools import dumpImageArray, test_command

from .base import EncoderDecoder


[docs]def removeFileIf(path) -> bool: try: os.remove(path) except FileNotFoundError: return False else: return True
[docs]class VTMEncoderDecoder(EncoderDecoder): """EncoderDecoder class for VTM encoder :param encoderApp: VTM encoder command :param decoderApp: VTM decoder command :param vtm_cfg: path of encoder cfg file :param ffmpeg: ffmpeg command used for padding/scaling :param qp: the default quantization parameter of the instance. Integer from 0 to 63. Default=30. :param scale: enable the VCM working group defined padding/scaling pre & post-processings steps. Possible values: 100 (default), 75, 50, 25. Special value: None = ffmpeg scaling. 100 equals to a simple padding operation :param save: save intermediate steps into member ``saved`` (for debugging). Default: False. :param cache: (optional) define a directory where all encoded bitstreams are cached. NOTE: If scale is defined, "scale/qp/" is appended to the cache path. If no scale is defined, the appended path is "0/qp/" :param dump: debugging option: dump input, intermediate and output images to disk in local directory :param skip: if bitstream is found in cache, then do absolutely nothing. Good for restarting the bitstream generation. default: False. When enabled, method BGR returns (0, None). NOTE: do not use if you want to verify the bitstream files. :param warn: warn always when a bitstream is generated. default: False. This class tries always to use the cached bitstreams if they are available (for this you need to define a cache directory, see above). If the bitstream is available in cache, it will be used and the encoding step is skipped. Otherwise encoder is started to produce bitstream. Example: :: import cv2, os, logging from compressai_vision.evaluation.pipeline import VTMEncoderDecoder from compressai_vision.pipelines.fo_vcm.tools import getDataFile path="/path/to/VVCSoftware_VTM/bin" encoderApp=os.path.join(path, "EncoderAppStatic") decoderApp=os.path.join(path, "DecoderAppStatic") # enable debugging log to see explicitly all the steps loglev=logging.DEBUG quickLog("VTMEncoderDecoder", loglev) encdec=VTMEncoderDecoder(encoderApp=encoderApp, decoderApp=decoderApp, ffmpeg="ffmpeg", vtm_cfg=getDataFile("encoder_intra_vtm_1.cfg"), qp=47) nbits, img_hat = encdec.BGR(cv2.imread("fname.png")) You can enable caching and avoid re-encoding of images: :: encdec=VTMEncoderDecoder(encoderApp=encoderApp, decoderApp=decoderApp, ffmpeg="ffmpeg", vtm_cfg=getDataFile("encoder_intra_vtm_1.cfg"), qp=47, cache="/tmp/kokkelis") nbits, img_hat = encdec.BGR(cv2.imread("fname.png"), tag="a_unique_tag") Cache can be inspected with: :: encdec.dump() """ def __init__( self, encoderApp=None, decoderApp=None, ffmpeg="ffmpeg", vtm_cfg=None, qp=47, scale=100, save=False, base_path="/dev/shm", cache=None, dump=False, skip=False, keep=False, warn=False, ): self.logger = logging.getLogger(self.__class__.__name__) assert encoderApp is not None, "please give encoder command" assert decoderApp is not None, "please give decoder command" assert vtm_cfg is not None, "please give VTM config file" self.scale = scale if self.scale is not None: assert self.scale in vf_per_scale.keys(), "wrong scaling factor" self.vtm_cfg = vtm_cfg self.qp = qp self.save = save self.base_path = base_path self.caching = False self.dump = dump self.skip = skip self.keep = keep self.warn = warn self.save_folder = "vtm_encoder_decoder" if self.dump: self.logger.warning( "Will save intermediate images to local folder %s", self.save_folder ) os.makedirs(self.save_folder, exist_ok=True) if cache is not None: if not os.path.isdir(cache): self.logger.info("creating %s", cache) os.makedirs(cache) # let's make the life easier for the user # for caching, they won't remember to include the quality parameter # value into the path anyway (so that files corresponding to different qps don't get mixed up) # so we'll do it here: if scale is None: self.folder = os.path.join(cache, "0", str(self.qp)) else: self.folder = os.path.join(cache, str(self.scale), str(self.qp)) self.caching = True else: self.caching = False # uid=str(id(self)) uid = str(uuid()) # safer self.folder = os.path.join(self.base_path, "vtm_" + uid) # test commands self.encoderApp = test_command(encoderApp) self.decoderApp = test_command(decoderApp) try: self.ffmpeg_comm = test_command(ffmpeg) except FileNotFoundError: raise (AssertionError("cant find ffmpeg")) assert os.path.isfile(vtm_cfg), "can't find " + vtm_cfg assert os.path.isdir(base_path), "can't find " + base_path # self.encoderApp = encoderApp # self.decoderApp = decoderApp # self.ffmpeg = ffmpeg self.ffmpeg = FFMpeg(self.ffmpeg_comm, self.logger) try: os.makedirs(self.folder, exist_ok=False) except FileExistsError: assert os.path.isdir(self.folder) self.logger.warning("folder %s exists already", self.folder) self.reset() def __str__(self): st = "" st += "encoderApp: " + self.encoderApp + "\n" st += "decoderApp: " + self.decoderApp + "\n" st += "ffmpeg : " + self.ffmpeg + "\n" st += "qp : " + str(self.qp) + "\n" st += "path : " + self.folder + "\n" if self.caching: st += "CACHING ENABLED\n" return st
[docs] def dump(self): """Dumps files cached on disk by the VTMEncoderDecoder""" print("contents of", self.folder) for fname in glob.glob(os.path.join(self.folder, "*")): print(" ", fname)
[docs] def getCacheDir(self): """Returns directory where temporary and cached files are saved""" return self.folder
def __del__(self): if not hasattr(self, "caching"): return # means ctor crashed if self.keep: return if self.caching: return # print("VTM: __del__", len(glob.glob(os.path.join(self.folder,"*")))) if len(glob.glob(os.path.join(self.folder, "*"))) > 5: # add some security here if user fat-fingers self.base_bath --> self.folder self.logger.critical( "there are multiple files in %s : please remove manually", self.folder ) return # print("removing", self.folder) if True: # if False: self.logger.debug("removing %s", self.folder) shutil.rmtree(self.folder)
[docs] def reset(self): """Reset encoder/decoder internal state. At the moment, there ain't any.""" super().reset() self.saved = {} self.imcount = 0
def __VTMEncode__( self, inp_yuv_path=None, out_yuv_path=None, bin_path=None, width=None, height=None, ) -> bool: assert inp_yuv_path is not None assert out_yuv_path is not None assert bin_path is not None assert width is not None assert height is not None comm = "{encoderApp} -c {vtm_cfg} -i {inp_yuv_path} -b {bin_path} -o {out_yuv_path} -fr 1 -f 1 -wdt {wdt} -hgt {hgt} -q {qp} --ConformanceWindowMode=1 --InternalBitDepth=10".format( encoderApp=self.encoderApp, vtm_cfg=self.vtm_cfg, inp_yuv_path=inp_yuv_path, # IN out_yuv_path=out_yuv_path, # OUT # NOT USED bin_path=bin_path, # OUT wdt=width, hgt=height, qp=self.qp, ) self.logger.debug(comm) args = shlex.split(comm) p = subprocess.Popen( args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = p.communicate() if p.returncode != 0: """ raise ( AssertionError( "VTM encode failed with:\n" + stderr.decode("utf-8") + "\nYOU PROBABLY SHOULD ENABLE FFMPEG SCALING\n" ) ) """ self.logger.fatal("VTM encode failed with %s", stderr.decode("utf-8")) self.logger.fatal("\nYOU PROBABLY SHOULD ENABLE FFMPEG SCALING\n") return False else: return True def __VTMDecode__(self, bin_path=None, rec_yuv_path=None) -> bool: assert bin_path is not None assert rec_yuv_path is not None comm = "{decoderApp} -b {bin_path} -o {rec_yuv_path}".format( decoderApp=self.decoderApp, bin_path=bin_path, # IN rec_yuv_path=rec_yuv_path, # OUT ) self.logger.debug(comm) args = shlex.split(comm) p = subprocess.Popen( args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = p.communicate() if p.returncode != 0: # raise (AssertionError("VTM decode failed with " + stderr.decode("utf-8"))) self.logger.fatal("VTM encode failed with %s", stderr.decode("utf-8")) return False else: return True
[docs] def BGR(self, bgr_image, tag=None) -> tuple: # noqa: C901 """ :param bgr_image: numpy BGR image (y,x,3) :param tag: a string that can be used to identify & cache images (optional). Necessary if you're using caching Returns BGR image that has gone through VTM encoding and decoding process and all other operations as defined by MPEG/VCM. Returns a tuple of (nbits, transformed_bgr_image) This method is somewhat complex: in addition to perform the necessary image transformation, it also handles caching of bitstreams, inspection if bitstreams exist, etc. Error conditions from ffmpeg and/or from VTMEncoder/Decoder must be taken correctly into account. VCM working group ops: :: padded_hgt = math.ceil(height/2)*2 padded_wdt = math.ceil(width/2)*2 1. ffmpeg vf -i {input_tmp_path} -o {input_padded_tmp_path} vf depends on the scale: for 100%: -vf “pad=ceil(iw/2)*2:ceil(ih/2)*2” # NOTE: simply padding for 75%: -vf "scale=ceil(iw*3/8)*2:ceil(ih*3/8)*2" for 50%: -vf "scale=ceil(iw/4)*2:ceil(ih/4)*2" for 25%: -vf "scale=ceil(iw/8)*2:ceil(ih/8)*2" 2. ffmpeg -i {input_padded_tmp_path} -f rawvideo -pix_fmt yuv420p -dst_range 1 {yuv_image_path} 3. {VTM_encoder_path} -c {VTM_AI_cfg} -i {yuv_image_path} -b {bin_image_path} -o {temp_yuv_path} -fr 1 -f 1 -wdt {padded_wdt} -hgt {padded_hgt} -q {qp} --ConformanceWindowMode=1 --InternalBitDepth=10 4. {VTM_decoder_path} -b {bin_image_path} -o {rec_yuv_path} 5. ffmpeg -y -f rawvideo -pix_fmt yuv420p10le -s {padded_wdt}x{padded_hgt} -src_range 1 -i {rec_yuv_path} -frames 1 -pix_fmt rgb24 {rec_png_path} 6. ffmpeg -y -i {rec_png_path} -vf "crop={width}:{height}" {rec_image_path} # NOTE: This can be done only if scale=100%, i.e. to remove padding """ # we could use this to create unique filename if we want cache & later identify the images: # "X".join([str(n) for n in md5(bgr_image).digest()]) # but it's better to use explicit tags as provided by the user if self.caching: assert tag is not None, "caching requested, but got no tag" fname_bin = os.path.join(self.folder, "bin_" + tag) # bin produced by VTM else: # if no caching, we have a unique directory where all this stuff goes, so no need to separate the files # with uuids tag = "" fname_bin = os.path.join(self.folder, "bin") # bin produced by VTM if self.skip: assert self.caching, "skip requires caching enabled" """A separate checkmode is not a good idea.. either check if the file exists (quickcheck) or otherwise do the whole pipeline (using the existing bitstream) if self.caching and self.checkmode: self.logger.debug("checkmode: looking for file %s", fname_bin) # just check if required bitstream exists. return 0 if ok, -1 if not there if os.path.isfile(fname_bin): self.logger.debug("checkmode: test reading file %s", fname_bin) with open(fname_bin, "rb") as f: bitstream = f.read() if len(bitstream) < 1: self.logger.warning("checkmode: found empty file for %s: will remove", fname_bin) removeFileIf(fname_bin) # cached bitstream exists allright return 0, None else: self.logger.debug("Checkmode: %s does not exist", fname_bin) return -1, None """ if self.skip: if os.path.isfile(fname_bin) and (os.path.getsize(fname_bin) > 5): self.logger.debug( "Found file %s from cache & skip enabled: returning 0, None", fname_bin, ) return 0, None else: self.logger.debug( "Couldn't find file %s from cache (or its zero-length) & skip enabled: returning -1, None", fname_bin, ) return -1, None # uid=str(uuid()) uid = tag # the tag is supposedly unique, so use that to mark all files fname_yuv = os.path.join( self.folder, "tmp_%s.yuv" % (uid) ) # yuv produced by ffmpeg fname_yuv_out = os.path.join( self.folder, "nada_%s.yuv" % (uid) ) # yuv produced VTM.. not used fname_rec = os.path.join( self.folder, "rec_%s.yuv" % (uid) ) # yuv produced by VTM rgb_image = bgr_image[:, :, [2, 1, 0]] # BGR --> RGB # apply ffmpeg commands as defined in MPEG/VCM group docs # each submethod should cite the correct command do_scaling = self.scale is not None """ rgb_image original img padded scaled image (1) padded_hat encoded & decoded with compressai rgb_image_hat scaling removed (1) """ if self.dump: dumpImageArray(rgb_image, self.save_folder, "original_" + uid + ".png") if do_scaling: # 1. MPEG-VCM: ffmpeg -i {input_jpg_path} -vf “pad=ceil(iw/2)*2:ceil(ih/2)*2” {input_tmp_path} vf = vf_per_scale[self.scale] padded = self.ffmpeg.ff_op(rgb_image, vf) if padded is None: self.logger.fatal( "ffmpeg scale operation failed: will skip image %s", tag ) return -1, None else: padded = rgb_image if self.dump: dumpImageArray(padded, self.save_folder, "padded_" + uid + ".png") if (not self.caching) or (not os.path.isfile(fname_bin)): self.logger.debug("Creating file %s with ffmpeg", fname_yuv) # 2. MPEG-VCM: ffmpeg -i {input_tmp_path} -f rawvideo -pix_fmt yuv420p -dst_range 1 {yuv_image_path} yuv_bytes = self.ffmpeg.ff_RGB24ToRAW(padded, "yuv420p") if yuv_bytes is None: self.logger.fatal( "ffmpeg to yuv conversion failed: will skip image %s", tag ) return -1, None # this is not needed since each VTMEncoderDecoder has its own directory # tmu=int(time.time()*1E6) # microsec timestamp # fname=os.path.join(self.folder, str(tmu)) # ..you could also use the tag to cache the encoded images if you'd like to do caching self.logger.debug( "writing %s output from ffmpeg to disk (for VTMEncode to read it)", fname_yuv, ) with open(fname_yuv, "wb") as f: f.write(yuv_bytes) # 3. MPEG-VCM: {VTM_encoder_path} -c {VTM_AI_cfg} -i {yuv_image_path} -b {bin_image_path} # -o {temp_yuv_path} -fr 1 -f 1 -wdt {padded_wdt} -hgt {padded_hgt} -q {qp} --ConformanceWindowMode=1 --InternalBitDepth=10 if self.warn: self.logger.warning( "creating bitstream %s with VTMEncode from scratch", fname_bin ) else: self.logger.debug( "creating bitstream %s with VTMEncode from scratch", fname_bin ) ok = self.__VTMEncode__( inp_yuv_path=fname_yuv, out_yuv_path=fname_yuv_out, bin_path=fname_bin, width=padded.shape[1], height=padded.shape[0], ) # cleanup if not self.keep: self.logger.debug("removing %s from ffmpeg", fname_yuv) removeFileIf(fname_yuv) # cleanup self.logger.debug("removing %s from VTMEncode", fname_yuv_out) removeFileIf(fname_yuv_out) # cleanup if (not ok) or (not os.path.isfile(fname_bin)): self.logger.fatal("VTMEncode failed: will skip image %s", tag) return -1, None else: self.logger.debug("Using existing file %s from cache", fname_bin) # calculate nbits self.logger.debug("reading %s from VTMEncode", fname_bin) with open(fname_bin, "rb") as f: n_bytes = len(f.read()) if n_bytes < 1: self.logger.fatal( "Empty output from VTMEncode: will skip image %s & remove the bitstream file", tag, ) removeFileIf(fname_bin) return -1, None nbits = n_bytes * 8 # / (rgb_image.shape[1] * rgb_image.shape[0]) # 4. MPEG-VCM: {VTM_decoder_path} -b {bin_image_path} -o {rec_yuv_path} ok = self.__VTMDecode__(bin_path=fname_bin, rec_yuv_path=fname_rec) if (not ok) or (not os.path.isfile(fname_rec)): self.logger.fatal( "VTMDecode failed: will skip image %s & remove the bitstream file", tag ) removeFileIf(fname_rec) removeFileIf(fname_bin) return -1, None self.logger.debug("reading %s from VTMDecode", fname_rec) with open(fname_rec, "rb") as f: yuv_bytes_hat = f.read() if len(yuv_bytes_hat) < 1: self.logger.fatal( "Empty output from VTMDecode: will skip image %s & remove the bitstream file", tag, ) removeFileIf(fname_rec) removeFileIf(fname_bin) return -1, None if not self.keep: self.logger.debug("removing %s from VTMDecode", fname_rec) removeFileIf(fname_rec) # cleanup if not self.caching and not self.keep: self.logger.debug("removing %s from VTMEncode", fname_bin) removeFileIf(fname_bin) # 5. MPEG-VCM: ffmpeg -y -f rawvideo -pix_fmt yuv420p10le -s {padded_wdt}x{padded_hgt} -src_range 1 -i {rec_yuv_path} -frames 1 -pix_fmt rgb24 {rec_png_path} form = "yuv420p10le" padded_hat = self.ffmpeg.ff_RAWToRGB24( yuv_bytes_hat, form=form, width=padded.shape[1], height=padded.shape[0] ) if padded_hat is None: self.logger.fatal( "ffmpeg raw->rgb24 operation failed: will skip image %s & remove bitstream file (if cached)", tag, ) removeFileIf(fname_bin) return -1, None if self.dump: dumpImageArray(padded_hat, self.save_folder, "padded_hat_" + uid + ".png") if do_scaling: # was scaled, so need to backscale # NOTE: this can only be done to the 100% "scaling" which is nothing else than just cropping # so we "backcrop" & remove the added borders # 6. MPEG-VCM: ffmpeg -y -i {rec_png_path} -vf "crop={width}:{height}" {rec_image_path} vf = inv_vf_per_scale[self.scale] rgb_image_hat = self.ffmpeg.ff_op( padded_hat, vf.format(width=rgb_image.shape[1], height=rgb_image.shape[0]), ) if rgb_image_hat is None: self.logger.fatal( "ffmpeg crop operation failed: will skip image %s & remove bitstream file (if cached)", tag, ) removeFileIf(fname_bin) return -1, None else: rgb_image_hat = padded_hat if self.dump: dumpImageArray( rgb_image_hat, self.save_folder, "rgb_image_hat_" + uid + ".png" ) if self.save: self.saved = { "rgb_image": rgb_image, "padded": padded, "padded_hat": padded_hat, "rgb_image_hat": rgb_image_hat, } else: self.saved = {} bgr_image_hat = rgb_image_hat[:, :, [2, 1, 0]] # RGB --> BGR self.logger.debug( "input & output sizes: %s %s. nbits = %s", bgr_image.shape, bgr_image_hat.shape, nbits, ) self.imcount += 1 return nbits, bgr_image_hat