compressai_vision.codecs#

class compressai_vision.codecs.Bypass(**kwargs)[source]#

Does no encoding/decoding whatsoever. Use for debugging.

decode(input: Dict, codec_output_dir: str = '', file_prefix: str = '', org_img_size: Dict | None = None, remote_inference=False)[source]#
encode(input: Dict, codec_output_dir: str = '', bitstream_name: str = '', file_prefix: str = '', remote_inference=False) Dict[source]#

Bypass encoder Returns the input and calculates its raw size

property eval_encode_type#
property qp_value#
training: bool#
class compressai_vision.codecs.HM(vision_model: BaseWrapper, dataset: Dict, **kwargs)[source]#

Encoder / Decoder class for HEVC - HM reference software

get_encode_cmd(inp_yuv_path: Path, qp: int, bitstream_path: Path, width: int, height: int, nb_frames: int = 1, parallel_encoding: bool = False, hash_check: int = 0, chroma_format: str = '400', input_bitdepth: int = 10, output_bitdepth: int = 0) List[Any][source]#

Generates the command to encode a video using the specified parameters. :param inp_yuv_path: The path to the input YUV file. :type inp_yuv_path: Path :param qp: The quantization parameter. :type qp: int :param bitstream_path: The path to the output bitstream file. :type bitstream_path: Path :param width: The width of the video. :type width: int :param height: The height of the video. :type height: int :param nb_frames: The number of frames in the video. Defaults to 1. :type nb_frames: int, optional :param parallel_encoding: Whether to enable parallel encoding. Defaults to False. :type parallel_encoding: bool, optional :param hash_check: The hash check value. Defaults to 0. :type hash_check: int, optional :param chroma_format: The chroma format of the video. Defaults to “400”. :type chroma_format: str, optional :param input_bitdepth: The bitdepth of the input video. Defaults to 10. :type input_bitdepth: int, optional :param output_bitdepth: The bitdepth of the output video. Defaults to 0. :type output_bitdepth: int, optional

Returns:

commands line to encode the video.

Return type:

List[Any]

training: bool#
class compressai_vision.codecs.SIC_SFU2022(device: str, **kwargs)[source]#
decode(bitstream_path: Path | None = None, codec_output_dir: str = '', file_prefix: str = '') bool[source]#
encode(x: Dict, codec_output_dir, bitstream_name, file_prefix: str = '')[source]#
property eval_encode_type#
static get_padded_input_size(fSize, p)[source]#
static load_pretrained(model, filename)[source]#
property qp_value#
reset()[source]#
static update_model(model, loaded_state)[source]#
class compressai_vision.codecs.VTM(vision_model: BaseWrapper, dataset: Dict, **kwargs)[source]#

Encoder/Decoder class for VVC - VTM reference software

close_bitstream_file()[source]#
convert_input_to_yuv(input: Dict, file_prefix: str)[source]#

Converts the input image or video to YUV format using ffmpeg, or use existing YUV if available. :param input: A dictionary containing information about the input. It should have the following keys:

  • file_names (List[str]): A list of file names for the input. If it contains more than one file, it is considered a video.

  • last_frame (int): The last frame number of the video.

  • frame_skip (int): The number of frames to skip in the video.

  • org_input_size (Dict[str, int]): A dictionary containing the width and height of the input.

Parameters:

file_prefix (str) – The prefix for the output file name.

Returns:

A tuple containing the following:
  • yuv_in_path (str): The path to the converted YUV input file.

  • nb_frames (int): The number of frames in the input.

  • frame_width (int): The width of the frames in the input.

  • frame_height (int): The height of the frames in the input.

  • file_prefix (str): The updated file prefix.

Return type:

Tuple[str, int, int, int, str]

Raises:

AssertionError – If the number of images in the input folder does not match the expected number of frames.

convert_yuv_to_pngs(output_file_prefix: str, dec_path: str, yuv_dec_path: Path, org_img_size: Dict | None = None, vcm_mode: bool = False)[source]#

Converts a YUV file to a series of PNG images using ffmpeg. :param output_file_prefix: The prefix of the output file name. :type output_file_prefix: str :param dec_path: The path to the directory where the PNG images will be saved. :type dec_path: str :param yuv_dec_path: The path to the input YUV file. :type yuv_dec_path: Path :param org_img_size: The original image size. Defaults to None. :type org_img_size: Dict, optional

Returns:

None

Raises:

AssertionError – If the video format is not YUV420.

decode(bitstream_path: Path | None = None, codec_output_dir: str = '', file_prefix: str = '', org_img_size: Dict | None = None, remote_inference=False, vcm_mode=False) Dict[source]#

Decodes the bitstream and returns the output features .

Parameters:
  • bitstream_path (Path) – The path to the bitstream file.

  • codec_output_dir (str) – The directory to store codec output.

  • file_prefix (str) – The prefix for the output files.

  • org_img_size (Dict) – The original image size.

  • remote_inference (bool) – Specifies if the remote inference pipeline is used.

Returns:

The dictionary of output features.

Return type:

Dict

encode(x: Dict, codec_output_dir, bitstream_name, file_prefix: str = '', remote_inference=False) Dict[source]#

Encodes the input data. :param x: The input data to be encoded. :type x: Dict :param codec_output_dir: The directory where the output bitstream will be saved. :type codec_output_dir: str :param bitstream_name: The name of the output bitstream. :type bitstream_name: str :param file_prefix: The prefix to be added to the output file name. Defaults to “”. :type file_prefix: str, optional :param remote_inference: Indicates if the encoding is done remotely. Defaults to False. :type remote_inference: bool, optional

Returns:

A dictionary containing the bytes per frame and the path to the output bitstream.

Return type:

dict

property eval_encode_type#
get_check_list_of_paths()[source]#
get_decode_cmd(yuv_dec_path: Path, bitstream_path: Path, output_bitdepth: int = 10) List[Any][source]#

Get command line for decoding a video bitstream with an external VTM decoder. :param yuv_dec_path: The path to the output YUV file. :type yuv_dec_path: Path :param bitstream_path: The path to the video bitstream file. :type bitstream_path: Path :param output_bitdepth: The bitdepth of the output YUV file. Defaults to 10. :type output_bitdepth: int, optional

Returns:

command line arguments for decoding the video bitstream.

Return type:

List[Any]

get_encode_cmd(inp_yuv_path: Path, qp: int, bitstream_path: Path, width: int, height: int, nb_frames: int = 1, parallel_encoding: bool = False, hash_check: int = 0, chroma_format: str = '400', input_bitdepth: int = 10, output_bitdepth: int = 0) List[Any][source]#

Generates the command to encode a video file using VTM software. :param inp_yuv_path: The path to the input YUV file. :type inp_yuv_path: Path :param qp: The quantization parameter. :type qp: int :param bitstream_path: The path to the output bitstream file. :type bitstream_path: Path :param width: The width of the video. :type width: int :param height: The height of the video. :type height: int :param nb_frames: The number of frames in the video. Defaults to 1. :type nb_frames: int, optional :param parallel_encoding: Whether to perform parallel encoding. Defaults to False. :type parallel_encoding: bool, optional :param hash_check: The hash check value. Defaults to 0. :type hash_check: int, optional :param chroma_format: The chroma format of the video. Defaults to “400”. :type chroma_format: str, optional :param input_bitdepth: The bit depth of the input video. Defaults to 10. :type input_bitdepth: int, optional :param output_bitdepth: The bit depth of the output video. Defaults to 0. :type output_bitdepth: int, optional

Returns:

the command line as a list.

Return type:

List[Any]

get_io_buffer_contents()[source]#
get_parcat_cmd(bitstream_path: Path) Tuple[List[Any], List[Path]][source]#

Returns a list of commands and bitstream lists needed to concatenate bitstream files. :param bitstream_path: The path to the bitstream file. :type bitstream_path: Path

Returns:

the command to concatenate the bitstream files in the folder.

Return type:

Tuple[List[Any], List[Path]]

open_bitstream_file(path, mode='rb')[source]#
property qp_value#
reset()[source]#
training: bool#
class compressai_vision.codecs.VVENC(vision_model: BaseWrapper, dataset_name: str = '', **kwargs)[source]#

Encoder / Decoder class for VVC - vvenc/vvdec software

get_encode_cmd(inp_yuv_path: Path, qp: int, bitstream_path: Path, width: int, height: int, nb_frames: int = 1) List[Any][source]#

Generate a command to encode a YUV video file using VVENCs. :param inp_yuv_path: The path to the input YUV video file. :type inp_yuv_path: Path :param qp: The quantization parameter for the encoding process. :type qp: int :param bitstream_path: The path to save the encoded bitstream. :type bitstream_path: Path :param width: The width of the video frame. :type width: int :param height: The height of the video frame. :type height: int :param nb_frames: The number of frames to encode (default is 1). :type nb_frames: int, optional

Returns:

A list of strings representing the encoding command.

Return type:

List[Any]

training: bool#
class compressai_vision.codecs.x264(vision_model: BaseWrapper, dataset: Dict, **kwargs)[source]#

Encoder/Decoder class for x265

decode(bitstream_path: Path | None = None, codec_output_dir: str = '', file_prefix: str = '', org_img_size: Dict | None = None, remote_inference=False) bool[source]#

Decodes a bitstream into video frames and extract features from the decoded frames. :param bitstream_path: The path to the input bitstream file. :type bitstream_path: Path :param codec_output_dir: The directory where the codec output will be stored. :type codec_output_dir: str :param file_prefix: The prefix to be used for the output files. :type file_prefix: str

Returns:

dictionary of output features.

Return type:

Dict

encode(x: Dict, codec_output_dir, bitstream_name, file_prefix: str = '', remote_inference=False) bool[source]#

Encodes the input feature tensors and returns the encoded bitstream. :param x: The input data dictionary. :type x: Dict :param codec_output_dir: The directory for codec output. :type codec_output_dir: str :param bitstream_name: The name of the bitstream. :type bitstream_name: str :param file_prefix: The prefix for the file. Defaults to “”. :type file_prefix: str, optional :param remote_inference: Flag for remote inference. :type remote_inference: bool

Returns:

numbers of bytes per frame and bitstream path.

Return type:

Dict

property eval_encode_type#
get_decode_cmd(bitstream_path: Path, yuv_dec_path: Path) List[Any][source]#

Get the ffmpeg decode command (x264 lib) for the given bitstream path and YUV decode path. :param bitstream_path: The path to the bitstream file. :type bitstream_path: Path :param yuv_dec_path: The path to save the YUV decoded file. :type yuv_dec_path: Path

Returns:

The list containing the ffmpeg command for decoding.

Return type:

List[Any]

get_encode_cmd(inp_yuv_path: Path, qp: int, bitstream_path: Path, width: int, height: int, frmRate: int = 1) List[Any][source]#

Generates the ffmpeg command (x264 lib) for encoding the input YUV video to H.264 format with the specified parameters. :param inp_yuv_path: The input YUV video file path. :type inp_yuv_path: Path :param qp: The quantization parameter for the video encoding. :type qp: int :param bitstream_path: The output bitstream file path. :type bitstream_path: Path :param width: The width of the video frame. :type width: int :param height: The height of the video frame. :type height: int :param frmRate: The frame rate of the video. Defaults to 1. :type frmRate: int, optional

Returns:

The generated ffmpeg command for encoding the video.

Return type:

List[Any]

property qp_value#
training: bool#
class compressai_vision.codecs.x265(vision_model: BaseWrapper, dataset: Dict, **kwargs)[source]#

Encoder / Decoder class for x265 - ffmpeg

get_encode_cmd(inp_yuv_path: Path, qp: int, bitstream_path: Path, width: int, height: int, frmRate: int = 1) List[Any][source]#

Generates the ffmpeg command (x265 lib) for encoding the input YUV video to H.265 format with the specified parameters. :param inp_yuv_path: The input YUV video file path. :type inp_yuv_path: Path :param qp: The quantization parameter for the video encoding. :type qp: int :param bitstream_path: The output bitstream file path. :type bitstream_path: Path :param width: The width of the video frame. :type width: int :param height: The height of the video frame. :type height: int :param frmRate: The frame rate of the video. Defaults to 1. :type frmRate: int, optional

Returns:

The generated ffmpeg command for encoding the video.

Return type:

List[Any]

training: bool#