# Copyright (c) 2021-2024, InterDigital Communications, Inc
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted (subject to the limitations in the disclaimer
# below) provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * Neither the name of InterDigital Communications, Inc nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
# THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import torch.nn as nn
from compressai.layers import (
AttentionBlock,
ResidualBlock,
ResidualBlockUpsample,
ResidualBlockWithStride,
conv3x3,
subpel_conv3x3,
)
from compressai.registry import register_model
from .google import JointAutoregressiveHierarchicalPriors
[docs]
@register_model("cheng2020-anchor")
class Cheng2020Anchor(JointAutoregressiveHierarchicalPriors):
"""Anchor model variant from `"Learned Image Compression with
Discretized Gaussian Mixture Likelihoods and Attention Modules"
<https://arxiv.org/abs/2001.01568>`_, by Zhengxue Cheng, Heming Sun, Masaru
Takeuchi, Jiro Katto.
Uses residual blocks with small convolutions (3x3 and 1x1), and sub-pixel
convolutions for up-sampling.
Args:
N (int): Number of channels
"""
def __init__(self, N=192, **kwargs):
super().__init__(N=N, M=N, **kwargs)
self.g_a = nn.Sequential(
ResidualBlockWithStride(3, N, stride=2),
ResidualBlock(N, N),
ResidualBlockWithStride(N, N, stride=2),
ResidualBlock(N, N),
ResidualBlockWithStride(N, N, stride=2),
ResidualBlock(N, N),
conv3x3(N, N, stride=2),
)
self.h_a = nn.Sequential(
conv3x3(N, N),
nn.LeakyReLU(inplace=True),
conv3x3(N, N),
nn.LeakyReLU(inplace=True),
conv3x3(N, N, stride=2),
nn.LeakyReLU(inplace=True),
conv3x3(N, N),
nn.LeakyReLU(inplace=True),
conv3x3(N, N, stride=2),
)
self.h_s = nn.Sequential(
conv3x3(N, N),
nn.LeakyReLU(inplace=True),
subpel_conv3x3(N, N, 2),
nn.LeakyReLU(inplace=True),
conv3x3(N, N * 3 // 2),
nn.LeakyReLU(inplace=True),
subpel_conv3x3(N * 3 // 2, N * 3 // 2, 2),
nn.LeakyReLU(inplace=True),
conv3x3(N * 3 // 2, N * 2),
)
self.g_s = nn.Sequential(
ResidualBlock(N, N),
ResidualBlockUpsample(N, N, 2),
ResidualBlock(N, N),
ResidualBlockUpsample(N, N, 2),
ResidualBlock(N, N),
ResidualBlockUpsample(N, N, 2),
ResidualBlock(N, N),
subpel_conv3x3(N, 3, 2),
)
@classmethod
def from_state_dict(cls, state_dict):
"""Return a new model instance from `state_dict`."""
N = state_dict["g_a.0.conv1.weight"].size(0)
net = cls(N)
net.load_state_dict(state_dict)
return net
[docs]
@register_model("cheng2020-attn")
class Cheng2020Attention(Cheng2020Anchor):
"""Self-attention model variant from `"Learned Image Compression with
Discretized Gaussian Mixture Likelihoods and Attention Modules"
<https://arxiv.org/abs/2001.01568>`_, by Zhengxue Cheng, Heming Sun, Masaru
Takeuchi, Jiro Katto.
Uses self-attention, residual blocks with small convolutions (3x3 and 1x1),
and sub-pixel convolutions for up-sampling.
Args:
N (int): Number of channels
"""
def __init__(self, N=192, **kwargs):
super().__init__(N=N, **kwargs)
self.g_a = nn.Sequential(
ResidualBlockWithStride(3, N, stride=2),
ResidualBlock(N, N),
ResidualBlockWithStride(N, N, stride=2),
AttentionBlock(N),
ResidualBlock(N, N),
ResidualBlockWithStride(N, N, stride=2),
ResidualBlock(N, N),
conv3x3(N, N, stride=2),
AttentionBlock(N),
)
self.g_s = nn.Sequential(
AttentionBlock(N),
ResidualBlock(N, N),
ResidualBlockUpsample(N, N, 2),
ResidualBlock(N, N),
ResidualBlockUpsample(N, N, 2),
AttentionBlock(N),
ResidualBlock(N, N),
ResidualBlockUpsample(N, N, 2),
ResidualBlock(N, N),
subpel_conv3x3(N, 3, 2),
)