Source code for compressai.models.waseda

# Copyright (c) 2021-2024, InterDigital Communications, Inc
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted (subject to the limitations in the disclaimer
# below) provided that the following conditions are met:

# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
# * Neither the name of InterDigital Communications, Inc nor the names of its
#   contributors may be used to endorse or promote products derived from this
#   software without specific prior written permission.

# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
# THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch.nn as nn

from compressai.layers import (
    AttentionBlock,
    ResidualBlock,
    ResidualBlockUpsample,
    ResidualBlockWithStride,
    conv3x3,
    subpel_conv3x3,
)
from compressai.registry import register_model

from .google import JointAutoregressiveHierarchicalPriors


[docs] @register_model("cheng2020-anchor") class Cheng2020Anchor(JointAutoregressiveHierarchicalPriors): """Anchor model variant from `"Learned Image Compression with Discretized Gaussian Mixture Likelihoods and Attention Modules" <https://arxiv.org/abs/2001.01568>`_, by Zhengxue Cheng, Heming Sun, Masaru Takeuchi, Jiro Katto. Uses residual blocks with small convolutions (3x3 and 1x1), and sub-pixel convolutions for up-sampling. Args: N (int): Number of channels """ def __init__(self, N=192, **kwargs): super().__init__(N=N, M=N, **kwargs) self.g_a = nn.Sequential( ResidualBlockWithStride(3, N, stride=2), ResidualBlock(N, N), ResidualBlockWithStride(N, N, stride=2), ResidualBlock(N, N), ResidualBlockWithStride(N, N, stride=2), ResidualBlock(N, N), conv3x3(N, N, stride=2), ) self.h_a = nn.Sequential( conv3x3(N, N), nn.LeakyReLU(inplace=True), conv3x3(N, N), nn.LeakyReLU(inplace=True), conv3x3(N, N, stride=2), nn.LeakyReLU(inplace=True), conv3x3(N, N), nn.LeakyReLU(inplace=True), conv3x3(N, N, stride=2), ) self.h_s = nn.Sequential( conv3x3(N, N), nn.LeakyReLU(inplace=True), subpel_conv3x3(N, N, 2), nn.LeakyReLU(inplace=True), conv3x3(N, N * 3 // 2), nn.LeakyReLU(inplace=True), subpel_conv3x3(N * 3 // 2, N * 3 // 2, 2), nn.LeakyReLU(inplace=True), conv3x3(N * 3 // 2, N * 2), ) self.g_s = nn.Sequential( ResidualBlock(N, N), ResidualBlockUpsample(N, N, 2), ResidualBlock(N, N), ResidualBlockUpsample(N, N, 2), ResidualBlock(N, N), ResidualBlockUpsample(N, N, 2), ResidualBlock(N, N), subpel_conv3x3(N, 3, 2), ) @classmethod def from_state_dict(cls, state_dict): """Return a new model instance from `state_dict`.""" N = state_dict["g_a.0.conv1.weight"].size(0) net = cls(N) net.load_state_dict(state_dict) return net
[docs] @register_model("cheng2020-attn") class Cheng2020Attention(Cheng2020Anchor): """Self-attention model variant from `"Learned Image Compression with Discretized Gaussian Mixture Likelihoods and Attention Modules" <https://arxiv.org/abs/2001.01568>`_, by Zhengxue Cheng, Heming Sun, Masaru Takeuchi, Jiro Katto. Uses self-attention, residual blocks with small convolutions (3x3 and 1x1), and sub-pixel convolutions for up-sampling. Args: N (int): Number of channels """ def __init__(self, N=192, **kwargs): super().__init__(N=N, **kwargs) self.g_a = nn.Sequential( ResidualBlockWithStride(3, N, stride=2), ResidualBlock(N, N), ResidualBlockWithStride(N, N, stride=2), AttentionBlock(N), ResidualBlock(N, N), ResidualBlockWithStride(N, N, stride=2), ResidualBlock(N, N), conv3x3(N, N, stride=2), AttentionBlock(N), ) self.g_s = nn.Sequential( AttentionBlock(N), ResidualBlock(N, N), ResidualBlockUpsample(N, N, 2), ResidualBlock(N, N), ResidualBlockUpsample(N, N, 2), AttentionBlock(N), ResidualBlock(N, N), ResidualBlockUpsample(N, N, 2), ResidualBlock(N, N), subpel_conv3x3(N, 3, 2), )