Source code for compressai.datasets.pointcloud.semantic_kitti

# Copyright (c) 2021-2024, InterDigital Communications, Inc
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted (subject to the limitations in the disclaimer
# below) provided that the following conditions are met:

# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
# * Neither the name of InterDigital Communications, Inc nor the names of its
#   contributors may be used to endorse or promote products derived from this
#   software without specific prior written permission.

# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
# THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import re
import shutil

from pathlib import Path

import numpy as np

from compressai.datasets.cache import CacheDataset
from compressai.datasets.utils import download_url, hash_file
from compressai.registry import register_dataset


[docs] @register_dataset("SemanticKittiDataset") class SemanticKittiDataset(CacheDataset): """SemanticKITTI dataset. The KITTI dataset, introduced by [Geiger2012]_, contains 3D point clouds sequences (i.e. video) of LiDAR sensor data from the perspective of a driving vehicle. The SemanticKITTI dataset, introduced by [Behley2019]_ and [Behley2021]_, provides semantic annotation of all 22 sequences from the odometry task [Odometry_KITTI]_ of KITTI. See the [ProjectPage_SemanticKITTI]_ for a visualization. Note that the test set is unlabelled, and must be evaluated on the server, as mentioned at [ProjectPageTasks_SemanticKITTI]_. The ``semantic_index`` is a number between 0 and 33 (inclusive), which can be used as the semantic label for each point. See also: [PapersWithCode_SemanticKITTI]_. References: .. [Geiger2012] `"Are we ready for Autonomous Driving? The KITTI Vision Benchmark Suite," <https://www.cvlibs.net/publications/Geiger2012CVPR.pdf>`_, by Andreas Geiger, Philip Lenz, and Raquel Urtasun, CVPR 2012. .. [Behley2019] `"SemanticKITTI: A Dataset for Semantic Scene Understanding of LiDAR Sequences," <https://arxiv.org/abs/1904.01416>`_, by Jens Behley, Martin Garbade, Andres Milioto, Jan Quenzel, Sven Behnke, Cyrill Stachniss, and Juergen Gall, ICCV 2019. .. [Behley2021] `"Towards 3D LiDAR-based semantic scene understanding of 3D point cloud sequences: The SemanticKITTI Dataset," <https://journals.sagepub.com/doi/10.1177/02783649211006735>`_, by Jens Behley, Martin Garbade, Andres Milioto, Jan Quenzel, Sven Behnke, Jürgen Gall, and Cyrill Stachniss, IJRR 2021. .. [ProjectPage_SemanticKITTI] `Project page (SemanticKITTI) <http://www.semantic-kitti.org/>`_ .. [ProjectPageTasks_SemanticKITTI] `Project page: Tasks (SemanticKITTI) <http://www.semantic-kitti.org/tasks.html>`_ .. [Odometry_KITTI] `"Visual Odometry / SLAM Evaluation 2012" <https://www.cvlibs.net/datasets/kitti/eval_odometry.php>`_ .. [PapersWithCode_SemanticKITTI] `PapersWithCode: SemanticKITTI <https://paperswithcode.com/dataset/semantickitti>`_ """ URLS = [ "https://s3.eu-central-1.amazonaws.com/avg-kitti/data_odometry_calib.zip", "https://s3.eu-central-1.amazonaws.com/avg-kitti/data_odometry_velodyne.zip", "http://www.semantic-kitti.org/assets/data_odometry_labels.zip", "http://www.semantic-kitti.org/assets/data_odometry_voxels_all.zip", "http://www.semantic-kitti.org/assets/data_odometry_voxels.zip", ] HASHES = [ "fa45d2bbff828776e6df689b161415fb7cd719345454b6d3567c2ff81fa4d075", # data_odometry_calib.zip "062a45667bec6874ac27f733bd6809919f077265e7ac0bb25ac885798fa85ab5", # data_odometry_velodyne.zip "408ec524636a393bae0288a0b2f48bf5418a1af988e82dee8496f89ddb7e6dda", # data_odometry_labels.zip "10f333faa63426a519a573fbf0b4e3b56513511af30583473fa6a5782e037f3a", # data_odometry_voxels_all.zip "d92c253e88e5e30c0a0b88f028510760e1db83b7e262d75c5931bf9b8d6dd51b", # data_odometry_voxels.zip ] # Suggested splits: SEQUENCES = { "train": (0, 1, 2, 3, 4, 5, 6, 7, 9, 10), "valid": (8,), "infer": (8,), "test": (11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21), # Unlabelled. } # fmt: off NUM_SAMPLES_PER_SEQUENCE = [ 4541, 1101, 4661, 801, 271, 2761, 1101, 1101, 4071, 1591, 1201, 921, 1061, 3281, 631, 1901, 1731, 491, 1801, 4981, 831, 2721 ] # fmt: on RAW_SEMANTIC_INDEX_TO_LABEL = { 0: "unlabeled", 1: "outlier", 10: "car", 11: "bicycle", 13: "bus", 15: "motorcycle", 16: "on-rails", 18: "truck", 20: "other-vehicle", 30: "person", 31: "bicyclist", 32: "motorcyclist", 40: "road", 44: "parking", 48: "sidewalk", 49: "other-ground", 50: "building", 51: "fence", 52: "other-structure", 60: "lane-marking", 70: "vegetation", 71: "trunk", 72: "terrain", 80: "pole", 81: "traffic-sign", 99: "other-object", 252: "moving-car", 253: "moving-bicyclist", 254: "moving-person", 255: "moving-motorcyclist", 256: "moving-on-rails", 257: "moving-bus", 258: "moving-truck", 259: "moving-other-vehicle", } RAW_SEMANTIC_INDEX_TO_SEMANTIC_INDEX = { idx: i for i, idx in enumerate(RAW_SEMANTIC_INDEX_TO_LABEL) } def __init__( self, root=None, cache_root=None, split="train", split_name=None, sequences=SEQUENCES["train"], pre_transform=None, transform=None, download=True, ): if cache_root is None: assert root is not None cache_root = f"{str(root).rstrip('/')}_cache" self.root = Path(root) if root else None self.cache_root = Path(cache_root) self.split = split self.split_name = split if split_name is None else split_name self.sequences = sequences if download and self.root: self.download() super().__init__( cache_root=self.cache_root / self.split_name, pre_transform=pre_transform, transform=transform, ) self._ensure_cache() def download(self, force=False): if not force and self.root.exists(): return tmpdir = self.root.parent / "tmp" os.makedirs(tmpdir, exist_ok=True) for expected_hash, url in zip(self.HASHES, self.URLS): filepath = download_url( url, tmpdir, check_certificate=False, overwrite=force ) shutil.unpack_archive(filepath, tmpdir) assert expected_hash == hash_file(filepath, method="sha256") shutil.move(tmpdir / "dataset", self.root) def _get_items(self): return sorted( x for i in self.sequences for x in self.root.glob(f"**/{i:02}/velodyne/*.bin") ) def _load_item(self, path): path_prefix, sequence_index, file_index = self._parse_path(path) assert str(path) == f"{path_prefix}{sequence_index}/velodyne/{file_index}.bin" point_data = np.fromfile(path, dtype=np.float32).reshape(-1, 4) label_data = ( np.fromfile( f"{path_prefix}{sequence_index}/labels/{file_index}.label", dtype="<u2" ) .reshape(-1, 2) .astype(np.int16) ) return { "file_index": np.array([file_index], dtype=np.int32), "sequence_index": np.array([sequence_index], dtype=np.int32), "raw_semantic_index": label_data[:, 0], "semantic_index": np_remap( label_data[:, 0], self.RAW_SEMANTIC_INDEX_TO_SEMANTIC_INDEX ), "instance_index": label_data[:, 1], "pos": point_data[:, :3], "remission": point_data[:, 3, None], } def _parse_path(self, path): pattern = ( r"^(?P<path_prefix>.*?/?)" r"(?P<sequence_index>\d+)/" r"velodyne/" r"(?P<file_index>\d{6})\.\w+$" ) match = re.match(pattern, str(path)) if match is None: raise ValueError(f"Could not parse path: {path}") path_prefix = match.group("path_prefix") sequence_index = match.group("sequence_index") file_index = match.group("file_index") return path_prefix, sequence_index, file_index
def np_remap(arr, d): values, inverse = np.unique(arr, return_inverse=True) values = np.array([d[x] for x in values], dtype=arr.dtype) return values[inverse].reshape(arr.shape)