diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..feae5c1921703133caa7d3cb82a79e2be6e85ab6 --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,2 @@ +*.pyc +__pycache__ \ No newline at end of file diff --git a/python/README.md b/python/README.md index d4b687fcab3830561bc24744f4101b851445d4a3..a2918492f1622494b35cf1c965cb306efa0b2524 100644 --- a/python/README.md +++ b/python/README.md @@ -1,5 +1,6 @@ Python support for `.ftl` files. At the moment, only reading RGB channels -(left/right) supported. +(left/right) supported. Non-parallel decoding of 8 streams has a frame rate +of ~15 fps on i7-9700K. Required **Python** modules: @@ -10,3 +11,79 @@ Required **Python** modules: Required libraries * libde265 (available on most Linux distributions) + +## Example + +Example reads from `input.ftl` and writes to `output.ftl`. Calibration and +pose are copied directly (same method can be used for other channels as well). +The program copies left and right frames of source 0 to new file (and re-encodes +them in JPG) when both frames are available. + +```python +import ftl +from ftl import types + +reader = ftl.FTLStreamReader("./input.ftl") +writer = ftl.FTLStreamWriter("./output") + +source_id = 0 +fps = 25 +frame_t = int(1000.0/fps) +timestamp_out = 0 +timestamp_in = 0 + +im_left = None +im_right = None + +while reader.read(): + channel = reader.get_channel_type() + timestamp = reader.get_timestamp() + frame = reader.get_frame() + + if reader.get_source_id() != source_id: + # not interested in this source, skip + continue + + if channel in (types.Channel.Calibration, types.Channel.Configuration): + # copy calibration and pose (but replace timestamp with new value) + + sp, p = reader.get_raw() + sp = sp._replace(timestamp=timestamp_out) + writer.add_raw(sp, p) + continue + + if channel not in (types.Channel.Left, types.Channel.Right): + # only interested in left and right frame + continue + + if frame is None: + # no frame if decoding failed + continue + + if timestamp_in != timestamp: + # new timestamp, process available frames + + if not (im_left is None or im_right is None): + # save frames only if both of them were found for this timestamp + + # Note: In this expample channel is re-encoded. If channel content + # is not modified, lossy channels should be passed directly + # (using add_raw() in same way as for calibration/pose) instead of + # re-encoding them. + + writer.add_frame(timestamp_out, 0, types.Channel.Left, 2, + types.codec_t.JPG, im_left) + writer.add_frame(timestamp_out, 0, types.Channel.Right, 2, + types.codec_t.JPG, im_right) + + + timestamp_out += frame_t + timestamp_in = timestamp + im_left, im_right = None, None + + if channel is types.Channel.Left: + im_left = frame + else: + im_right = frame + +``` diff --git a/python/ftl/__init__.py b/python/ftl/__init__.py index 963e374bd350bcaabcd832778d6311f19a39e72f..38e017612ea53ec9e65417aa9a56729c8f10baf0 100644 --- a/python/ftl/__init__.py +++ b/python/ftl/__init__.py @@ -1 +1,4 @@ -from . ftlstream import FTLStream \ No newline at end of file +from . ftlstream import FTLStreamReader, FTLStreamWriter +from . misc import disparity_to_depth + +from . import ftltypes as types diff --git a/python/ftl/ftlstream.py b/python/ftl/ftlstream.py index 7ab436e0063b9e30e505398f359efd688aad1f68..6031e757d39b915f76db997952f17a3d77ffebd0 100644 --- a/python/ftl/ftlstream.py +++ b/python/ftl/ftlstream.py @@ -2,9 +2,17 @@ import msgpack import numpy as np +import sys +import struct +from warnings import warn from enum import IntEnum from collections import namedtuple -from . libde265 import Decoder + +from . misc import is_iframe +from . import ftltypes as ftl +from . import libde265 + +_calib_fmt = "@ddddIIdddd" try: import cv2 as cv @@ -13,10 +21,13 @@ try: return cv.cvtColor(img, cv.COLOR_YCrCb2RGB) except ImportError: + warn("OpenCV not available. OpenCV required for full functionality.") + def _ycrcb2rgb(img): ''' YCrCb to RGB, based on OpenCV documentation definition. - Note: It seems this implementation is not perfectly equivalent to OpenCV's + Note: It seems this implementation is not perfectly equivalent to + OpenCV's ''' rgb = np.zeros(img.shape, np.float) @@ -32,116 +43,126 @@ except ImportError: return rgb.round().astype(np.uint8) -# FTL definitions - -_packet = namedtuple("Packet", ["codec", "definition", "block_total", "block_number", "flags", "data"]) -_stream_packet = namedtuple("StreamPacket", ["timestamp", "streamID", "chanel_count", "channel"]) - -_definition_t = { - 0 : (), - 1 : (), - 2 : (1080, 1920), - 3 : (720, 1280), - 4 : (), - 5 : (), - 6 : (), - 7 : (), - 8 : () -} - -class NALType(IntEnum): - CODED_SLICE_TRAIL_N = 0 - CODED_SLICE_TRAIL_R = 1 - - CODED_SLICE_TSA_N = 2 - CODED_SLICE_TSA_R = 3 - - CODED_SLICE_STSA_N = 4 - CODED_SLICE_STSA_R = 5 - - CODED_SLICE_RADL_N = 6 - CODED_SLICE_RADL_R = 7 - - CODED_SLICE_RASL_N = 8 - CODED_SLICE_RASL_R = 9 - - RESERVED_VCL_N10 = 10 - RESERVED_VCL_R11 = 11 - RESERVED_VCL_N12 = 12 - RESERVED_VCL_R13 = 13 - RESERVED_VCL_N14 = 14 - RESERVED_VCL_R15 = 15 - - CODED_SLICE_BLA_W_LP = 16 - CODED_SLICE_BLA_W_RADL = 17 - CODED_SLICE_BLA_N_LP = 18 - CODED_SLICE_IDR_W_RADL = 19 - CODED_SLICE_IDR_N_LP = 20 - CODED_SLICE_CRA = 21 - RESERVED_IRAP_VCL22 = 22 - RESERVED_IRAP_VCL23 = 23 - - RESERVED_VCL24 = 24 - RESERVED_VCL25 = 25 - RESERVED_VCL26 = 26 - RESERVED_VCL27 = 27 - RESERVED_VCL28 = 28 - RESERVED_VCL29 = 29 - RESERVED_VCL30 = 30 - RESERVED_VCL31 = 31 - - VPS = 32 - SPS = 33 - PPS = 34 - ACCESS_UNIT_DELIMITER = 35 - EOS = 36 - EOB = 37 - FILLER_DATA = 38 - PREFIX_SEI = 39 - SUFFIX_SEI = 40 - - RESERVED_NVCL41 = 41 - RESERVED_NVCL42 = 42 - RESERVED_NVCL43 = 43 - RESERVED_NVCL44 = 44 - RESERVED_NVCL45 = 45 - RESERVED_NVCL46 = 46 - RESERVED_NVCL47 = 47 - UNSPECIFIED_48 = 48 - UNSPECIFIED_49 = 49 - UNSPECIFIED_50 = 50 - UNSPECIFIED_51 = 51 - UNSPECIFIED_52 = 52 - UNSPECIFIED_53 = 53 - UNSPECIFIED_54 = 54 - UNSPECIFIED_55 = 55 - UNSPECIFIED_56 = 56 - UNSPECIFIED_57 = 57 - UNSPECIFIED_58 = 58 - UNSPECIFIED_59 = 59 - UNSPECIFIED_60 = 60 - UNSPECIFIED_61 = 61 - UNSPECIFIED_62 = 62 - UNSPECIFIED_63 = 63 - INVALID = 64 - -def get_NAL_type(data): - if not isinstance(data, bytes): - raise ValueError("expected bytes") - - return NALType((data[4] >> 1) & 0x3f) +class FTLStreamWriter: + def __init__(self, file): + self._file = open(file, "wb") + self._file.write(bytes(ord(c) for c in "FTLF")) # magic + self._file.write(bytes([2])) # version + self._file.write(bytes([0]*64)) # reserved + + self._packer = msgpack.Packer(strict_types=False, use_bin_type=True) + + def __del__(self): + self.close() + + def close(self): + self._file.close() + + def add_raw(self, sp, p): + if len(sp) != len(ftl.StreamPacket._fields) or len(p) != len(ftl.Packet._fields): + raise ValueError("invalid input") + + self._file.write(self._packer.pack((sp, p))) + self._file.flush() + + def add_frame(self, timestamp, source, channel, channel_count, codec, data, + definition=None, flags=0, encode=True): + ''' Write frame to file. If encode is False (data already encoded), + definition needs to be specified. + ''' + + if source < 0: + raise ValueError("invalid source id") + + if channel not in ftl.Channel: + raise ValueError("invalid channel") + + if codec not in ftl.codec_t: + raise ValueError("invalid codec") + + if encode: + if definition is None: + definition = ftl.get_definition(data.shape) + + if definition is None: + raise ValueError("unsupported resolution") + + if definition != ftl.get_definition(data.shape): + # todo: could replace definition or scale + raise ValueError("definition does not match frame resolution") + + if codec == ftl.codec_t.PNG: + if ftl.is_float_channel(channel): + # scaling always same (???) + data = data.astype(np.float) / 1000.0 + + params = [cv.IMWRITE_PNG_COMPRESSION, 9] + retval, data = cv.imencode(".png", data, params) + + if not retval: + raise Exception("encoding error (PNG)") + + elif codec == ftl.codec_t.JPG: + params = [] + retval, data = cv.imencode(".jpg", data, params) -class FTLStream: + if not retval: + raise Exception("encoding error (JPG)") + + else: + raise ValueError("unsupported codec") + + data = data.tobytes() + + if definition is None: + raise ValueError("definition required") + + if not isinstance(data, bytes): + raise ValueError("expected bytes") + + sp = ftl.StreamPacket(int(timestamp), int(source), + int(channel_count), int(channel)) + p = ftl.Packet(int(codec), int(definition), 1, 0, int(flags), data) + + self.add_raw(sp, p) + + def add_pose(self, timestamp, source, data): + if data.shape != (4, 4): + raise ValueError("invalid pose") + + data.astype(np.float64).tobytes(order='F') + raise NotImplementedError("todo") + + def add_calibration(self, timestamp, source, data): + struct.pack(_calib_fmt, *data) + raise NotImplementedError("todo") + +class FTLStreamReader: + ''' FTL file reader. ''' + def __init__(self, file): self._file = open(file, "br") - self._decoders = {} - self._frames = {} + self._version = 0 + + self._decoders_hevc = {} + self._seen_iframe = set() + + self._frame = None + # calibration and pose are cached + self._calibration = {} + self._pose = {} + try: magic = self._file.read(5) - if magic[:4] != bytearray(ord(c) for c in "FTLF"): + self._version = int(magic[4]) + if magic[:4] != bytes(ord(c) for c in "FTLF"): raise Exception("wrong magic") + if self._version >= 2: + # first 64 bytes reserved + self._file.read(8*8) + self._unpacker = msgpack.Unpacker(self._file, raw=True, use_list=False) except Exception as ex: @@ -155,92 +176,181 @@ class FTLStream: def _read_next(self): v1, v2 = self._unpacker.unpack() - return _stream_packet._make(v1), _packet._make(v2) + return ftl.StreamPacket._make(v1), ftl.Packet._make(v2) def _update_calib(self, sp, p): - ''' Update calibration ''' - pass - + ''' Update calibration. ''' + calibration = struct.unpack(_calib_fmt, p.data[:(4*8+2*4+4*8)]) + self._calibration[sp.streamID] = ftl.Camera._make(calibration) + def _update_pose(self, sp, p): ''' Update pose ''' - pass + pose = np.asarray(struct.unpack("@16d", p.data[:(16*8)]), + dtype=np.float64) + pose = pose.reshape((4, 4), order='F') # Eigen + self._pose[sp.streamID] = pose - def _decode_frame_hevc(self, sp, p): + def _process_json(self, sp, p): + raise NotImplementedError("json decoding not implemented") + + def _decode_hevc(self, sp, p): ''' Decode HEVC frame ''' k = (sp.streamID, sp.channel) - if k not in self._decoders: - self._decoders[k] = Decoder(_definition_t[p.definition]) + if k not in self._decoders_hevc: + self._decoders_hevc[k] = libde265.Decoder(ftl.definition_t[p.definition]) - decoder = self._decoders[k] + decoder = self._decoders_hevc[k] + + if k not in self._seen_iframe: + if not is_iframe(p.data): + # can't decode before first I-frame has been received + warn("received P-frame before I-frame") + return + + self._seen_iframe.add(k) decoder.push_data(p.data) - decoder.decode() + decoder.push_end_of_frame() + + while decoder.get_number_of_input_bytes_pending() > 0: + decoder.decode() img = decoder.get_next_picture() + if img is None: + # if this happens, does get_next_picture() in loop help? + warn("frame expected, no image from decoded") - if img is not None: - self._frames[k] = _ycrcb2rgb(img) - - def _flush_decoders(self): - for decoder in self._decoders.values(): - decoder.flush_data() - + if ftl.is_float_channel(self._sp.channel): + raise NotImplementedError("non-color channel decoding not available") + + else: + self._frame = _ycrcb2rgb(img) + + def _decode_opencv(self, sp, p): + try: + cv + except NameError: + raise Exception("OpenCV required for OpenCV (png/jpeg) decoding") + + self._frame = cv.imdecode(np.frombuffer(p.data, dtype=np.uint8), + cv.IMREAD_UNCHANGED) + + if ftl.is_float_channel(self._sp.channel): + self._frame = self._frame.astype(np.float) / 1000.0 + + def seek(self, ts): + ''' Read until timestamp reached ''' + if self.get_timestamp() >= ts: + raise Exception("trying to seek to earlier timestamp") + + while self.read(): + if self.get_timestamp() >= ts: + break + def read(self): ''' Reads data for until the next timestamp. Returns False if there is no more data to read, otherwise returns True. + + todo: make decoding optional ''' - if self._packets_read == 0: + self._frame = None + + try: self._sp, self._p = self._read_next() self._packets_read += 1 - - self._frames = {} - ts = self._sp.timestamp - ex = None - - while self._sp.timestamp == ts: - try: - if self._p.codec == 100: # JSON - NotImplementedError("json decoding not implemented") + except msgpack.OutOfData: + return False - elif self._p.codec == 101: # CALIBRATION - self._update_calib(self._sp, self._p) + if self._p.block_total != 1 or self._p.block_number != 0: + raise Exception("Unsupported block format (todo)") - elif self._p.codec == 102: # POSE - self._update_pose(self._sp, self._p) + if self._p.codec == ftl.codec_t.JSON: + self._process_json(self._sp, self._p) - elif self._p.codec == 3: # HEVC - self._decode_frame_hevc(self._sp, self._p) + elif self._p.codec == ftl.codec_t.CALIBRATION: + self._update_calib(self._sp, self._p) + + elif self._p.codec == ftl.codec_t.POSE: + self._update_pose(self._sp, self._p) + + elif self._p.codec == ftl.codec_t.HEVC: + self._decode_hevc(self._sp, self._p) + + elif self._p.codec == ftl.codec_t.PNG: + self._decode_opencv(self._sp, self._p) + + elif self._p.codec == ftl.codec_t.JPG: + self._decode_opencv(self._sp, self._p) + + else: + raise Exception("unkowno codec %i" % self._p.codec) - else: - raise ValueError("unkowno codec %i" % p.codec) - - except Exception as e: - # TODO: Multiple exceptions possible. Re-design read()? - ex = e - - try: - self._sp, self._p = self._read_next() - self._packets_read += 1 - - except msgpack.OutOfData: - return False - - if ex is not None: - raise ex - return True + + def get_packet_count(self): + return self._packets_read + + def get_raw(self): + ''' Returns previously received StreamPacket and Packet ''' + return self._sp, self._p + + def get_channel_type(self): + return ftl.Channel(self._sp.channel) + + def get_source_id(self): + return self._sp.streamID + + def get_timestamp(self): + return self._sp.timestamp + + def get_frame(self): + ''' Return decoded frame from previous packet. Returns None if previous + packet did not contain a (valid) frame. ''' + return self._frame + + def get_pose(self, source): + try: + return self._pose[source] + except KeyError: + raise ValueError("source id %i not found" % source) + + def get_camera_matrix(self, source): + ''' Camera intrinsic parameters ''' + + calib = self.get_calibration(source) + K = np.identity(3, dtype=np.float64) + K[0,0] = calib.fx + K[1,1] = calib.fy + K[0,2] = calib.cx + K[1,2] = calib.cy + return K + + def get_calibration(self, source): + try: + return self._calibration[source] + except KeyError: + raise ValueError("source id %i not found" % source) - def get_frames(self): - ''' Returns all frames ''' - return self._frames - - def get_frame(self, source, channel): - k = (source, channel) - if k in self._frames: - return self._frames[k] - else: - return None + def get_Q(self, source): + ''' Disparity to depth matrix in OpenCV format ''' + + calib = self.get_calibration(source) + Q = np.identity(4, dtype=np.float64) + Q[0,3] = calib.cx + Q[1,3] = calib.cy + Q[2,2] = 0.0 + Q[2,3] = calib.fx + Q[3,2] = -1 / calib.baseline + Q[3,3] = calib.doff + return Q + + def get_sources(self): + ''' Get list of sources ''' + return list(self._calibration.keys()) + + def get_version(self): + return self._version diff --git a/python/ftl/ftltypes.py b/python/ftl/ftltypes.py new file mode 100644 index 0000000000000000000000000000000000000000..17980b808fbadd68c1adff9f17a037a92878c27e --- /dev/null +++ b/python/ftl/ftltypes.py @@ -0,0 +1,88 @@ + +from collections import namedtuple +from enum import IntEnum + +# components/rgbd-sources/include/ftl/rgbd/camera.hpp +Camera = namedtuple("Camera", ["fx", "fy", "cx", "cy", "width", "height", + "min_depth", "max_depth", "baseline", "doff"]) + +# components/codecs/include/ftl/codecs/packet.hpp +Packet = namedtuple("Packet", ["codec", "definition", "block_total", + "block_number", "flags", "data"]) + +StreamPacket = namedtuple("StreamPacket", ["timestamp", "streamID", + "channel_count", "channel"]) + +# components/codecs/include/ftl/codecs/channels.hpp +class Channel(IntEnum): + None_ = -1 + Colour = 0 + Left = 0 + Depth = 1 + Right = 2 + Colour2 = 2 + Disparity = 3 + Depth2 = 3 + Deviation = 4 + Normals = 5 + Points = 6 + Confidence = 7 + Contribution = 7 + EnergyVector = 8 + Flow = 9 + Energy = 10 + Mask = 11 + Density = 12 + LeftGray = 13 + RightGray = 14 + Overlay1 = 15 + + AudioLeft = 32 + AudioRight = 33 + + Configuration = 64 + Calibration = 65 + Pose = 66 + Data = 67 + +_float_channels = [ + Channel.Depth, + Channel.Confidence, + Channel.Density, + Channel.Energy +] + +def is_float_channel(channel): + return channel in _float_channels + +# components/codecs/include/ftl/codecs/bitrates.hpp +class codec_t(IntEnum): + JPG = 0 + PNG = 1 + H264 = 2 + HEVC = 3 + WAV = 4 + JSON = 100 + CALIBRATION = 101 + POSE = 102 + RAW = 103 + +definition_t = { + 0 : (7680, 4320), + 1 : (2160, 3840), + 2 : (1080, 1920), + 3 : (720, 1280), + 4 : (576, 1024), + 5 : (480, 854), + 6 : (360, 640), + 7 : (0, 0), + 8 : (2056, 1852) +} + +def get_definition(shape): + for k, v in definition_t.items(): + if shape[:2] == v: + return k + + return 7 # (None) + diff --git a/python/ftl/libde265.py b/python/ftl/libde265.py index 74a1dcedbb74e3a2b2cbf26f27d3437dae04ff77..20f2f250c0c7344bc81682e15ab6bf08965318b2 100644 --- a/python/ftl/libde265.py +++ b/python/ftl/libde265.py @@ -20,14 +20,28 @@ except ImportError: # order: 0 nn, 1 bilinear, 3 bicubic return (resize_skimage(img, size, order=3, mode="constant", cval=0) * 255).astype(np.uint8) +from warnings import warn + import ctypes from enum import IntEnum import numpy as np +import os + +''' +# default number of worker threads for decoder: half of os.cpu_count() + +_threads = os.cpu_count() // 2 +if _threads is None: + _threads = 1 +''' + +_threads = 1 + # error codes copied from header (de265.h) -class libde265error(IntEnum): +class _libde265error(IntEnum): DE265_OK = 0 DE265_ERROR_NO_SUCH_FILE=1 DE265_ERROR_COEFFICIENT_OUT_OF_IMAGE_BOUNDS=4 @@ -74,10 +88,20 @@ class libde265error(IntEnum): DE265_WARNING_SPS_MISSING_CANNOT_DECODE_SEI=1025 DE265_WARNING_COLLOCATED_MOTION_VECTOR_OUTSIDE_IMAGE_AREA=1026 +class de265_chroma(IntEnum): + de265_chroma_mono = 0 + de265_chroma_420 = 1 + de265_chroma_422 = 2 + de265_chroma_444 = 3 + libde265 = ctypes.cdll.LoadLibrary("libde265.so.0") libde265.de265_get_error_text.argtypes = [ctypes.c_void_p] libde265.de265_get_error_text.restype = ctypes.c_char_p + +libde265.de265_get_warning.argtypes = [ctypes.c_void_p] +libde265.de265_get_warning.restype = ctypes.c_int + libde265.de265_get_version_number_major.restype = ctypes.c_uint32 libde265.de265_get_version_number_minor.restype = ctypes.c_uint32 @@ -96,6 +120,7 @@ libde265.de265_push_NAL.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_i libde265.de265_push_data.restype = ctypes.c_int libde265.de265_push_end_of_frame.argtypes = [ctypes.c_void_p] +libde265.de265_push_end_of_frame.restype = None libde265.de265_flush_data.argtypes = [ctypes.c_void_p] libde265.de265_flush_data.restype = ctypes.c_int @@ -106,6 +131,15 @@ libde265.de265_decode.restype = ctypes.c_int libde265.de265_get_next_picture.argtypes = [ctypes.c_void_p] libde265.de265_get_next_picture.restype = ctypes.c_void_p +libde265.de265_peek_next_picture.argtypes = [ctypes.c_void_p] +libde265.de265_peek_next_picture.restype = ctypes.c_void_p + +libde265.de265_release_next_picture.argtypes = [ctypes.c_void_p] +libde265.de265_release_next_picture.restype = None + +libde265.de265_get_chroma_format.argtypes = [ctypes.c_void_p] +libde265.de265_get_chroma_format.restype = ctypes.c_int + libde265.de265_get_image_width.argtypes = [ctypes.c_void_p, ctypes.c_int] libde265.de265_get_image_width.restype = ctypes.c_int @@ -118,21 +152,85 @@ libde265.de265_get_bits_per_pixel.restype = ctypes.c_int libde265.de265_get_image_plane.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.POINTER(ctypes.c_int)] libde265.de265_get_image_plane.restype = ctypes.POINTER(ctypes.c_char) +libde265.de265_get_number_of_input_bytes_pending.argtypes = [ctypes.c_void_p] +libde265.de265_get_number_of_input_bytes_pending.restype = ctypes.c_int + +class libde265Error(Exception): + def __init__(self, code): + super(libde265Error, self).__init__( + libde265.de265_get_error_text(code).decode("ascii")) + +class WaitingForInput(libde265Error): + pass + class Decoder: - def __init__(self, size, threads=1): + def __init__(self, size, threads=_threads): self._size = size self._more = ctypes.c_int() self._out_stride = ctypes.c_int() self._ctx = libde265.de265_new_decoder() + self._supress_warnings = False + err = libde265.de265_start_worker_threads(self._ctx, threads) + if err: - raise Exception(self.get_error_str(err)) + raise libde265Error(err) def __del__(self): libde265.de265_free_decoder(self._ctx) + + def _copy_image(self, de265_image): + res = np.zeros((self._size[0], self._size[1], 3), dtype=np.uint8) + + # libde265: always 420 (???) + # chroma_format = libde265.de265_get_chroma_format(de265_image) + + for c in range(0, 3): + size = (libde265.de265_get_image_height(de265_image, c), + libde265.de265_get_image_width(de265_image, c)) + + bpp = libde265.de265_get_bits_per_pixel(de265_image, c) + if bpp != 8: + raise NotImplementedError("unsupported bits per pixel %i" % bpp) + + img_ptr = libde265.de265_get_image_plane(de265_image, c, self._out_stride) + + # for frombuffer() no copy assumed + ch = np.frombuffer(img_ptr[:size[0] * size[1]], dtype=np.uint8) + ch.shape = size + + res[:,:,c] = _resize(ch, self._size) + + return res + + def _warning(self): + if self._supress_warnings: + return + + code = libde265.de265_get_warning(self._ctx) + + if code != _libde265error.DE265_OK: + msg = libde265.de265_get_error_text(code).decode("ascii") + warn(msg) + + def decode(self): + err = libde265.de265_decode(self._ctx, self._more) + + if err: + if err == _libde265error.DE265_ERROR_WAITING_FOR_INPUT_DATA: + raise WaitingForInput(err) + + raise libde265Error(err) + + self._warning() + + return self._more.value != 0 - def get_error_str(self, code): - return libde265.de265_get_error_text(code).decode("ascii") + def flush_data(self): + err = libde265.de265_flush_data(self._ctx) + + if err: + raise libde265Error(err) def push_data(self, data): if not isinstance(data, bytes): @@ -141,13 +239,13 @@ class Decoder: err = libde265.de265_push_data(self._ctx, data, len(data), None, None) if err: - raise Exception(self.get_error_str(err)) - + raise libde265Error(err) + def push_end_of_frame(self): err = libde265.de265_push_end_of_frame(self._ctx) if err: - raise Exception(self.get_error_str(err)) + raise libde265Error(err) def push_NAL(self, data): if not isinstance(data, bytes): @@ -156,48 +254,36 @@ class Decoder: err = libde265.de265_push_NAL(self._ctx, data, len(data), None, None) if err: - raise Exception(self.get_error_str(err)) - - def decode(self): - err = libde265.de265_decode(self._ctx, self._more) - - if err and err != libde265error.DE265_ERROR_WAITING_FOR_INPUT_DATA: - raise Exception(self.get_error_str(err)) - - return self._more.value != 0 - - def flush_data(self): - err = libde265.de265_flush_data(self._ctx) - - if err: - raise Exception(self.get_error_str(err)) - + raise libde265Error(err) + def get_next_picture(self): ''' Returns next decoded frame. Image in YCbCr format. If no frame available returns None. ''' - img = libde265.de265_get_next_picture(self._ctx) - - if not img: + + de265_image = libde265.de265_get_next_picture(self._ctx) + + if not de265_image: return None - res = np.zeros((self._size[0], self._size[1], 3), dtype=np.uint8) + res = self._copy_image(de265_image) + + libde265.de265_release_next_picture(self._ctx) + + return res + + def get_number_of_input_bytes_pending(self): + return libde265.de265_get_number_of_input_bytes_pending(self._ctx) + + def peek_next_picture(self): + de265_image = libde265.de265_peek_next_picture(self._ctx) + + if not de265_image: + return None - for c in range(0, 3): - size = (libde265.de265_get_image_height(img, c), - libde265.de265_get_image_width(img, c)) - - bpp = libde265.de265_get_bits_per_pixel(img, c) + res = self._copy_image(de265_image) - if bpp != 8: - raise NotImplementedError("unsupported bits per pixel %i" % bpp) - - img_ptr = libde265.de265_get_image_plane(img, c, self._out_stride) - - ch = np.frombuffer(img_ptr[:size[0] * size[1]], dtype=np.uint8) - ch.shape = size - - res[:,:,c] = _resize(ch, self._size) + libde265.de265_release_next_picture(self._ctx) - return res \ No newline at end of file + return res diff --git a/python/ftl/misc.py b/python/ftl/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..5494382ca1376c8bf96a9d30d7d863d8bfc9eff9 --- /dev/null +++ b/python/ftl/misc.py @@ -0,0 +1,97 @@ + +def disparity_to_depth(disparity, camera, max_depth=10.0, invalid_value=0.0): + ''' Calculate depth map from disparity map. Depth values smaller than 0.0 + and larger than max_depth are set to invalid_value. + ''' + depth = (camera.fx * camera.baseline) / (disparity - camera.doff) + depth[depth < 0] = invalid_value + depth[depth > max_depth] = invalid_value + return depth + +from enum import IntEnum + +# components/codecs/include/ftl/codecs/hevc.hpp +class NALType(IntEnum): + CODED_SLICE_TRAIL_N = 0 + CODED_SLICE_TRAIL_R = 1 + + CODED_SLICE_TSA_N = 2 + CODED_SLICE_TSA_R = 3 + + CODED_SLICE_STSA_N = 4 + CODED_SLICE_STSA_R = 5 + + CODED_SLICE_RADL_N = 6 + CODED_SLICE_RADL_R = 7 + + CODED_SLICE_RASL_N = 8 + CODED_SLICE_RASL_R = 9 + + RESERVED_VCL_N10 = 10 + RESERVED_VCL_R11 = 11 + RESERVED_VCL_N12 = 12 + RESERVED_VCL_R13 = 13 + RESERVED_VCL_N14 = 14 + RESERVED_VCL_R15 = 15 + + CODED_SLICE_BLA_W_LP = 16 + CODED_SLICE_BLA_W_RADL = 17 + CODED_SLICE_BLA_N_LP = 18 + CODED_SLICE_IDR_W_RADL = 19 + CODED_SLICE_IDR_N_LP = 20 + CODED_SLICE_CRA = 21 + RESERVED_IRAP_VCL22 = 22 + RESERVED_IRAP_VCL23 = 23 + + RESERVED_VCL24 = 24 + RESERVED_VCL25 = 25 + RESERVED_VCL26 = 26 + RESERVED_VCL27 = 27 + RESERVED_VCL28 = 28 + RESERVED_VCL29 = 29 + RESERVED_VCL30 = 30 + RESERVED_VCL31 = 31 + + VPS = 32 + SPS = 33 + PPS = 34 + ACCESS_UNIT_DELIMITER = 35 + EOS = 36 + EOB = 37 + FILLER_DATA = 38 + PREFIX_SEI = 39 + SUFFIX_SEI = 40 + + RESERVED_NVCL41 = 41 + RESERVED_NVCL42 = 42 + RESERVED_NVCL43 = 43 + RESERVED_NVCL44 = 44 + RESERVED_NVCL45 = 45 + RESERVED_NVCL46 = 46 + RESERVED_NVCL47 = 47 + UNSPECIFIED_48 = 48 + UNSPECIFIED_49 = 49 + UNSPECIFIED_50 = 50 + UNSPECIFIED_51 = 51 + UNSPECIFIED_52 = 52 + UNSPECIFIED_53 = 53 + UNSPECIFIED_54 = 54 + UNSPECIFIED_55 = 55 + UNSPECIFIED_56 = 56 + UNSPECIFIED_57 = 57 + UNSPECIFIED_58 = 58 + UNSPECIFIED_59 = 59 + UNSPECIFIED_60 = 60 + UNSPECIFIED_61 = 61 + UNSPECIFIED_62 = 62 + UNSPECIFIED_63 = 63 + INVALID = 64 + +def get_NAL_type(data): + if not isinstance(data, bytes): + raise ValueError("expected bytes") + + return NALType((data[4] >> 1) & 0x3f) + +def is_iframe(data): + return get_NAL_type(data) == NALType.VPS