diff --git a/Doxyfile b/Doxyfile index 42eb33cc5ec62e06256976f4fccad1db9c757fae..43c790ff3d6381d66ee53b60af111a6ff8935724 100644 --- a/Doxyfile +++ b/Doxyfile @@ -963,7 +963,7 @@ INPUT_FILTER = # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. -FILTER_PATTERNS = +FILTER_PATTERNS = *.py=/usr/bin/doxypy # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for diff --git a/python/ftl/__init__.py b/python/ftl/__init__.py index 38e017612ea53ec9e65417aa9a56729c8f10baf0..9f7ea874efc077e2442a4386ce94088e90aecdf7 100644 --- a/python/ftl/__init__.py +++ b/python/ftl/__init__.py @@ -1,4 +1,4 @@ -from . ftlstream import FTLStreamReader, FTLStreamWriter -from . misc import disparity_to_depth +from . ftlstreamreader import FTLStreamReader +#from . ftlstreamwriter import FTLStreamWriter -from . import ftltypes as types +from . import ftltype as types diff --git a/python/ftl/codecs.py b/python/ftl/codecs.py new file mode 100644 index 0000000000000000000000000000000000000000..bf4e21e6860e2bcfc4d39ff041cd53f5f17d0a4c --- /dev/null +++ b/python/ftl/codecs.py @@ -0,0 +1,413 @@ +import numpy as np +import msgpack + +import struct +from warnings import warn + +from . import ftltype +from . import libde265 +from . misc import Calibration + +from enum import IntEnum + +_has_opencv = False +try: + import cv2 as cv + _has_opencv = True +except ImportError: + warn("OpenCV not available. OpenCV required for full functionality.") + +def _int_to_float(im): + return im.astype(float) / np.iinfo(im.dtype).max + +if _has_opencv: + def _ycrcb2rgb(img): + return _int_to_float(cv.cvtColor(img, cv.COLOR_YCrCb2RGB)) + +else: + def _ycrcb2rgb(img): + """ YCrCb to RGB, based on OpenCV documentation definition. + + Note: It seems this implementation is not perfectly equivalent to + OpenCV's (results not exactly same, why?) + """ + + rgb = np.zeros(img.shape, np.float) + + Y = img[:,:,0].astype(np.float) + Cr = img[:,:,1].astype(np.float) + Cb = img[:,:,2].astype(np.float) + delta = 128.0 + + rgb[:,:,0] = Y + 1.403 * (Cr - delta) + rgb[:,:,1] = Y - 0.714 * (Cr - delta) - 0.344 * (Cb - delta) + rgb[:,:,2] = Y + 1.773 * (Cb - delta) + + return rgb / 255 + +def _ycbcr2rgb(img): + rgb = np.zeros(img.shape, np.float) + + Y = img[:,:,0].astype(np.float) + Cr = img[:,:,2].astype(np.float) + Cb = img[:,:,1].astype(np.float) + delta = 128.0 + + rgb[:,:,0] = Y + 1.403 * (Cr - delta) + rgb[:,:,1] = Y - 0.714 * (Cr - delta) - 0.344 * (Cb - delta) + rgb[:,:,2] = Y + 1.773 * (Cb - delta) + + return rgb / 255 + +################################################################################ +# Decoding +################################################################################ + +class FTLDecoder: + def decode(self, packet): + raise NotImplementedError() + +################################################################################ +# OpenCV (optional) +################################################################################ + +def decode_codec_opencv(packet): + if packet.block_total != 1 or packet.block_number != 0: + raise Exception("Unsupported block format (todo)") + + return _int_to_float(cv.imdecode(np.frombuffer(packet.data, dtype=np.uint8), + cv.IMREAD_UNCHANGED)) + +def decode_codec_opencv_float(packet): + if packet.block_total != 1 or packet.block_number != 0: + raise Exception("Unsupported block format (todo)") + + return cv.imdecode(np.frombuffer(packet.data, dtype=np.uint8), + cv.IMREAD_UNCHANGED).astype(np.float) / 1000.0 + +################################################################################ +# HEVC +################################################################################ + +# components/codecs/include/ftl/codecs/hevc.hpp + +class _NALType(IntEnum): + CODED_SLICE_TRAIL_N = 0 + CODED_SLICE_TRAIL_R = 1 + + CODED_SLICE_TSA_N = 2 + CODED_SLICE_TSA_R = 3 + + CODED_SLICE_STSA_N = 4 + CODED_SLICE_STSA_R = 5 + + CODED_SLICE_RADL_N = 6 + CODED_SLICE_RADL_R = 7 + + CODED_SLICE_RASL_N = 8 + CODED_SLICE_RASL_R = 9 + + RESERVED_VCL_N10 = 10 + RESERVED_VCL_R11 = 11 + RESERVED_VCL_N12 = 12 + RESERVED_VCL_R13 = 13 + RESERVED_VCL_N14 = 14 + RESERVED_VCL_R15 = 15 + + CODED_SLICE_BLA_W_LP = 16 + CODED_SLICE_BLA_W_RADL = 17 + CODED_SLICE_BLA_N_LP = 18 + CODED_SLICE_IDR_W_RADL = 19 + CODED_SLICE_IDR_N_LP = 20 + CODED_SLICE_CRA = 21 + RESERVED_IRAP_VCL22 = 22 + RESERVED_IRAP_VCL23 = 23 + + RESERVED_VCL24 = 24 + RESERVED_VCL25 = 25 + RESERVED_VCL26 = 26 + RESERVED_VCL27 = 27 + RESERVED_VCL28 = 28 + RESERVED_VCL29 = 29 + RESERVED_VCL30 = 30 + RESERVED_VCL31 = 31 + + VPS = 32 + SPS = 33 + PPS = 34 + ACCESS_UNIT_DELIMITER = 35 + EOS = 36 + EOB = 37 + FILLER_DATA = 38 + PREFIX_SEI = 39 + SUFFIX_SEI = 40 + + RESERVED_NVCL41 = 41 + RESERVED_NVCL42 = 42 + RESERVED_NVCL43 = 43 + RESERVED_NVCL44 = 44 + RESERVED_NVCL45 = 45 + RESERVED_NVCL46 = 46 + RESERVED_NVCL47 = 47 + UNSPECIFIED_48 = 48 + UNSPECIFIED_49 = 49 + UNSPECIFIED_50 = 50 + UNSPECIFIED_51 = 51 + UNSPECIFIED_52 = 52 + UNSPECIFIED_53 = 53 + UNSPECIFIED_54 = 54 + UNSPECIFIED_55 = 55 + UNSPECIFIED_56 = 56 + UNSPECIFIED_57 = 57 + UNSPECIFIED_58 = 58 + UNSPECIFIED_59 = 59 + UNSPECIFIED_60 = 60 + UNSPECIFIED_61 = 61 + UNSPECIFIED_62 = 62 + UNSPECIFIED_63 = 63 + INVALID = 64 + +def _get_NAL_type(data): + if not isinstance(data, bytes): + raise ValueError("expected bytes") + + return _NALType((data[4] >> 1) & 0x3f) + +def _is_iframe(data): + return _get_NAL_type(data) == _NALType.VPS + +class FTLDecoder_HEVC: + def __init__(self): + self._decoder = libde265.Decoder() + self._seen_iframe = False + + def decode(self, packet): + if not self._seen_iframe: + if not _is_iframe(packet.data): + # can't decode before first I-frame has been received + warn("received P-frame before I-frame") + return + + self._decoder.push_data(packet.data) + self._decoder.push_end_of_frame() + + while self._decoder.get_number_of_input_bytes_pending() > 0: + self._decoder.decode() + + img = self._decoder.get_next_picture() + if img is None: + # if this happens, does get_next_picture() in loop help? + warn("frame expected, no image received from decoder") + + return img + +class FTLDecoder_HEVC_Float(FTLDecoder_HEVC): + @staticmethod + def _decode_format_nvpipe(img): + high = img[:,(img.shape[1]//2):,0].astype(np.uint32) << 8 + low = img[:,:(img.shape[1]//2),0].astype(np.uint32) + return (high|low).astype(np.float)/1000.0 + + @staticmethod + def _decode_format_mappeddepth(img): + # hardcoded constants maxdepth and P + maxdepth = 16 + P = (2.0 * 256.0) / 16384.0 + + # use only 8 bits of 10 + img = (img >> 2).astype(np.float) / 255 + + L = img[:,:,0] + Ha = img[:,:,1] + Hb = img[:,:,2] + + m = np.floor(4.0 * (L/P) - 0.5).astype(np.int) % 4 + L0 = L - ((L-(P / 8.0)) % P) + (P / 4.0) * m.astype(np.float) - (P/8.0) + + s = np.zeros(img.shape[:2], dtype=np.float) + np.copyto(s, (P/2.0) * Ha, where=m == 0) + np.copyto(s, (P/2.0) * Hb, where=m == 1) + np.copyto(s, (P/2.0) * (1.0 - Ha), where=m == 2) + np.copyto(s, (P/2.0) * (1.0 - Hb), where=m == 3) + + return (L0 + s) * maxdepth + + def decode(self, packet): + img = super().decode(packet) + if img is None: + return None + + if (packet.flags & ftltype.PacketFlags.MappedDepth): + return self._decode_format_mappeddepth(img) + + else: + return self._decode_format_nvpipe(img) + +class FTLDecoder_HEVC_YCrCb(FTLDecoder_HEVC): + def decode(self, packet): + img = super().decode(packet) + if img is None: + return None + + return _ycrcb2rgb(img) + +class FTLDecoder_HEVC_YCbCr(FTLDecoder_HEVC): + def decode(self, packet): + img = super().decode(packet) + if img is None: + return None + + return _ycbcr2rgb(img) + +################################################################################ +# Other (msgpack/calibration/pose) +################################################################################ + +def decode_codec_calibration(packet): + calibration = struct.unpack("@ddddIIdddd", packet.data[:(4*8+2*4+4*8)]) + return Calibration(ftltype.Camera._make(calibration), 0, 0) + +def decode_codec_msgpack_calibration(packet): + calib, channel, capabilities = msgpack.unpackb(packet.data) + return Calibration(ftltype.Camera._make(calib), channel, capabilities) + +def decode_codec_msgpack_pose(packet): + raw = msgpack.unpackb(packet.data) + # TODO: msgpack returns 128 (4*4*sizeof(double)) floating point values + return raw + +def decode_codec_pose(packet): + pose = np.asarray(struct.unpack("@16d", packet.data[:(16*8)]), + dtype=np.float64) + + return pose.reshape((4, 4), order="F") # Eigen + +################################################################################ + +def create_decoder(codec, channel, version=3): + """ @brief Create decoder for given channel, codec and ftlf version. + @param codec Codec id + @param channel Channel id + @param version FTL file version + @returns callable which takes packet as argument + """ + + if codec == ftltype.codec_t.HEVC: + if ftltype.is_float_channel(channel): + return FTLDecoder_HEVC_Float().decode + else: + if version < 3: + return FTLDecoder_HEVC_YCrCb().decode + else: + return FTLDecoder_HEVC_YCbCr().decode + + elif codec == ftltype.codec_t.PNG: + if not _has_opencv: + raise Exception("OpenCV required for OpenCV (png/jpeg) decoding") + + if ftltype.is_float_channel(channel): + return decode_codec_opencv_float + else: + return decode_codec_opencv + + elif codec == ftltype.codec_t.JPG: + if not _has_opencv: + raise Exception("OpenCV required for OpenCV (png/jpeg) decoding") + + return decode_codec_opencv + + elif codec == ftltype.codec_t.MSGPACK: + if channel == ftltype.Channel.Calibration: + return decode_codec_msgpack_calibration + elif channel == ftltype.Channel.Pose: + return decode_codec_msgpack_pose + else: + return lambda packet: msgpack.unpackb(packet.data) + + elif codec == ftltype.codec_t.CALIBRATION: + return decode_codec_calibration + + elif codec == ftltype.codec_t.POSE: + return decode_codec_pose + + else: + raise ValueError("Unknown codec %i" % codec) + +################################################################################ +# ENCODING +################################################################################ + +def create_packet(codec, definition, flags, data): + return ftltype.Packet._make((codec, definition, 1, 0, flags, data)) + +# TODO exception types? + +def encode_codec_opencv_jpg(data, **kwargs): + params = [] + retval, encoded = cv.imencode(".jpg", data, params) + if retval: + return create_packet(ftltype.codec_t.JPG, + ftltype.get_definition(data), + 0, + encoded) + else: + # todo + raise Exception("encoding error") + +def encode_codec_opencv_png(data, **kwargs): + params = [cv.IMWRITE_PNG_COMPRESSION, 9] + retval, encoded = cv.imencode(".png", data, params) + if retval: + return create_packet(ftltype.codec_t.PNG, + ftltype.get_definition(data), + 0, + encoded) + else: + # todo + raise Exception("encoding error") + +def encode_codec_opencv_png_float(data, compression=9): + data = (data * 1000).astype(np.uint16) + params = [cv.IMWRITE_PNG_COMPRESSION, compression] + retval, encoded = cv.imencode(".png", data, params) + if retval: + return create_packet(ftltype.codec_t.PNG, + ftltype.get_definition(data), + 0, + encoded) + else: + # todo + raise Exception("encoding error") + +def create_encoder(codec, channel, **options): + """ @brief Create encoder + @param codec codec id + @param channel channel id + @param **options options passed to codec constructor + @returns callable which takes unencoded data and optional parameters + """ + + if codec == ftltype.codec_t.JPG: + if not ftltype.is_float_channel(channel): + return encode_codec_opencv_jpg + else: + raise Exception("JPG not supported for float channels") + + elif codec == ftltype.codec_t.PNG: + if ftltype.is_float_channel(channel): + return encode_codec_opencv_png_float + else: + return encode_codec_opencv_png + + elif codec == ftltype.codec_t.MSGPACK: + if channel == ftltype.Channel.Pose: + raise NotImplementedError("todo") + + elif channel == ftltype.Channel.Calibration: + raise NotImplementedError("todo") + + else: + raise Exception("msgpack only available for pose/calibration") + + else: + raise Exception("unsupported/unknown codec") diff --git a/python/ftl/ftlstream.py b/python/ftl/ftlstream.py deleted file mode 100644 index 2af8e71644d39e4c6145708668602f8cffb084d1..0000000000000000000000000000000000000000 --- a/python/ftl/ftlstream.py +++ /dev/null @@ -1,421 +0,0 @@ -import msgpack - -import numpy as np - -import sys -import struct -from warnings import warn -from enum import IntEnum -from collections import namedtuple - -from . misc import is_iframe -from . import ftltypes as ftl -from . import libde265 - -try: - import cv2 as cv - - def _ycrcb2rgb(img): - return cv.cvtColor(img, cv.COLOR_YCrCb2RGB) - -except ImportError: - warn("OpenCV not available. OpenCV required for full functionality.") - - def _ycrcb2rgb(img): - ''' YCrCb to RGB, based on OpenCV documentation definition. - - Note: It seems this implementation is not perfectly equivalent to - OpenCV's (results not exactly same, why?) - ''' - - rgb = np.zeros(img.shape, np.float) - - Y = img[:,:,0].astype(np.float) - Cr = img[:,:,1].astype(np.float) - Cb = img[:,:,2].astype(np.float) - delta = 128.0 - - rgb[:,:,0] = Y + 1.403 * (Cr - delta) - rgb[:,:,1] = Y - 0.714 * (Cr - delta) - 0.344 * (Cb - delta) - rgb[:,:,2] = Y + 1.773 * (Cb - delta) - - return rgb.round().astype(np.uint8) - -def _ycbcr2rgb(img): - rgb = np.zeros(img.shape, np.float) - - Y = img[:,:,0].astype(np.float) - Cr = img[:,:,2].astype(np.float) - Cb = img[:,:,1].astype(np.float) - delta = 128.0 - - rgb[:,:,0] = Y + 1.403 * (Cr - delta) - rgb[:,:,1] = Y - 0.714 * (Cr - delta) - 0.344 * (Cb - delta) - rgb[:,:,2] = Y + 1.773 * (Cb - delta) - - return rgb.round().astype(np.uint8) - -class FTLStreamWriter: - def __init__(self, file, version=2): - self._file = open(file, "wb") - self._file.write(bytes(ord(c) for c in "FTLF")) # magic - self._file.write(bytes([version])) # version - self._file.write(bytes([0]*64)) # reserved - - self._packer = msgpack.Packer(strict_types=False, use_bin_type=True) - - def __del__(self): - self.close() - - def close(self): - self._file.close() - - def add_raw(self, sp, p): - if len(sp) != len(ftl.StreamPacket._fields) or len(p) != len(ftl.Packet._fields): - raise ValueError("invalid input") - - self._file.write(self._packer.pack((sp, p))) - self._file.flush() - - def add_frame(self, timestamp, source, channel, channel_count, codec, data, - definition=None, flags=0, encode=True): - ''' Write frame to file. If encode is False (data already encoded), - definition needs to be specified. - ''' - - if source < 0: - raise ValueError("invalid source id") - - if channel not in ftl.Channel: - raise ValueError("invalid channel") - - if codec not in ftl.codec_t: - raise ValueError("invalid codec") - - if encode: - if definition is None: - definition = ftl.get_definition(data.shape) - - if definition is None: - raise ValueError("unsupported resolution") - - if definition != ftl.get_definition(data.shape): - # todo: could replace definition or scale - raise ValueError("definition does not match frame resolution") - - if codec == ftl.codec_t.PNG: - if ftl.is_float_channel(channel): - # scaling always same (???) - data = (data * 1000).astype(np.uint16) - - params = [cv.IMWRITE_PNG_COMPRESSION, 9] - retval, data = cv.imencode(".png", data, params) - - if not retval: - raise Exception("encoding error (PNG)") - - elif codec == ftl.codec_t.JPG: - params = [] - retval, data = cv.imencode(".jpg", data, params) - - if not retval: - raise Exception("encoding error (JPG)") - - else: - raise ValueError("unsupported codec") - - data = data.tobytes() - - if definition is None: - raise ValueError("definition required") - - if not isinstance(data, bytes): - raise ValueError("expected bytes") - - sp = ftl.StreamPacket(int(timestamp), int(source), - int(channel_count), int(channel)) - p = ftl.Packet(int(codec), int(definition), 1, 0, int(flags), data) - - self.add_raw(sp, p) - - def add_pose(self, timestamp, source, data): - if data.shape != (4, 4): - raise ValueError("invalid pose") - - data.astype(np.float64).tobytes(order='F') - raise NotImplementedError("todo") - - def add_calibration(self, timestamp, source, data): - # todo: Use msgpack format instead (ftlf v3+) - struct.pack("@ddddIIdddd", *data) - raise NotImplementedError("todo") - -class FTLStreamReader: - ''' FTL file reader. ''' - - def __init__(self, file): - self._file = open(file, "br") - self._version = 0 - - self._decoders_hevc = {} - self._seen_iframe = set() - - self._frame = None - - # calibration and pose are cached - self._calibration = {} - self._pose = {} - - try: - magic = self._file.read(5) - self._version = int(magic[4]) - if magic[:4] != bytes(ord(c) for c in "FTLF"): - raise Exception("wrong magic") - - if self._version >= 2: - # first 64 bytes reserved - self._file.read(8*8) - - self._unpacker = msgpack.Unpacker(self._file, raw=True, use_list=False) - - except Exception as ex: - self._file.close() - raise ex - - self._packets_read = 0 - - def __del__(self): - self._file.close() - - def _read_next(self): - v1, v2 = self._unpacker.unpack() - return ftl.StreamPacket._make(v1), ftl.Packet._make(v2) - - def _update_calib(self, sp, p): - ''' Update calibration. ''' - - if p.codec == ftl.codec_t.MSGPACK: - # TODO: channel and capabilities should be saved as well - calib, channel, capabilities = msgpack.unpackb(p.data) - self._calibration[sp.streamID] = ftl.Camera._make(calib) - - elif p.codec == ftl.codec_t.CALIBRATION: - calibration = struct.unpack("@ddddIIdddd", p.data[:(4*8+2*4+4*8)]) - self._calibration[sp.streamID] = ftl.Camera._make(calibration) - - else: - raise Exception("Unknown codec %i for calibration" % p.codec) - - def _update_pose(self, sp, p): - ''' Update pose ''' - pose = np.asarray(struct.unpack("@16d", p.data[:(16*8)]), - dtype=np.float64) - pose = pose.reshape((4, 4), order='F') # Eigen - self._pose[sp.streamID] = pose - - def _process_json(self, sp, p): - raise NotImplementedError("json decoding not implemented") - - def _decode_hevc(self, sp, p): - ''' Decode HEVC frame ''' - - k = (sp.streamID, sp.channel) - - if k not in self._decoders_hevc: - self._decoders_hevc[k] = libde265.Decoder() - - decoder = self._decoders_hevc[k] - - if k not in self._seen_iframe: - if not is_iframe(p.data): - # can't decode before first I-frame has been received - warn("received P-frame before I-frame") - return - - self._seen_iframe.add(k) - - decoder.push_data(p.data) - decoder.push_end_of_frame() - - while decoder.get_number_of_input_bytes_pending() > 0: - decoder.decode() - - img = decoder.get_next_picture() - if img is None: - # if this happens, does get_next_picture() in loop help? - warn("frame expected, no image received from decoder") - - if ftl.is_float_channel(self._sp.channel): - if (p.flags & ftl.PacketFlags.MappedDepth): - # New format - - # hardcoded constants maxdepth and P - maxdepth = 16 - P = (2.0 * 256.0) / 16384.0 - - # use only 8 bits of 10 - img = (img >> 2).astype(np.float) / 255 - - L = img[:,:,0] - Ha = img[:,:,1] - Hb = img[:,:,2] - - m = np.floor(4.0 * (L/P) - 0.5).astype(np.int) % 4 - L0 = L - ((L-(P / 8.0)) % P) + (P / 4.0) * m.astype(np.float) - (P/8.0) - - s = np.zeros(img.shape[:2], dtype=np.float) - np.copyto(s, (P/2.0) * Ha, where=m == 0) - np.copyto(s, (P/2.0) * Hb, where=m == 1) - np.copyto(s, (P/2.0) * (1.0 - Ha), where=m == 2) - np.copyto(s, (P/2.0) * (1.0 - Hb), where=m == 3) - - img = (L0+s) * maxdepth - - else: - # NvPipe format - high = img[:,(img.shape[1]//2):,0].astype(np.uint32) << 8 - low = img[:,:(img.shape[1]//2),0].astype(np.uint32) - img = (high|low).astype(np.float)/1000.0 - ''' - try: - img[img < self._calibration[sp.streamID].min_depth] = 0.0 - img[img > self._calibration[sp.streamID].max_depth] = 0.0 - except KeyError: - warn("no calibration for received frame") - ''' - self._frame = img - - else: - if self._version < 3: - self._frame = _ycrcb2rgb(img) - else: - self._frame = _ycbcr2rgb(img) - - def _decode_opencv(self, sp, p): - try: - cv - except NameError: - raise Exception("OpenCV required for OpenCV (png/jpeg) decoding") - - self._frame = cv.imdecode(np.frombuffer(p.data, dtype=np.uint8), - cv.IMREAD_UNCHANGED) - - if ftl.is_float_channel(self._sp.channel): - self._frame = self._frame.astype(np.float) / 1000.0 - - def seek(self, ts): - ''' Read until timestamp reached ''' - if self.get_timestamp() >= ts: - raise Exception("trying to seek to earlier timestamp") - - while self.read(): - if self.get_timestamp() >= ts: - break - - def read(self): - ''' - Reads data for until the next timestamp. Returns False if there is no - more data to read, otherwise returns True. - - todo: make (frame) decoding optional - ''' - self._frame = None - - try: - self._sp, self._p = self._read_next() - self._packets_read += 1 - - except msgpack.OutOfData: - return False - - if self._p.block_total != 1 or self._p.block_number != 0: - raise Exception("Unsupported block format (todo)") - - # calibration/pose cached - # todo: should be done by user instead? - - if self._sp.channel == ftl.Channel.Calibration: - self._update_calib(self._sp, self._p) - - elif self._sp.channel == ftl.Channel.Pose: - self._update_pose(self._sp, self._p) - - # decode if codec supported - if self._p.codec == ftl.codec_t.HEVC: - self._decode_hevc(self._sp, self._p) - - elif self._p.codec == ftl.codec_t.PNG: - self._decode_opencv(self._sp, self._p) - - elif self._p.codec == ftl.codec_t.JPG: - self._decode_opencv(self._sp, self._p) - - else: - # todo (unsupported codec) - pass - - return True - - def get_packet_count(self): - return self._packets_read - - def get_raw(self): - ''' Returns previously received StreamPacket and Packet ''' - return self._sp, self._p - - def get_channel_type(self): - return ftl.Channel(self._sp.channel) - - def get_source_id(self): - return self._sp.streamID - - def get_timestamp(self): - return self._sp.timestamp - - def get_frame(self): - ''' Return decoded frame from previous packet. Returns None if previous - packet did not contain a (valid) frame. ''' - return self._frame - - def get_pose(self, source): - try: - return self._pose[source] - except KeyError: - raise ValueError("source id %i not found" % source) - - def get_camera_matrix(self, source): - ''' Camera intrinsic parameters ''' - - calib = self.get_calibration(source) - K = np.identity(3, dtype=np.float64) - K[0,0] = calib.fx - K[1,1] = calib.fy - K[0,2] = calib.cx - K[1,2] = calib.cy - return K - - def get_calibration(self, source): - try: - return self._calibration[source] - except KeyError: - raise ValueError("source id %i not found" % source) - - def get_Q(self, source): - ''' Disparity to depth matrix (OpenCV) ''' - - calib = self.get_calibration(source) - Q = np.identity(4, dtype=np.float64) - Q[0,3] = calib.cx - Q[1,3] = calib.cy - Q[2,2] = 0.0 - Q[2,3] = calib.fx - Q[3,2] = -1 / calib.baseline - Q[3,3] = calib.doff - return Q - - def get_sources(self): - ''' Get list of sources ''' - return list(self._calibration.keys()) - - def get_version(self): - return self._version diff --git a/python/ftl/ftlstreamreader.py b/python/ftl/ftlstreamreader.py new file mode 100644 index 0000000000000000000000000000000000000000..1a33d16f383a9ae53cf55345edcffa6fe41c8b55 --- /dev/null +++ b/python/ftl/ftlstreamreader.py @@ -0,0 +1,109 @@ +import msgpack +import numpy as np + +from warnings import warn + +from . import ftltype +from . codecs import create_decoder + +class FTLStreamReader: + """ FTL file reader. """ + + def __init__(self, file): + self._file = open(file, "br") + self._version = 0 + + self._decoders = {} + self._enabled_sources = [] + self._available_sources = [] + + self._data = None + + try: + magic = self._file.read(5) + self._version = int(magic[4]) + if magic[:4] != bytes(ord(c) for c in "FTLF"): + raise Exception("wrong magic") + + if self._version >= 2: + # first 64 bytes reserved + self._file.read(8*8) + + self._unpacker = msgpack.Unpacker(self._file, raw=True, use_list=False) + + except Exception as ex: + self._file.close() + raise ex + + self._packets_read = 0 + + def __del__(self): + self._file.close() + + def _read_next(self): + v1, v2 = self._unpacker.unpack() + return ftltype.StreamPacket._make(v1), ftltype.Packet._make(v2) + + def seek(self, ts): + """ Read until timestamp reached """ + if self.get_timestamp() >= ts: + raise Exception("trying to seek to earlier timestamp") + + while self.read(): + if self.get_timestamp() >= ts: + break + + def read(self): + """ + Reads data for until the next timestamp. Returns False if there is no + more data to read, otherwise returns True. + """ + self._data = None + + try: + self._sp, self._p = self._read_next() + self._packets_read += 1 + + except msgpack.OutOfData: + return False + + if self._sp.streamID not in self._available_sources: + self._available_sources.append(self._sp.streamID) + + if self._enabled_sources and self._sp.streamID not in self._enabled_sources: + return True + + k = (self._sp.streamID, self._sp.channel) + if k not in self._decoders: + self._decoders[k] = create_decoder(self._p.codec, self._sp.channel, self._version) + + self._data = self._decoders[k](self._p) + + return True + + def get_raw(self): + """ Returns previously read StreamPacket and Packet """ + return self._sp, self._p + + def get_channel(self): + return ftltype.Channel(self._sp.channel) + + def get_source_id(self): + return self._sp.streamID + + def get_timestamp(self): + return self._sp.timestamp + + def get_data(self): + """ Returns decoded data """ + return self._data + + def get_sources(self): + """ Return list of sources. Can change as stream is read. """ + return list(self._available_sources) + + def get_version(self): + return self._version + + def get_packet_count(self): + return self._packets_read diff --git a/python/ftl/ftlstreamwriter.py b/python/ftl/ftlstreamwriter.py new file mode 100644 index 0000000000000000000000000000000000000000..3dfc8903b4d5a17e7d7aeb53f491e23e4c33196a --- /dev/null +++ b/python/ftl/ftlstreamwriter.py @@ -0,0 +1,75 @@ +import msgpack +import struct + +from . import ftltype + +from codecs import create_encoder + +class FTLStreamWriter: + def __init__(self, file, version=3): + self._file = open(file, "wb") + self._file.write(bytes(ord(c) for c in "FTLF")) # magic + self._file.write(bytes([version])) # version + self._file.write(bytes([0]*64)) # reserved + self._packer = msgpack.Packer(strict_types=False, use_bin_type=True) + + self._encoders = {} + self._channel_count = 0 + + def __del__(self): + self.close() + + def close(self): + self._file.close() + + def add_raw(self, sp, p): + if len(sp) != len(ftltype.StreamPacket._fields): + raise ValueError("invalid StreamPacket") + + if len(p) != len(ftltype.Packet._fields): + raise ValueError("invalid Packet") + + self._file.write(self._packer.pack((sp, p))) + self._file.flush() + + def create_encoder(self, source, codec, channel, **kwargs): + if channel not in ftltype.Channel: + raise ValueError("unknown channel") + + if not isinstance(source, int): + raise ValueError("source id must be int") + + if source < 0: + raise ValueError("source id must be positive") + + encoder = create_encoder(codec, channel, **kwargs) + self._encoders[(int(source), int(channel))] = encoder + self._channel_count += 1 + + def encode(self, source, timestamp, channel, data): + if not isinstance(source, int): + raise ValueError("source id must be int") + + if source < 0: + raise ValueError("source id must be positive") + + if timestamp < 0: + raise ValueError("timestamp must be positive") + + if channel not in ftltype.Channel: + raise ValueError("unknown channel") + + try: + p = self._encoders[(int(source), int(channel))](data) + except KeyError: + raise Exception("no encoder found, create_encoder() has to be" + + "called for every source and channel") + except Exception as ex: + raise Exception("Encoding error:" + str(ex)) + + sp = ftltype.StreamPacket._make((timestamp, + int(source), + int(channel), + self._channel_count)) + + self.add_raw(sp, p) diff --git a/python/ftl/ftltypes.py b/python/ftl/ftltype.py similarity index 99% rename from python/ftl/ftltypes.py rename to python/ftl/ftltype.py index 6c22004140b4e71e6a0ffbf4bc900e09b8a0ad28..4120fdb18ca2c9fd6c7f98da4a315a0cebb45edf 100644 --- a/python/ftl/ftltypes.py +++ b/python/ftl/ftltype.py @@ -1,4 +1,3 @@ - from collections import namedtuple from enum import IntEnum @@ -89,5 +88,5 @@ def get_definition(shape): for k, v in definition_t.items(): if shape[:2] == v: return k - + return 7 # (None) diff --git a/python/ftl/libde265.py b/python/ftl/libde265.py index 20ef8ca38c9b29f5f3408385f13a98af72081c7f..830a54b2c2a6db5267e541b3a2beaaa0a674f8fe 100644 --- a/python/ftl/libde265.py +++ b/python/ftl/libde265.py @@ -1,4 +1,4 @@ -'''! +""" Python wrapper for libde265. Only decoding is implemented. Requirements: @@ -6,7 +6,7 @@ Requirements: * numpy * opencv (recommended) or skimage -''' +""" try: import cv2 as cv @@ -33,13 +33,12 @@ import numpy as np import os -''' -# default number of worker threads for decoder: half of os.cpu_count() -_threads = os.cpu_count() // 2 -if _threads is None: - _threads = 1 -''' +# default number of worker threads for decoder: half of os.cpu_count() +# +#_threads = os.cpu_count() // 2 +#if _threads is None: +# _threads = 1 _threads = 1 @@ -174,6 +173,8 @@ class WaitingForInput(libde265Error): pass class Decoder: + """ Python interface to libde256 decoder API. + """ def __init__(self, threads=_threads): self._more = ctypes.c_int() self._out_stride = ctypes.c_int() @@ -186,7 +187,8 @@ class Decoder: raise libde265Error(err) def __del__(self): - libde265.de265_free_decoder(self._ctx) + if self._ctx: + libde265.de265_free_decoder(self._ctx) def _copy_image(self, de265_image): size = (libde265.de265_get_image_height(de265_image, 0), @@ -198,7 +200,7 @@ class Decoder: chroma_format = libde265.de265_get_chroma_format(de265_image) if chroma_format != de265_chroma.de265_chroma_420: raise NotImplementedError("Unsupported chroma format %s" % str(chroma_format)) - + for c in range(0, 3): size_channel = (libde265.de265_get_image_height(de265_image, c), libde265.de265_get_image_width(de265_image, c)) @@ -251,6 +253,10 @@ class Decoder: raise libde265Error(err) def push_data(self, data): + """ Push data to decoder + + @param data input bytes + """ if not isinstance(data, bytes): raise ValueError("expected bytes") @@ -276,8 +282,9 @@ class Decoder: def get_next_picture(self): ''' - Returns next decoded frame. Image in YCbCr format. If no frame available - returns None. + Get decoded frame. + + @returns image in YCbCr format or None if no frame available ''' de265_image = libde265.de265_get_next_picture(self._ctx) diff --git a/python/ftl/misc.py b/python/ftl/misc.py index c47a47cf2c3ca2680b4031a56c42eec83fd99a7d..2a6c940cc198e51588c60c66fbbb975219a1f11c 100644 --- a/python/ftl/misc.py +++ b/python/ftl/misc.py @@ -1,104 +1,193 @@ +import numpy as np + +def get_camera_matrix(calib): + K = np.identity(3, dtype=np.float64) + K[0,0] = calib.fx + K[1,1] = calib.fy + K[0,2] = calib.cx + K[1,2] = calib.cy + return K + +def get_Q(calib): + """ Disparity to depth matrix. Explained in "Learning OpenCV: Computer + Vision with the OpenCV Library" (2008) p. 435. + """ + Q = np.identity(4, dtype=np.float64) + Q[0,3] = calib.cx + Q[1,3] = calib.cy + Q[2,2] = 0.0 + Q[2,3] = calib.fx + Q[3,2] = -1 / calib.baseline + Q[3,3] = calib.doff + return Q def disparity_to_depth(disparity, camera, max_depth=10.0, invalid_value=0.0): - ''' Calculate depth map from disparity map. Depth values smaller than 0.0 + """ Calculate depth map from disparity map. Depth values smaller than 0.0 and larger than max_depth are set to invalid_value. - ''' + """ + depth = (camera.fx * camera.baseline) / (disparity - camera.doff) depth[depth < 0] = invalid_value depth[depth > max_depth] = invalid_value return depth def depth_to_disparity(depth, camera, invalid_value=0.0): + """ Calculate disparity from depth image. Inverse of disparity_to_depth(). + """ invalid = depth == 0.0 depth[invalid] = 1.0 disparity = ((camera.fx * camera.baseline) / depth) + camera.doff - disparity[invalid] = invalid_value + disparity[invalid] = invalid_value return disparity -from enum import IntEnum - -# components/codecs/include/ftl/codecs/hevc.hpp -class NALType(IntEnum): - CODED_SLICE_TRAIL_N = 0 - CODED_SLICE_TRAIL_R = 1 - - CODED_SLICE_TSA_N = 2 - CODED_SLICE_TSA_R = 3 - - CODED_SLICE_STSA_N = 4 - CODED_SLICE_STSA_R = 5 - - CODED_SLICE_RADL_N = 6 - CODED_SLICE_RADL_R = 7 - - CODED_SLICE_RASL_N = 8 - CODED_SLICE_RASL_R = 9 - - RESERVED_VCL_N10 = 10 - RESERVED_VCL_R11 = 11 - RESERVED_VCL_N12 = 12 - RESERVED_VCL_R13 = 13 - RESERVED_VCL_N14 = 14 - RESERVED_VCL_R15 = 15 - - CODED_SLICE_BLA_W_LP = 16 - CODED_SLICE_BLA_W_RADL = 17 - CODED_SLICE_BLA_N_LP = 18 - CODED_SLICE_IDR_W_RADL = 19 - CODED_SLICE_IDR_N_LP = 20 - CODED_SLICE_CRA = 21 - RESERVED_IRAP_VCL22 = 22 - RESERVED_IRAP_VCL23 = 23 - - RESERVED_VCL24 = 24 - RESERVED_VCL25 = 25 - RESERVED_VCL26 = 26 - RESERVED_VCL27 = 27 - RESERVED_VCL28 = 28 - RESERVED_VCL29 = 29 - RESERVED_VCL30 = 30 - RESERVED_VCL31 = 31 - - VPS = 32 - SPS = 33 - PPS = 34 - ACCESS_UNIT_DELIMITER = 35 - EOS = 36 - EOB = 37 - FILLER_DATA = 38 - PREFIX_SEI = 39 - SUFFIX_SEI = 40 - - RESERVED_NVCL41 = 41 - RESERVED_NVCL42 = 42 - RESERVED_NVCL43 = 43 - RESERVED_NVCL44 = 44 - RESERVED_NVCL45 = 45 - RESERVED_NVCL46 = 46 - RESERVED_NVCL47 = 47 - UNSPECIFIED_48 = 48 - UNSPECIFIED_49 = 49 - UNSPECIFIED_50 = 50 - UNSPECIFIED_51 = 51 - UNSPECIFIED_52 = 52 - UNSPECIFIED_53 = 53 - UNSPECIFIED_54 = 54 - UNSPECIFIED_55 = 55 - UNSPECIFIED_56 = 56 - UNSPECIFIED_57 = 57 - UNSPECIFIED_58 = 58 - UNSPECIFIED_59 = 59 - UNSPECIFIED_60 = 60 - UNSPECIFIED_61 = 61 - UNSPECIFIED_62 = 62 - UNSPECIFIED_63 = 63 - INVALID = 64 - -def get_NAL_type(data): - if not isinstance(data, bytes): - raise ValueError("expected bytes") - - return NALType((data[4] >> 1) & 0x3f) - -def is_iframe(data): - return get_NAL_type(data) == NALType.VPS +class Calibration: + def __init__(self, calib, channel, capabilities): + self._calib = calib + self._capabilities = capabilities + + def matrix(self): + return get_camera_matrix(self._calib) + + def Q(self): + return get_Q(self._calib) + + def params(self): + return self._calib + +def point3d(calib, u, v, d): + """ Calculate point 3D coordinates + @param calib calibration + @param u image x coordinate + @param v image y coordinate + @param d depth value for pixel (u, v) + """ + + return np.array([(u+calib.cx)*d/calib.fx, (v+calib.cy)*d / calib.fy, d], dtype=np.float) + +def depth_image_to_3D(depth, calib): + """ Calculate 3D points from depth image and calibration paramters + @param depth depth image + @param calib calibration paramters + @returns 3D points in (h,w,3) array + """ + + bad = (depth == 0.0) | (depth < calib.min_depth) | (depth > calib.max_depth) + + xs = np.zeros(depth.shape, dtype=np.float) + xs[:,:] = (np.arange(0, xs.shape[1], dtype=np.float) + calib.cx) / calib.fx + xs = xs * depth + xs[bad] = 0.0 + + ys = np.zeros(depth.shape, dtype=np.float) + (ys.T)[:,:] = (np.arange(0, ys.shape[0], dtype=np.float) + calib.cy) / calib.fy + ys = ys * depth + ys[bad] = 0.0 + + points = np.zeros((*depth.shape, 3), dtype=np.float) + points[:,:,0] = xs + points[:,:,1] = ys + points[:,:,2] = np.where(bad, 0.0, depth) + + return points + +def center_points_nonzero(points): + if points.shape[-1] != 3: + raise ValueError("last axis dimension must be 3") + + npoints = np.product(points.shape[:-1]) + rows = points.reshape((npoints, 3)) + + rows[rows.nonzero()[0],:] -= [rows[rows.nonzero()[0],i].mean() for i in range(0, 3)] + return points + +def write_xyz(fname, points, color=None): + """ Write XYZ file (for MeshLab etc). + + Points at origin (0, 0, 0) are not included. + + @param fname output file name + @param points points in (n,3) or (h,w,3) array, where last dimension + contains 3D points in X, Y, Z order + @param color RGB color image (optional) + """ + + if points.shape[-1] != 3: + raise ValueError("last axis dimension must be 3") + + if len(points.shape) == 3: + npoints = points.shape[0] * points.shape[1] + rows = points.reshape((npoints, 3)) + + elif len(points.shape) == 2: + rows = points + + else: + raise ValueError("points must be in (n,3) or (h,w,3) array") + + nonzero = rows.nonzero()[0] + + if color is not None: + if color.shape[-1] != 3: + raise ValueError("color must be rgb") + + with_color = np.zeros((rows.shape[0], 6), dtype=np.float) + with_color[:,0:3] = rows + with_color[:,3:6] = color.reshape((color.shape[0] * color.shape[1], 3)) + rows = with_color + + rows_nonzero = rows[nonzero,:] + + np.savetxt(fname, rows_nonzero, fmt=("%.9f "*rows_nonzero.shape[1])) + +def write_ply(fname, points, color=None): + """ Save points in PLY file as vertices. + + Points at origin (0, 0, 0) are not included. + + @param fname file name + @param points points, last dimension 3 for point coordinates (x, y, z) + @param color RGB color for points (optional) + """ + if points.shape[-1] != 3: + raise ValueError("last axis dimension for points must be 3 (x, y, z)") + + if color is not None: + if color.shape[-1] != 3: + raise ValueError("last axis for color must be 3 (r, g, b)") + + if np.product(points.shape[:-1]) != np.product(color.shape[:-1]): + raise ValueError("color must have same dimensions as points") + + npoints_all = np.product(points.shape[:-1]) + points_ = points.reshape((npoints_all, 3)) + + nonzero = points_.nonzero()[0] + points_ = points_[nonzero,:] + + if color is not None: + color_ = color.reshape((npoints_all, 3))[nonzero,:] + + npoints = points_.shape[0] + + with open(fname, "w") as f: + f.write("ply\n") + f.write("format ascii 1.0\n") + f.write("element vertex %i\n" % npoints) + f.write("property float x\n") + f.write("property float y\n") + f.write("property float z\n") + + if color is not None: + f.write("property uchar red\n") + f.write("property uchar green\n") + f.write("property uchar blue\n") + + f.write("end_header\n") + + if color is not None: + for p, c in zip(points_, color_): + f.write("%f %f %f %i %i %i\n" % (*p, *(c * 255))) + + else: + for p in points_: + f.write("%f %f %f\n" % p)