Support for HEVC depth decoding (NVPipe format)

1d7c155c · Sebastian Hahta · 031427a3 · 1d7c155c · 1d7c155c
Commit 1d7c155c authored 5 years ago by Sebastian Hahta
--- a/python/ftl/ftlstream.py
+++ b/python/ftl/ftlstream.py
@@ -14,20 +14,20 @@ from . import libde265

 try:
    import cv2 as cv
-    
+
    def _ycrcb2rgb(img):
        return cv.cvtColor(img, cv.COLOR_YCrCb2RGB)
-    
+
 except ImportError:
    warn("OpenCV not available. OpenCV required for full functionality.")

    def _ycrcb2rgb(img):
        ''' YCrCb to RGB, based on OpenCV documentation definition.
-        
+
        Note: It seems this implementation is not perfectly equivalent to
        OpenCV's (results not exactly same, why?)
        '''
-        
+
        rgb = np.zeros(img.shape, np.float)

        Y = img[:,:,0].astype(np.float)
@@ -73,7 +73,7 @@ class FTLStreamWriter:
    def add_raw(self, sp, p):
        if len(sp) != len(ftl.StreamPacket._fields) or len(p) != len(ftl.Packet._fields):
           raise ValueError("invalid input")
-        
+
        self._file.write(self._packer.pack((sp, p)))
        self._file.flush()

@@ -82,13 +82,13 @@ class FTLStreamWriter:
        ''' Write frame to file. If encode is False (data already encoded),
        definition needs to be specified.
        '''
-        
+
        if source < 0:
            raise ValueError("invalid source id")

        if channel not in ftl.Channel:
            raise ValueError("invalid channel")
-        
+
        if codec not in ftl.codec_t:
            raise ValueError("invalid codec")

@@ -98,7 +98,7 @@ class FTLStreamWriter:

            if definition is None:
                raise ValueError("unsupported resolution")
-            
+
            if definition != ftl.get_definition(data.shape):
                # todo: could replace definition or scale
                raise ValueError("definition does not match frame resolution")
@@ -107,25 +107,25 @@ class FTLStreamWriter:
                if ftl.is_float_channel(channel):
                    # scaling always same (???)
                    data = (data * 1000).astype(np.uint16)
-                
+
                params = [cv.IMWRITE_PNG_COMPRESSION, 9]
                retval, data = cv.imencode(".png", data, params)
-                
+
                if not retval:
                    raise Exception("encoding error (PNG)")
-            
+
            elif codec == ftl.codec_t.JPG:
                params = []
                retval, data = cv.imencode(".jpg", data, params)

                if not retval:
                    raise Exception("encoding error (JPG)")
-            
+
            else:
                raise ValueError("unsupported codec")

            data = data.tobytes()
-        
+
        if definition is None:
            raise ValueError("definition required")

@@ -152,7 +152,7 @@ class FTLStreamWriter:

 class FTLStreamReader:
    ''' FTL file reader. '''
-    
+
    def __init__(self, file):
        self._file = open(file, "br")
        self._version = 0
@@ -161,7 +161,7 @@ class FTLStreamReader:
        self._seen_iframe = set()

        self._frame = None
-        
+
        # calibration and pose are cached
        self._calibration = {}
        self._pose = {}
@@ -171,38 +171,38 @@ class FTLStreamReader:
            self._version = int(magic[4])
            if magic[:4] != bytes(ord(c) for c in "FTLF"):
                raise Exception("wrong magic")
-            
+
            if self._version >= 2:
                # first 64 bytes reserved
                self._file.read(8*8)

            self._unpacker = msgpack.Unpacker(self._file, raw=True, use_list=False)
-            
+
        except Exception as ex:
            self._file.close()
            raise ex
-            
+
        self._packets_read = 0
-    
+
    def __del__(self):
        self._file.close()
-    
+
    def _read_next(self):
        v1, v2 = self._unpacker.unpack()
        return ftl.StreamPacket._make(v1), ftl.Packet._make(v2)
-    
+
    def _update_calib(self, sp, p):
        ''' Update calibration. '''
-        
+
        if p.codec == ftl.codec_t.MSGPACK:
            # TODO: channel and capabilities should be saved as well
            calib, channel, capabilities = msgpack.unpackb(p.data)
            self._calibration[sp.streamID] = ftl.Camera._make(calib)
-        
+
        elif p.codec == ftl.codec_t.CALIBRATION:
            calibration = struct.unpack("@ddddIIdddd", p.data[:(4*8+2*4+4*8)])
            self._calibration[sp.streamID] = ftl.Camera._make(calibration)
-        
+
        else:
            raise Exception("Unknown codec %i for calibration" % p.codec)

@@ -212,18 +212,18 @@ class FTLStreamReader:
                          dtype=np.float64)
        pose = pose.reshape((4, 4), order='F') # Eigen
        self._pose[sp.streamID] = pose
-    
+
    def _process_json(self, sp, p):
        raise NotImplementedError("json decoding not implemented")

    def _decode_hevc(self, sp, p):
        ''' Decode HEVC frame '''
-        
+
        k = (sp.streamID, sp.channel)
-        
+
        if k not in self._decoders_hevc:
-            self._decoders_hevc[k] = libde265.Decoder(ftl.definition_t[p.definition])
-        
+            self._decoders_hevc[k] = libde265.Decoder()
+
        decoder = self._decoders_hevc[k]

        if k not in self._seen_iframe:
@@ -231,28 +231,38 @@ class FTLStreamReader:
                # can't decode before first I-frame has been received
                warn("received P-frame before I-frame")
                return
-            
+
            self._seen_iframe.add(k)
-        
+
        decoder.push_data(p.data)
        decoder.push_end_of_frame()
-        
+
        while decoder.get_number_of_input_bytes_pending() > 0:
            decoder.decode()
-        
+
        img = decoder.get_next_picture()
        if img is None:
            # if this happens, does get_next_picture() in loop help?
-            warn("frame expected, no image from decoded")
-        
+            warn("frame expected, no image received from decoder")
+
        if ftl.is_float_channel(self._sp.channel):
-            # todo: old hevc format twice the width, upper/lower in left/right
-            #       half of the image
+            # TODO: only supports 8 bits per pixel format and 16 bits
+            #       ()"old format")
            #
-            #       new format, see depth_convert.cu:66
-            
-            raise NotImplementedError("non-color channel decoding not available")
-        
+            # NVPipe: (2 * width), high bits in left, low in right
+
+            high = img[:,(img.shape[1]//2):,0].astype(np.uint32) << 8
+            low = img[:,:(img.shape[1]//2),0].astype(np.uint32)
+            img = (high|low).astype(np.float)/1000.0
+
+            try:
+                img[img < self._calibration[sp.streamID].min_depth] = 0.0
+                img[img > self._calibration[sp.streamID].max_depth] = 0.0
+            except KeyError:
+                warn("no calibration for received frame")
+
+            self._frame = img
+
        else:
            if self._version < 3:
                self._frame = _ycrcb2rgb(img)
@@ -275,7 +285,7 @@ class FTLStreamReader:
        ''' Read until timestamp reached '''
        if self.get_timestamp() >= ts:
            raise Exception("trying to seek to earlier timestamp")
-        
+
        while self.read():
            if self.get_timestamp() >= ts:
                break
@@ -288,20 +298,20 @@ class FTLStreamReader:
        todo: make (frame) decoding optional
        '''
        self._frame = None
-        
+
        try:
            self._sp, self._p = self._read_next()
            self._packets_read += 1
-        
+
        except msgpack.OutOfData:
            return False
-        
+
        if self._p.block_total != 1 or self._p.block_number != 0:
            raise Exception("Unsupported block format (todo)")

        # calibration/pose cached
        # todo: should be done by user instead?
-        
+
        if self._sp.channel == ftl.Channel.Calibration:
            self._update_calib(self._sp, self._p)

@@ -367,7 +377,7 @@ class FTLStreamReader:
            return self._calibration[source]
        except KeyError:
            raise ValueError("source id %i not found" % source)
-    
+
    def get_Q(self, source):
        ''' Disparity to depth matrix (OpenCV) '''


--- a/python/ftl/libde265.py
+++ b/python/ftl/libde265.py
@@ -12,7 +12,7 @@ try:
    import cv2 as cv
    def _resize(img, size):
        return cv.resize(img, dsize=tuple(reversed(size)), interpolation=cv.INTER_CUBIC)
-    
+
 except ImportError:
    from skimage.transform import resize as resize_skimage
    def _resize(img, size):
@@ -27,7 +27,7 @@ from enum import IntEnum

 import numpy as np

-import os 
+import os

 '''
 # default number of worker threads for decoder: half of os.cpu_count()
@@ -164,9 +164,7 @@ class WaitingForInput(libde265Error):
    pass

 class Decoder:
-    def __init__(self, size, threads=_threads):
-        # todo: remove _size
-        self._size = size
+    def __init__(self, threads=_threads):
        self._more = ctypes.c_int()
        self._out_stride = ctypes.c_int()
        self._ctx = libde265.de265_new_decoder()
@@ -176,87 +174,92 @@ class Decoder:

        if err:
            raise libde265Error(err)
-        
+
    def __del__(self):
        libde265.de265_free_decoder(self._ctx)

    def _copy_image(self, de265_image):
-        res = np.zeros((self._size[0], self._size[1], 3), dtype=np.uint8)
+        size = (libde265.de265_get_image_height(de265_image, 0),
+                libde265.de265_get_image_width(de265_image, 0))
+
+        res = np.zeros((*size, 3), dtype=np.uint8)

        # libde265: always 420 (???)
        # chroma_format = libde265.de265_get_chroma_format(de265_image)

        for c in range(0, 3):
-            size = (libde265.de265_get_image_height(de265_image, c),
-                    libde265.de265_get_image_width(de265_image, c))
-            
-            # todo: 10 bits per pixel for new depth format
+            size_channel = (libde265.de265_get_image_height(de265_image, c),
+                            libde265.de265_get_image_width(de265_image, c))
+
+            if size_channel[0] > size[0] or size_channel[1] > size[1]:
+                # Is this possible?
+                print(size, size_channel)
+                raise Exception("Channel larger than first channel")
+
            bpp = libde265.de265_get_bits_per_pixel(de265_image, c)
            if bpp != 8:
-                raise NotImplementedError("unsupported bits per pixel %i" % bpp)
+                raise NotImplementedError("%i-bit format not implemented (TODO)" % bpp)

            img_ptr = libde265.de265_get_image_plane(de265_image, c, self._out_stride)
-            
-            # for frombuffer() no copy assumed
-            ch = np.frombuffer(img_ptr[:size[0] * size[1]], dtype=np.uint8)
-            ch.shape = size
-            
-            # todo: old depth format twice the width
-            #       resize all channels to match Y channel and remove self._size
-            res[:,:,c] = _resize(ch, self._size)
-        
+
+			# libde: how is 10 bits per pixel returned
+            ch = np.frombuffer(img_ptr[:size_channel[0] * size_channel[1]], dtype=np.uint8)
+            ch.shape = size_channel
+
+            res[:,:,c] = _resize(ch, size)
+
        return res

    def _warning(self):
        if self._supress_warnings:
            return
-        
+
        code = libde265.de265_get_warning(self._ctx)
-        
+
        if code != _libde265error.DE265_OK:
            msg = libde265.de265_get_error_text(code).decode("ascii")
            warn(msg)

    def decode(self):
        err = libde265.de265_decode(self._ctx, self._more)
-        
+
        if err:
            if err == _libde265error.DE265_ERROR_WAITING_FOR_INPUT_DATA:
                raise WaitingForInput(err)

            raise libde265Error(err)
-        
+
        self._warning()

        return self._more.value != 0
-    
+
    def flush_data(self):
        err = libde265.de265_flush_data(self._ctx)
-        
+
        if err:
            raise libde265Error(err)
-    
+
    def push_data(self, data):
        if not isinstance(data, bytes):
            raise ValueError("expected bytes")
-        
+
        err = libde265.de265_push_data(self._ctx, data, len(data), None, None)
-        
+
        if err:
            raise libde265Error(err)
-    
+
    def push_end_of_frame(self):
        err = libde265.de265_push_end_of_frame(self._ctx)
-        
+
        if err:
            raise libde265Error(err)
-            
+
    def push_NAL(self, data):
        if not isinstance(data, bytes):
            raise ValueError("expected bytes")
-        
+
        err = libde265.de265_push_NAL(self._ctx, data, len(data), None, None)
-        
+
        if err:
            raise libde265Error(err)

@@ -270,7 +273,7 @@ class Decoder:

        if not de265_image:
            return None
-        
+
        res = self._copy_image(de265_image)

        libde265.de265_release_next_picture(self._ctx)
@@ -285,7 +288,7 @@ class Decoder:

        if not de265_image:
            return None
-        
+
        res = self._copy_image(de265_image)

        libde265.de265_release_next_picture(self._ctx)