diff --git a/CMakeLists.txt b/CMakeLists.txt index 95a37c796246d51cc7b97f8e17534ae88fbeb627..8ac2afe78e6312c51652530cdfb0720908ce53b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ enable_testing() option(WITH_NVPIPE "Use NvPipe for compression if available" ON) option(WITH_OPTFLOW "Use NVIDIA Optical Flow if available" OFF) option(WITH_OPENVR "Build with OpenVR support" OFF) +option(WITH_OPUS "Use Opus audio compression" ON) option(WITH_FIXSTARS "Use Fixstars libSGM" ON) option(WITH_CERES "Use Ceres solver" ON) option(USE_CPPCHECK "Apply cppcheck during build" ON) @@ -113,6 +114,37 @@ else() add_library(openvr INTERFACE) endif() +# ============== Opus ========================================================== + +if (WITH_OPUS) + find_library( OPUS_LIBRARY NAMES opus PATHS ${OPUS_DIR} PATH_SUFFIXES lib) + find_path( OPUS_INCLUDE NAMES opus/opus.h) + if (OPUS_LIBRARY AND OPUS_INCLUDE) + set(HAVE_OPUS TRUE) + add_library(Opus UNKNOWN IMPORTED) + #set_property(TARGET nanogui PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${NANOGUI_EXTRA_INCS}) + set_property(TARGET Opus PROPERTY IMPORTED_LOCATION ${OPUS_LIBRARY}) + message(STATUS "Found Opus: ${OPUS_LIBRARY}") + + if(WIN32) + # Find include + find_path(OPUS_INCLUDE_DIRS + NAMES opus/opus.h + #PATHS "C:/Program Files/OpenVRSDK" "C:/Program Files (x86)/OpenVRSDK" ${OPENVR_DIR} + PATH_SUFFIXES include + ) + set_property(TARGET Opus PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${OPUS_INCLUDE_DIRS}) + endif() + else() + message(STATUS "No Opus, audio compression disabled") + set(OPUS_LIBRARY "") + add_library(Opus INTERFACE) + endif() +else() + set(OPUS_LIBRARY "") + add_library(Opus INTERFACE) +endif() + # ============================================================================== add_subdirectory(lib/libstereo) diff --git a/applications/gui/src/src_window.cpp b/applications/gui/src/src_window.cpp index fc9f92b640085b4e01cb3003177c2a4a86781736..6406d914aec8ad63cc2b27974b24ad3f5e277987 100644 --- a/applications/gui/src/src_window.cpp +++ b/applications/gui/src/src_window.cpp @@ -160,6 +160,8 @@ SourceWindow::SourceWindow(ftl::gui::Screen *screen) int64_t renddelay = (c) ? c->getFrameTimeMS() : 0; speaker_->setDelay(fs.timestamp - framesets_[0]->timestamp + renddelay); // Add Xms for local render time speaker_->queue(fs.timestamp, fs.frames[0]); + + //LOG(INFO) << "Audio delay = " << (fs.timestamp - framesets_[0]->timestamp + renddelay); return true; }); diff --git a/applications/vision/src/main.cpp b/applications/vision/src/main.cpp index 615e8f05a4302f3571e04096a06151fadd5c692e..489a384b276835afdaf6f027a944f680dd91362f 100644 --- a/applications/vision/src/main.cpp +++ b/applications/vision/src/main.cpp @@ -110,7 +110,23 @@ static void run(ftl::Configurable *root) { auto paths = root->get<vector<string>>("paths"); string file = ""; - if (paths && (*paths).size() > 0) file = (*paths)[(*paths).size()-1]; + //if (paths && (*paths).size() > 0) file = (*paths)[(*paths).size()-1]; + + for (auto &x : *paths) { + //LOG(INFO) << "PATH - " << x; + if (x != "") { + ftl::URI uri(x); + if (uri.isValid()) { + switch (uri.getScheme()) { + case ftl::URI::SCHEME_WS : + case ftl::URI::SCHEME_TCP : net->connect(x)->waitConnection(); break; + case ftl::URI::SCHEME_DEVICE : + case ftl::URI::SCHEME_FILE : file = x; break; + default: break; + } + } + } + } Source *source = nullptr; source = ftl::create<Source>(root, "source", net); diff --git a/components/audio/CMakeLists.txt b/components/audio/CMakeLists.txt index 767184b2b91ab77c8a5fcb6e4ea842e586ac24f8..bd5548ba6e884f4b656c998b2c81ba400c5bb2f7 100644 --- a/components/audio/CMakeLists.txt +++ b/components/audio/CMakeLists.txt @@ -3,6 +3,8 @@ set(AUDIOSRC src/frame.cpp src/portaudio.cpp src/speaker.cpp + src/software_encoder.cpp + src/software_decoder.cpp ) add_library(ftlaudio ${AUDIOSRC}) @@ -12,7 +14,7 @@ target_include_directories(ftlaudio PUBLIC $<INSTALL_INTERFACE:include> PRIVATE src) -target_link_libraries(ftlaudio ftlcommon Eigen3::Eigen ftlstreams ftldata portaudio) +target_link_libraries(ftlaudio ftlcommon Eigen3::Eigen ftlstreams ftldata portaudio Opus) #add_subdirectory(test) diff --git a/components/audio/include/ftl/audio/buffer.hpp b/components/audio/include/ftl/audio/buffer.hpp index 87a80285e59a14d5c7567bb9272daa521c84dca8..2367629912ab432e0750a5df6074079b6150ef26 100644 --- a/components/audio/include/ftl/audio/buffer.hpp +++ b/components/audio/include/ftl/audio/buffer.hpp @@ -170,10 +170,10 @@ void FixedBuffer<T,CHAN,FRAME,SIZE>::read(std::vector<T> &out, int count) { // ==== Common forms =========================================================== template <int SIZE> -using StereoBuffer16 = ftl::audio::FixedBuffer<short,2,256,SIZE>; +using StereoBuffer16 = ftl::audio::FixedBuffer<short,2,960,SIZE>; template <int SIZE> -using MonoBuffer16 = ftl::audio::FixedBuffer<short,1,256,SIZE>; +using MonoBuffer16 = ftl::audio::FixedBuffer<short,1,960,SIZE>; } } diff --git a/components/audio/include/ftl/audio/decoder.hpp b/components/audio/include/ftl/audio/decoder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c13860a8bbe3fe62aed2c783fdccf89118c0c0c3 --- /dev/null +++ b/components/audio/include/ftl/audio/decoder.hpp @@ -0,0 +1,24 @@ +#ifndef _FTL_AUDIO_DECODER_HPP_ +#define _FTL_AUDIO_DECODER_HPP_ + +#include <vector> +#include <ftl/codecs/packet.hpp> +#include <ftl/codecs/codecs.hpp> + +namespace ftl { +namespace audio { + +class Decoder { + public: + Decoder() { }; + virtual ~Decoder() { }; + + virtual bool decode(const ftl::codecs::Packet &pkt, std::vector<short> &out)=0; + + virtual bool accepts(const ftl::codecs::Packet &)=0; +}; + +} +} + +#endif \ No newline at end of file diff --git a/components/audio/include/ftl/audio/encoder.hpp b/components/audio/include/ftl/audio/encoder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2e17f4497fc03804b48a6b6f8f59afc12e82a148 --- /dev/null +++ b/components/audio/include/ftl/audio/encoder.hpp @@ -0,0 +1,26 @@ +#ifndef _FTL_AUDIO_ENCODER_HPP_ +#define _FTL_AUDIO_ENCODER_HPP_ + +#include <vector> +#include <ftl/codecs/packet.hpp> +#include <ftl/codecs/codecs.hpp> + +namespace ftl { +namespace audio { + +class Encoder { + public: + Encoder() {}; + virtual ~Encoder() {}; + + virtual bool encode(const std::vector<short> &in, ftl::codecs::Packet &pkt)=0; + + virtual void reset() {} + + virtual bool supports(ftl::codecs::codec_t codec)=0; +}; + +} +} + +#endif diff --git a/components/audio/include/ftl/audio/software_decoder.hpp b/components/audio/include/ftl/audio/software_decoder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c0ae871a69e7acfb336598a26e93ad0990b40060 --- /dev/null +++ b/components/audio/include/ftl/audio/software_decoder.hpp @@ -0,0 +1,33 @@ +#ifndef _FTL_AUDIO_SOFTWARE_DECODER_HPP_ +#define _FTL_AUDIO_SOFTWARE_DECODER_HPP_ + +#include <ftl/audio/decoder.hpp> + +struct OpusMSDecoder; + +namespace ftl { +namespace audio { + +class SoftwareDecoder : public ftl::audio::Decoder { + public: + SoftwareDecoder(); + ~SoftwareDecoder(); + + bool decode(const ftl::codecs::Packet &pkt, std::vector<short> &out) override; + + bool accepts(const ftl::codecs::Packet &) override; + + private: + OpusMSDecoder *opus_decoder_; + bool cur_stereo_; + ftl::codecs::definition_t cur_definition_; + + bool _decodeOpus(const ftl::codecs::Packet &pkt, std::vector<short> &out); + bool _decodeRaw(const ftl::codecs::Packet &pkt, std::vector<short> &out); + bool _createOpus(const ftl::codecs::Packet &pkt); +}; + +} +} + +#endif diff --git a/components/audio/include/ftl/audio/software_encoder.hpp b/components/audio/include/ftl/audio/software_encoder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4f11843c1f1da142999c1c1ab30dd9c426230fe8 --- /dev/null +++ b/components/audio/include/ftl/audio/software_encoder.hpp @@ -0,0 +1,36 @@ +#ifndef _FTL_AUDIO_SOFTWARE_ENCODER_HPP_ +#define _FTL_AUDIO_SOFTWARE_ENCODER_HPP_ + +#include <ftl/audio/encoder.hpp> + +struct OpusMSEncoder; + +namespace ftl { +namespace audio { + +class SoftwareEncoder : public ftl::audio::Encoder { + public: + SoftwareEncoder(); + ~SoftwareEncoder(); + + bool encode(const std::vector<short> &in, ftl::codecs::Packet &pkt) override; + + void reset() override; + + bool supports(ftl::codecs::codec_t codec) override; + + private: + OpusMSEncoder *opus_encoder_; + bool cur_stereo_; + ftl::codecs::definition_t cur_definition_; + uint8_t cur_bitrate_; + + bool _encodeRaw(const std::vector<short> &in, ftl::codecs::Packet &pkt); + bool _encodeOpus(const std::vector<short> &in, ftl::codecs::Packet &pkt); + bool _createOpus(ftl::codecs::Packet &pkt); +}; + +} +} + +#endif diff --git a/components/audio/include/ftl/audio/source.hpp b/components/audio/include/ftl/audio/source.hpp index 797aee1e9ac2288a93f60bd674e6003f84e0495c..ff0663ddce1eab5e77fc29517318f81b16ba414e 100644 --- a/components/audio/include/ftl/audio/source.hpp +++ b/components/audio/include/ftl/audio/source.hpp @@ -13,7 +13,7 @@ namespace ftl { namespace audio { -static constexpr int kFrameSize = 256; +static constexpr int kFrameSize = 960; typedef ftl::data::Generator<ftl::audio::FrameSet> Generator; diff --git a/components/audio/src/software_decoder.cpp b/components/audio/src/software_decoder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c5516fa26ae2f71e5ab3a1f9c6e3257cd276e5a9 --- /dev/null +++ b/components/audio/src/software_decoder.cpp @@ -0,0 +1,117 @@ +#include <ftl/audio/software_decoder.hpp> +#include <ftl/config.h> + +#ifdef HAVE_OPUS +#include <opus/opus_multistream.h> +#else +struct OpusMSDecoder {}; +#endif + +#define LOGURU_REPLACE_GLOG 1 +#include <loguru.hpp> + +#define FRAME_SIZE 960 + +using ftl::audio::SoftwareDecoder; +using ftl::codecs::codec_t; + +SoftwareDecoder::SoftwareDecoder() : opus_decoder_(nullptr) { + +} + +SoftwareDecoder::~SoftwareDecoder() { + +} + +bool SoftwareDecoder::_createOpus(const ftl::codecs::Packet &pkt) { + #ifdef HAVE_OPUS + bool stereo = pkt.flags & ftl::codecs::kFlagStereo; + if (pkt.definition == cur_definition_ && stereo == cur_stereo_ && opus_decoder_) return true; + + cur_definition_ = pkt.definition; + cur_stereo_ = stereo; + + if (opus_decoder_) { + opus_multistream_decoder_destroy(opus_decoder_); + opus_decoder_ = nullptr; + } + + int sample_rate; + switch (pkt.definition) { + case ftl::codecs::definition_t::hz48000 : sample_rate = 48000; break; + case ftl::codecs::definition_t::hz44100 : sample_rate = 44100; break; + default: return false; + } + + int errcode = 0; + int channels = (stereo) ? 2 : 1; + const unsigned char mapping[2] = {0,1}; + opus_decoder_ = opus_multistream_decoder_create(sample_rate, channels, 1, channels-1, mapping, &errcode); + + if (errcode < 0) return false; + + LOG(INFO) << "Created OPUS decoder: " << sample_rate << ", " << channels; + #endif + return true; +} + +bool SoftwareDecoder::decode(const ftl::codecs::Packet &pkt, std::vector<short> &out) { + switch (pkt.codec) { + case codec_t::OPUS : return _decodeOpus(pkt, out); + case codec_t::RAW : return _decodeRaw(pkt, out); + default: return false; + } +} + +bool SoftwareDecoder::_decodeOpus(const ftl::codecs::Packet &pkt, std::vector<short> &out) { + #ifdef HAVE_OPUS + if (!_createOpus(pkt)) return false; + + int channels = (cur_stereo_) ? 2 : 1; + + out.resize(10*FRAME_SIZE*channels); + + const unsigned char *inptr = pkt.data.data(); + short *outptr = out.data(); + int count = 0; + int frames = 0; + + for (size_t i=0; i<pkt.data.size(); ) { + const short *len = (const short*)inptr; + inptr += 2; + i += (*len)+2; + int samples = opus_multistream_decode(opus_decoder_, inptr, *len, outptr, FRAME_SIZE, 0); + + if (samples != FRAME_SIZE) { + LOG(ERROR) << "Failed to Opus decode: " << samples; + //return false; + break; + } + + inptr += *len; + outptr += FRAME_SIZE*channels; + count += samples; + ++frames; + } + + out.resize(count*channels); + //LOG(INFO) << "Received " << frames << " Opus frames"; + return true; + + #else + LOG(WARNING) << "No Opus decoder installed"; + return false; + #endif +} + +bool SoftwareDecoder::_decodeRaw(const ftl::codecs::Packet &pkt, std::vector<short> &out) { + size_t size = pkt.data.size()/sizeof(short); + out.resize(size); + auto *ptr = (short*)pkt.data.data(); + for (size_t i=0; i<size; i++) out.data()[i] = ptr[i]; + return true; +} + +bool SoftwareDecoder::accepts(const ftl::codecs::Packet &) { + return false; +} diff --git a/components/audio/src/software_encoder.cpp b/components/audio/src/software_encoder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4a135e821afc46e3e2c13382fdcf6ad827510f7c --- /dev/null +++ b/components/audio/src/software_encoder.cpp @@ -0,0 +1,136 @@ +#include <ftl/audio/software_encoder.hpp> +#include <ftl/config.h> + +#ifdef HAVE_OPUS +#include <opus/opus_multistream.h> +#else +struct OpusMSEncoder {}; +#endif + +#define LOGURU_REPLACE_GLOG 1 +#include <loguru.hpp> + +using ftl::audio::SoftwareEncoder; +using ftl::codecs::codec_t; + +#define FRAME_SIZE 960 +#define MAX_PACKET_SIZE (3*2*FRAME_SIZE) + +SoftwareEncoder::SoftwareEncoder() : ftl::audio::Encoder(), opus_encoder_(nullptr), cur_bitrate_(0) { + +} + +SoftwareEncoder::~SoftwareEncoder() { + +} + +bool SoftwareEncoder::encode(const std::vector<short> &in, ftl::codecs::Packet &pkt) { + auto codec = (pkt.codec == codec_t::Any) ? codec_t::OPUS : pkt.codec; + + // Force RAW if no opus + #ifndef HAVE_OPUS + codec = codec_t::RAW; + #endif + + pkt.codec = codec; + + switch (codec) { + case codec_t::OPUS : return _encodeOpus(in, pkt); + case codec_t::RAW : return _encodeRaw(in, pkt); + default: return false; + } +} + +bool SoftwareEncoder::_createOpus(ftl::codecs::Packet &pkt) { + #ifdef HAVE_OPUS + bool stereo = pkt.flags & ftl::codecs::kFlagStereo; + if (pkt.definition == cur_definition_ && stereo == cur_stereo_ && opus_encoder_) return true; + + cur_definition_ = pkt.definition; + cur_stereo_ = stereo; + + if (opus_encoder_) { + opus_multistream_encoder_destroy(opus_encoder_); + opus_encoder_ = nullptr; + } + + int sample_rate; + switch (pkt.definition) { + case ftl::codecs::definition_t::hz48000 : sample_rate = 48000; break; + case ftl::codecs::definition_t::hz44100 : sample_rate = 44100; break; + default: return false; + } + + int errcode = 0; + int channels = (stereo) ? 2 : 1; + const unsigned char mapping[2] = {0,1}; + opus_encoder_ = opus_multistream_encoder_create(sample_rate, channels, 1, channels-1, mapping, OPUS_APPLICATION_VOIP, &errcode); + + if (errcode < 0) return false; + LOG(INFO) << "Created OPUS encoder"; + #endif + + return true; +} + +bool SoftwareEncoder::_encodeOpus(const std::vector<short> &in, ftl::codecs::Packet &pkt) { + #ifdef HAVE_OPUS + static const float MAX_BITRATE = 128000.0f; + static const float MIN_BITRATE = 24000.0f; + + if (!_createOpus(pkt)) return false; + + if (pkt.bitrate != cur_bitrate_) { + int bitrate = (MAX_BITRATE-MIN_BITRATE) * (float(pkt.bitrate)/255.0f) + MIN_BITRATE; + if (!cur_stereo_) bitrate /= 2; + int errcode = opus_multistream_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)); + if (errcode < 0) return false; + LOG(INFO) << "OPUS encoder: bitrate = " << bitrate; + cur_bitrate_ = pkt.bitrate; + } + + int channels = (cur_stereo_) ? 2 : 1; + + pkt.data.resize(MAX_PACKET_SIZE); + int count = 0; + int frames = 0; + + unsigned char *outptr = pkt.data.data(); + + for (unsigned int i=0; i<in.size(); i+=channels*FRAME_SIZE) { + short *len = (short*)outptr; + outptr += 2; + int nbBytes = opus_multistream_encode(opus_encoder_, &in.data()[i], FRAME_SIZE, outptr, MAX_PACKET_SIZE); + if (nbBytes <= 0) return false; + + //if (nbBytes > 32000) LOG(WARNING) << "Packet exceeds size limit"; + + *len = nbBytes; + + count += nbBytes+2; + outptr += nbBytes; + ++frames; + } + + pkt.data.resize(count); + //LOG(INFO) << "Opus Encode = " << pkt.data.size() << ", " << frames; + return true; + + #else + return false; + #endif +} + +bool SoftwareEncoder::_encodeRaw(const std::vector<short> &in, ftl::codecs::Packet &pkt) { + const unsigned char *ptr = (unsigned char*)in.data(); + pkt.data = std::move(std::vector<unsigned char>(ptr, ptr+in.size()*sizeof(short))); + return true; +} + +void SoftwareEncoder::reset() { + +} + +bool SoftwareEncoder::supports(ftl::codecs::codec_t codec) { + return false; +} diff --git a/components/audio/src/source.cpp b/components/audio/src/source.cpp index 2c976b20721b29dd35bd6a486e7ab55b06ba6569..aa3258b9d0e4f77a32f30688b9bd6b8793aa7409 100644 --- a/components/audio/src/source.cpp +++ b/components/audio/src/source.cpp @@ -138,7 +138,7 @@ Source::Source(nlohmann::json &config) : ftl::Configurable(config), buffer_(null ftl::audio::AudioSettings settings; settings.channels = channels; settings.sample_rate = 48000; - settings.frame_size = 256; + settings.frame_size = 960; state_.setLeft(settings); timer_hp_ = ftl::timer::add(ftl::timer::kTimerHighPrecision, [this](int64_t ts) { diff --git a/components/audio/src/speaker.cpp b/components/audio/src/speaker.cpp index 82addb4ed5558a941964ca6315dad2a470d817ad..9f3d6ecd80e5bef59a58a01acaef589847051be8 100644 --- a/components/audio/src/speaker.cpp +++ b/components/audio/src/speaker.cpp @@ -101,7 +101,7 @@ void Speaker::_open(int fsize, int sample, int channels) { NULL, &outputParameters, sample, // Sample rate - 256, // Size of single frame + 960, // Size of single frame paNoFlag, (channels == 1) ? pa_speaker_callback<ftl::audio::MonoBuffer16<2000>> : pa_speaker_callback<ftl::audio::StereoBuffer16<2000>>, this->buffer_ @@ -133,7 +133,7 @@ void Speaker::queue(int64_t ts, ftl::audio::Frame &frame) { auto &audio = frame.get<ftl::audio::Audio>((frame.hasChannel(Channel::AudioStereo)) ? Channel::AudioStereo : Channel::AudioMono); if (!buffer_) { - _open(256, frame.getSettings().sample_rate, frame.getSettings().channels); + _open(960, frame.getSettings().sample_rate, frame.getSettings().channels); } if (!buffer_) return; diff --git a/components/codecs/include/ftl/codecs/codecs.hpp b/components/codecs/include/ftl/codecs/codecs.hpp index 8c3a006ed2afe62a7178f5c7a5eee23889edd29e..8dd2b18a075e5e1ffa76ac78ff0063bb0cb8f4d7 100644 --- a/components/codecs/include/ftl/codecs/codecs.hpp +++ b/components/codecs/include/ftl/codecs/codecs.hpp @@ -33,8 +33,8 @@ enum struct codec_t : uint8_t { H264_LOSSLESS, HEVC_LOSSLESS, - // TODO: Add audio codecs - WAV, + WAV=32, + OPUS, JSON = 100, // A JSON string CALIBRATION, // Camera parameters object diff --git a/components/common/cpp/include/ftl/config.h.in b/components/common/cpp/include/ftl/config.h.in index ecf9e79f798b333a91f68b814583f5edf46615fc..bb58bc207c8dc4fe872447594837e71e344273ac 100644 --- a/components/common/cpp/include/ftl/config.h.in +++ b/components/common/cpp/include/ftl/config.h.in @@ -27,6 +27,7 @@ #cmakedefine HAVE_NVPIPE #cmakedefine HAVE_PORTAUDIO #cmakedefine HAVE_X11 +#cmakedefine HAVE_OPUS #cmakedefine ENABLE_PROFILER diff --git a/components/streams/include/ftl/streams/receiver.hpp b/components/streams/include/ftl/streams/receiver.hpp index 5ca29947ae54d8a89cf16e04f1b05c8bf07d3158..4edfba5b2aa1ae8a7ecfca423095261a39687f60 100644 --- a/components/streams/include/ftl/streams/receiver.hpp +++ b/components/streams/include/ftl/streams/receiver.hpp @@ -6,6 +6,7 @@ #include <ftl/audio/frameset.hpp> #include <ftl/streams/stream.hpp> #include <ftl/codecs/decoder.hpp> +#include <ftl/audio/decoder.hpp> namespace ftl { namespace stream { @@ -75,6 +76,7 @@ class Receiver : public ftl::Configurable, public ftl::rgbd::Generator { ftl::audio::Frame frame; MUTEX mutex; ftl::codecs::Channels<0> completed; + ftl::audio::Decoder *decoder; }; std::vector<InternalVideoStates*> video_frames_[ftl::stream::kMaxStreams]; @@ -86,6 +88,7 @@ class Receiver : public ftl::Configurable, public ftl::rgbd::Generator { void _processAudio(const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt); void _processVideo(const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt); void _createDecoder(InternalVideoStates &frame, int chan, const ftl::codecs::Packet &pkt); + ftl::audio::Decoder *_createAudioDecoder(InternalAudioStates &frame, const ftl::codecs::Packet &pkt); InternalVideoStates &_getVideoFrame(const ftl::codecs::StreamPacket &spkt, int ix=0); InternalAudioStates &_getAudioFrame(const ftl::codecs::StreamPacket &spkt, int ix=0); }; diff --git a/components/streams/include/ftl/streams/sender.hpp b/components/streams/include/ftl/streams/sender.hpp index 677990711d02afae03aeae1b2e9aac86c73091cc..eaad7ee8c3541253160912618859c7332f66b975 100644 --- a/components/streams/include/ftl/streams/sender.hpp +++ b/components/streams/include/ftl/streams/sender.hpp @@ -6,6 +6,7 @@ #include <ftl/audio/frameset.hpp> #include <ftl/streams/stream.hpp> #include <ftl/codecs/encoder.hpp> +#include <ftl/audio/encoder.hpp> #include <unordered_map> @@ -54,7 +55,12 @@ class Sender : public ftl::Configurable { cudaStream_t stream; }; + struct AudioState { + ftl::audio::Encoder *encoder; + }; + std::unordered_map<int, EncodingState> state_; + std::unordered_map<int, AudioState> audio_state_; //ftl::codecs::Encoder *_getEncoder(int fsid, int fid, ftl::codecs::Channel c); void _encodeChannel(ftl::rgbd::FrameSet &fs, ftl::codecs::Channel c, bool reset); @@ -62,6 +68,7 @@ class Sender : public ftl::Configurable { EncodingState &_getTile(int fsid, ftl::codecs::Channel c); cv::Rect _generateROI(const ftl::rgbd::FrameSet &fs, ftl::codecs::Channel c, int offset, bool stereo); float _selectFloatMax(ftl::codecs::Channel c); + ftl::audio::Encoder *_getAudioEncoder(int fsid, int sid, ftl::codecs::Channel c, ftl::codecs::Packet &pkt); }; } diff --git a/components/streams/src/receiver.cpp b/components/streams/src/receiver.cpp index 6a274d56c3efc52007b996b20d0f8bdf7b6b67ef..5493bedbffdd62efebb02b0dc369b1070a97e7d2 100644 --- a/components/streams/src/receiver.cpp +++ b/components/streams/src/receiver.cpp @@ -1,6 +1,7 @@ #include <ftl/streams/receiver.hpp> #include <ftl/codecs/depth_convert_cuda.hpp> #include <ftl/profiler.hpp> +#include <ftl/audio/software_decoder.hpp> #include <opencv2/cudaimgproc.hpp> @@ -150,6 +151,11 @@ void Receiver::_processData(const StreamPacket &spkt, const Packet &pkt) { } } +ftl::audio::Decoder *Receiver::_createAudioDecoder(InternalAudioStates &frame, const ftl::codecs::Packet &pkt) { + if (!frame.decoder) frame.decoder = new ftl::audio::SoftwareDecoder(); + return frame.decoder; +} + void Receiver::_processAudio(const StreamPacket &spkt, const Packet &pkt) { // Audio Data InternalAudioStates &frame = _getAudioFrame(spkt); @@ -157,15 +163,25 @@ void Receiver::_processAudio(const StreamPacket &spkt, const Packet &pkt) { frame.frame.reset(); frame.timestamp = spkt.timestamp; auto &audio = frame.frame.create<ftl::audio::Audio>(spkt.channel); - size_t size = pkt.data.size()/sizeof(short); - audio.data().resize(size); - auto *ptr = (short*)pkt.data.data(); - for (size_t i=0; i<size; i++) audio.data()[i] = ptr[i]; + //size_t size = pkt.data.size()/sizeof(short); + //audio.data().resize(size); + //auto *ptr = (short*)pkt.data.data(); + //for (size_t i=0; i<size; i++) audio.data()[i] = ptr[i]; + + ftl::audio::Decoder *dec = _createAudioDecoder(frame, pkt); + if (!dec) { + LOG(ERROR) << "Could get an audio decoder"; + return; + } + if (!dec->decode(pkt, audio.data())) { + LOG(ERROR) << "Audio decode failed"; + return; + } // Generate settings from packet data ftl::audio::AudioSettings settings; settings.channels = (spkt.channel == Channel::AudioStereo) ? 2 : 1; - settings.frame_size = 256; + settings.frame_size = 960; switch (pkt.definition) { case definition_t::hz48000 : settings.sample_rate = 48000; break; diff --git a/components/streams/src/sender.cpp b/components/streams/src/sender.cpp index 87bc83019c214618d26d743bf4bc705e5fe0ab95..34cbf255ffc8d5cd36430e38d076c9aa802d83c9 100644 --- a/components/streams/src/sender.cpp +++ b/components/streams/src/sender.cpp @@ -1,6 +1,7 @@ #include <ftl/streams/sender.hpp> #include <ftl/codecs/depth_convert_cuda.hpp> #include <ftl/profiler.hpp> +#include <ftl/audio/software_encoder.hpp> #include <opencv2/cudaimgproc.hpp> @@ -59,6 +60,20 @@ void Sender::onRequest(const ftl::stream::StreamCallback &cb) { reqcb_ = cb; } +ftl::audio::Encoder *Sender::_getAudioEncoder(int fsid, int sid, ftl::codecs::Channel c, ftl::codecs::Packet &pkt) { + int id = (fsid << 8) + sid; + auto i = audio_state_.find(id); + if (i == audio_state_.end()) { + audio_state_[id] = {nullptr}; + } + + auto &state = audio_state_[id]; + if (state.encoder == nullptr) { + state.encoder = new ftl::audio::SoftwareEncoder(); + } + return state.encoder; +} + void Sender::post(const ftl::audio::FrameSet &fs) { if (!stream_) return; @@ -82,7 +97,7 @@ void Sender::post(const ftl::audio::FrameSet &fs) { spkt.channel = (fs.frames[i].hasChannel(Channel::AudioStereo)) ? Channel::AudioStereo : Channel::AudioMono; ftl::codecs::Packet pkt; - pkt.codec = ftl::codecs::codec_t::RAW; + pkt.codec = ftl::codecs::codec_t::OPUS; pkt.definition = ftl::codecs::definition_t::Any; switch (settings.sample_rate) { @@ -92,11 +107,22 @@ void Sender::post(const ftl::audio::FrameSet &fs) { } pkt.frame_count = 1; - pkt.flags = 0; - pkt.bitrate = 0; + pkt.flags = (fs.frames[i].hasChannel(Channel::AudioStereo)) ? ftl::codecs::kFlagStereo : 0; + pkt.bitrate = 180; - const unsigned char *ptr = (unsigned char*)data.data().data(); - pkt.data = std::move(std::vector<unsigned char>(ptr, ptr+data.size())); // TODO: Reduce copy... + // Find encoder here ... + ftl::audio::Encoder *enc = _getAudioEncoder(fs.id, i, spkt.channel, pkt); + + // Do encoding into pkt.data + if (!enc) { + LOG(ERROR) << "Could not find audio encoder"; + return; + } + + if (!enc->encode(data.data(), pkt)) { + LOG(ERROR) << "Could not encode audio"; + return; + } stream_->post(spkt, pkt); diff --git a/web-service/public/js/bundle.js b/web-service/public/js/bundle.js index 05c1d80c722491f4a2fe66708e5d096fcd76b8ca..074e04643695e136ff0ecb3e2ccc955de9f43529 100644 --- a/web-service/public/js/bundle.js +++ b/web-service/public/js/bundle.js @@ -70845,7 +70845,8 @@ function FTLMSE(video) { }); // TODO: Generate - this.mime = 'video/mp4; codecs="avc1.640028"'; + //this.mime = 'video/mp4; codecs="avc1.640028, opus"'; + this.mime = null; this.mediaSource = new MediaSource(); //this.element.play(); @@ -70863,11 +70864,11 @@ function FTLMSE(video) { }); this.mediaSource.addEventListener('sourceopen', (e) => { - console.log("Source Open"); + console.log("Source Open", e); URL.revokeObjectURL(this.video.src); console.log(this.mediaSource.readyState); this.sourceBuffer = e.target.addSourceBuffer(this.mime); - this.sourceBuffer.mode = 'sequence'; + //this.sourceBuffer.mode = 'sequence'; this.active = true; this.sourceBuffer.addEventListener('error', (e) => { @@ -70891,13 +70892,35 @@ function FTLMSE(video) { }); this.queue = []; - this.video.src = URL.createObjectURL(this.mediaSource); + //this.video.src = URL.createObjectURL(this.mediaSource); + + this.has_audio = false; + this.first_ts = 0; } ee(FTLMSE.prototype); FTLMSE.prototype.push = function(spkt, pkt) { - this.remux.push(spkt,pkt); + if (this.first_ts == 0) this.first_ts = spkt[0]; + + // Skip first 200ms, use to analyse the stream contents + if (spkt[0] < this.first_ts + 200) { + if (spkt[3] == 32 || spkt[3] == 33) this.has_audio = true; + } else { + if (!this.mime) { + if (this.has_audio) { + console.log("Create video with audio"); + this.mime = 'video/mp4; codecs="avc1.640028, opus"'; + this.remux.has_audio = true; + } else { + console.log("Create video without audio"); + this.mime = 'video/mp4; codecs="avc1.640028"'; + this.remux.has_audio = false; + } + this.video.src = URL.createObjectURL(this.mediaSource); + } + this.remux.push(spkt,pkt); + } } FTLMSE.prototype.select = function(frameset, source, channel) { @@ -70909,7 +70932,7 @@ module.exports = FTLMSE; },{"./ftlremux":109,"event-emitter":26}],109:[function(require,module,exports){ var ee = require('event-emitter'); const MUXJS = require('mux.js'); -const MP4 = MUXJS.mp4.generator; +const MP4 = require('./lib/mp4-generator'); const H264Stream = MUXJS.codecs.h264.H264Stream; const VIDEO_PROPERTIES = [ @@ -70945,6 +70968,56 @@ function concatNals(sample) { sample.data = data; } +function concatAudioSamples(samples) { + let totallen = 0; + for (let i=0; i<samples.length; ++i) { + totallen += samples[i].size; + } + + let result = new Uint8Array(totallen); + let offset = 0; + for (let i=0; i<samples.length; ++i) { + result.set(samples[i].data, offset); + offset += samples[i].size; + } + return MP4.mdat(result); +} + +function reformAudio(data) { + let offset = 0; + let results = []; + + while (offset < data.length) { + let l = data[offset] + (data[offset+1] << 8); //view.getInt16(offset); + offset += 2; + //console.log("Opus frame code = ", data[offset] & 0x03, l); + //let p; + let p = data.subarray(offset, offset+l); + /*let ll = l-1; // Remove config byte + if (ll <= 251) { + p = new Uint8Array(l+1); + p[0] = data[offset]; + p[1] = ll & 0xff; + p.set(data.subarray(offset+1, offset+l), 2); + } else { + //let p = data.subarray(offset, offset+l); + p = new Uint8Array(l+2); + p[0] = data[offset]; + let l2 = (ll-252) >> 2; + let l1 = 252 + ((ll-252) - (l2 << 2)); + p[1] = l1; + p[3] = l2; + console.log("Opus size", l1 + 4*l2, ll, l1, l2); + p.set(data.subarray(offset+1, offset+l), 3); + }*/ + //let mdat = MP4.mdat(p); + results.push({size: p.byteLength, duration: 1800, data: p}); + offset += l; + } + + return results; +} + var createDefaultSample = function() { return { units: [], @@ -70988,6 +71061,25 @@ function FTLRemux() { duration: 0 }; + this.audiotrack = { + timelineStartInfo: { + baseMediaDecodeTime: 0 + }, + baseMediaDecodeTime: 1800, + id: 1, + codec: 'opus', + type: 'audio', + samples: [{ + size: 0, + duration: 1800 //960 + }], + duration: 0, + insamplerate: 48000, + channelcount: 2, + width: 0, + height: 0 + }; + this.h264 = new H264Stream(); this.h264.on('data', (nalUnit) => { @@ -71007,9 +71099,13 @@ function FTLRemux() { } if (!this.init_seg && this.track.sps && this.track.pps) { - this.init_seg = true; console.log("Init", this.track); - this.emit('data', MP4.initSegment([this.track])); + if (this.has_audio) { + this.emit('data', MP4.initSegment([this.track, this.audiotrack])); + } else { + this.emit('data', MP4.initSegment([this.track])); + } + this.init_seg = true; } let keyFrame = nalUnit.nalUnitType == 'slice_layer_without_partitioning_rbsp_idr'; @@ -71025,12 +71121,14 @@ function FTLRemux() { } }); - this.mime = 'video/mp4; codecs="avc1.640028"'; this.sequenceNo = 0; + this.audioSequenceNo = 0; this.seen_keyframe = false; this.ts = 0; this.dts = 0; this.init_seg = false; + this.init_audio = false; + this.has_audio = false; }; ee(FTLRemux.prototype); @@ -71040,7 +71138,22 @@ FTLRemux.prototype.push = function(spkt, pkt) { return; } - if(pkt[0] === 2){ // H264 packet. + if (pkt[0] === 33) { // Opus audio + if (this.has_audio && this.init_seg) { + // Split into individual packets and create moof+mdat + let samples = reformAudio(pkt[5]); + this.audiotrack.samples = samples; + + // TODO: Can this audio track be combined into same fragment as video frame? + let moof = MP4.moof(this.audioSequenceNo++, [this.audiotrack]); + let mdat = concatAudioSamples(samples); + let result = new Uint8Array(moof.byteLength + mdat.byteLength); + result.set(moof); + result.set(mdat, moof.byteLength); + this.emit('data', result); + this.audiotrack.baseMediaDecodeTime += 1800*samples.length; // 1800 = 20ms*90 or frame size 960@48000hz in 90000 ticks/s + } + } else if(pkt[0] === 2){ // H264 packet. if (spkt[1] == this.frameset && spkt[2] == this.source && spkt[3] == this.channel) { if (!this.seen_keyframe) { @@ -71107,7 +71220,7 @@ FTLRemux.prototype.reset = function() { module.exports = FTLRemux; -},{"event-emitter":26,"mux.js":50}],110:[function(require,module,exports){ +},{"./lib/mp4-generator":111,"event-emitter":26,"mux.js":50}],110:[function(require,module,exports){ (function (Buffer){ const Peer = require('../../server/src/peer') const msgpack = require('msgpack5')(); @@ -71455,7 +71568,9 @@ function FTLStream(peer, uri, element) { return; } - if(pckg[0] === 2){ // H264 packet. + if (pckg[0] == 33) { + this.mse.push(streampckg, pckg); + } else if(pckg[0] === 2){ // H264 packet. let id = "id-"+streampckg[1]+"-"+streampckg[2]+"-"+streampckg[3]; if (this.current == id) { @@ -71652,7 +71767,858 @@ saveConfigs = async () => { const content = await rawResp.json(); } }).call(this,require("buffer").Buffer) -},{"../../server/src/peer":111,"./ftlmse":108,"buffer":9,"msgpack5":32,"rematrix":89,"three":92}],111:[function(require,module,exports){ +},{"../../server/src/peer":112,"./ftlmse":108,"buffer":9,"msgpack5":32,"rematrix":89,"three":92}],111:[function(require,module,exports){ +/** + * mux.js + * + * Copyright (c) Brightcove + * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE + * + * Functions that generate fragmented MP4s suitable for use with Media + * Source Extensions. + * + * Modified by Nicolas Pope to include support for Opus audio tracks + */ +'use strict'; + +var UINT32_MAX = Math.pow(2, 32) - 1; + +var box, dinf, osse, esds, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, + trak, tkhd, mdia, mdhd, hdlr, sdtp, stbl, stsd, traf, trex, + trun, types, MAJOR_BRAND, MINOR_VERSION, AVC1_BRAND, VIDEO_HDLR, + AUDIO_HDLR, HDLR_TYPES, VMHD, SMHD, DREF, STCO, STSC, STSZ, STTS; + +// pre-calculate constants +(function() { + var i; + types = { + avc1: [], // codingname + avcC: [], + btrt: [], + dinf: [], + dref: [], + esds: [], + ftyp: [], + hdlr: [], + mdat: [], + mdhd: [], + mdia: [], + mfhd: [], + minf: [], + moof: [], + moov: [], + mp4a: [], // codingname + mvex: [], + mvhd: [], + pasp: [], + sdtp: [], + smhd: [], + stbl: [], + stco: [], + stsc: [], + stsd: [], + stsz: [], + stts: [], + styp: [], + tfdt: [], + tfhd: [], + traf: [], + trak: [], + trun: [], + trex: [], + tkhd: [], + vmhd: [], + Opus: [], + dOps: [] + }; + + // In environments where Uint8Array is undefined (e.g., IE8), skip set up so that we + // don't throw an error + if (typeof Uint8Array === 'undefined') { + return; + } + + for (i in types) { + if (types.hasOwnProperty(i)) { + types[i] = [ + i.charCodeAt(0), + i.charCodeAt(1), + i.charCodeAt(2), + i.charCodeAt(3) + ]; + } + } + + MAJOR_BRAND = new Uint8Array([ + 'i'.charCodeAt(0), + 's'.charCodeAt(0), + 'o'.charCodeAt(0), + 'm'.charCodeAt(0) + ]); + AVC1_BRAND = new Uint8Array([ + 'a'.charCodeAt(0), + 'v'.charCodeAt(0), + 'c'.charCodeAt(0), + '1'.charCodeAt(0) + ]); + MINOR_VERSION = new Uint8Array([0, 0, 0, 1]); + VIDEO_HDLR = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00, // pre_defined + 0x76, 0x69, 0x64, 0x65, // handler_type: 'vide' + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x56, 0x69, 0x64, 0x65, + 0x6f, 0x48, 0x61, 0x6e, + 0x64, 0x6c, 0x65, 0x72, 0x00 // name: 'VideoHandler' + ]); + AUDIO_HDLR = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00, // pre_defined + 0x73, 0x6f, 0x75, 0x6e, // handler_type: 'soun' + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x53, 0x6f, 0x75, 0x6e, + 0x64, 0x48, 0x61, 0x6e, + 0x64, 0x6c, 0x65, 0x72, 0x00 // name: 'SoundHandler' + ]); + HDLR_TYPES = { + video: VIDEO_HDLR, + audio: AUDIO_HDLR + }; + DREF = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x01, // entry_count + 0x00, 0x00, 0x00, 0x0c, // entry_size + 0x75, 0x72, 0x6c, 0x20, // 'url' type + 0x00, // version 0 + 0x00, 0x00, 0x01 // entry_flags + ]); + SMHD = new Uint8Array([ + 0x00, // version + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, // balance, 0 means centered + 0x00, 0x00 // reserved + ]); + STCO = new Uint8Array([ + 0x00, // version + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00 // entry_count + ]); + STSC = STCO; + STSZ = new Uint8Array([ + 0x00, // version + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00, // sample_size + 0x00, 0x00, 0x00, 0x00 // sample_count + ]); + STTS = STCO; + VMHD = new Uint8Array([ + 0x00, // version + 0x00, 0x00, 0x01, // flags + 0x00, 0x00, // graphicsmode + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00 // opcolor + ]); +}()); + +box = function(type) { + var + payload = [], + size = 0, + i, + result, + view; + + for (i = 1; i < arguments.length; i++) { + payload.push(arguments[i]); + } + + i = payload.length; + + // calculate the total size we need to allocate + while (i--) { + size += payload[i].byteLength; + } + result = new Uint8Array(size + 8); + view = new DataView(result.buffer, result.byteOffset, result.byteLength); + view.setUint32(0, result.byteLength); + result.set(type, 4); + + // copy the payload into the result + for (i = 0, size = 8; i < payload.length; i++) { + result.set(payload[i], size); + size += payload[i].byteLength; + } + return result; +}; + +dinf = function() { + return box(types.dinf, box(types.dref, DREF)); +}; + +// Opus (Nick) +osse = function(track) { + let preskip = 3840; + return box(types.dOps, new Uint8Array([ + 0x00, // version + track.channelcount, // Output channel count + (preskip & 0xff00) >> 8, (preskip & 0xff), // Preskip + //0x00, 0x00, 0x00, 0x00, // Input sample rate + 0x00, 0x00, // Upper sample rate bytes + (track.insamplerate & 0xff00) >> 8, + (track.insamplerate & 0xff), + //0x00, 0x00, // samplerate, 16.16 + 0x00, 0x00, // Output gain + 0x00 //ChannelMappingFamily + ])); + }; + +esds = function(track) { + return box(types.esds, new Uint8Array([ + 0x00, // version + 0x00, 0x00, 0x00, // flags + + // ES_Descriptor + 0x03, // tag, ES_DescrTag + 0x19, // length + 0x00, 0x00, // ES_ID + 0x00, // streamDependenceFlag, URL_flag, reserved, streamPriority + + // DecoderConfigDescriptor + 0x04, // tag, DecoderConfigDescrTag + 0x11, // length + 0x40, // object type + 0x15, // streamType + 0x00, 0x06, 0x00, // bufferSizeDB + 0x00, 0x00, 0xda, 0xc0, // maxBitrate + 0x00, 0x00, 0xda, 0xc0, // avgBitrate + + // DecoderSpecificInfo + 0x05, // tag, DecoderSpecificInfoTag + 0x02, // length + // ISO/IEC 14496-3, AudioSpecificConfig + // for samplingFrequencyIndex see ISO/IEC 13818-7:2006, 8.1.3.2.2, Table 35 + (track.audioobjecttype << 3) | (track.samplingfrequencyindex >>> 1), + (track.samplingfrequencyindex << 7) | (track.channelcount << 3), + 0x06, 0x01, 0x02 // GASpecificConfig + ])); +}; + +ftyp = function() { + return box(types.ftyp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND, AVC1_BRAND); +}; + +hdlr = function(type) { + return box(types.hdlr, HDLR_TYPES[type]); +}; +mdat = function(data) { + return box(types.mdat, data); +}; +mdhd = function(track) { + var result = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x02, // creation_time + 0x00, 0x00, 0x00, 0x03, // modification_time + 0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second + + (track.duration >>> 24) & 0xFF, + (track.duration >>> 16) & 0xFF, + (track.duration >>> 8) & 0xFF, + track.duration & 0xFF, // duration + 0x55, 0xc4, // 'und' language (undetermined) + 0x00, 0x00 + ]); + + // Use the sample rate from the track metadata, when it is + // defined. The sample rate can be parsed out of an ADTS header, for + // instance. + if (track.samplerate) { + result[12] = (track.samplerate >>> 24) & 0xFF; + result[13] = (track.samplerate >>> 16) & 0xFF; + result[14] = (track.samplerate >>> 8) & 0xFF; + result[15] = (track.samplerate) & 0xFF; + } + + return box(types.mdhd, result); +}; +mdia = function(track) { + return box(types.mdia, mdhd(track), hdlr(track.type), minf(track)); +}; +mfhd = function(sequenceNumber) { + return box(types.mfhd, new Uint8Array([ + 0x00, + 0x00, 0x00, 0x00, // flags + (sequenceNumber & 0xFF000000) >> 24, + (sequenceNumber & 0xFF0000) >> 16, + (sequenceNumber & 0xFF00) >> 8, + sequenceNumber & 0xFF // sequence_number + ])); +}; +minf = function(track) { + return box(types.minf, + track.type === 'video' ? box(types.vmhd, VMHD) : box(types.smhd, SMHD), + dinf(), + stbl(track)); +}; +moof = function(sequenceNumber, tracks) { + var + trackFragments = [], + i = tracks.length; + // build traf boxes for each track fragment + while (i--) { + trackFragments[i] = traf(tracks[i]); + } + return box.apply(null, [ + types.moof, + mfhd(sequenceNumber) + ].concat(trackFragments)); +}; +/** + * Returns a movie box. + * @param tracks {array} the tracks associated with this movie + * @see ISO/IEC 14496-12:2012(E), section 8.2.1 + */ +moov = function(tracks) { + var + i = tracks.length, + boxes = []; + + while (i--) { + boxes[i] = trak(tracks[i]); + } + + return box.apply(null, [types.moov, mvhd(0xffffffff)].concat(boxes).concat(mvex(tracks))); +}; +mvex = function(tracks) { + var + i = tracks.length, + boxes = []; + + while (i--) { + boxes[i] = trex(tracks[i]); + } + return box.apply(null, [types.mvex].concat(boxes)); +}; +mvhd = function(duration) { + var + bytes = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x01, // creation_time + 0x00, 0x00, 0x00, 0x02, // modification_time + 0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second + (duration & 0xFF000000) >> 24, + (duration & 0xFF0000) >> 16, + (duration & 0xFF00) >> 8, + duration & 0xFF, // duration + 0x00, 0x01, 0x00, 0x00, // 1.0 rate + 0x01, 0x00, // 1.0 volume + 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, // transformation: unity matrix + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, // pre_defined + 0xff, 0xff, 0xff, 0xff // next_track_ID + ]); + return box(types.mvhd, bytes); +}; + +sdtp = function(track) { + var + samples = track.samples || [], + bytes = new Uint8Array(4 + samples.length), + flags, + i; + + // leave the full box header (4 bytes) all zero + + // write the sample table + for (i = 0; i < samples.length; i++) { + flags = samples[i].flags; + + bytes[i + 4] = (flags.dependsOn << 4) | + (flags.isDependedOn << 2) | + (flags.hasRedundancy); + } + + return box(types.sdtp, + bytes); +}; + +stbl = function(track) { + return box(types.stbl, + stsd(track), + box(types.stts, STTS), + box(types.stsc, STSC), + box(types.stsz, STSZ), + box(types.stco, STCO)); +}; + +(function() { + var videoSample, audioSample; + + stsd = function(track) { + + return box(types.stsd, new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x01 + ]), track.type === 'video' ? videoSample(track) : audioSample(track)); + }; + + videoSample = function(track) { + var + sps = track.sps || [], + pps = track.pps || [], + sequenceParameterSets = [], + pictureParameterSets = [], + i, + avc1Box; + + // assemble the SPSs + for (i = 0; i < sps.length; i++) { + sequenceParameterSets.push((sps[i].byteLength & 0xFF00) >>> 8); + sequenceParameterSets.push((sps[i].byteLength & 0xFF)); // sequenceParameterSetLength + sequenceParameterSets = sequenceParameterSets.concat(Array.prototype.slice.call(sps[i])); // SPS + } + + // assemble the PPSs + for (i = 0; i < pps.length; i++) { + pictureParameterSets.push((pps[i].byteLength & 0xFF00) >>> 8); + pictureParameterSets.push((pps[i].byteLength & 0xFF)); + pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(pps[i])); + } + + avc1Box = [ + types.avc1, new Uint8Array([ + 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // data_reference_index + 0x00, 0x00, // pre_defined + 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, // pre_defined + (track.width & 0xff00) >> 8, + track.width & 0xff, // width + (track.height & 0xff00) >> 8, + track.height & 0xff, // height + 0x00, 0x48, 0x00, 0x00, // horizresolution + 0x00, 0x48, 0x00, 0x00, // vertresolution + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // frame_count + 0x13, + 0x76, 0x69, 0x64, 0x65, + 0x6f, 0x6a, 0x73, 0x2d, + 0x63, 0x6f, 0x6e, 0x74, + 0x72, 0x69, 0x62, 0x2d, + 0x68, 0x6c, 0x73, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, // compressorname + 0x00, 0x18, // depth = 24 + 0x11, 0x11 // pre_defined = -1 + ]), + box(types.avcC, new Uint8Array([ + 0x01, // configurationVersion + track.profileIdc, // AVCProfileIndication + track.profileCompatibility, // profile_compatibility + track.levelIdc, // AVCLevelIndication + 0xff // lengthSizeMinusOne, hard-coded to 4 bytes + ].concat( + [sps.length], // numOfSequenceParameterSets + sequenceParameterSets, // "SPS" + [pps.length], // numOfPictureParameterSets + pictureParameterSets // "PPS" + ))), + box(types.btrt, new Uint8Array([ + 0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB + 0x00, 0x2d, 0xc6, 0xc0, // maxBitrate + 0x00, 0x2d, 0xc6, 0xc0 // avgBitrate + ])) + ]; + + if (track.sarRatio) { + var + hSpacing = track.sarRatio[0], + vSpacing = track.sarRatio[1]; + + avc1Box.push( + box(types.pasp, new Uint8Array([ + (hSpacing & 0xFF000000) >> 24, + (hSpacing & 0xFF0000) >> 16, + (hSpacing & 0xFF00) >> 8, + hSpacing & 0xFF, + (vSpacing & 0xFF000000) >> 24, + (vSpacing & 0xFF0000) >> 16, + (vSpacing & 0xFF00) >> 8, + vSpacing & 0xFF + ])) + ); + } + + return box.apply(null, avc1Box); + }; + + audioSample = function(track) { + console.log("AUDIO", track); + if (track.codec == "opus") { + let samplesize = 16; + let samplerate = 48000; + return box(types.Opus, new Uint8Array([ + + // SampleEntry, ISO/IEC 14496-12 + 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // data_reference_index + + // AudioSampleEntry, ISO/IEC 14496-12 + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + (track.channelcount & 0xff00) >> 8, + (track.channelcount & 0xff), // channelcount + + (samplesize & 0xff00) >> 8, + (samplesize & 0xff), // samplesize + 0x00, 0x00, // pre_defined + 0x00, 0x00, // reserved + + (samplerate & 0xff00) >> 8, + (samplerate & 0xff), + 0x00, 0x00 // samplerate, 16.16 + + // OpusSpecificSampleEntry + ]), osse(track)); + } else { + return box(types.mp4a, new Uint8Array([ + + // SampleEntry, ISO/IEC 14496-12 + 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // data_reference_index + + // AudioSampleEntry, ISO/IEC 14496-12 + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + (track.channelcount & 0xff00) >> 8, + (track.channelcount & 0xff), // channelcount + + (track.samplesize & 0xff00) >> 8, + (track.samplesize & 0xff), // samplesize + 0x00, 0x00, // pre_defined + 0x00, 0x00, // reserved + + (track.samplerate & 0xff00) >> 8, + (track.samplerate & 0xff), + 0x00, 0x00 // samplerate, 16.16 + + // MP4AudioSampleEntry, ISO/IEC 14496-14 + ]), esds(track)); + }; + } +}()); + +tkhd = function(track) { + var result = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x07, // flags + 0x00, 0x00, 0x00, 0x00, // creation_time + 0x00, 0x00, 0x00, 0x00, // modification_time + (track.id & 0xFF000000) >> 24, + (track.id & 0xFF0000) >> 16, + (track.id & 0xFF00) >> 8, + track.id & 0xFF, // track_ID + 0x00, 0x00, 0x00, 0x00, // reserved + (track.duration & 0xFF000000) >> 24, + (track.duration & 0xFF0000) >> 16, + (track.duration & 0xFF00) >> 8, + track.duration & 0xFF, // duration + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, // layer + 0x00, 0x00, // alternate_group + 0x01, 0x00, // non-audio track volume + 0x00, 0x00, // reserved + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, // transformation: unity matrix + (track.width & 0xFF00) >> 8, + track.width & 0xFF, + 0x00, 0x00, // width + (track.height & 0xFF00) >> 8, + track.height & 0xFF, + 0x00, 0x00 // height + ]); + + return box(types.tkhd, result); +}; + +/** + * Generate a track fragment (traf) box. A traf box collects metadata + * about tracks in a movie fragment (moof) box. + */ +traf = function(track) { + var trackFragmentHeader, trackFragmentDecodeTime, trackFragmentRun, + sampleDependencyTable, dataOffset, + upperWordBaseMediaDecodeTime, lowerWordBaseMediaDecodeTime; + + trackFragmentHeader = box(types.tfhd, new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x3a, // flags + (track.id & 0xFF000000) >> 24, + (track.id & 0xFF0000) >> 16, + (track.id & 0xFF00) >> 8, + (track.id & 0xFF), // track_ID + 0x00, 0x00, 0x00, 0x01, // sample_description_index + 0x00, 0x00, 0x00, 0x00, // default_sample_duration + 0x00, 0x00, 0x00, 0x00, // default_sample_size + 0x00, 0x00, 0x00, 0x00 // default_sample_flags + ])); + + upperWordBaseMediaDecodeTime = Math.floor(track.baseMediaDecodeTime / (UINT32_MAX + 1)); + lowerWordBaseMediaDecodeTime = Math.floor(track.baseMediaDecodeTime % (UINT32_MAX + 1)); + + trackFragmentDecodeTime = box(types.tfdt, new Uint8Array([ + 0x01, // version 1 + 0x00, 0x00, 0x00, // flags + // baseMediaDecodeTime + (upperWordBaseMediaDecodeTime >>> 24) & 0xFF, + (upperWordBaseMediaDecodeTime >>> 16) & 0xFF, + (upperWordBaseMediaDecodeTime >>> 8) & 0xFF, + upperWordBaseMediaDecodeTime & 0xFF, + (lowerWordBaseMediaDecodeTime >>> 24) & 0xFF, + (lowerWordBaseMediaDecodeTime >>> 16) & 0xFF, + (lowerWordBaseMediaDecodeTime >>> 8) & 0xFF, + lowerWordBaseMediaDecodeTime & 0xFF + ])); + + // the data offset specifies the number of bytes from the start of + // the containing moof to the first payload byte of the associated + // mdat + dataOffset = (32 + // tfhd + 20 + // tfdt + 8 + // traf header + 16 + // mfhd + 8 + // moof header + 8); // mdat header + + // audio tracks require less metadata + if (track.type === 'audio') { + trackFragmentRun = trun(track, dataOffset); + return box(types.traf, + trackFragmentHeader, + trackFragmentDecodeTime, + trackFragmentRun); + } + + // video tracks should contain an independent and disposable samples + // box (sdtp) + // generate one and adjust offsets to match + sampleDependencyTable = sdtp(track); + trackFragmentRun = trun(track, + sampleDependencyTable.length + dataOffset); + return box(types.traf, + trackFragmentHeader, + trackFragmentDecodeTime, + trackFragmentRun, + sampleDependencyTable); +}; + +/** + * Generate a track box. + * @param track {object} a track definition + * @return {Uint8Array} the track box + */ +trak = function(track) { + track.duration = track.duration || 0xffffffff; + return box(types.trak, + tkhd(track), + mdia(track)); +}; + +trex = function(track) { + var result = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + (track.id & 0xFF000000) >> 24, + (track.id & 0xFF0000) >> 16, + (track.id & 0xFF00) >> 8, + (track.id & 0xFF), // track_ID + 0x00, 0x00, 0x00, 0x01, // default_sample_description_index + 0x00, 0x00, 0x00, 0x00, // default_sample_duration + 0x00, 0x00, 0x00, 0x00, // default_sample_size + 0x00, 0x01, 0x00, 0x01 // default_sample_flags + ]); + // the last two bytes of default_sample_flags is the sample + // degradation priority, a hint about the importance of this sample + // relative to others. Lower the degradation priority for all sample + // types other than video. + if (track.type !== 'video') { + result[result.length - 1] = 0x00; + } + + return box(types.trex, result); +}; + +(function() { + var audioTrun, videoTrun, trunHeader; + + // This method assumes all samples are uniform. That is, if a + // duration is present for the first sample, it will be present for + // all subsequent samples. + // see ISO/IEC 14496-12:2012, Section 8.8.8.1 + trunHeader = function(samples, offset) { + var durationPresent = 0, sizePresent = 0, + flagsPresent = 0, compositionTimeOffset = 0; + + // trun flag constants + if (samples.length) { + if (samples[0].duration !== undefined) { + durationPresent = 0x1; + } + if (samples[0].size !== undefined) { + sizePresent = 0x2; + } + if (samples[0].flags !== undefined) { + flagsPresent = 0x4; + } + if (samples[0].compositionTimeOffset !== undefined) { + compositionTimeOffset = 0x8; + } + } + + return [ + 0x00, // version 0 + 0x00, + durationPresent | sizePresent | flagsPresent | compositionTimeOffset, + 0x01, // flags + (samples.length & 0xFF000000) >>> 24, + (samples.length & 0xFF0000) >>> 16, + (samples.length & 0xFF00) >>> 8, + samples.length & 0xFF, // sample_count + (offset & 0xFF000000) >>> 24, + (offset & 0xFF0000) >>> 16, + (offset & 0xFF00) >>> 8, + offset & 0xFF // data_offset + ]; + }; + + videoTrun = function(track, offset) { + var bytesOffest, bytes, header, samples, sample, i; + + samples = track.samples || []; + offset += 8 + 12 + (16 * samples.length); + header = trunHeader(samples, offset); + bytes = new Uint8Array(header.length + samples.length * 16); + bytes.set(header); + bytesOffest = header.length; + + for (i = 0; i < samples.length; i++) { + sample = samples[i]; + + bytes[bytesOffest++] = (sample.duration & 0xFF000000) >>> 24; + bytes[bytesOffest++] = (sample.duration & 0xFF0000) >>> 16; + bytes[bytesOffest++] = (sample.duration & 0xFF00) >>> 8; + bytes[bytesOffest++] = sample.duration & 0xFF; // sample_duration + bytes[bytesOffest++] = (sample.size & 0xFF000000) >>> 24; + bytes[bytesOffest++] = (sample.size & 0xFF0000) >>> 16; + bytes[bytesOffest++] = (sample.size & 0xFF00) >>> 8; + bytes[bytesOffest++] = sample.size & 0xFF; // sample_size + bytes[bytesOffest++] = (sample.flags.isLeading << 2) | sample.flags.dependsOn; + bytes[bytesOffest++] = (sample.flags.isDependedOn << 6) | + (sample.flags.hasRedundancy << 4) | + (sample.flags.paddingValue << 1) | + sample.flags.isNonSyncSample; + bytes[bytesOffest++] = sample.flags.degradationPriority & 0xF0 << 8; + bytes[bytesOffest++] = sample.flags.degradationPriority & 0x0F; // sample_flags + bytes[bytesOffest++] = (sample.compositionTimeOffset & 0xFF000000) >>> 24; + bytes[bytesOffest++] = (sample.compositionTimeOffset & 0xFF0000) >>> 16; + bytes[bytesOffest++] = (sample.compositionTimeOffset & 0xFF00) >>> 8; + bytes[bytesOffest++] = sample.compositionTimeOffset & 0xFF; // sample_composition_time_offset + } + return box(types.trun, bytes); + }; + + audioTrun = function(track, offset) { + var bytes, bytesOffest, header, samples, sample, i; + + samples = track.samples || []; + offset += 8 + 12 + (8 * samples.length); + + header = trunHeader(samples, offset); + bytes = new Uint8Array(header.length + samples.length * 8); + bytes.set(header); + bytesOffest = header.length; + + for (i = 0; i < samples.length; i++) { + sample = samples[i]; + bytes[bytesOffest++] = (sample.duration & 0xFF000000) >>> 24; + bytes[bytesOffest++] = (sample.duration & 0xFF0000) >>> 16; + bytes[bytesOffest++] = (sample.duration & 0xFF00) >>> 8; + bytes[bytesOffest++] = sample.duration & 0xFF; // sample_duration + bytes[bytesOffest++] = (sample.size & 0xFF000000) >>> 24; + bytes[bytesOffest++] = (sample.size & 0xFF0000) >>> 16; + bytes[bytesOffest++] = (sample.size & 0xFF00) >>> 8; + bytes[bytesOffest++] = sample.size & 0xFF; // sample_size + } + + return box(types.trun, bytes); + }; + + trun = function(track, offset) { + if (track.type === 'audio') { + return audioTrun(track, offset); + } + + return videoTrun(track, offset); + }; +}()); + +module.exports = { + ftyp: ftyp, + mdat: mdat, + moof: moof, + moov: moov, + initSegment: function(tracks) { + var + fileType = ftyp(), + movie = moov(tracks), + result; + + result = new Uint8Array(fileType.byteLength + movie.byteLength); + result.set(fileType); + result.set(movie, fileType.byteLength); + return result; + } +}; +},{}],112:[function(require,module,exports){ (function (Buffer){ const msgpack = require('msgpack5')() , encode = msgpack.encode @@ -71941,7 +72907,7 @@ Peer.prototype.getUuid = function() { module.exports = Peer; }).call(this,require("buffer").Buffer) -},{"./utils/uuidParser":112,"buffer":9,"msgpack5":32,"uuid":103}],112:[function(require,module,exports){ +},{"./utils/uuidParser":113,"buffer":9,"msgpack5":32,"uuid":103}],113:[function(require,module,exports){ // Maps for number <-> hex string conversion var _byteToHex = []; var _hexToByte = {}; diff --git a/web-service/public/js/ftlmse.js b/web-service/public/js/ftlmse.js index 75309c12da346a61815bb8b3e236077d456e13c5..82028c451af115165e971562a67c9982c9103977 100644 --- a/web-service/public/js/ftlmse.js +++ b/web-service/public/js/ftlmse.js @@ -23,7 +23,8 @@ function FTLMSE(video) { }); // TODO: Generate - this.mime = 'video/mp4; codecs="avc1.640028"'; + //this.mime = 'video/mp4; codecs="avc1.640028, opus"'; + this.mime = null; this.mediaSource = new MediaSource(); //this.element.play(); @@ -41,11 +42,11 @@ function FTLMSE(video) { }); this.mediaSource.addEventListener('sourceopen', (e) => { - console.log("Source Open"); + console.log("Source Open", e); URL.revokeObjectURL(this.video.src); console.log(this.mediaSource.readyState); this.sourceBuffer = e.target.addSourceBuffer(this.mime); - this.sourceBuffer.mode = 'sequence'; + //this.sourceBuffer.mode = 'sequence'; this.active = true; this.sourceBuffer.addEventListener('error', (e) => { @@ -69,13 +70,35 @@ function FTLMSE(video) { }); this.queue = []; - this.video.src = URL.createObjectURL(this.mediaSource); + //this.video.src = URL.createObjectURL(this.mediaSource); + + this.has_audio = false; + this.first_ts = 0; } ee(FTLMSE.prototype); FTLMSE.prototype.push = function(spkt, pkt) { - this.remux.push(spkt,pkt); + if (this.first_ts == 0) this.first_ts = spkt[0]; + + // Skip first 200ms, use to analyse the stream contents + if (spkt[0] < this.first_ts + 200) { + if (spkt[3] == 32 || spkt[3] == 33) this.has_audio = true; + } else { + if (!this.mime) { + if (this.has_audio) { + console.log("Create video with audio"); + this.mime = 'video/mp4; codecs="avc1.640028, opus"'; + this.remux.has_audio = true; + } else { + console.log("Create video without audio"); + this.mime = 'video/mp4; codecs="avc1.640028"'; + this.remux.has_audio = false; + } + this.video.src = URL.createObjectURL(this.mediaSource); + } + this.remux.push(spkt,pkt); + } } FTLMSE.prototype.select = function(frameset, source, channel) { diff --git a/web-service/public/js/ftlremux.js b/web-service/public/js/ftlremux.js index 4c5c722ca7519d40af06b4802ee5fe4f253f2b7f..637364518d88fd8af5152ba41f7b59f79f06a48a 100644 --- a/web-service/public/js/ftlremux.js +++ b/web-service/public/js/ftlremux.js @@ -1,6 +1,6 @@ var ee = require('event-emitter'); const MUXJS = require('mux.js'); -const MP4 = MUXJS.mp4.generator; +const MP4 = require('./lib/mp4-generator'); const H264Stream = MUXJS.codecs.h264.H264Stream; const VIDEO_PROPERTIES = [ @@ -36,6 +36,56 @@ function concatNals(sample) { sample.data = data; } +function concatAudioSamples(samples) { + let totallen = 0; + for (let i=0; i<samples.length; ++i) { + totallen += samples[i].size; + } + + let result = new Uint8Array(totallen); + let offset = 0; + for (let i=0; i<samples.length; ++i) { + result.set(samples[i].data, offset); + offset += samples[i].size; + } + return MP4.mdat(result); +} + +function reformAudio(data) { + let offset = 0; + let results = []; + + while (offset < data.length) { + let l = data[offset] + (data[offset+1] << 8); //view.getInt16(offset); + offset += 2; + //console.log("Opus frame code = ", data[offset] & 0x03, l); + //let p; + let p = data.subarray(offset, offset+l); + /*let ll = l-1; // Remove config byte + if (ll <= 251) { + p = new Uint8Array(l+1); + p[0] = data[offset]; + p[1] = ll & 0xff; + p.set(data.subarray(offset+1, offset+l), 2); + } else { + //let p = data.subarray(offset, offset+l); + p = new Uint8Array(l+2); + p[0] = data[offset]; + let l2 = (ll-252) >> 2; + let l1 = 252 + ((ll-252) - (l2 << 2)); + p[1] = l1; + p[3] = l2; + console.log("Opus size", l1 + 4*l2, ll, l1, l2); + p.set(data.subarray(offset+1, offset+l), 3); + }*/ + //let mdat = MP4.mdat(p); + results.push({size: p.byteLength, duration: 1800, data: p}); + offset += l; + } + + return results; +} + var createDefaultSample = function() { return { units: [], @@ -79,6 +129,25 @@ function FTLRemux() { duration: 0 }; + this.audiotrack = { + timelineStartInfo: { + baseMediaDecodeTime: 0 + }, + baseMediaDecodeTime: 1800, + id: 1, + codec: 'opus', + type: 'audio', + samples: [{ + size: 0, + duration: 1800 //960 + }], + duration: 0, + insamplerate: 48000, + channelcount: 2, + width: 0, + height: 0 + }; + this.h264 = new H264Stream(); this.h264.on('data', (nalUnit) => { @@ -98,9 +167,13 @@ function FTLRemux() { } if (!this.init_seg && this.track.sps && this.track.pps) { - this.init_seg = true; console.log("Init", this.track); - this.emit('data', MP4.initSegment([this.track])); + if (this.has_audio) { + this.emit('data', MP4.initSegment([this.track, this.audiotrack])); + } else { + this.emit('data', MP4.initSegment([this.track])); + } + this.init_seg = true; } let keyFrame = nalUnit.nalUnitType == 'slice_layer_without_partitioning_rbsp_idr'; @@ -116,12 +189,14 @@ function FTLRemux() { } }); - this.mime = 'video/mp4; codecs="avc1.640028"'; this.sequenceNo = 0; + this.audioSequenceNo = 0; this.seen_keyframe = false; this.ts = 0; this.dts = 0; this.init_seg = false; + this.init_audio = false; + this.has_audio = false; }; ee(FTLRemux.prototype); @@ -131,7 +206,22 @@ FTLRemux.prototype.push = function(spkt, pkt) { return; } - if(pkt[0] === 2){ // H264 packet. + if (pkt[0] === 33) { // Opus audio + if (this.has_audio && this.init_seg) { + // Split into individual packets and create moof+mdat + let samples = reformAudio(pkt[5]); + this.audiotrack.samples = samples; + + // TODO: Can this audio track be combined into same fragment as video frame? + let moof = MP4.moof(this.audioSequenceNo++, [this.audiotrack]); + let mdat = concatAudioSamples(samples); + let result = new Uint8Array(moof.byteLength + mdat.byteLength); + result.set(moof); + result.set(mdat, moof.byteLength); + this.emit('data', result); + this.audiotrack.baseMediaDecodeTime += 1800*samples.length; // 1800 = 20ms*90 or frame size 960@48000hz in 90000 ticks/s + } + } else if(pkt[0] === 2){ // H264 packet. if (spkt[1] == this.frameset && spkt[2] == this.source && spkt[3] == this.channel) { if (!this.seen_keyframe) { diff --git a/web-service/public/js/index.js b/web-service/public/js/index.js index 4f3fd8149cc48c9900e5d92434a34bd96bcd162f..7100c66a88de886e6d0b2a5a08eafdb8c8411f28 100644 --- a/web-service/public/js/index.js +++ b/web-service/public/js/index.js @@ -344,7 +344,9 @@ function FTLStream(peer, uri, element) { return; } - if(pckg[0] === 2){ // H264 packet. + if (pckg[0] == 33) { + this.mse.push(streampckg, pckg); + } else if(pckg[0] === 2){ // H264 packet. let id = "id-"+streampckg[1]+"-"+streampckg[2]+"-"+streampckg[3]; if (this.current == id) { diff --git a/web-service/public/js/lib/mp4-generator.js b/web-service/public/js/lib/mp4-generator.js new file mode 100644 index 0000000000000000000000000000000000000000..5b6ad7c97dfa38d2d5e452b21bdbdaf11afb7603 --- /dev/null +++ b/web-service/public/js/lib/mp4-generator.js @@ -0,0 +1,850 @@ +/** + * mux.js + * + * Copyright (c) Brightcove + * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE + * + * Functions that generate fragmented MP4s suitable for use with Media + * Source Extensions. + * + * Modified by Nicolas Pope to include support for Opus audio tracks + */ +'use strict'; + +var UINT32_MAX = Math.pow(2, 32) - 1; + +var box, dinf, osse, esds, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, + trak, tkhd, mdia, mdhd, hdlr, sdtp, stbl, stsd, traf, trex, + trun, types, MAJOR_BRAND, MINOR_VERSION, AVC1_BRAND, VIDEO_HDLR, + AUDIO_HDLR, HDLR_TYPES, VMHD, SMHD, DREF, STCO, STSC, STSZ, STTS; + +// pre-calculate constants +(function() { + var i; + types = { + avc1: [], // codingname + avcC: [], + btrt: [], + dinf: [], + dref: [], + esds: [], + ftyp: [], + hdlr: [], + mdat: [], + mdhd: [], + mdia: [], + mfhd: [], + minf: [], + moof: [], + moov: [], + mp4a: [], // codingname + mvex: [], + mvhd: [], + pasp: [], + sdtp: [], + smhd: [], + stbl: [], + stco: [], + stsc: [], + stsd: [], + stsz: [], + stts: [], + styp: [], + tfdt: [], + tfhd: [], + traf: [], + trak: [], + trun: [], + trex: [], + tkhd: [], + vmhd: [], + Opus: [], + dOps: [] + }; + + // In environments where Uint8Array is undefined (e.g., IE8), skip set up so that we + // don't throw an error + if (typeof Uint8Array === 'undefined') { + return; + } + + for (i in types) { + if (types.hasOwnProperty(i)) { + types[i] = [ + i.charCodeAt(0), + i.charCodeAt(1), + i.charCodeAt(2), + i.charCodeAt(3) + ]; + } + } + + MAJOR_BRAND = new Uint8Array([ + 'i'.charCodeAt(0), + 's'.charCodeAt(0), + 'o'.charCodeAt(0), + 'm'.charCodeAt(0) + ]); + AVC1_BRAND = new Uint8Array([ + 'a'.charCodeAt(0), + 'v'.charCodeAt(0), + 'c'.charCodeAt(0), + '1'.charCodeAt(0) + ]); + MINOR_VERSION = new Uint8Array([0, 0, 0, 1]); + VIDEO_HDLR = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00, // pre_defined + 0x76, 0x69, 0x64, 0x65, // handler_type: 'vide' + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x56, 0x69, 0x64, 0x65, + 0x6f, 0x48, 0x61, 0x6e, + 0x64, 0x6c, 0x65, 0x72, 0x00 // name: 'VideoHandler' + ]); + AUDIO_HDLR = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00, // pre_defined + 0x73, 0x6f, 0x75, 0x6e, // handler_type: 'soun' + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x53, 0x6f, 0x75, 0x6e, + 0x64, 0x48, 0x61, 0x6e, + 0x64, 0x6c, 0x65, 0x72, 0x00 // name: 'SoundHandler' + ]); + HDLR_TYPES = { + video: VIDEO_HDLR, + audio: AUDIO_HDLR + }; + DREF = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x01, // entry_count + 0x00, 0x00, 0x00, 0x0c, // entry_size + 0x75, 0x72, 0x6c, 0x20, // 'url' type + 0x00, // version 0 + 0x00, 0x00, 0x01 // entry_flags + ]); + SMHD = new Uint8Array([ + 0x00, // version + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, // balance, 0 means centered + 0x00, 0x00 // reserved + ]); + STCO = new Uint8Array([ + 0x00, // version + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00 // entry_count + ]); + STSC = STCO; + STSZ = new Uint8Array([ + 0x00, // version + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x00, // sample_size + 0x00, 0x00, 0x00, 0x00 // sample_count + ]); + STTS = STCO; + VMHD = new Uint8Array([ + 0x00, // version + 0x00, 0x00, 0x01, // flags + 0x00, 0x00, // graphicsmode + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00 // opcolor + ]); +}()); + +box = function(type) { + var + payload = [], + size = 0, + i, + result, + view; + + for (i = 1; i < arguments.length; i++) { + payload.push(arguments[i]); + } + + i = payload.length; + + // calculate the total size we need to allocate + while (i--) { + size += payload[i].byteLength; + } + result = new Uint8Array(size + 8); + view = new DataView(result.buffer, result.byteOffset, result.byteLength); + view.setUint32(0, result.byteLength); + result.set(type, 4); + + // copy the payload into the result + for (i = 0, size = 8; i < payload.length; i++) { + result.set(payload[i], size); + size += payload[i].byteLength; + } + return result; +}; + +dinf = function() { + return box(types.dinf, box(types.dref, DREF)); +}; + +// Opus (Nick) +osse = function(track) { + let preskip = 3840; + return box(types.dOps, new Uint8Array([ + 0x00, // version + track.channelcount, // Output channel count + (preskip & 0xff00) >> 8, (preskip & 0xff), // Preskip + //0x00, 0x00, 0x00, 0x00, // Input sample rate + 0x00, 0x00, // Upper sample rate bytes + (track.insamplerate & 0xff00) >> 8, + (track.insamplerate & 0xff), + //0x00, 0x00, // samplerate, 16.16 + 0x00, 0x00, // Output gain + 0x00 //ChannelMappingFamily + ])); + }; + +esds = function(track) { + return box(types.esds, new Uint8Array([ + 0x00, // version + 0x00, 0x00, 0x00, // flags + + // ES_Descriptor + 0x03, // tag, ES_DescrTag + 0x19, // length + 0x00, 0x00, // ES_ID + 0x00, // streamDependenceFlag, URL_flag, reserved, streamPriority + + // DecoderConfigDescriptor + 0x04, // tag, DecoderConfigDescrTag + 0x11, // length + 0x40, // object type + 0x15, // streamType + 0x00, 0x06, 0x00, // bufferSizeDB + 0x00, 0x00, 0xda, 0xc0, // maxBitrate + 0x00, 0x00, 0xda, 0xc0, // avgBitrate + + // DecoderSpecificInfo + 0x05, // tag, DecoderSpecificInfoTag + 0x02, // length + // ISO/IEC 14496-3, AudioSpecificConfig + // for samplingFrequencyIndex see ISO/IEC 13818-7:2006, 8.1.3.2.2, Table 35 + (track.audioobjecttype << 3) | (track.samplingfrequencyindex >>> 1), + (track.samplingfrequencyindex << 7) | (track.channelcount << 3), + 0x06, 0x01, 0x02 // GASpecificConfig + ])); +}; + +ftyp = function() { + return box(types.ftyp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND, AVC1_BRAND); +}; + +hdlr = function(type) { + return box(types.hdlr, HDLR_TYPES[type]); +}; +mdat = function(data) { + return box(types.mdat, data); +}; +mdhd = function(track) { + var result = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x02, // creation_time + 0x00, 0x00, 0x00, 0x03, // modification_time + 0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second + + (track.duration >>> 24) & 0xFF, + (track.duration >>> 16) & 0xFF, + (track.duration >>> 8) & 0xFF, + track.duration & 0xFF, // duration + 0x55, 0xc4, // 'und' language (undetermined) + 0x00, 0x00 + ]); + + // Use the sample rate from the track metadata, when it is + // defined. The sample rate can be parsed out of an ADTS header, for + // instance. + if (track.samplerate) { + result[12] = (track.samplerate >>> 24) & 0xFF; + result[13] = (track.samplerate >>> 16) & 0xFF; + result[14] = (track.samplerate >>> 8) & 0xFF; + result[15] = (track.samplerate) & 0xFF; + } + + return box(types.mdhd, result); +}; +mdia = function(track) { + return box(types.mdia, mdhd(track), hdlr(track.type), minf(track)); +}; +mfhd = function(sequenceNumber) { + return box(types.mfhd, new Uint8Array([ + 0x00, + 0x00, 0x00, 0x00, // flags + (sequenceNumber & 0xFF000000) >> 24, + (sequenceNumber & 0xFF0000) >> 16, + (sequenceNumber & 0xFF00) >> 8, + sequenceNumber & 0xFF // sequence_number + ])); +}; +minf = function(track) { + return box(types.minf, + track.type === 'video' ? box(types.vmhd, VMHD) : box(types.smhd, SMHD), + dinf(), + stbl(track)); +}; +moof = function(sequenceNumber, tracks) { + var + trackFragments = [], + i = tracks.length; + // build traf boxes for each track fragment + while (i--) { + trackFragments[i] = traf(tracks[i]); + } + return box.apply(null, [ + types.moof, + mfhd(sequenceNumber) + ].concat(trackFragments)); +}; +/** + * Returns a movie box. + * @param tracks {array} the tracks associated with this movie + * @see ISO/IEC 14496-12:2012(E), section 8.2.1 + */ +moov = function(tracks) { + var + i = tracks.length, + boxes = []; + + while (i--) { + boxes[i] = trak(tracks[i]); + } + + return box.apply(null, [types.moov, mvhd(0xffffffff)].concat(boxes).concat(mvex(tracks))); +}; +mvex = function(tracks) { + var + i = tracks.length, + boxes = []; + + while (i--) { + boxes[i] = trex(tracks[i]); + } + return box.apply(null, [types.mvex].concat(boxes)); +}; +mvhd = function(duration) { + var + bytes = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x01, // creation_time + 0x00, 0x00, 0x00, 0x02, // modification_time + 0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second + (duration & 0xFF000000) >> 24, + (duration & 0xFF0000) >> 16, + (duration & 0xFF00) >> 8, + duration & 0xFF, // duration + 0x00, 0x01, 0x00, 0x00, // 1.0 rate + 0x01, 0x00, // 1.0 volume + 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, // transformation: unity matrix + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, // pre_defined + 0xff, 0xff, 0xff, 0xff // next_track_ID + ]); + return box(types.mvhd, bytes); +}; + +sdtp = function(track) { + var + samples = track.samples || [], + bytes = new Uint8Array(4 + samples.length), + flags, + i; + + // leave the full box header (4 bytes) all zero + + // write the sample table + for (i = 0; i < samples.length; i++) { + flags = samples[i].flags; + + bytes[i + 4] = (flags.dependsOn << 4) | + (flags.isDependedOn << 2) | + (flags.hasRedundancy); + } + + return box(types.sdtp, + bytes); +}; + +stbl = function(track) { + return box(types.stbl, + stsd(track), + box(types.stts, STTS), + box(types.stsc, STSC), + box(types.stsz, STSZ), + box(types.stco, STCO)); +}; + +(function() { + var videoSample, audioSample; + + stsd = function(track) { + + return box(types.stsd, new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + 0x00, 0x00, 0x00, 0x01 + ]), track.type === 'video' ? videoSample(track) : audioSample(track)); + }; + + videoSample = function(track) { + var + sps = track.sps || [], + pps = track.pps || [], + sequenceParameterSets = [], + pictureParameterSets = [], + i, + avc1Box; + + // assemble the SPSs + for (i = 0; i < sps.length; i++) { + sequenceParameterSets.push((sps[i].byteLength & 0xFF00) >>> 8); + sequenceParameterSets.push((sps[i].byteLength & 0xFF)); // sequenceParameterSetLength + sequenceParameterSets = sequenceParameterSets.concat(Array.prototype.slice.call(sps[i])); // SPS + } + + // assemble the PPSs + for (i = 0; i < pps.length; i++) { + pictureParameterSets.push((pps[i].byteLength & 0xFF00) >>> 8); + pictureParameterSets.push((pps[i].byteLength & 0xFF)); + pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(pps[i])); + } + + avc1Box = [ + types.avc1, new Uint8Array([ + 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // data_reference_index + 0x00, 0x00, // pre_defined + 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, // pre_defined + (track.width & 0xff00) >> 8, + track.width & 0xff, // width + (track.height & 0xff00) >> 8, + track.height & 0xff, // height + 0x00, 0x48, 0x00, 0x00, // horizresolution + 0x00, 0x48, 0x00, 0x00, // vertresolution + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // frame_count + 0x13, + 0x76, 0x69, 0x64, 0x65, + 0x6f, 0x6a, 0x73, 0x2d, + 0x63, 0x6f, 0x6e, 0x74, + 0x72, 0x69, 0x62, 0x2d, + 0x68, 0x6c, 0x73, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, // compressorname + 0x00, 0x18, // depth = 24 + 0x11, 0x11 // pre_defined = -1 + ]), + box(types.avcC, new Uint8Array([ + 0x01, // configurationVersion + track.profileIdc, // AVCProfileIndication + track.profileCompatibility, // profile_compatibility + track.levelIdc, // AVCLevelIndication + 0xff // lengthSizeMinusOne, hard-coded to 4 bytes + ].concat( + [sps.length], // numOfSequenceParameterSets + sequenceParameterSets, // "SPS" + [pps.length], // numOfPictureParameterSets + pictureParameterSets // "PPS" + ))), + box(types.btrt, new Uint8Array([ + 0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB + 0x00, 0x2d, 0xc6, 0xc0, // maxBitrate + 0x00, 0x2d, 0xc6, 0xc0 // avgBitrate + ])) + ]; + + if (track.sarRatio) { + var + hSpacing = track.sarRatio[0], + vSpacing = track.sarRatio[1]; + + avc1Box.push( + box(types.pasp, new Uint8Array([ + (hSpacing & 0xFF000000) >> 24, + (hSpacing & 0xFF0000) >> 16, + (hSpacing & 0xFF00) >> 8, + hSpacing & 0xFF, + (vSpacing & 0xFF000000) >> 24, + (vSpacing & 0xFF0000) >> 16, + (vSpacing & 0xFF00) >> 8, + vSpacing & 0xFF + ])) + ); + } + + return box.apply(null, avc1Box); + }; + + audioSample = function(track) { + console.log("AUDIO", track); + if (track.codec == "opus") { + let samplesize = 16; + let samplerate = 48000; + return box(types.Opus, new Uint8Array([ + + // SampleEntry, ISO/IEC 14496-12 + 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // data_reference_index + + // AudioSampleEntry, ISO/IEC 14496-12 + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + (track.channelcount & 0xff00) >> 8, + (track.channelcount & 0xff), // channelcount + + (samplesize & 0xff00) >> 8, + (samplesize & 0xff), // samplesize + 0x00, 0x00, // pre_defined + 0x00, 0x00, // reserved + + (samplerate & 0xff00) >> 8, + (samplerate & 0xff), + 0x00, 0x00 // samplerate, 16.16 + + // OpusSpecificSampleEntry + ]), osse(track)); + } else { + return box(types.mp4a, new Uint8Array([ + + // SampleEntry, ISO/IEC 14496-12 + 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // data_reference_index + + // AudioSampleEntry, ISO/IEC 14496-12 + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, // reserved + (track.channelcount & 0xff00) >> 8, + (track.channelcount & 0xff), // channelcount + + (track.samplesize & 0xff00) >> 8, + (track.samplesize & 0xff), // samplesize + 0x00, 0x00, // pre_defined + 0x00, 0x00, // reserved + + (track.samplerate & 0xff00) >> 8, + (track.samplerate & 0xff), + 0x00, 0x00 // samplerate, 16.16 + + // MP4AudioSampleEntry, ISO/IEC 14496-14 + ]), esds(track)); + }; + } +}()); + +tkhd = function(track) { + var result = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x07, // flags + 0x00, 0x00, 0x00, 0x00, // creation_time + 0x00, 0x00, 0x00, 0x00, // modification_time + (track.id & 0xFF000000) >> 24, + (track.id & 0xFF0000) >> 16, + (track.id & 0xFF00) >> 8, + track.id & 0xFF, // track_ID + 0x00, 0x00, 0x00, 0x00, // reserved + (track.duration & 0xFF000000) >> 24, + (track.duration & 0xFF0000) >> 16, + (track.duration & 0xFF00) >> 8, + track.duration & 0xFF, // duration + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x00, // layer + 0x00, 0x00, // alternate_group + 0x01, 0x00, // non-audio track volume + 0x00, 0x00, // reserved + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, // transformation: unity matrix + (track.width & 0xFF00) >> 8, + track.width & 0xFF, + 0x00, 0x00, // width + (track.height & 0xFF00) >> 8, + track.height & 0xFF, + 0x00, 0x00 // height + ]); + + return box(types.tkhd, result); +}; + +/** + * Generate a track fragment (traf) box. A traf box collects metadata + * about tracks in a movie fragment (moof) box. + */ +traf = function(track) { + var trackFragmentHeader, trackFragmentDecodeTime, trackFragmentRun, + sampleDependencyTable, dataOffset, + upperWordBaseMediaDecodeTime, lowerWordBaseMediaDecodeTime; + + trackFragmentHeader = box(types.tfhd, new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x3a, // flags + (track.id & 0xFF000000) >> 24, + (track.id & 0xFF0000) >> 16, + (track.id & 0xFF00) >> 8, + (track.id & 0xFF), // track_ID + 0x00, 0x00, 0x00, 0x01, // sample_description_index + 0x00, 0x00, 0x00, 0x00, // default_sample_duration + 0x00, 0x00, 0x00, 0x00, // default_sample_size + 0x00, 0x00, 0x00, 0x00 // default_sample_flags + ])); + + upperWordBaseMediaDecodeTime = Math.floor(track.baseMediaDecodeTime / (UINT32_MAX + 1)); + lowerWordBaseMediaDecodeTime = Math.floor(track.baseMediaDecodeTime % (UINT32_MAX + 1)); + + trackFragmentDecodeTime = box(types.tfdt, new Uint8Array([ + 0x01, // version 1 + 0x00, 0x00, 0x00, // flags + // baseMediaDecodeTime + (upperWordBaseMediaDecodeTime >>> 24) & 0xFF, + (upperWordBaseMediaDecodeTime >>> 16) & 0xFF, + (upperWordBaseMediaDecodeTime >>> 8) & 0xFF, + upperWordBaseMediaDecodeTime & 0xFF, + (lowerWordBaseMediaDecodeTime >>> 24) & 0xFF, + (lowerWordBaseMediaDecodeTime >>> 16) & 0xFF, + (lowerWordBaseMediaDecodeTime >>> 8) & 0xFF, + lowerWordBaseMediaDecodeTime & 0xFF + ])); + + // the data offset specifies the number of bytes from the start of + // the containing moof to the first payload byte of the associated + // mdat + dataOffset = (32 + // tfhd + 20 + // tfdt + 8 + // traf header + 16 + // mfhd + 8 + // moof header + 8); // mdat header + + // audio tracks require less metadata + if (track.type === 'audio') { + trackFragmentRun = trun(track, dataOffset); + return box(types.traf, + trackFragmentHeader, + trackFragmentDecodeTime, + trackFragmentRun); + } + + // video tracks should contain an independent and disposable samples + // box (sdtp) + // generate one and adjust offsets to match + sampleDependencyTable = sdtp(track); + trackFragmentRun = trun(track, + sampleDependencyTable.length + dataOffset); + return box(types.traf, + trackFragmentHeader, + trackFragmentDecodeTime, + trackFragmentRun, + sampleDependencyTable); +}; + +/** + * Generate a track box. + * @param track {object} a track definition + * @return {Uint8Array} the track box + */ +trak = function(track) { + track.duration = track.duration || 0xffffffff; + return box(types.trak, + tkhd(track), + mdia(track)); +}; + +trex = function(track) { + var result = new Uint8Array([ + 0x00, // version 0 + 0x00, 0x00, 0x00, // flags + (track.id & 0xFF000000) >> 24, + (track.id & 0xFF0000) >> 16, + (track.id & 0xFF00) >> 8, + (track.id & 0xFF), // track_ID + 0x00, 0x00, 0x00, 0x01, // default_sample_description_index + 0x00, 0x00, 0x00, 0x00, // default_sample_duration + 0x00, 0x00, 0x00, 0x00, // default_sample_size + 0x00, 0x01, 0x00, 0x01 // default_sample_flags + ]); + // the last two bytes of default_sample_flags is the sample + // degradation priority, a hint about the importance of this sample + // relative to others. Lower the degradation priority for all sample + // types other than video. + if (track.type !== 'video') { + result[result.length - 1] = 0x00; + } + + return box(types.trex, result); +}; + +(function() { + var audioTrun, videoTrun, trunHeader; + + // This method assumes all samples are uniform. That is, if a + // duration is present for the first sample, it will be present for + // all subsequent samples. + // see ISO/IEC 14496-12:2012, Section 8.8.8.1 + trunHeader = function(samples, offset) { + var durationPresent = 0, sizePresent = 0, + flagsPresent = 0, compositionTimeOffset = 0; + + // trun flag constants + if (samples.length) { + if (samples[0].duration !== undefined) { + durationPresent = 0x1; + } + if (samples[0].size !== undefined) { + sizePresent = 0x2; + } + if (samples[0].flags !== undefined) { + flagsPresent = 0x4; + } + if (samples[0].compositionTimeOffset !== undefined) { + compositionTimeOffset = 0x8; + } + } + + return [ + 0x00, // version 0 + 0x00, + durationPresent | sizePresent | flagsPresent | compositionTimeOffset, + 0x01, // flags + (samples.length & 0xFF000000) >>> 24, + (samples.length & 0xFF0000) >>> 16, + (samples.length & 0xFF00) >>> 8, + samples.length & 0xFF, // sample_count + (offset & 0xFF000000) >>> 24, + (offset & 0xFF0000) >>> 16, + (offset & 0xFF00) >>> 8, + offset & 0xFF // data_offset + ]; + }; + + videoTrun = function(track, offset) { + var bytesOffest, bytes, header, samples, sample, i; + + samples = track.samples || []; + offset += 8 + 12 + (16 * samples.length); + header = trunHeader(samples, offset); + bytes = new Uint8Array(header.length + samples.length * 16); + bytes.set(header); + bytesOffest = header.length; + + for (i = 0; i < samples.length; i++) { + sample = samples[i]; + + bytes[bytesOffest++] = (sample.duration & 0xFF000000) >>> 24; + bytes[bytesOffest++] = (sample.duration & 0xFF0000) >>> 16; + bytes[bytesOffest++] = (sample.duration & 0xFF00) >>> 8; + bytes[bytesOffest++] = sample.duration & 0xFF; // sample_duration + bytes[bytesOffest++] = (sample.size & 0xFF000000) >>> 24; + bytes[bytesOffest++] = (sample.size & 0xFF0000) >>> 16; + bytes[bytesOffest++] = (sample.size & 0xFF00) >>> 8; + bytes[bytesOffest++] = sample.size & 0xFF; // sample_size + bytes[bytesOffest++] = (sample.flags.isLeading << 2) | sample.flags.dependsOn; + bytes[bytesOffest++] = (sample.flags.isDependedOn << 6) | + (sample.flags.hasRedundancy << 4) | + (sample.flags.paddingValue << 1) | + sample.flags.isNonSyncSample; + bytes[bytesOffest++] = sample.flags.degradationPriority & 0xF0 << 8; + bytes[bytesOffest++] = sample.flags.degradationPriority & 0x0F; // sample_flags + bytes[bytesOffest++] = (sample.compositionTimeOffset & 0xFF000000) >>> 24; + bytes[bytesOffest++] = (sample.compositionTimeOffset & 0xFF0000) >>> 16; + bytes[bytesOffest++] = (sample.compositionTimeOffset & 0xFF00) >>> 8; + bytes[bytesOffest++] = sample.compositionTimeOffset & 0xFF; // sample_composition_time_offset + } + return box(types.trun, bytes); + }; + + audioTrun = function(track, offset) { + var bytes, bytesOffest, header, samples, sample, i; + + samples = track.samples || []; + offset += 8 + 12 + (8 * samples.length); + + header = trunHeader(samples, offset); + bytes = new Uint8Array(header.length + samples.length * 8); + bytes.set(header); + bytesOffest = header.length; + + for (i = 0; i < samples.length; i++) { + sample = samples[i]; + bytes[bytesOffest++] = (sample.duration & 0xFF000000) >>> 24; + bytes[bytesOffest++] = (sample.duration & 0xFF0000) >>> 16; + bytes[bytesOffest++] = (sample.duration & 0xFF00) >>> 8; + bytes[bytesOffest++] = sample.duration & 0xFF; // sample_duration + bytes[bytesOffest++] = (sample.size & 0xFF000000) >>> 24; + bytes[bytesOffest++] = (sample.size & 0xFF0000) >>> 16; + bytes[bytesOffest++] = (sample.size & 0xFF00) >>> 8; + bytes[bytesOffest++] = sample.size & 0xFF; // sample_size + } + + return box(types.trun, bytes); + }; + + trun = function(track, offset) { + if (track.type === 'audio') { + return audioTrun(track, offset); + } + + return videoTrun(track, offset); + }; +}()); + +module.exports = { + ftyp: ftyp, + mdat: mdat, + moof: moof, + moov: moov, + initSegment: function(tracks) { + var + fileType = ftyp(), + movie = moov(tracks), + result; + + result = new Uint8Array(fileType.byteLength + movie.byteLength); + result.set(fileType); + result.set(movie, fileType.byteLength); + return result; + } +}; \ No newline at end of file diff --git a/web-service/server/src/index.js b/web-service/server/src/index.js index 9e417752ba5e795775c0c3c683f77acaa2d3a8d4..e65d0277e694c83784abf3f117ab6302367f824a 100644 --- a/web-service/server/src/index.js +++ b/web-service/server/src/index.js @@ -467,8 +467,9 @@ app.ws('/', (ws, req) => { * @param {uri} uri */ function stringSplitter(uri) { - const url = new Url(uri) - return url.origin; + //const url = new Url(uri) + //return url.origin; + return uri; } console.log("Listening or port 8080");