Source: lib/cea/mp4_cea_parser.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.cea.Mp4CeaParser');
  7. goog.require('goog.asserts');
  8. goog.require('shaka.cea.ICeaParser');
  9. goog.require('shaka.cea.SeiProcessor');
  10. goog.require('shaka.util.DataViewReader');
  11. goog.require('shaka.util.Error');
  12. goog.require('shaka.util.Mp4Parser');
  13. goog.require('shaka.util.Mp4BoxParsers');
  14. /**
  15. * MPEG4 stream parser used for extracting 708 closed captions data.
  16. * @implements {shaka.cea.ICeaParser}
  17. */
  18. shaka.cea.Mp4CeaParser = class {
  19. /** */
  20. constructor() {
  21. /**
  22. * SEI data processor.
  23. * @private
  24. * @const {!shaka.cea.SeiProcessor}
  25. */
  26. this.seiProcessor_ = new shaka.cea.SeiProcessor();
  27. /**
  28. * Map of track id to corresponding timescale.
  29. * @private {!Map<number, number>}
  30. */
  31. this.trackIdToTimescale_ = new Map();
  32. /**
  33. * Default sample duration, as specified by the TREX box.
  34. * @private {!number}
  35. */
  36. this.defaultSampleDuration_ = 0;
  37. /**
  38. * Default sample size, as specified by the TREX box.
  39. * @private {!number}
  40. */
  41. this.defaultSampleSize_ = 0;
  42. }
  43. /**
  44. * Parses the init segment. Gets Default Sample Duration and Size from the
  45. * TREX box, and constructs a map of Track IDs to timescales. Each TRAK box
  46. * contains a track header (TKHD) containing track ID, and a media header box
  47. * (MDHD) containing the timescale for the track
  48. * @override
  49. */
  50. init(initSegment) {
  51. const Mp4Parser = shaka.util.Mp4Parser;
  52. const trackIds = [];
  53. const timescales = [];
  54. new Mp4Parser()
  55. .box('moov', Mp4Parser.children)
  56. .box('mvex', Mp4Parser.children)
  57. .fullBox('trex', (box) => {
  58. const parsedTREXBox = shaka.util.Mp4BoxParsers.parseTREX(
  59. box.reader);
  60. this.defaultSampleDuration_ = parsedTREXBox.defaultSampleDuration;
  61. this.defaultSampleSize_ = parsedTREXBox.defaultSampleSize;
  62. })
  63. .box('trak', Mp4Parser.children)
  64. .fullBox('tkhd', (box) => {
  65. goog.asserts.assert(
  66. box.version != null,
  67. 'TKHD is a full box and should have a valid version.');
  68. const parsedTKHDBox = shaka.util.Mp4BoxParsers.parseTKHD(
  69. box.reader, box.version);
  70. trackIds.push(parsedTKHDBox.trackId);
  71. })
  72. .box('mdia', Mp4Parser.children)
  73. .fullBox('mdhd', (box) => {
  74. goog.asserts.assert(
  75. box.version != null,
  76. 'MDHD is a full box and should have a valid version.');
  77. const parsedMDHDBox = shaka.util.Mp4BoxParsers.parseMDHD(
  78. box.reader, box.version);
  79. timescales.push(parsedMDHDBox.timescale);
  80. })
  81. .parse(initSegment, /* partialOkay= */ true);
  82. // At least one track should exist, and each track should have a
  83. // corresponding Id in TKHD box, and timescale in its MDHD box
  84. if (!trackIds.length|| !timescales.length ||
  85. trackIds.length != timescales.length) {
  86. throw new shaka.util.Error(
  87. shaka.util.Error.Severity.CRITICAL,
  88. shaka.util.Error.Category.TEXT,
  89. shaka.util.Error.Code.INVALID_MP4_CEA);
  90. }
  91. // Populate the map from track Id to timescale
  92. trackIds.forEach((trackId, idx) => {
  93. this.trackIdToTimescale_.set(trackId, timescales[idx]);
  94. });
  95. }
  96. /**
  97. * Parses each video segment. In fragmented MP4s, MOOF and MDAT come in
  98. * pairs. The following logic gets the necessary info from MOOFs to parse
  99. * MDATs (base media decode time, sample sizes/offsets/durations, etc),
  100. * and then parses the MDAT boxes for CEA-708 packets using this information.
  101. * CEA-708 packets are returned in the callback.
  102. * @override
  103. */
  104. parse(mediaSegment) {
  105. const Mp4Parser = shaka.util.Mp4Parser;
  106. /** @type {!Array<!shaka.cea.ICeaParser.CaptionPacket>} **/
  107. const captionPackets = [];
  108. // Fields that are found in MOOF boxes
  109. let defaultSampleDuration = this.defaultSampleDuration_;
  110. let defaultSampleSize = this.defaultSampleSize_;
  111. let sampleData = [];
  112. let baseMediaDecodeTime = null;
  113. let timescale = shaka.cea.ICeaParser.DEFAULT_TIMESCALE_VALUE;
  114. new Mp4Parser()
  115. .box('moof', Mp4Parser.children)
  116. .box('traf', Mp4Parser.children)
  117. .fullBox('trun', (box) => {
  118. goog.asserts.assert(
  119. box.version != null && box.flags!=null,
  120. 'TRUN is a full box and should have a valid version & flags.');
  121. const parsedTRUN = shaka.util.Mp4BoxParsers.parseTRUN(
  122. box.reader, box.version, box.flags);
  123. sampleData = parsedTRUN.sampleData;
  124. })
  125. .fullBox('tfhd', (box) => {
  126. goog.asserts.assert(
  127. box.flags != null,
  128. 'TFHD is a full box and should have valid flags.');
  129. const parsedTFHD = shaka.util.Mp4BoxParsers.parseTFHD(
  130. box.reader, box.flags);
  131. // If specified, defaultSampleDuration and defaultSampleSize
  132. // override the ones specified in the TREX box
  133. defaultSampleDuration = parsedTFHD.defaultSampleDuration ||
  134. this.defaultSampleDuration_;
  135. defaultSampleSize = parsedTFHD.defaultSampleSize ||
  136. this.defaultSampleSize_;
  137. const trackId = parsedTFHD.trackId;
  138. // Get the timescale from the track Id
  139. if (this.trackIdToTimescale_.has(trackId)) {
  140. timescale = this.trackIdToTimescale_.get(trackId);
  141. }
  142. })
  143. .fullBox('tfdt', (box) => {
  144. goog.asserts.assert(
  145. box.version != null,
  146. 'TFDT is a full box and should have a valid version.');
  147. const parsedTFDT = shaka.util.Mp4BoxParsers.parseTFDT(
  148. box.reader, box.version);
  149. baseMediaDecodeTime = parsedTFDT.baseMediaDecodeTime;
  150. })
  151. .box('mdat', (box) => {
  152. if (baseMediaDecodeTime === null) {
  153. // This field should have been populated by
  154. // the Base Media Decode time in the TFDT box
  155. throw new shaka.util.Error(
  156. shaka.util.Error.Severity.CRITICAL,
  157. shaka.util.Error.Category.TEXT,
  158. shaka.util.Error.Code.INVALID_MP4_CEA);
  159. }
  160. this.parseMdat_(box.reader, baseMediaDecodeTime, timescale,
  161. defaultSampleDuration, defaultSampleSize, sampleData,
  162. captionPackets);
  163. })
  164. .parse(mediaSegment, /* partialOkay= */ false);
  165. return captionPackets;
  166. }
  167. /**
  168. * Parse MDAT box.
  169. * @param {!shaka.util.DataViewReader} reader
  170. * @param {!number} time
  171. * @param {!number} timescale
  172. * @param {!number} defaultSampleDuration
  173. * @param {!number} defaultSampleSize
  174. * @param {!Array<shaka.util.ParsedTRUNSample>} sampleData
  175. * @param {!Array<!shaka.cea.ICeaParser.CaptionPacket>} captionPackets
  176. * @private
  177. */
  178. parseMdat_(reader, time, timescale, defaultSampleDuration,
  179. defaultSampleSize, sampleData, captionPackets) {
  180. let sampleIndex = 0;
  181. // The fields in each ParsedTRUNSample contained in the sampleData
  182. // array are nullable. In the case of sample data and sample duration,
  183. // we use the defaults provided by the TREX/TFHD boxes. For sample
  184. // composition time offset, we default to 0.
  185. let sampleSize = defaultSampleSize;
  186. if (sampleData.length) {
  187. sampleSize = sampleData[0].sampleSize || defaultSampleSize;
  188. }
  189. while (reader.hasMoreData()) {
  190. const naluSize = reader.readUint32();
  191. const naluType = reader.readUint8() & 0x1F;
  192. if (naluType == shaka.cea.ICeaParser.NALU_TYPE_SEI) {
  193. let timeOffset = 0;
  194. if (sampleData.length > sampleIndex) {
  195. timeOffset = sampleData[sampleIndex].sampleCompositionTimeOffset || 0;
  196. }
  197. const pts = (time + timeOffset)/timescale;
  198. for (const packet of this.seiProcessor_
  199. .process(reader.readBytes(naluSize - 1))) {
  200. captionPackets.push({
  201. packet,
  202. pts,
  203. });
  204. }
  205. } else {
  206. try {
  207. reader.skip(naluSize - 1);
  208. } catch (e) {
  209. // It is necessary to ignore this error because it can break the start
  210. // of playback even if the user does not want to see the subtitles.
  211. break;
  212. }
  213. }
  214. sampleSize -= (naluSize + 4);
  215. if (sampleSize == 0) {
  216. if (sampleData.length > sampleIndex) {
  217. time += sampleData[sampleIndex].sampleDuration ||
  218. defaultSampleDuration;
  219. } else {
  220. time += defaultSampleDuration;
  221. }
  222. sampleIndex++;
  223. if (sampleData.length > sampleIndex) {
  224. sampleSize = sampleData[sampleIndex].sampleSize || defaultSampleSize;
  225. } else {
  226. sampleSize = defaultSampleSize;
  227. }
  228. }
  229. }
  230. }
  231. };