Home Assistant Unofficial Reference 2024.12.1
fmp4utils.py
Go to the documentation of this file.
1 """Utilities to help convert mp4s to fmp4s."""
2 
3 from __future__ import annotations
4 
5 from collections.abc import Generator
6 from typing import TYPE_CHECKING
7 
8 from homeassistant.exceptions import HomeAssistantError
9 
10 from .core import Orientation
11 
12 if TYPE_CHECKING:
13  from io import BufferedIOBase
14 
15 
17  mp4_bytes: bytes, target_type: bytes, box_start: int = 0
18 ) -> Generator[int]:
19  """Find location of first box (or sub box if box_start provided) of given type."""
20  if box_start == 0:
21  index = 0
22  box_end = len(mp4_bytes)
23  else:
24  box_end = box_start + int.from_bytes(
25  mp4_bytes[box_start : box_start + 4], byteorder="big"
26  )
27  index = box_start + 8
28  while 1:
29  if index > box_end - 8: # End of box, not found
30  break
31  box_header = mp4_bytes[index : index + 8]
32  if box_header[4:8] == target_type:
33  yield index
34  index += int.from_bytes(box_header[0:4], byteorder="big")
35 
36 
37 def get_codec_string(mp4_bytes: bytes) -> str:
38  """Get RFC 6381 codec string."""
39  codecs = []
40 
41  # Find moov
42  moov_location = next(find_box(mp4_bytes, b"moov"))
43 
44  # Find tracks
45  for trak_location in find_box(mp4_bytes, b"trak", moov_location):
46  # Drill down to media info
47  mdia_location = next(find_box(mp4_bytes, b"mdia", trak_location))
48  minf_location = next(find_box(mp4_bytes, b"minf", mdia_location))
49  stbl_location = next(find_box(mp4_bytes, b"stbl", minf_location))
50  stsd_location = next(find_box(mp4_bytes, b"stsd", stbl_location))
51 
52  # Get stsd box
53  stsd_length = int.from_bytes(
54  mp4_bytes[stsd_location : stsd_location + 4], byteorder="big"
55  )
56  stsd_box = mp4_bytes[stsd_location : stsd_location + stsd_length]
57 
58  # Base Codec
59  codec = stsd_box[20:24].decode("utf-8")
60 
61  # Handle H264
62  if (
63  codec in ("avc1", "avc2", "avc3", "avc4")
64  and stsd_length > 110
65  and stsd_box[106:110] == b"avcC"
66  ):
67  profile = stsd_box[111:112].hex()
68  compatibility = stsd_box[112:113].hex()
69  # Cap level at 4.1 for compatibility with some Google Cast devices
70  level = hex(min(stsd_box[113], 41))[2:]
71  codec += "." + profile + compatibility + level
72 
73  # Handle H265
74  elif (
75  codec in ("hev1", "hvc1")
76  and stsd_length > 110
77  and stsd_box[106:110] == b"hvcC"
78  ):
79  tmp_byte = int.from_bytes(stsd_box[111:112], byteorder="big")
80 
81  # Profile Space
82  codec += "."
83  profile_space_map = {0: "", 1: "A", 2: "B", 3: "C"}
84  profile_space = tmp_byte >> 6
85  codec += profile_space_map[profile_space]
86  general_profile_idc = tmp_byte & 31
87  codec += str(general_profile_idc)
88 
89  # Compatibility
90  codec += "."
91  general_profile_compatibility = int.from_bytes(
92  stsd_box[112:116], byteorder="big"
93  )
94  reverse = 0
95  for i in range(32):
96  reverse |= general_profile_compatibility & 1
97  if i == 31:
98  break
99  reverse <<= 1
100  general_profile_compatibility >>= 1
101  codec += hex(reverse)[2:]
102 
103  # Tier Flag
104  if (tmp_byte & 32) >> 5 == 0:
105  codec += ".L"
106  else:
107  codec += ".H"
108  codec += str(int.from_bytes(stsd_box[122:123], byteorder="big"))
109 
110  # Constraint String
111  has_byte = False
112  constraint_string = ""
113  for i in range(121, 115, -1):
114  gci = int.from_bytes(stsd_box[i : i + 1], byteorder="big")
115  if gci or has_byte:
116  constraint_string = "." + hex(gci)[2:] + constraint_string
117  has_byte = True
118  codec += constraint_string
119 
120  # Handle Audio
121  elif codec == "mp4a":
122  oti = None
123  dsi = None
124 
125  # Parse ES Descriptors
126  oti_loc = stsd_box.find(b"\x04\x80\x80\x80")
127  if oti_loc > 0:
128  oti = stsd_box[oti_loc + 5 : oti_loc + 6].hex()
129  codec += f".{oti}"
130 
131  dsi_loc = stsd_box.find(b"\x05\x80\x80\x80")
132  if dsi_loc > 0:
133  dsi_length = int.from_bytes(
134  stsd_box[dsi_loc + 4 : dsi_loc + 5], byteorder="big"
135  )
136  dsi_data = stsd_box[dsi_loc + 5 : dsi_loc + 5 + dsi_length]
137  dsi0 = int.from_bytes(dsi_data[0:1], byteorder="big")
138  dsi = (dsi0 & 248) >> 3
139  if dsi == 31 and len(dsi_data) >= 2:
140  dsi1 = int.from_bytes(dsi_data[1:2], byteorder="big")
141  dsi = 32 + ((dsi0 & 7) << 3) + ((dsi1 & 224) >> 5)
142  codec += f".{dsi}"
143 
144  codecs.append(codec)
145 
146  return ",".join(codecs)
147 
148 
149 def find_moov(mp4_io: BufferedIOBase) -> int:
150  """Find location of moov atom in a BufferedIOBase mp4."""
151  index = 0
152  # Ruff doesn't understand this loop - the exception is always raised at the end
153  while 1: # noqa: RET503
154  mp4_io.seek(index)
155  box_header = mp4_io.read(8)
156  if len(box_header) != 8 or box_header[0:4] == b"\x00\x00\x00\x00":
157  raise HomeAssistantError("moov atom not found")
158  if box_header[4:8] == b"moov":
159  return index
160  index += int.from_bytes(box_header[0:4], byteorder="big")
161 
162 
163 def read_init(bytes_io: BufferedIOBase) -> bytes:
164  """Read the init from a mp4 file."""
165  moov_loc = find_moov(bytes_io)
166  bytes_io.seek(moov_loc)
167  moov_len = int.from_bytes(bytes_io.read(4), byteorder="big")
168  bytes_io.seek(0)
169  return bytes_io.read(moov_loc + moov_len)
170 
171 
172 ZERO32 = b"\x00\x00\x00\x00"
173 ONE32 = b"\x00\x01\x00\x00"
174 NEGONE32 = b"\xff\xff\x00\x00"
175 XYW_ROW = ZERO32 + ZERO32 + b"\x40\x00\x00\x00"
176 ROTATE_RIGHT = (ZERO32 + ONE32 + ZERO32) + (NEGONE32 + ZERO32 + ZERO32)
177 ROTATE_LEFT = (ZERO32 + NEGONE32 + ZERO32) + (ONE32 + ZERO32 + ZERO32)
178 ROTATE_180 = (NEGONE32 + ZERO32 + ZERO32) + (ZERO32 + NEGONE32 + ZERO32)
179 MIRROR = (NEGONE32 + ZERO32 + ZERO32) + (ZERO32 + ONE32 + ZERO32)
180 FLIP = (ONE32 + ZERO32 + ZERO32) + (ZERO32 + NEGONE32 + ZERO32)
181 # The two below do not seem to get applied properly
182 ROTATE_LEFT_FLIP = (ZERO32 + NEGONE32 + ZERO32) + (NEGONE32 + ZERO32 + ZERO32)
183 ROTATE_RIGHT_FLIP = (ZERO32 + ONE32 + ZERO32) + (ONE32 + ZERO32 + ZERO32)
184 
185 TRANSFORM_MATRIX_TOP = (
186  # The index into this tuple corresponds to the EXIF orientation tag
187  # Only index values of 2 through 8 are used
188  # The first two entries are just to keep everything aligned
189  b"", # 0
190  b"", # 1
191  MIRROR, # 2
192  ROTATE_180, # 3
193  FLIP, # 4
194  ROTATE_LEFT_FLIP, # 5
195  ROTATE_LEFT, # 6
196  ROTATE_RIGHT_FLIP, # 7
197  ROTATE_RIGHT, # 8
198 )
199 
200 
201 def transform_init(init: bytes, orientation: Orientation) -> bytes:
202  """Change the transformation matrix in the header."""
203  if orientation == Orientation.NO_TRANSFORM:
204  return init
205  # Find moov
206  moov_location = next(find_box(init, b"moov"))
207  mvhd_location = next(find_box(init, b"trak", moov_location))
208  tkhd_location = next(find_box(init, b"tkhd", mvhd_location))
209  tkhd_length = int.from_bytes(
210  init[tkhd_location : tkhd_location + 4], byteorder="big"
211  )
212  return (
213  init[: tkhd_location + tkhd_length - 44]
214  + TRANSFORM_MATRIX_TOP[orientation]
215  + XYW_ROW
216  + init[tkhd_location + tkhd_length - 8 :]
217  )
int find_moov(BufferedIOBase mp4_io)
Definition: fmp4utils.py:149
Generator[int] find_box(bytes mp4_bytes, bytes target_type, int box_start=0)
Definition: fmp4utils.py:18
str get_codec_string(bytes mp4_bytes)
Definition: fmp4utils.py:37
bytes transform_init(bytes init, Orientation orientation)
Definition: fmp4utils.py:201
bytes read_init(BufferedIOBase bytes_io)
Definition: fmp4utils.py:163