Skip to content

Commit

Permalink
Only decode text direction entities in Sub files (cont.)
Browse files Browse the repository at this point in the history
Already did this for HLS, but somehow forgot to for DASH and direct URLs.
  • Loading branch information
rlaphoenix committed Feb 29, 2024
1 parent 4073cef commit 97efb59
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 4 deletions.
8 changes: 6 additions & 2 deletions devine/commands/dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -906,10 +906,14 @@ def cleanup():
track.OnDecrypted(drm)
progress(downloaded="Decrypted", completed=100)

if isinstance(track, Subtitle):
if isinstance(track, Subtitle) and \
track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
track_data = track.path.read_bytes()
track_data = try_ensure_utf8(track_data)
track_data = html.unescape(track_data.decode("utf8")).encode("utf8")
track_data = track_data.decode("utf8"). \
replace("‎", html.unescape("‎")). \
replace("‏", html.unescape("‏")). \
encode("utf8")
track.path.write_bytes(track_data)

progress(downloaded="Downloaded")
Expand Down
5 changes: 4 additions & 1 deletion devine/core/manifests/dash.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,10 @@ def download_track(
track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
):
segment_data = try_ensure_utf8(segment_data)
segment_data = html.unescape(segment_data.decode("utf8")).encode("utf8")
segment_data = segment_data.decode("utf8"). \
replace("‎", html.unescape("‎")). \
replace("‏", html.unescape("‏")). \
encode("utf8")
f.write(segment_data)
f.flush()
segment_file.unlink()
Expand Down
1 change: 0 additions & 1 deletion devine/core/manifests/hls.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,6 @@ def merge_discontinuity(include_this_segment: bool, include_map_data: bool = Tru
if isinstance(track, Subtitle):
segment_data = try_ensure_utf8(segment_file_path.read_bytes())
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
# decode text direction entities or SubtitleEdit's /ReverseRtlStartEnd won't work
segment_data = segment_data.decode("utf8"). \
replace("‎", html.unescape("‎")). \
replace("‏", html.unescape("‏")). \
Expand Down

0 comments on commit 97efb59

Please sign in to comment.