Skip to content

Commit

Permalink
Rename fix_webvtt_separator to space_webvtt_headers
Browse files Browse the repository at this point in the history
  • Loading branch information
rlaphoenix committed May 6, 2024
1 parent bbc64b9 commit 8bea858
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions devine/core/tracks/subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def parse(data: bytes, codec: Subtitle.Codec) -> pycaption.CaptionSet:
caption_lists[language] = caption_list
caption_set: pycaption.CaptionSet = pycaption.CaptionSet(caption_lists)
elif codec == Subtitle.Codec.WebVTT:
text = Subtitle.fix_webvtt_separator(data)
text = Subtitle.space_webvtt_headers(data)
caption_set = pycaption.WebVTTReader().read(text)
else:
raise ValueError(f"Unknown Subtitle format \"{codec}\"...")
Expand All @@ -327,10 +327,13 @@ def parse(data: bytes, codec: Subtitle.Codec) -> pycaption.CaptionSet:
return caption_set

@staticmethod
def fix_webvtt_separator(data: Union[str, bytes]):
def space_webvtt_headers(data: Union[str, bytes]):
"""
Space out the WEBVTT Headers from Captions.
Segmented VTT when merged may have the WEBVTT headers part of the next caption
if they are not separated far enough from the previous caption, hence the \n\n
as they were not separated far enough from the previous caption and ended up
being considered as caption text rather than the header for the next segment.
"""
if isinstance(data, bytes):
data = data.decode("utf8")
Expand Down Expand Up @@ -600,7 +603,7 @@ def fix_webvtt_timestamp(self) -> None:
else:
return

text = Subtitle.fix_webvtt_separator(self.path.read_text("utf8"))
text = Subtitle.space_webvtt_headers(self.path.read_text("utf8"))
fixed = fix_webvtt_timestamp(
text, segment_duration=segment_duration, timescale=timescale
)
Expand Down

0 comments on commit 8bea858

Please sign in to comment.