Skip to content

Commit

Permalink
Merge pull request #44 from sayunkim/fix-div-p-issue
Browse files Browse the repository at this point in the history
Fix an issue where epubs that used the "div" tag instead of "p" would not parse properly.
  • Loading branch information
aedocw authored Dec 2, 2024
2 parents 7133f67 + 5eca12e commit e8d1b81
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
13 changes: 10 additions & 3 deletions epub2tts_edge/epub2tts_edge.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def chap2text_epub(chap):
a.extract()

chapter_paragraphs = soup.find_all("p")
if len(chapter_paragraphs) == 0:
print(f"Could not find any paragraph tags <p> in \"{chapter_title_text}\". Trying with <div>.")
chapter_paragraphs = soup.find_all("div")

for p in chapter_paragraphs:
paragraph_text = "".join(p.strings).strip()
paragraphs.append(paragraph_text)
Expand Down Expand Up @@ -237,11 +241,14 @@ def read_book(book_contents, speaker, paragraphpause, sentencepause):
for i, chapter in enumerate(book_contents, start=1):
files = []
partname = f"part{i}.flac"
print(f"\n\n")

if os.path.isfile(partname):
print(f"{partname} exists, skipping to next chapter")
segments.append(partname)
else:
print(f"Chapter: {chapter['title']}\n")
print(f"Section name: \"{chapter['title']}\"")
if chapter["title"] == "":
chapter["title"] = "blank"
if chapter["title"] != "Title":
Expand All @@ -250,7 +257,7 @@ def read_book(book_contents, speaker, paragraphpause, sentencepause):
)
append_silence("sntnc0.mp3", 1200)
for pindex, paragraph in enumerate(
tqdm(chapter["paragraphs"], desc=f"Processing chapter {i}",unit='pg')
tqdm(chapter["paragraphs"], desc=f"Generating audio files: ",unit='pg')
):
ptemp = f"pgraphs{pindex}.flac"
if os.path.isfile(ptemp):
Expand Down Expand Up @@ -312,8 +319,8 @@ def get_duration(file_path):
def make_m4b(files, sourcefile, speaker):
filelist = "filelist.txt"
basefile = sourcefile.replace(".txt", "")
outputm4a = f"{basefile}-{speaker}.m4a"
outputm4b = f"{basefile}-{speaker}.m4b"
outputm4a = f"{basefile} ({speaker}).m4a"
outputm4b = f"{basefile} ({speaker}).m4b"
with open(filelist, "w") as f:
for filename in files:
filename = filename.replace("'", "'\\''")
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
author_email="[email protected]",
url="https://github.com/aedocw/epub2tts-edge",
license="GPL 3.0",
version="1.2.6",
version="1.2.7",
packages=find_packages(),
install_requires=requirements,
entry_points={
Expand Down

0 comments on commit e8d1b81

Please sign in to comment.