-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreamlit_app.py
115 lines (96 loc) · 3.43 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import streamlit as st
from PyPDF2 import PdfReader
import openai
# HTML template with basic styling
html_template = """
<html>
<head>
<style>
body {{
font-family: Arial, sans-serif;
margin: 20px;
}}
h1, h2, h3 {{
color: #333;
}}
</style>
</head>
<body>
<h1>Professional Resume</h1>
{resume_content}
</body>
</html>
"""
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
pdf_reader = PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() # Extracting text from each page
# Ensure text extracted is not empty or malformed
if not text.strip():
return None
return text
# Function to split text into chunks that fit within OpenAI token limits
def split_text_to_fit_limit(text, max_tokens=1500):
chunks = []
words = text.split()
current_chunk = []
for word in words:
if len(current_chunk) + len(word.split()) <= max_tokens:
current_chunk.append(word)
else:
chunks.append(" ".join(current_chunk))
current_chunk = [word]
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
# Function to generate an HTML resume using OpenAI
def generate_html_resume(pdf_text, api_key):
openai.api_key = api_key
chunks = split_text_to_fit_limit(pdf_text, max_tokens=2000) # Split into chunks to avoid token limits
resume_parts = []
for chunk in chunks:
prompt = f"Generate a professional HTML resume from the following extracted text:\n\n{chunk}"
try:
response = openai.Completion.create(
engine="text-davinci-003",
prompt=prompt,
max_tokens=2000
)
resume_parts.append(response['choices'][0]['text'])
except Exception as e:
return f"Error: {str(e)}"
# Combine all parts and return final HTML content
return html_template.format(resume_content="".join(resume_parts))
# Streamlit App
def main():
st.title("PDF to HTML Resume Generator")
# Input fields for API key and PDF upload
api_key = st.text_input("Enter your OpenAI API key", type="password")
pdf_file = st.file_uploader("Upload PDF File", type=["pdf"])
# Generate resume button
if pdf_file and api_key:
pdf_text = extract_text_from_pdf(pdf_file)
# If PDF text extraction fails
if not pdf_text:
st.error("The PDF content could not be extracted. Please check the file.")
else:
st.write("PDF text extracted successfully.")
# Generate the HTML resume
st.write("Generating the resume...")
html_resume = generate_html_resume(pdf_text, api_key)
if "Error" in html_resume:
st.error(html_resume)
else:
# Display the generated HTML
st.markdown(html_resume, unsafe_allow_html=True)
# Download button for the generated HTML
st.download_button(
label="Download HTML Resume",
data=html_resume,
file_name="resume.html",
mime="text/html"
)
if __name__ == "__main__":
main()