Skip to content

Commit

Permalink
Add filessupport, scrape and refine your data
Browse files Browse the repository at this point in the history
Remove Webdriver usages
Add continue messages for other providers
  • Loading branch information
hlohaus committed Jan 1, 2025
1 parent 90360cc commit 7893a08
Show file tree
Hide file tree
Showing 33 changed files with 1,155 additions and 559 deletions.
2 changes: 1 addition & 1 deletion etc/unittest/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from unittest.mock import MagicMock
from g4f.errors import MissingRequirementsError
try:
from g4f.gui.server.backend import Backend_Api
from g4f.gui.server.backend_api import Backend_Api
has_requirements = True
except:
has_requirements = False
Expand Down
2 changes: 1 addition & 1 deletion g4f/Provider/Blackbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .base_provider import AsyncGeneratorProvider, ProviderModelMixin
from ..image import ImageResponse, to_data_uri
from ..cookies import get_cookies_dir
from ..web_search import get_search_message
from ..tools.web_search import get_search_message
from .helper import format_prompt

from .. import debug
Expand Down
19 changes: 15 additions & 4 deletions g4f/Provider/Cloudflare.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@

from ..typing import AsyncResult, Messages, Cookies
from .base_provider import AsyncGeneratorProvider, ProviderModelMixin, get_running_loop
from ..requests import Session, StreamSession, get_args_from_nodriver, raise_for_status, merge_cookies, DEFAULT_HEADERS, has_nodriver, has_curl_cffi
from ..errors import ResponseStatusError
from ..requests import Session, StreamSession, get_args_from_nodriver, raise_for_status, merge_cookies
from ..requests import DEFAULT_HEADERS, has_nodriver, has_curl_cffi
from ..providers.response import FinishReason
from ..errors import ResponseStatusError, ModelNotFoundError

class Cloudflare(AsyncGeneratorProvider, ProviderModelMixin):
label = "Cloudflare AI"
Expand Down Expand Up @@ -70,7 +72,10 @@ async def create_async_generator(
cls._args = await get_args_from_nodriver(cls.url, proxy, timeout, cookies)
else:
cls._args = {"headers": DEFAULT_HEADERS, "cookies": {}}
model = cls.get_model(model)
try:
model = cls.get_model(model)
except ModelNotFoundError:
pass
data = {
"messages": messages,
"lora": None,
Expand All @@ -89,6 +94,7 @@ async def create_async_generator(
except ResponseStatusError:
cls._args = None
raise
reason = None
async for line in response.iter_lines():
if line.startswith(b'data: '):
if line == b'data: [DONE]':
Expand All @@ -97,5 +103,10 @@ async def create_async_generator(
content = json.loads(line[6:].decode())
if content.get("response") and content.get("response") != '</s>':
yield content['response']
reason = "max_tokens"
elif content.get("response") == '':
reason = "stop"
except Exception:
continue
continue
if reason is not None:
yield FinishReason(reason)
34 changes: 0 additions & 34 deletions g4f/Provider/bing/create_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

from ..helper import get_connector
from ...errors import MissingRequirementsError, RateLimitError
from ...webdriver import WebDriver, get_driver_cookies, get_browser

BING_URL = "https://www.bing.com"
TIMEOUT_LOGIN = 1200
Expand All @@ -31,39 +30,6 @@
"https://r.bing.com/rp/TX9QuO3WzcCJz1uaaSwQAz39Kb0.jpg",
]

def wait_for_login(driver: WebDriver, timeout: int = TIMEOUT_LOGIN) -> None:
"""
Waits for the user to log in within a given timeout period.
Args:
driver (WebDriver): Webdriver for browser automation.
timeout (int): Maximum waiting time in seconds.
Raises:
RuntimeError: If the login process exceeds the timeout.
"""
driver.get(f"{BING_URL}/")
start_time = time.time()
while not driver.get_cookie("_U"):
if time.time() - start_time > timeout:
raise RuntimeError("Timeout error")
time.sleep(0.5)

def get_cookies_from_browser(proxy: str = None) -> dict[str, str]:
"""
Retrieves cookies from the browser using webdriver.
Args:
proxy (str, optional): Proxy configuration.
Returns:
dict[str, str]: Retrieved cookies.
"""
with get_browser(proxy=proxy) as driver:
wait_for_login(driver)
time.sleep(1)
return get_driver_cookies(driver)

def create_session(cookies: Dict[str, str], proxy: str = None, connector: BaseConnector = None) -> ClientSession:
"""
Creates a new client session with specified cookies and headers.
Expand Down
44 changes: 28 additions & 16 deletions g4f/Provider/needs_auth/HuggingFace.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,22 +102,15 @@ async def create_async_generator(
if "config" in model_data and "model_type" in model_data["config"]:
model_type = model_data["config"]["model_type"]
debug.log(f"Model type: {model_type}")
if model_type in ("gpt2", "gpt_neo", "gemma", "gemma2"):
inputs = format_prompt(messages, do_continue=do_continue)
elif model_type in ("mistral"):
inputs = format_prompt_mistral(messages, do_continue)
elif "config" in model_data and "tokenizer_config" in model_data["config"] and "eos_token" in model_data["config"]["tokenizer_config"]:
eos_token = model_data["config"]["tokenizer_config"]["eos_token"]
if eos_token in ("<|endoftext|>", "<eos>", "</s>"):
inputs = format_prompt_custom(messages, eos_token, do_continue)
elif eos_token == "<|im_end|>":
inputs = format_prompt_qwen(messages, do_continue)
elif eos_token == "<|eot_id|>":
inputs = format_prompt_llama(messages, do_continue)
inputs = get_inputs(messages, model_data, model_type, do_continue)
debug.log(f"Inputs len: {len(inputs)}")
if len(inputs) > 4096:
if len(messages) > 6:
messages = messages[:3] + messages[-3:]
else:
inputs = format_prompt(messages, do_continue=do_continue)
else:
inputs = format_prompt(messages, do_continue=do_continue)
messages = [m for m in messages if m["role"] == "system"] + [messages[-1]]
inputs = get_inputs(messages, model_data, model_type, do_continue)
debug.log(f"New len: {len(inputs)}")
if model_type == "gpt2" and max_new_tokens >= 1024:
params["max_new_tokens"] = 512
payload = {"inputs": inputs, "parameters": params, "stream": stream}
Expand Down Expand Up @@ -187,4 +180,23 @@ def format_prompt_custom(messages: Messages, end_token: str = "</s>", do_continu
]) + ("" if do_continue else "<|assistant|>\n")
if do_continue:
return prompt[:-len(end_token + "\n")]
return prompt
return prompt

def get_inputs(messages: Messages, model_data: dict, model_type: str, do_continue: bool = False) -> str:
if model_type in ("gpt2", "gpt_neo", "gemma", "gemma2"):
inputs = format_prompt(messages, do_continue=do_continue)
elif model_type in ("mistral"):
inputs = format_prompt_mistral(messages, do_continue)
elif "config" in model_data and "tokenizer_config" in model_data["config"] and "eos_token" in model_data["config"]["tokenizer_config"]:
eos_token = model_data["config"]["tokenizer_config"]["eos_token"]
if eos_token in ("<|endoftext|>", "<eos>", "</s>"):
inputs = format_prompt_custom(messages, eos_token, do_continue)
elif eos_token == "<|im_end|>":
inputs = format_prompt_qwen(messages, do_continue)
elif eos_token == "<|eot_id|>":
inputs = format_prompt_llama(messages, do_continue)
else:
inputs = format_prompt(messages, do_continue=do_continue)
else:
inputs = format_prompt(messages, do_continue=do_continue)
return inputs
5 changes: 3 additions & 2 deletions g4f/Provider/needs_auth/OpenaiChat.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ async def create_async_generator(
data["conversation_id"] = conversation.conversation_id
debug.log(f"OpenaiChat: Use conversation: {conversation.conversation_id}")
if action != "continue":
data["parent_message_id"] = conversation.parent_message_id
data["parent_message_id"] = getattr(conversation, "parent_message_id", conversation.message_id)
conversation.parent_message_id = None
messages = messages if conversation_id is None else [messages[-1]]
data["messages"] = cls.create_messages(messages, image_requests, ["search"] if web_search else None)
Expand Down Expand Up @@ -604,7 +604,6 @@ async def login(
"api_key": cls._api_key,
"proof_token": RequestConfig.proof_token,
"cookies": RequestConfig.cookies,
"headers": RequestConfig.headers
})

@classmethod
Expand Down Expand Up @@ -636,6 +635,8 @@ def on_request(event: nodriver.cdp.network.RequestWillBeSent):
page = await browser.get(cls.url)
user_agent = await page.evaluate("window.navigator.userAgent")
await page.select("#prompt-textarea", 240)
await page.evaluate("document.getElementById('prompt-textarea').innerText = 'Hello'")
await page.evaluate("document.querySelector('[data-testid=\"send-button\"]').click()")
while True:
if cls._api_key is not None:
break
Expand Down
3 changes: 1 addition & 2 deletions g4f/Provider/needs_auth/Poe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from ...typing import CreateResult, Messages
from ..base_provider import AbstractProvider
from ..helper import format_prompt
from ...webdriver import WebDriver, WebDriverSession, element_send_text

models = {
"meta-llama/Llama-2-7b-chat-hf": {"name": "Llama-2-7b"},
Expand All @@ -22,7 +21,7 @@

class Poe(AbstractProvider):
url = "https://poe.com"
working = True
working = False
needs_auth = True
supports_stream = True

Expand Down
3 changes: 1 addition & 2 deletions g4f/Provider/needs_auth/Theb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from ...typing import CreateResult, Messages
from ..base_provider import AbstractProvider
from ..helper import format_prompt
from ...webdriver import WebDriver, WebDriverSession, element_send_text

models = {
"theb-ai": "TheB.AI",
Expand Down Expand Up @@ -34,7 +33,7 @@
class Theb(AbstractProvider):
label = "TheB.AI"
url = "https://beta.theb.ai"
working = True
working = False
supports_stream = True

models = models.keys()
Expand Down
4 changes: 1 addition & 3 deletions g4f/Provider/not_working/Aura.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

from ...typing import AsyncResult, Messages
from ..base_provider import AsyncGeneratorProvider
from ...requests import get_args_from_browser
from ...webdriver import WebDriver

class Aura(AsyncGeneratorProvider):
url = "https://openchat.team"
Expand All @@ -19,7 +17,7 @@ async def create_async_generator(
proxy: str = None,
temperature: float = 0.5,
max_tokens: int = 8192,
webdriver: WebDriver = None,
webdriver = None,
**kwargs
) -> AsyncResult:
args = get_args_from_browser(cls.url, webdriver, proxy)
Expand Down
3 changes: 1 addition & 2 deletions g4f/Provider/not_working/MyShell.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from ...typing import CreateResult, Messages
from ..base_provider import AbstractProvider
from ..helper import format_prompt
from ...webdriver import WebDriver, WebDriverSession, bypass_cloudflare

class MyShell(AbstractProvider):
url = "https://app.myshell.ai/chat"
Expand All @@ -21,7 +20,7 @@ def create_completion(
stream: bool,
proxy: str = None,
timeout: int = 120,
webdriver: WebDriver = None,
webdriver = None,
**kwargs
) -> CreateResult:
with WebDriverSession(webdriver, "", proxy=proxy) as driver:
Expand Down
3 changes: 1 addition & 2 deletions g4f/Provider/selenium/PerplexityAi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from ...typing import CreateResult, Messages
from ..base_provider import AbstractProvider
from ..helper import format_prompt
from ...webdriver import WebDriver, WebDriverSession, element_send_text

class PerplexityAi(AbstractProvider):
url = "https://www.perplexity.ai"
Expand All @@ -28,7 +27,7 @@ def create_completion(
stream: bool,
proxy: str = None,
timeout: int = 120,
webdriver: WebDriver = None,
webdriver = None,
virtual_display: bool = True,
copilot: bool = False,
**kwargs
Expand Down
3 changes: 1 addition & 2 deletions g4f/Provider/selenium/Phind.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from ...typing import CreateResult, Messages
from ..base_provider import AbstractProvider
from ..helper import format_prompt
from ...webdriver import WebDriver, WebDriverSession

class Phind(AbstractProvider):
url = "https://www.phind.com"
Expand All @@ -22,7 +21,7 @@ def create_completion(
stream: bool,
proxy: str = None,
timeout: int = 120,
webdriver: WebDriver = None,
webdriver = None,
creative_mode: bool = None,
**kwargs
) -> CreateResult:
Expand Down
3 changes: 1 addition & 2 deletions g4f/Provider/selenium/TalkAi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from ...typing import CreateResult, Messages
from ..base_provider import AbstractProvider
from ...webdriver import WebDriver, WebDriverSession

class TalkAi(AbstractProvider):
url = "https://talkai.info"
Expand All @@ -19,7 +18,7 @@ def create_completion(
messages: Messages,
stream: bool,
proxy: str = None,
webdriver: WebDriver = None,
webdriver = None,
**kwargs
) -> CreateResult:
with WebDriverSession(webdriver, "", virtual_display=True, proxy=proxy) as driver:
Expand Down
37 changes: 36 additions & 1 deletion g4f/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@
from g4f.cookies import read_cookie_files, get_cookies_dir
from g4f.Provider import ProviderType, ProviderUtils, __providers__
from g4f.gui import get_gui_app
from g4f.tools.files import supports_filename, get_streaming
from .stubs import (
ChatCompletionsConfig, ImageGenerationConfig,
ProviderResponseModel, ModelResponseModel,
ErrorResponseModel, ProviderResponseDetailModel,
FileResponseModel, Annotated
FileResponseModel, UploadResponseModel, Annotated
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -424,6 +425,40 @@ def upload_cookies(files: List[UploadFile]):
read_cookie_files()
return response_data

@self.app.get("/v1/files/{bucket_id}", responses={
HTTP_200_OK: {"content": {
"text/event-stream": {"schema": {"type": "string"}},
"text/plain": {"schema": {"type": "string"}},
}},
HTTP_404_NOT_FOUND: {"model": ErrorResponseModel},
})
def read_files(request: Request, bucket_id: str, delete_files: bool = True, refine_chunks_with_spacy: bool = False):
bucket_dir = os.path.join(get_cookies_dir(), bucket_id)
event_stream = "text/event-stream" in request.headers.get("accept", "")
if not os.path.isdir(bucket_dir):
return ErrorResponse.from_message("Bucket dir not found", 404)
return StreamingResponse(get_streaming(bucket_dir, delete_files, refine_chunks_with_spacy, event_stream), media_type="text/plain")

@self.app.post("/v1/files/{bucket_id}", responses={
HTTP_200_OK: {"model": UploadResponseModel}
})
def upload_files(bucket_id: str, files: List[UploadFile]):
bucket_dir = os.path.join(get_cookies_dir(), bucket_id)
os.makedirs(bucket_dir, exist_ok=True)
filenames = []
for file in files:
try:
filename = os.path.basename(file.filename)
if file and supports_filename(filename):
with open(os.path.join(bucket_dir, filename), 'wb') as f:
shutil.copyfileobj(file.file, f)
filenames.append(filename)
finally:
file.file.close()
with open(os.path.join(bucket_dir, "files.txt"), 'w') as f:
[f.write(f"{filename}\n") for filename in filenames]
return {"bucket_id": bucket_id, "url": f"/v1/files/{bucket_id}", "files": filenames}

@self.app.get("/v1/synthesize/{provider}", responses={
HTTP_200_OK: {"content": {"audio/*": {}}},
HTTP_404_NOT_FOUND: {"model": ErrorResponseModel},
Expand Down
4 changes: 4 additions & 0 deletions g4f/api/stubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ class ModelResponseModel(BaseModel):
created: int
owned_by: Optional[str]

class UploadResponseModel(BaseModel):
bucket_id: str
url: str

class ErrorResponseModel(BaseModel):
error: ErrorResponseMessageModel
model: Optional[str] = None
Expand Down
Loading

0 comments on commit 7893a08

Please sign in to comment.