From 4bc4d635bca9c1c7633ff87ff24b757c653ff60f Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Mon, 22 Apr 2024 01:27:48 +0200 Subject: Add vision models to readme --- g4f/Provider/Bing.py | 3 +- g4f/Provider/BingCreateImages.py | 1 + g4f/Provider/DeepInfra.py | 33 ++++++++---- g4f/Provider/DeepInfraImage.py | 1 + g4f/Provider/MetaAIAccount.py | 1 + g4f/Provider/ReplicateImage.py | 1 + g4f/Provider/You.py | 22 +++++--- g4f/Provider/needs_auth/Gemini.py | 87 ++++++++++++++++++++++---------- g4f/Provider/needs_auth/OpenaiAccount.py | 1 + g4f/Provider/needs_auth/OpenaiChat.py | 1 + g4f/gui/server/api.py | 14 +++-- g4f/image.py | 9 +++- 12 files changed, 122 insertions(+), 52 deletions(-) (limited to 'g4f') diff --git a/g4f/Provider/Bing.py b/g4f/Provider/Bing.py index 1fe94359..bfd74f8c 100644 --- a/g4f/Provider/Bing.py +++ b/g4f/Provider/Bing.py @@ -38,8 +38,9 @@ class Bing(AsyncGeneratorProvider, ProviderModelMixin): supports_message_history = True supports_gpt_4 = True default_model = "Balanced" + default_vision_model = "gpt-4-vision" models = [getattr(Tones, key) for key in Tones.__dict__ if not key.startswith("__")] - + @classmethod def create_async_generator( cls, diff --git a/g4f/Provider/BingCreateImages.py b/g4f/Provider/BingCreateImages.py index 60ecff07..69bf1e92 100644 --- a/g4f/Provider/BingCreateImages.py +++ b/g4f/Provider/BingCreateImages.py @@ -13,6 +13,7 @@ from .bing.create_images import create_images, create_session, get_cookies_from_ class BingCreateImages(AsyncGeneratorProvider, ProviderModelMixin): label = "Microsoft Designer" + parent = "Bing" url = "https://www.bing.com/images/create" working = True needs_auth = True diff --git a/g4f/Provider/DeepInfra.py b/g4f/Provider/DeepInfra.py index 971424b7..35ff84a1 100644 --- a/g4f/Provider/DeepInfra.py +++ b/g4f/Provider/DeepInfra.py @@ -1,17 +1,22 @@ from __future__ import annotations import requests -from ..typing import AsyncResult, Messages +from ..typing import AsyncResult, Messages, ImageType +from ..image import to_data_uri from .needs_auth.Openai import Openai class DeepInfra(Openai): label = "DeepInfra" url = "https://deepinfra.com" working = True - needs_auth = False + has_auth = True supports_stream = True supports_message_history = True - default_model = 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1' + default_model = "meta-llama/Meta-Llama-3-70b-instruct" + default_vision_model = "llava-hf/llava-1.5-7b-hf" + model_aliases = { + 'mixtral-8x22b': 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1' + } @classmethod def get_models(cls): @@ -27,19 +32,12 @@ class DeepInfra(Openai): model: str, messages: Messages, stream: bool, + image: ImageType = None, api_base: str = "https://api.deepinfra.com/v1/openai", temperature: float = 0.7, max_tokens: int = 1028, **kwargs ) -> AsyncResult: - - if not '/' in model: - models = { - 'mixtral-8x22b': 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1', - 'dbrx-instruct': 'databricks/dbrx-instruct', - } - model = models.get(model, model) - headers = { 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US', @@ -55,6 +53,19 @@ class DeepInfra(Openai): 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', } + if image is not None: + if not model: + model = cls.default_vision_model + messages[-1]["content"] = [ + { + "type": "image_url", + "image_url": {"url": to_data_uri(image)} + }, + { + "type": "text", + "text": messages[-1]["content"] + } + ] return super().create_async_generator( model, messages, stream=stream, diff --git a/g4f/Provider/DeepInfraImage.py b/g4f/Provider/DeepInfraImage.py index 8e56e513..46a5c2e2 100644 --- a/g4f/Provider/DeepInfraImage.py +++ b/g4f/Provider/DeepInfraImage.py @@ -9,6 +9,7 @@ from ..image import ImageResponse class DeepInfraImage(AsyncGeneratorProvider, ProviderModelMixin): url = "https://deepinfra.com" + parent = "DeepInfra" working = True default_model = 'stability-ai/sdxl' image_models = [default_model] diff --git a/g4f/Provider/MetaAIAccount.py b/g4f/Provider/MetaAIAccount.py index d334393d..369b3f2f 100644 --- a/g4f/Provider/MetaAIAccount.py +++ b/g4f/Provider/MetaAIAccount.py @@ -6,6 +6,7 @@ from .MetaAI import MetaAI class MetaAIAccount(MetaAI): needs_auth = True + parent = "MetaAI" image_models = ["meta"] @classmethod diff --git a/g4f/Provider/ReplicateImage.py b/g4f/Provider/ReplicateImage.py index 5d001604..cc3943d7 100644 --- a/g4f/Provider/ReplicateImage.py +++ b/g4f/Provider/ReplicateImage.py @@ -11,6 +11,7 @@ from ..errors import ResponseError class ReplicateImage(AsyncGeneratorProvider, ProviderModelMixin): url = "https://replicate.com" + parent = "Replicate" working = True default_model = 'stability-ai/sdxl' default_versions = [ diff --git a/g4f/Provider/You.py b/g4f/Provider/You.py index 61069503..a9e7834c 100644 --- a/g4f/Provider/You.py +++ b/g4f/Provider/You.py @@ -14,13 +14,16 @@ from .you.har_file import get_telemetry_ids from .. import debug class You(AsyncGeneratorProvider, ProviderModelMixin): + label = "You.com" url = "https://you.com" working = True supports_gpt_35_turbo = True supports_gpt_4 = True default_model = "gpt-3.5-turbo" + default_vision_model = "agent" + image_models = ["dall-e"] models = [ - "gpt-3.5-turbo", + default_model, "gpt-4", "gpt-4-turbo", "claude-instant", @@ -29,12 +32,12 @@ class You(AsyncGeneratorProvider, ProviderModelMixin): "claude-3-sonnet", "gemini-pro", "zephyr", - "dall-e", + default_vision_model, + *image_models ] model_aliases = { "claude-v2": "claude-2" } - image_models = ["dall-e"] _cookies = None _cookies_used = 0 _telemetry_ids = [] @@ -52,7 +55,7 @@ class You(AsyncGeneratorProvider, ProviderModelMixin): chat_mode: str = "default", **kwargs, ) -> AsyncResult: - if image is not None: + if image is not None or model == cls.default_vision_model: chat_mode = "agent" elif not model or model == cls.default_model: ... @@ -63,13 +66,18 @@ class You(AsyncGeneratorProvider, ProviderModelMixin): chat_mode = "custom" model = cls.get_model(model) async with StreamSession( - proxies={"all": proxy}, + proxy=proxy, impersonate="chrome", timeout=(30, timeout) ) as session: cookies = await cls.get_cookies(session) if chat_mode != "default" else None - - upload = json.dumps([await cls.upload_file(session, cookies, to_bytes(image), image_name)]) if image else "" + upload = "" + if image is not None: + upload_file = await cls.upload_file( + session, cookies, + to_bytes(image), image_name + ) + upload = json.dumps([upload_file]) headers = { "Accept": "text/event-stream", "Referer": f"{cls.url}/search?fromSearchBar=true&tbm=youchat", diff --git a/g4f/Provider/needs_auth/Gemini.py b/g4f/Provider/needs_auth/Gemini.py index 3917df80..209c2e91 100644 --- a/g4f/Provider/needs_auth/Gemini.py +++ b/g4f/Provider/needs_auth/Gemini.py @@ -16,6 +16,7 @@ try: except ImportError: pass +from ... import debug from ...typing import Messages, Cookies, ImageType, AsyncResult from ..base_provider import AsyncGeneratorProvider from ..helper import format_prompt, get_cookies @@ -54,6 +55,55 @@ class Gemini(AsyncGeneratorProvider): needs_auth = True working = True image_models = ["gemini"] + default_vision_model = "gemini" + _cookies: Cookies = None + + @classmethod + async def nodriver_login(cls) -> Cookies: + try: + import nodriver as uc + except ImportError: + return + try: + from platformdirs import user_config_dir + user_data_dir = user_config_dir("g4f-nodriver") + except: + user_data_dir = None + if debug.logging: + print(f"Open nodriver with user_dir: {user_data_dir}") + browser = await uc.start(user_data_dir=user_data_dir) + page = await browser.get(f"{cls.url}/app") + await page.select("div.ql-editor.textarea", 240) + cookies = {} + for c in await page.browser.cookies.get_all(): + if c.domain.endswith(".google.com"): + cookies[c.name] = c.value + await page.close() + return cookies + + @classmethod + async def webdriver_login(cls, proxy: str): + driver = None + try: + driver = get_browser(proxy=proxy) + try: + driver.get(f"{cls.url}/app") + WebDriverWait(driver, 5).until( + EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) + ) + except: + login_url = os.environ.get("G4F_LOGIN_URL") + if login_url: + yield f"Please login: [Google Gemini]({login_url})\n\n" + WebDriverWait(driver, 240).until( + EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) + ) + cls._cookies = get_driver_cookies(driver) + except MissingRequirementsError: + pass + finally: + if driver: + driver.close() @classmethod async def create_async_generator( @@ -73,47 +123,30 @@ class Gemini(AsyncGeneratorProvider): if cookies is None: cookies = {} cookies["__Secure-1PSID"] = api_key - cookies = cookies if cookies else get_cookies(".google.com", False, True) + cls._cookies = cookies or cls._cookies or get_cookies(".google.com", False, True) base_connector = get_connector(connector, proxy) async with ClientSession( headers=REQUEST_HEADERS, connector=base_connector ) as session: - snlm0e = await cls.fetch_snlm0e(session, cookies) if cookies else None + snlm0e = await cls.fetch_snlm0e(session, cls._cookies) if cls._cookies else None if not snlm0e: - driver = None - try: - driver = get_browser(proxy=proxy) - try: - driver.get(f"{cls.url}/app") - WebDriverWait(driver, 5).until( - EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) - ) - except: - login_url = os.environ.get("G4F_LOGIN_URL") - if login_url: - yield f"Please login: [Google Gemini]({login_url})\n\n" - WebDriverWait(driver, 240).until( - EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) - ) - cookies = get_driver_cookies(driver) - except MissingRequirementsError: - pass - finally: - if driver: - driver.close() + cls._cookies = await cls.nodriver_login(); + if cls._cookies is None: + async for chunk in cls.webdriver_login(proxy): + yield chunk if not snlm0e: - if "__Secure-1PSID" not in cookies: + if "__Secure-1PSID" not in cls._cookies: raise MissingAuthError('Missing "__Secure-1PSID" cookie') - snlm0e = await cls.fetch_snlm0e(session, cookies) + snlm0e = await cls.fetch_snlm0e(session, cls._cookies) if not snlm0e: - raise RuntimeError("Invalid auth. SNlM0e not found") + raise RuntimeError("Invalid cookies. SNlM0e not found") image_url = await cls.upload_image(base_connector, to_bytes(image), image_name) if image else None async with ClientSession( - cookies=cookies, + cookies=cls._cookies, headers=REQUEST_HEADERS, connector=base_connector, ) as client: diff --git a/g4f/Provider/needs_auth/OpenaiAccount.py b/g4f/Provider/needs_auth/OpenaiAccount.py index 6260d343..16bfff66 100644 --- a/g4f/Provider/needs_auth/OpenaiAccount.py +++ b/g4f/Provider/needs_auth/OpenaiAccount.py @@ -4,4 +4,5 @@ from .OpenaiChat import OpenaiChat class OpenaiAccount(OpenaiChat): needs_auth = True + parent = "OpenaiChat" image_models = ["dall-e"] \ No newline at end of file diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py index 3d6e9858..515230f0 100644 --- a/g4f/Provider/needs_auth/OpenaiChat.py +++ b/g4f/Provider/needs_auth/OpenaiChat.py @@ -44,6 +44,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): supports_message_history = True supports_system_message = True default_model = None + default_vision_model = "gpt-4-vision" models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-gizmo"] model_aliases = { "text-davinci-002-render-sha": "gpt-3.5-turbo", diff --git a/g4f/gui/server/api.py b/g4f/gui/server/api.py index 435700ea..3d9f6a1c 100644 --- a/g4f/gui/server/api.py +++ b/g4f/gui/server/api.py @@ -45,16 +45,20 @@ class Api(): @staticmethod def get_image_models() -> list[dict]: image_models = [] - for key, provider in __map__.items(): + for provider in __providers__: if hasattr(provider, "image_models"): if hasattr(provider, "get_models"): provider.get_models() + parent = provider + if hasattr(provider, "parent"): + parent = __map__[provider.parent] for model in provider.image_models: image_models.append({ - "provider": key, - "url": provider.url, - "label": provider.label if hasattr(provider, "label") else None, - "image_model": model + "provider": parent.__name__, + "url": parent.url, + "label": parent.label if hasattr(parent, "label") else None, + "image_model": model, + "vision_model": parent.default_vision_model if hasattr(parent, "default_vision_model") else None }) return image_models diff --git a/g4f/image.py b/g4f/image.py index ed8af103..270b59ad 100644 --- a/g4f/image.py +++ b/g4f/image.py @@ -86,7 +86,7 @@ def is_data_uri_an_image(data_uri: str) -> bool: if image_format not in ALLOWED_EXTENSIONS and image_format != "svg+xml": raise ValueError("Invalid image format (from mime file type).") -def is_accepted_format(binary_data: bytes) -> bool: +def is_accepted_format(binary_data: bytes) -> str: """ Checks if the given binary data represents an image with an accepted format. @@ -241,6 +241,13 @@ def to_bytes(image: ImageType) -> bytes: else: return image.read() +def to_data_uri(image: ImageType) -> str: + if not isinstance(image, str): + data = to_bytes(image) + data_base64 = base64.b64encode(data).decode() + return f"data:{is_accepted_format(data)};base64,{data_base64}" + return image + class ImageResponse: def __init__( self, -- cgit v1.2.3