From 335c971f6a9cd071d18f9fffeb76df4eda8876d5 Mon Sep 17 00:00:00 2001 From: H Lohaus Date: Fri, 13 Dec 2024 22:20:58 +0100 Subject: Add multiple images support (#2478) * Add multiple images support * Add multiple images support in gui * Support multiple images in legacy client and in the api Fix some model names in provider model list * Fix unittests * Add vision and providers docs --- g4f/Provider/Blackbox.py | 8 +- g4f/Provider/Blackbox2.py | 12 +-- g4f/Provider/Copilot.py | 29 ++++--- g4f/Provider/DeepInfraChat.py | 48 ++--------- g4f/Provider/Flux.py | 3 +- g4f/Provider/PerplexityLabs.py | 3 +- g4f/Provider/RubiksAI.py | 2 +- g4f/Provider/needs_auth/Gemini.py | 75 ++++++++-------- g4f/Provider/needs_auth/GeminiPro.py | 21 ++--- g4f/Provider/needs_auth/HuggingFace.py | 2 +- g4f/Provider/needs_auth/OpenaiAPI.py | 10 +-- g4f/Provider/needs_auth/OpenaiChat.py | 152 +++++++++++++++++++-------------- g4f/__init__.py | 52 +++-------- g4f/api/__init__.py | 15 +++- g4f/client/__init__.py | 14 +-- g4f/gui/client/index.html | 2 +- g4f/gui/client/static/js/chat.v1.js | 46 ++++++---- g4f/gui/server/api.py | 2 +- g4f/gui/server/backend.py | 11 +-- g4f/image.py | 3 +- g4f/requests/curl_cffi.py | 1 + g4f/typing.py | 6 +- 22 files changed, 253 insertions(+), 264 deletions(-) (limited to 'g4f') diff --git a/g4f/Provider/Blackbox.py b/g4f/Provider/Blackbox.py index fec0a8a9..68ac7852 100644 --- a/g4f/Provider/Blackbox.py +++ b/g4f/Provider/Blackbox.py @@ -10,7 +10,7 @@ import aiohttp import json from pathlib import Path -from ..typing import AsyncResult, Messages, ImageType +from ..typing import AsyncResult, Messages, ImagesType from .base_provider import AsyncGeneratorProvider, ProviderModelMixin from ..image import ImageResponse, to_data_uri from ..cookies import get_cookies_dir @@ -197,8 +197,7 @@ class Blackbox(AsyncGeneratorProvider, ProviderModelMixin): prompt: str = None, proxy: str = None, web_search: bool = False, - image: ImageType = None, - image_name: str = None, + images: ImagesType = None, top_p: float = 0.9, temperature: float = 0.5, max_tokens: int = 1024, @@ -212,13 +211,14 @@ class Blackbox(AsyncGeneratorProvider, ProviderModelMixin): messages = [{"id": message_id, "content": formatted_message, "role": "user"}] - if image is not None: + if images is not None: messages[-1]['data'] = { "imagesData": [ { "filePath": f"MultipleFiles/{image_name}", "contents": to_data_uri(image) } + for image, image_name in images ], "fileText": "", "title": "" diff --git a/g4f/Provider/Blackbox2.py b/g4f/Provider/Blackbox2.py index cae3141e..d09cbf12 100644 --- a/g4f/Provider/Blackbox2.py +++ b/g4f/Provider/Blackbox2.py @@ -3,7 +3,7 @@ from __future__ import annotations import random import asyncio from aiohttp import ClientSession -from typing import Union, AsyncGenerator +from typing import AsyncGenerator from ..typing import AsyncResult, Messages from ..image import ImageResponse @@ -37,12 +37,15 @@ class Blackbox2(AsyncGeneratorProvider, ProviderModelMixin): max_retries: int = 3, delay: int = 1, **kwargs - ) -> AsyncGenerator: + ) -> AsyncResult: + if not model: + model = cls.default_model if model in cls.chat_models: async for result in cls._generate_text(model, messages, proxy, max_retries, delay): yield result elif model in cls.image_models: - async for result in cls._generate_image(model, messages, proxy): + prompt = messages[-1]["content"] if prompt is None else prompt + async for result in cls._generate_image(model, prompt, proxy): yield result else: raise ValueError(f"Unsupported model: {model}") @@ -87,14 +90,13 @@ class Blackbox2(AsyncGeneratorProvider, ProviderModelMixin): async def _generate_image( cls, model: str, - messages: Messages, + prompt: str, proxy: str = None ) -> AsyncGenerator: headers = cls._get_headers() api_endpoint = cls.api_endpoints[model] async with ClientSession(headers=headers) as session: - prompt = messages[-1]["content"] data = { "query": prompt } diff --git a/g4f/Provider/Copilot.py b/g4f/Provider/Copilot.py index 23f175ac..cc0cb6ab 100644 --- a/g4f/Provider/Copilot.py +++ b/g4f/Provider/Copilot.py @@ -18,7 +18,7 @@ except ImportError: from .base_provider import AbstractProvider, ProviderModelMixin, BaseConversation from .helper import format_prompt -from ..typing import CreateResult, Messages, ImageType +from ..typing import CreateResult, Messages, ImagesType from ..errors import MissingRequirementsError, NoValidHarFileError from ..requests.raise_for_status import raise_for_status from ..providers.asyncio import get_running_loop @@ -58,7 +58,7 @@ class Copilot(AbstractProvider, ProviderModelMixin): stream: bool = False, proxy: str = None, timeout: int = 900, - image: ImageType = None, + images: ImagesType = None, conversation: Conversation = None, return_conversation: bool = False, web_search: bool = True, @@ -69,7 +69,7 @@ class Copilot(AbstractProvider, ProviderModelMixin): websocket_url = cls.websocket_url headers = None - if cls.needs_auth or image is not None: + if cls.needs_auth or images is not None: if cls._access_token is None: try: cls._access_token, cls._cookies = readHAR(cls.url) @@ -112,22 +112,23 @@ class Copilot(AbstractProvider, ProviderModelMixin): prompt = messages[-1]["content"] debug.log(f"Copilot: Use conversation: {conversation_id}") - images = [] - if image is not None: - data = to_bytes(image) - response = session.post( - "https://copilot.microsoft.com/c/api/attachments", - headers={"content-type": is_accepted_format(data)}, - data=data - ) - raise_for_status(response) - images.append({"type":"image", "url": response.json().get("url")}) + uploaded_images = [] + if images is not None: + for image, _ in images: + data = to_bytes(image) + response = session.post( + "https://copilot.microsoft.com/c/api/attachments", + headers={"content-type": is_accepted_format(data)}, + data=data + ) + raise_for_status(response) + uploaded_images.append({"type":"image", "url": response.json().get("url")}) wss = session.ws_connect(cls.websocket_url) wss.send(json.dumps({ "event": "send", "conversationId": conversation_id, - "content": [*images, { + "content": [*uploaded_images, { "type": "text", "text": prompt, }], diff --git a/g4f/Provider/DeepInfraChat.py b/g4f/Provider/DeepInfraChat.py index 6874b023..5f3e68e5 100644 --- a/g4f/Provider/DeepInfraChat.py +++ b/g4f/Provider/DeepInfraChat.py @@ -1,19 +1,12 @@ from __future__ import annotations -from aiohttp import ClientSession, ClientResponseError -import json -from ..typing import AsyncResult, Messages, ImageType -from .base_provider import AsyncGeneratorProvider, ProviderModelMixin +from ..typing import AsyncResult, Messages +from .needs_auth import OpenaiAPI - -class DeepInfraChat(AsyncGeneratorProvider, ProviderModelMixin): +class DeepInfraChat(OpenaiAPI): + label = "DeepInfra Chat" url = "https://deepinfra.com/chat" - api_endpoint = "https://api.deepinfra.com/v1/openai/chat/completions" - working = True - supports_stream = True - supports_system_message = True - supports_message_history = True default_model = 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo' models = [ @@ -31,16 +24,17 @@ class DeepInfraChat(AsyncGeneratorProvider, ProviderModelMixin): "qwq-32b": "Qwen/QwQ-32B-Preview", "wizardlm-2-8x22b": "microsoft/WizardLM-2-8x22B", "qwen-2-72b": "Qwen/Qwen2.5-72B-Instruct", - "qwen-2.5-coder-32b": "Qwen2.5-Coder-32B-Instruct", + "qwen-2.5-coder-32b": "Qwen/Qwen2.5-Coder-32B-Instruct", "nemotron-70b": "nvidia/Llama-3.1-Nemotron-70B-Instruct", } @classmethod - async def create_async_generator( + def create_async_generator( cls, model: str, messages: Messages, proxy: str = None, + api_base: str = "https://api.deepinfra.com/v1/openai", **kwargs ) -> AsyncResult: headers = { @@ -52,30 +46,4 @@ class DeepInfraChat(AsyncGeneratorProvider, ProviderModelMixin): 'X-Deepinfra-Source': 'web-page', 'accept': 'text/event-stream', } - - data = { - 'model': model, - 'messages': messages, - 'stream': True - } - - async with ClientSession(headers=headers) as session: - async with session.post(cls.api_endpoint, json=data, proxy=proxy) as response: - response.raise_for_status() - async for line in response.content: - if line: - decoded_line = line.decode('utf-8').strip() - if decoded_line.startswith('data:'): - json_part = decoded_line[5:].strip() - if json_part == '[DONE]': - break - try: - data = json.loads(json_part) - choices = data.get('choices', []) - if choices: - delta = choices[0].get('delta', {}) - content = delta.get('content', '') - if content: - yield content - except json.JSONDecodeError: - print(f"JSON decode error: {json_part}") + return super().create_async_generator(model, messages, proxy, api_base=api_base, headers=headers, **kwargs) \ No newline at end of file diff --git a/g4f/Provider/Flux.py b/g4f/Provider/Flux.py index b211ecef..d3949153 100644 --- a/g4f/Provider/Flux.py +++ b/g4f/Provider/Flux.py @@ -8,14 +8,13 @@ from ..image import ImageResponse, ImagePreview from .base_provider import AsyncGeneratorProvider, ProviderModelMixin class Flux(AsyncGeneratorProvider, ProviderModelMixin): - label = "HuggingSpace (black-forest-labs-flux-1-dev)" + label = "Flux (HuggingSpace)" url = "https://black-forest-labs-flux-1-dev.hf.space" api_endpoint = "/gradio_api/call/infer" working = True default_model = 'flux-dev' models = [default_model] image_models = [default_model] - model_aliases = {"flux-dev": "flux-1-dev"} @classmethod async def create_async_generator( diff --git a/g4f/Provider/PerplexityLabs.py b/g4f/Provider/PerplexityLabs.py index 3a6f0d39..af0b8d5b 100644 --- a/g4f/Provider/PerplexityLabs.py +++ b/g4f/Provider/PerplexityLabs.py @@ -21,14 +21,13 @@ class PerplexityLabs(AsyncGeneratorProvider, ProviderModelMixin): "llama-3.1-sonar-small-128k-chat", "llama-3.1-8b-instruct", "llama-3.1-70b-instruct", + "llama-3.3-70b-instruct", "/models/LiquidCloud", ] model_aliases = { "sonar-online": "llama-3.1-sonar-large-128k-online", - "sonar-online": "sonar-small-128k-online", "sonar-chat": "llama-3.1-sonar-large-128k-chat", - "sonar-chat": "llama-3.1-sonar-small-128k-chat", "llama-3.3-70b": "llama-3.3-70b-instruct", "llama-3.1-8b": "llama-3.1-8b-instruct", "llama-3.1-70b": "llama-3.1-70b-instruct", diff --git a/g4f/Provider/RubiksAI.py b/g4f/Provider/RubiksAI.py index 86d61564..43760f9d 100644 --- a/g4f/Provider/RubiksAI.py +++ b/g4f/Provider/RubiksAI.py @@ -23,7 +23,7 @@ class RubiksAI(AsyncGeneratorProvider, ProviderModelMixin): supports_message_history = True default_model = 'gpt-4o-mini' - models = [default_model, 'gpt-4o', 'o1-mini', 'claude-3.5-sonnet', 'grok-beta', 'gemini-1.5-pro', 'nova-pro'] + models = [default_model, 'gpt-4o', 'o1-mini', 'claude-3.5-sonnet', 'grok-beta', 'gemini-1.5-pro', 'nova-pro', "llama-3.1-70b-versatile"] model_aliases = { "llama-3.1-70b": "llama-3.1-70b-versatile", } diff --git a/g4f/Provider/needs_auth/Gemini.py b/g4f/Provider/needs_auth/Gemini.py index b55a604b..3734bbb8 100644 --- a/g4f/Provider/needs_auth/Gemini.py +++ b/g4f/Provider/needs_auth/Gemini.py @@ -5,6 +5,7 @@ import json import random import re import base64 +import asyncio from aiohttp import ClientSession, BaseConnector @@ -15,7 +16,7 @@ except ImportError: has_nodriver = False from ... import debug -from ...typing import Messages, Cookies, ImageType, AsyncResult, AsyncIterator +from ...typing import Messages, Cookies, ImagesType, AsyncResult, AsyncIterator from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin, BaseConversation, SynthesizeData from ..helper import format_prompt, get_cookies from ...requests.raise_for_status import raise_for_status @@ -97,8 +98,7 @@ class Gemini(AsyncGeneratorProvider, ProviderModelMixin): proxy: str = None, cookies: Cookies = None, connector: BaseConnector = None, - image: ImageType = None, - image_name: str = None, + images: ImagesType = None, return_conversation: bool = False, conversation: Conversation = None, language: str = "en", @@ -128,8 +128,7 @@ class Gemini(AsyncGeneratorProvider, ProviderModelMixin): raise RuntimeError("Invalid cookies. SNlM0e not found") yield SynthesizeData(cls.__name__, {"text": messages[-1]["content"]}) - image_url = await cls.upload_image(base_connector, to_bytes(image), image_name) if image else None - + images = await cls.upload_images(base_connector, images) if images else None async with ClientSession( cookies=cls._cookies, headers=REQUEST_HEADERS, @@ -148,8 +147,7 @@ class Gemini(AsyncGeneratorProvider, ProviderModelMixin): prompt, language=language, conversation=conversation, - image_url=image_url, - image_name=image_name + images=images ))]) } async with client.post( @@ -195,7 +193,7 @@ class Gemini(AsyncGeneratorProvider, ProviderModelMixin): images = [image[0][3][3] for image in response_part[4][0][12][7][0]] image_prompt = image_prompt.replace("a fake image", "") yield ImageResponse(images, image_prompt, {"cookies": cls._cookies}) - except TypeError: + except (TypeError, IndexError, KeyError): pass @classmethod @@ -235,11 +233,10 @@ class Gemini(AsyncGeneratorProvider, ProviderModelMixin): prompt: str, language: str, conversation: Conversation = None, - image_url: str = None, - image_name: str = None, + images: list[list[str, str]] = None, tools: list[list[str]] = [] ) -> list: - image_list = [[[image_url, 1], image_name]] if image_url else [] + image_list = [[[image_url, 1], image_name] for image_url, image_name in images] if images else [] return [ [prompt, 0, None, image_list, None, None, 0], [language], @@ -262,35 +259,39 @@ class Gemini(AsyncGeneratorProvider, ProviderModelMixin): 0, ] - async def upload_image(connector: BaseConnector, image: bytes, image_name: str = None): - async with ClientSession( - headers=UPLOAD_IMAGE_HEADERS, - connector=connector - ) as session: - async with session.options(UPLOAD_IMAGE_URL) as response: - await raise_for_status(response) + async def upload_images(connector: BaseConnector, images: ImagesType) -> list: + async def upload_image(image: bytes, image_name: str = None): + async with ClientSession( + headers=UPLOAD_IMAGE_HEADERS, + connector=connector + ) as session: + image = to_bytes(image) - headers = { - "size": str(len(image)), - "x-goog-upload-command": "start" - } - data = f"File name: {image_name}" if image_name else None - async with session.post( - UPLOAD_IMAGE_URL, headers=headers, data=data - ) as response: - await raise_for_status(response) - upload_url = response.headers["X-Goog-Upload-Url"] + async with session.options(UPLOAD_IMAGE_URL) as response: + await raise_for_status(response) - async with session.options(upload_url, headers=headers) as response: - await raise_for_status(response) + headers = { + "size": str(len(image)), + "x-goog-upload-command": "start" + } + data = f"File name: {image_name}" if image_name else None + async with session.post( + UPLOAD_IMAGE_URL, headers=headers, data=data + ) as response: + await raise_for_status(response) + upload_url = response.headers["X-Goog-Upload-Url"] - headers["x-goog-upload-command"] = "upload, finalize" - headers["X-Goog-Upload-Offset"] = "0" - async with session.post( - upload_url, headers=headers, data=image - ) as response: - await raise_for_status(response) - return await response.text() + async with session.options(upload_url, headers=headers) as response: + await raise_for_status(response) + + headers["x-goog-upload-command"] = "upload, finalize" + headers["X-Goog-Upload-Offset"] = "0" + async with session.post( + upload_url, headers=headers, data=image + ) as response: + await raise_for_status(response) + return [await response.text(), image_name] + return await asyncio.gather(*[upload_image(image, image_name) for image, image_name in images]) @classmethod async def fetch_snlm0e(cls, session: ClientSession, cookies: Cookies): diff --git a/g4f/Provider/needs_auth/GeminiPro.py b/g4f/Provider/needs_auth/GeminiPro.py index d9204b25..36c90656 100644 --- a/g4f/Provider/needs_auth/GeminiPro.py +++ b/g4f/Provider/needs_auth/GeminiPro.py @@ -4,7 +4,7 @@ import base64 import json from aiohttp import ClientSession, BaseConnector -from ...typing import AsyncResult, Messages, ImageType +from ...typing import AsyncResult, Messages, ImagesType from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin from ...image import to_bytes, is_accepted_format from ...errors import MissingAuthError @@ -36,7 +36,7 @@ class GeminiPro(AsyncGeneratorProvider, ProviderModelMixin): api_key: str = None, api_base: str = "https://generativelanguage.googleapis.com/v1beta", use_auth_header: bool = False, - image: ImageType = None, + images: ImagesType = None, connector: BaseConnector = None, **kwargs ) -> AsyncResult: @@ -62,14 +62,15 @@ class GeminiPro(AsyncGeneratorProvider, ProviderModelMixin): for message in messages if message["role"] != "system" ] - if image is not None: - image = to_bytes(image) - contents[-1]["parts"].append({ - "inline_data": { - "mime_type": is_accepted_format(image), - "data": base64.b64encode(image).decode() - } - }) + if images is not None: + for image, _ in images: + image = to_bytes(image) + contents[-1]["parts"].append({ + "inline_data": { + "mime_type": is_accepted_format(image), + "data": base64.b64encode(image).decode() + } + }) data = { "contents": contents, "generationConfig": { diff --git a/g4f/Provider/needs_auth/HuggingFace.py b/g4f/Provider/needs_auth/HuggingFace.py index fd1da2a7..40db1ee7 100644 --- a/g4f/Provider/needs_auth/HuggingFace.py +++ b/g4f/Provider/needs_auth/HuggingFace.py @@ -18,7 +18,7 @@ class HuggingFace(AsyncGeneratorProvider, ProviderModelMixin): supports_message_history = True default_model = HuggingChat.default_model default_image_model = HuggingChat.default_image_model - models = [*HuggingChat.models, default_image_model] + models = HuggingChat.models image_models = [default_image_model] model_aliases = HuggingChat.model_aliases diff --git a/g4f/Provider/needs_auth/OpenaiAPI.py b/g4f/Provider/needs_auth/OpenaiAPI.py index 83268b6d..e4731ae2 100644 --- a/g4f/Provider/needs_auth/OpenaiAPI.py +++ b/g4f/Provider/needs_auth/OpenaiAPI.py @@ -4,7 +4,7 @@ import json from ..helper import filter_none from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin, FinishReason -from ...typing import Union, Optional, AsyncResult, Messages, ImageType +from ...typing import Union, Optional, AsyncResult, Messages, ImagesType from ...requests import StreamSession, raise_for_status from ...errors import MissingAuthError, ResponseError from ...image import to_data_uri @@ -25,7 +25,7 @@ class OpenaiAPI(AsyncGeneratorProvider, ProviderModelMixin): messages: Messages, proxy: str = None, timeout: int = 120, - image: ImageType = None, + images: ImagesType = None, api_key: str = None, api_base: str = "https://api.openai.com/v1", temperature: float = None, @@ -40,14 +40,14 @@ class OpenaiAPI(AsyncGeneratorProvider, ProviderModelMixin): ) -> AsyncResult: if cls.needs_auth and api_key is None: raise MissingAuthError('Add a "api_key"') - if image is not None: + if images is not None: if not model and hasattr(cls, "default_vision_model"): model = cls.default_vision_model messages[-1]["content"] = [ - { + *[{ "type": "image_url", "image_url": {"url": to_data_uri(image)} - }, + } for image, image_name in images], { "type": "text", "text": messages[-1]["content"] diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py index 3ce5b1a1..01976198 100644 --- a/g4f/Provider/needs_auth/OpenaiChat.py +++ b/g4f/Provider/needs_auth/OpenaiChat.py @@ -18,7 +18,7 @@ except ImportError: has_nodriver = False from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin -from ...typing import AsyncResult, Messages, Cookies, ImageType, AsyncIterator +from ...typing import AsyncResult, Messages, Cookies, ImagesType, AsyncIterator from ...requests.raise_for_status import raise_for_status from ...requests import StreamSession from ...requests import get_nodriver @@ -37,14 +37,14 @@ DEFAULT_HEADERS = { "accept-encoding": "gzip, deflate, br, zstd", 'accept-language': 'en-US,en;q=0.8', "referer": "https://chatgpt.com/", - "sec-ch-ua": "\"Brave\";v=\"123\", \"Not:A-Brand\";v=\"8\", \"Chromium\";v=\"123\"", + "sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", "sec-gpc": "1", - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" } INIT_HEADERS = { @@ -53,19 +53,35 @@ INIT_HEADERS = { 'cache-control': 'no-cache', 'pragma': 'no-cache', 'priority': 'u=0, i', - 'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"', + "sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", 'sec-ch-ua-arch': '"arm"', 'sec-ch-ua-bitness': '"64"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-model': '""', - 'sec-ch-ua-platform': '"macOS"', + "sec-ch-ua-platform": "\"Windows\"", 'sec-ch-ua-platform-version': '"14.4.0"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'none', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" +} + +UPLOAD_HEADERS = { + "accept": "application/json, text/plain, */*", + 'accept-language': 'en-US,en;q=0.8', + "referer": "https://chatgpt.com/", + "priority": "u=1, i", + "sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", + "sec-ch-ua-mobile": "?0", + 'sec-ch-ua-platform': '"macOS"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "cross-site", + "x-ms-blob-type": "BlockBlob", + "x-ms-version": "2020-04-08", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" } class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): @@ -78,7 +94,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): supports_message_history = True supports_system_message = True default_model = "auto" - fallback_models = [default_model, "gpt-4", "gpt-4o", "gpt-4o-mini", "gpt-4o-canmore", "o1-preview", "o1-mini"] + fallback_models = [default_model, "gpt-4", "gpt-4o", "gpt-4o-mini", "gpt-4o-canmore", "o1", "o1-preview", "o1-mini"] vision_models = fallback_models synthesize_content_type = "audio/mpeg" @@ -100,12 +116,11 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): return cls.models @classmethod - async def upload_image( + async def upload_images( cls, session: StreamSession, headers: dict, - image: ImageType, - image_name: str = None + images: ImagesType, ) -> ImageRequest: """ Upload an image to the service and get the download URL @@ -113,58 +128,63 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): Args: session: The StreamSession object to use for requests headers: The headers to include in the requests - image: The image to upload, either a PIL Image object or a bytes object + images: The images to upload, either a PIL Image object or a bytes object Returns: An ImageRequest object that contains the download URL, file name, and other data """ - # Convert the image to a PIL Image object and get the extension - data_bytes = to_bytes(image) - image = to_image(data_bytes) - extension = image.format.lower() - data = { - "file_name": "" if image_name is None else image_name, - "file_size": len(data_bytes), - "use_case": "multimodal" - } - # Post the image data to the service and get the image data - async with session.post(f"{cls.url}/backend-api/files", json=data, headers=headers) as response: - cls._update_request_args(session) - await raise_for_status(response, "Create file failed") - image_data = { - **data, - **await response.json(), - "mime_type": is_accepted_format(data_bytes), - "extension": extension, - "height": image.height, - "width": image.width - } - # Put the image bytes to the upload URL and check the status - async with session.put( - image_data["upload_url"], - data=data_bytes, - headers={ - **DEFAULT_HEADERS, - "Content-Type": image_data["mime_type"], - "x-ms-blob-type": "BlockBlob", - "x-ms-version": "2020-04-08", - "Origin": "https://chatgpt.com", + async def upload_image(image, image_name): + # Convert the image to a PIL Image object and get the extension + data_bytes = to_bytes(image) + image = to_image(data_bytes) + extension = image.format.lower() + data = { + "file_name": "" if image_name is None else image_name, + "file_size": len(data_bytes), + "use_case": "multimodal" } - ) as response: - await raise_for_status(response) - # Post the file ID to the service and get the download URL - async with session.post( - f"{cls.url}/backend-api/files/{image_data['file_id']}/uploaded", - json={}, - headers=headers - ) as response: - cls._update_request_args(session) - await raise_for_status(response, "Get download url failed") - image_data["download_url"] = (await response.json())["download_url"] - return ImageRequest(image_data) + # Post the image data to the service and get the image data + async with session.post(f"{cls.url}/backend-api/files", json=data, headers=headers) as response: + cls._update_request_args(session) + await raise_for_status(response, "Create file failed") + image_data = { + **data, + **await response.json(), + "mime_type": is_accepted_format(data_bytes), + "extension": extension, + "height": image.height, + "width": image.width + } + # Put the image bytes to the upload URL and check the status + await asyncio.sleep(1) + async with session.put( + image_data["upload_url"], + data=data_bytes, + headers={ + **UPLOAD_HEADERS, + "Content-Type": image_data["mime_type"], + "x-ms-blob-type": "BlockBlob", + "x-ms-version": "2020-04-08", + "Origin": "https://chatgpt.com", + } + ) as response: + await raise_for_status(response) + # Post the file ID to the service and get the download URL + async with session.post( + f"{cls.url}/backend-api/files/{image_data['file_id']}/uploaded", + json={}, + headers=headers + ) as response: + cls._update_request_args(session) + await raise_for_status(response, "Get download url failed") + image_data["download_url"] = (await response.json())["download_url"] + return ImageRequest(image_data) + if not images: + return + return [await upload_image(image, image_name) for image, image_name in images] @classmethod - def create_messages(cls, messages: Messages, image_request: ImageRequest = None, system_hints: list = None): + def create_messages(cls, messages: Messages, image_requests: ImageRequest = None, system_hints: list = None): """ Create a list of messages for the user input @@ -185,16 +205,18 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): } for message in messages] # Check if there is an image response - if image_request is not None: + if image_requests: # Change content in last user message messages[-1]["content"] = { "content_type": "multimodal_text", - "parts": [{ + "parts": [*[{ "asset_pointer": f"file-service://{image_request.get('file_id')}", "height": image_request.get("height"), "size_bytes": image_request.get("file_size"), "width": image_request.get("width"), - }, messages[-1]["content"]["parts"][0]] + } + for image_request in image_requests], + messages[-1]["content"]["parts"][0]] } # Add the metadata object with the attachments messages[-1]["metadata"] = { @@ -205,7 +227,8 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): "name": image_request.get("file_name"), "size": image_request.get("file_size"), "width": image_request.get("width"), - }] + } + for image_request in image_requests] } return messages @@ -259,8 +282,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): conversation_id: str = None, conversation: Conversation = None, parent_id: str = None, - image: ImageType = None, - image_name: str = None, + images: ImagesType = None, return_conversation: bool = False, max_retries: int = 3, web_search: bool = False, @@ -281,7 +303,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): action (str): Type of action ('next', 'continue', 'variant'). conversation_id (str): ID of the conversation. parent_id (str): ID of the parent message. - image (ImageType): Image to include in the conversation. + images (ImagesType): Images to include in the conversation. return_conversation (bool): Flag to include response fields in the output. **kwargs: Additional keyword arguments. @@ -298,7 +320,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): impersonate="chrome", timeout=timeout ) as session: - image_request = None + image_requests = None if not cls.needs_auth: if cls._headers is None: cls._create_request_args(cookies) @@ -310,7 +332,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): cls._update_request_args(session) await raise_for_status(response) try: - image_request = await cls.upload_image(session, cls._headers, image, image_name) if image else None + image_requests = await cls.upload_images(session, cls._headers, images) if images else None except Exception as e: debug.log("OpenaiChat: Upload image failed") debug.log(f"{e.__class__.__name__}: {e}") @@ -384,7 +406,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): debug.log(f"OpenaiChat: Use conversation: {conversation.conversation_id}") if action != "continue": messages = messages if conversation_id is None else [messages[-1]] - data["messages"] = cls.create_messages(messages, image_request, ["search"] if web_search else None) + data["messages"] = cls.create_messages(messages, image_requests, ["search"] if web_search else None) headers = { **cls._headers, "accept": "text/event-stream", diff --git a/g4f/__init__.py b/g4f/__init__.py index f59a1446..0e13c99e 100644 --- a/g4f/__init__.py +++ b/g4f/__init__.py @@ -2,11 +2,12 @@ from __future__ import annotations import os import logging +from typing import Union, Optional from . import debug, version from .models import Model from .client import Client, AsyncClient -from .typing import Messages, CreateResult, AsyncResult, Union +from .typing import Messages, CreateResult, AsyncResult, ImageType from .errors import StreamNotSupportedError, ModelNotAllowedError from .cookies import get_cookies, set_cookies from .providers.types import ProviderType @@ -27,32 +28,27 @@ class ChatCompletion: messages : Messages, provider : Union[ProviderType, str, None] = None, stream : bool = False, - auth : Union[str, None] = None, - ignored : list[str] = None, + image : ImageType = None, + image_name: Optional[str] = None, + ignored: list[str] = None, ignore_working: bool = False, ignore_stream: bool = False, - patch_provider: callable = None, **kwargs) -> Union[CreateResult, str]: model, provider = get_model_and_provider( model, provider, stream, ignored, ignore_working, ignore_stream or kwargs.get("ignore_stream_and_auth") ) - - if auth is not None: - kwargs['auth'] = auth - + if image is not None: + kwargs["images"] = [(image, image_name)] if "proxy" not in kwargs: proxy = os.environ.get("G4F_PROXY") if proxy: kwargs['proxy'] = proxy - if patch_provider: - provider = patch_provider(provider) - result = provider.create_completion(model, messages, stream=stream, **kwargs) - return result if stream else ''.join([str(chunk) for chunk in result]) + return result if stream else ''.join([str(chunk) for chunk in result if chunk]) @staticmethod def create_async(model : Union[Model, str], @@ -61,40 +57,12 @@ class ChatCompletion: stream : bool = False, ignored : list[str] = None, ignore_working: bool = False, - patch_provider: callable = None, **kwargs) -> Union[AsyncResult, str]: model, provider = get_model_and_provider(model, provider, False, ignored, ignore_working) if stream: - if isinstance(provider, type) and issubclass(provider, AsyncGeneratorProvider): + if hasattr(provider, "create_async_generator"): return provider.create_async_generator(model, messages, **kwargs) raise StreamNotSupportedError(f'{provider.__name__} does not support "stream" argument in "create_async"') - if patch_provider: - provider = patch_provider(provider) - - return provider.create_async(model, messages, **kwargs) - -class Completion: - @staticmethod - def create(model : Union[Model, str], - prompt : str, - provider : Union[ProviderType, None] = None, - stream : bool = False, - ignored : list[str] = None, **kwargs) -> Union[CreateResult, str]: - allowed_models = [ - 'code-davinci-002', - 'text-ada-001', - 'text-babbage-001', - 'text-curie-001', - 'text-davinci-002', - 'text-davinci-003' - ] - if model not in allowed_models: - raise ModelNotAllowedError(f'Can\'t use {model} with Completion.create()') - - model, provider = get_model_and_provider(model, provider, stream, ignored) - - result = provider.create_completion(model, [{"role": "user", "content": prompt}], stream=stream, **kwargs) - - return result if stream else ''.join(result) + return provider.create_async(model, messages, **kwargs) \ No newline at end of file diff --git a/g4f/api/__init__.py b/g4f/api/__init__.py index b036603e..b42c5991 100644 --- a/g4f/api/__init__.py +++ b/g4f/api/__init__.py @@ -102,6 +102,7 @@ class ChatCompletionsConfig(BaseModel): stream: bool = False image: Optional[str] = None image_name: Optional[str] = None + images: Optional[list[tuple[str, str]]] = None temperature: Optional[float] = None max_tokens: Optional[int] = None stop: Union[list[str], str, None] = None @@ -171,7 +172,7 @@ class AppConfig: ignored_providers: Optional[list[str]] = None g4f_api_key: Optional[str] = None ignore_cookie_files: bool = False - model: str = None, + model: str = None provider: str = None image_provider: str = None proxy: str = None @@ -328,8 +329,14 @@ class Api: try: is_data_uri_an_image(config.image) except ValueError as e: - return ErrorResponse.from_message(f"The image you send must be a data URI. Example: data:image/webp;base64,...", status_code=HTTP_422_UNPROCESSABLE_ENTITY) - + return ErrorResponse.from_message(f"The image you send must be a data URI. Example: data:image/jpeg;base64,...", status_code=HTTP_422_UNPROCESSABLE_ENTITY) + if config.images is not None: + for image in config.images: + try: + is_data_uri_an_image(image[0]) + except ValueError as e: + example = json.dumps({"images": [["data:image/jpeg;base64,...", "filename"]]}) + return ErrorResponse.from_message(f'The image you send must be a data URI. Example: {example}', status_code=HTTP_422_UNPROCESSABLE_ENTITY) # Create the completion response response = self.client.chat.completions.create( **filter_none( @@ -522,8 +529,8 @@ def format_exception(e: Union[Exception, str], config: Union[ChatCompletionsConf message = f"{e.__class__.__name__}: {e}" return json.dumps({ "error": {"message": message}, - "model": last_provider.get("model") if model is None else model, **filter_none( + model=last_provider.get("model") if model is None else model, provider=last_provider.get("name") if provider is None else provider ) }) diff --git a/g4f/client/__init__.py b/g4f/client/__init__.py index 52349e72..ec3fec00 100644 --- a/g4f/client/__init__.py +++ b/g4f/client/__init__.py @@ -12,7 +12,7 @@ from ..image import ImageResponse, copy_images, images_dir from ..typing import Messages, ImageType from ..providers.types import ProviderType from ..providers.response import ResponseType, FinishReason, BaseConversation, SynthesizeData -from ..errors import NoImageResponseError, MissingAuthError, NoValidHarFileError +from ..errors import NoImageResponseError from ..providers.retry_provider import IterListProvider from ..providers.asyncio import to_sync_generator, async_generator_to_list from ..Provider.needs_auth import BingCreateImages, OpenaiAccount @@ -192,6 +192,8 @@ class Completions: provider: Optional[ProviderType] = None, stream: Optional[bool] = False, proxy: Optional[str] = None, + image: Optional[ImageType] = None, + image_name: Optional[str] = None, response_format: Optional[dict] = None, max_tokens: Optional[int] = None, stop: Optional[Union[list[str], str]] = None, @@ -210,7 +212,8 @@ class Completions: ignore_stream, ) stop = [stop] if isinstance(stop, str) else stop - + if image is not None: + kwargs["images"] = [(image, image_name)] response = provider.create_completion( model, messages, @@ -390,8 +393,6 @@ class Images: e = None response = None if isinstance(provider_handler, IterListProvider): - # File pointer can be read only once, so we need to convert it to bytes - image = to_bytes(image) for provider in provider_handler.providers: try: response = await self._generate_image_response(provider, provider.__name__, model, prompt, image=image, **kwargs) @@ -471,6 +472,8 @@ class AsyncCompletions: provider: Optional[ProviderType] = None, stream: Optional[bool] = False, proxy: Optional[str] = None, + image: Optional[ImageType] = None, + image_name: Optional[str] = None, response_format: Optional[dict] = None, max_tokens: Optional[int] = None, stop: Optional[Union[list[str], str]] = None, @@ -489,7 +492,8 @@ class AsyncCompletions: ignore_stream, ) stop = [stop] if isinstance(stop, str) else stop - + if image is not None: + kwargs["images"] = [(image, image_name)] if hasattr(provider, "create_async_generator"): create_handler = provider.create_async_generator else: diff --git a/g4f/gui/client/index.html b/g4f/gui/client/index.html index b5fcd280..9c066eea 100644 --- a/g4f/gui/client/index.html +++ b/g4f/gui/client/index.html @@ -240,7 +240,7 @@