From 335c971f6a9cd071d18f9fffeb76df4eda8876d5 Mon Sep 17 00:00:00 2001 From: H Lohaus Date: Fri, 13 Dec 2024 22:20:58 +0100 Subject: Add multiple images support (#2478) * Add multiple images support * Add multiple images support in gui * Support multiple images in legacy client and in the api Fix some model names in provider model list * Fix unittests * Add vision and providers docs --- docs/vision.md | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 docs/vision.md (limited to 'docs/vision.md') diff --git a/docs/vision.md b/docs/vision.md new file mode 100644 index 00000000..34a8ca39 --- /dev/null +++ b/docs/vision.md @@ -0,0 +1,83 @@ +## Vision Support in Chat Completion + +This documentation provides an overview of how to integrate vision support into chat completions using an API and a client. It includes examples to guide you through the process. + +### Example with the API + +To use vision support in chat completion with the API, follow the example below: + +```python +import requests +import json +from g4f.image import to_data_uri +from g4f.requests.raise_for_status import raise_for_status + +url = "http://localhost:8080/v1/chat/completions" +body = { + "model": "", + "provider": "Copilot", + "messages": [ + {"role": "user", "content": "what are on this image?"} + ], + "images": [ + ["data:image/jpeg;base64,...", "cat.jpeg"] + ] +} +response = requests.post(url, json=body, headers={"g4f-api-key": "secret"}) +raise_for_status(response) +print(response.json()) +``` + +In this example: +- `url` is the endpoint for the chat completion API. +- `body` contains the model, provider, messages, and images. +- `messages` is a list of message objects with roles and content. +- `images` is a list of image data in Data URI format and optional filenames. +- `response` stores the API response. + +### Example with the Client + +To use vision support in chat completion with the client, follow the example below: + +```python +import g4f +import g4f.Provider + +def chat_completion(prompt): + client = g4f.Client(provider=g4f.Provider.Blackbox) + images = [ + [open("docs/images/waterfall.jpeg", "rb"), "waterfall.jpeg"], + [open("docs/images/cat.webp", "rb"), "cat.webp"] + ] + response = client.chat.completions.create([{"content": prompt, "role": "user"}], "", images=images) + print(response.choices[0].message.content) + +prompt = "what are on this images?" +chat_completion(prompt) +``` + +``` +**Image 1** + +* A waterfall with a rainbow +* Lush greenery surrounding the waterfall +* A stream flowing from the waterfall + +**Image 2** + +* A white cat with blue eyes +* A bird perched on a window sill +* Sunlight streaming through the window +``` + +In this example: +- `client` initializes a new client with the specified provider. +- `images` is a list of image data and optional filenames. +- `response` stores the response from the client. +- The `chat_completion` function prints the chat completion output. + +### Notes + +- Multiple images can be sent. Each image has two data parts: image data (in Data URI format for the API) and an optional filename. +- The client supports bytes, IO objects, and PIL images as input. +- Ensure you use a provider that supports vision and multiple images. \ No newline at end of file -- cgit v1.2.3