From 0d59789eedf3784cf4c3aaf764785a4ad91723c4 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Wed, 1 Jan 2025 14:01:33 +0100 Subject: Add File API Documentation for Python and JS Format Bucket Placeholder in GUI --- docs/file.md | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 docs/file.md (limited to 'docs') diff --git a/docs/file.md b/docs/file.md new file mode 100644 index 00000000..be20d8f0 --- /dev/null +++ b/docs/file.md @@ -0,0 +1,182 @@ +## G4F - File API Documentation with Web Download and Enhanced File Support + +This document details the enhanced G4F File API, allowing users to upload files, download files from web URLs, and process a wider range of file types for integration with language models. + +**Key Improvements:** + +* **Web URL Downloads:** Upload a `downloads.json` file to your bucket containing a list of URLs. The API will download and process these files. Example: `[{"url": "https://example.com/document.pdf"}]` + +* **Expanded File Support:** Added support for additional plain text file extensions: `.txt`, `.xml`, `.json`, `.js`, `.har`, `.sh`, `.py`, `.php`, `.css`, `.yaml`, `.sql`, `.log`, `.csv`, `.twig`, `.md`. Binary file support remains for `.pdf`, `.html`, `.docx`, `.odt`, `.epub`, `.xlsx`, and `.zip`. + +* **Server-Sent Events (SSE):** SSE are now used to provide asynchronous updates on file download and processing progress. This improves the user experience, particularly for large files and multiple downloads. + + +**API Endpoints:** + +* **Upload:** `/v1/files/{bucket_id}` (POST) + + * **Method:** POST + * **Path Parameters:** `bucket_id` (Generated by your own. For example a UUID) + * **Body:** Multipart/form-data with files OR a `downloads.json` file containing URLs. + * **Response:** JSON object with `bucket_id`, `url`, and a list of uploaded/downloaded filenames. + + +* **Retrieve:** `/v1/files/{bucket_id}` (GET) + + * **Method:** GET + * **Path Parameters:** `bucket_id` + * **Query Parameters:** + * `delete_files`: (Optional, boolean, default `true`) Delete files after retrieval. + * `refine_chunks_with_spacy`: (Optional, boolean, default `false`) Apply spaCy-based refinement. + * **Response:** Streaming response with extracted text, separated by ``` markers. SSE updates are sent if the `Accept` header includes `text/event-stream`. + + +**Example Usage (Python):** + +```python +import requests +import uuid +import json + +def upload_and_process(files_or_urls, bucket_id=None): + if bucket_id is None: + bucket_id = str(uuid.uuid4()) + + if isinstance(files_or_urls, list): #URLs + files = {'files': ('downloads.json', json.dumps(files_or_urls), 'application/json')} + elif isinstance(files_or_urls, dict): #Files + files = files_or_urls + else: + raise ValueError("files_or_urls must be a list of URLs or a dictionary of files") + + upload_response = requests.post(f'http://localhost:1337/v1/files/{bucket_id}', files=files) + + if upload_response.status_code == 200: + upload_data = upload_response.json() + print(f"Upload successful. Bucket ID: {upload_data['bucket_id']}") + else: + print(f"Upload failed: {upload_response.status_code} - {upload_response.text}") + + response = requests.get(f'http://localhost:1337/v1/files/{bucket_id}', stream=True, headers={'Accept': 'text/event-stream'}) + for line in response.iter_lines(): + if line: + line = line.decode('utf-8') + if line.startswith('data:'): + try: + data = json.loads(line[5:]) #remove data: prefix + if "action" in data: + print(f"SSE Event: {data}") + elif "error" in data: + print(f"Error: {data['error']['message']}") + else: + print(f"File data received: {data}") #Assuming it's file content + except json.JSONDecodeError as e: + print(f"Error decoding JSON: {e}") + else: + print(f"Unhandled SSE event: {line}") + response.close() + +# Example with URLs +urls = [{"url": "https://github.com/xtekky/gpt4free/issues"}] +bucket_id = upload_and_process(urls) + +#Example with files +files = {'files': open('document.pdf', 'rb'), 'files': open('data.json', 'rb')} +bucket_id = upload_and_process(files) +``` + + +**Example Usage (JavaScript):** + +```javascript +function uuid() { + return ([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g, c => + (c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16) + ); +} + +async function upload_files_or_urls(data) { + let bucket_id = uuid(); // Use a random generated key for your bucket + + let formData = new FormData(); + if (typeof data === "object" && data.constructor === Array) { //URLs + const blob = new Blob([JSON.stringify(data)], { type: 'application/json' }); + const file = new File([blob], 'downloads.json', { type: 'application/json' }); // Create File object + formData.append('files', file); // Append as a file + } else { //Files + Array.from(data).forEach(file => { + formData.append('files', file); + }); + } + + await fetch("/v1/files/" + bucket_id, { + method: 'POST', + body: formData + }); + + function connectToSSE(url) { + const eventSource = new EventSource(url); + eventSource.onmessage = (event) => { + const data = JSON.parse(event.data); + if (data.error) { + console.error("Error:", data.error.message); + } else if (data.action === "done") { + console.log("Files loaded successfully. Bucket ID:", bucket_id); + // Use bucket_id in your LLM prompt. + const prompt = `Use files from bucket. ${JSON.stringify({"bucket_id": bucket_id})} to answer this: ...your question...`; + // ... Send prompt to your language model ... + } else { + console.log("SSE Event:", data); // Update UI with progress as needed + } + }; + eventSource.onerror = (event) => { + console.error("SSE Error:", event); + eventSource.close(); + }; + } + + connectToSSE(`/v1/files/${bucket_id}`); //Retrieve and refine +} + +// Example with URLs +const urls = [{"url": "https://github.com/xtekky/gpt4free/issues"}]; +upload_files_or_urls(urls) + +// Example with files (using a file input element) +const fileInput = document.getElementById('fileInput'); +fileInput.addEventListener('change', () => { + upload_files_or_urls(fileInput.files); +}); +``` + +**Integrating with `ChatCompletion`:** + +To incorporate file uploads into your client applications, include the `tool_calls` parameter in your chat completion requests, using the `bucket_tool` function. The `bucket_id` is passed as a JSON object within your prompt. + + +```json +{ + "messages": [ + { + "role": "user", + "content": "Answer this question using the files in the specified bucket: ...your question...\n{\"bucket_id\": \"your_actual_bucket_id\"}" + } + ], + "tool_calls": [ + { + "function": { + "name": "bucket_tool" + }, + "type": "function" + } + ] +} +``` + +**Important Considerations:** + +* **Error Handling:** Implement robust error handling in both Python and JavaScript to gracefully manage potential issues during file uploads, downloads, and API interactions. +* **Dependencies:** Ensure all required packages are installed (`pip install -U g4f[files]` for Python). + +--- +[Return to Home](/) \ No newline at end of file -- cgit v1.2.3