From 0d59789eedf3784cf4c3aaf764785a4ad91723c4 Mon Sep 17 00:00:00 2001
From: Heiner Lohaus <hlohaus@users.noreply.github.com>
Date: Wed, 1 Jan 2025 14:01:33 +0100
Subject: Add File API Documentation for Python and JS Format Bucket
 Placeholder in GUI

---
 docs/file.md | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 docs/file.md

(limited to 'docs')

diff --git a/docs/file.md b/docs/file.md
new file mode 100644
index 00000000..be20d8f0
--- /dev/null
+++ b/docs/file.md
@@ -0,0 +1,182 @@
+## G4F - File API Documentation with Web Download and Enhanced File Support
+
+This document details the enhanced G4F File API, allowing users to upload files, download files from web URLs, and process a wider range of file types for integration with language models.
+
+**Key Improvements:**
+
+* **Web URL Downloads:**  Upload a `downloads.json` file to your bucket containing a list of URLs. The API will download and process these files.  Example: `[{"url": "https://example.com/document.pdf"}]`
+
+* **Expanded File Support:**  Added support for additional plain text file extensions:  `.txt`, `.xml`, `.json`, `.js`, `.har`, `.sh`, `.py`, `.php`, `.css`, `.yaml`, `.sql`, `.log`, `.csv`, `.twig`, `.md`.  Binary file support remains for `.pdf`, `.html`, `.docx`, `.odt`, `.epub`, `.xlsx`, and `.zip`.
+
+* **Server-Sent Events (SSE):**  SSE are now used to provide asynchronous updates on file download and processing progress. This improves the user experience, particularly for large files and multiple downloads.
+
+
+**API Endpoints:**
+
+* **Upload:** `/v1/files/{bucket_id}` (POST)
+
+    * **Method:** POST
+    * **Path Parameters:** `bucket_id` (Generated by your own. For example a UUID)
+    * **Body:** Multipart/form-data with files OR a `downloads.json` file containing URLs.
+    * **Response:** JSON object with `bucket_id`, `url`, and a list of uploaded/downloaded filenames.
+
+
+* **Retrieve:** `/v1/files/{bucket_id}` (GET)
+
+    * **Method:** GET
+    * **Path Parameters:** `bucket_id`
+    * **Query Parameters:**
+        * `delete_files`: (Optional, boolean, default `true`) Delete files after retrieval.
+        * `refine_chunks_with_spacy`: (Optional, boolean, default `false`) Apply spaCy-based refinement.
+    * **Response:** Streaming response with extracted text, separated by ``` markers.  SSE updates are sent if the `Accept` header includes `text/event-stream`.
+
+
+**Example Usage (Python):**
+
+```python
+import requests
+import uuid
+import json
+
+def upload_and_process(files_or_urls, bucket_id=None):
+    if bucket_id is None:
+        bucket_id = str(uuid.uuid4())
+    
+    if isinstance(files_or_urls, list): #URLs
+        files = {'files': ('downloads.json', json.dumps(files_or_urls), 'application/json')}
+    elif isinstance(files_or_urls, dict): #Files
+        files = files_or_urls
+    else:
+        raise ValueError("files_or_urls must be a list of URLs or a dictionary of files")
+
+    upload_response = requests.post(f'http://localhost:1337/v1/files/{bucket_id}', files=files)
+
+    if upload_response.status_code == 200:
+        upload_data = upload_response.json()
+        print(f"Upload successful. Bucket ID: {upload_data['bucket_id']}")
+    else:
+        print(f"Upload failed: {upload_response.status_code} - {upload_response.text}")
+
+    response = requests.get(f'http://localhost:1337/v1/files/{bucket_id}', stream=True, headers={'Accept': 'text/event-stream'})
+    for line in response.iter_lines():
+      if line:
+          line = line.decode('utf-8')
+          if line.startswith('data:'):
+              try:
+                  data = json.loads(line[5:]) #remove data: prefix
+                  if "action" in data:
+                      print(f"SSE Event: {data}")
+                  elif "error" in data:
+                      print(f"Error: {data['error']['message']}")
+                  else:
+                      print(f"File data received: {data}") #Assuming it's file content
+              except json.JSONDecodeError as e:
+                  print(f"Error decoding JSON: {e}")
+          else:
+              print(f"Unhandled SSE event: {line}")
+    response.close()
+
+# Example with URLs
+urls = [{"url": "https://github.com/xtekky/gpt4free/issues"}]
+bucket_id = upload_and_process(urls)
+
+#Example with files
+files = {'files': open('document.pdf', 'rb'), 'files': open('data.json', 'rb')}
+bucket_id = upload_and_process(files)
+```
+
+
+**Example Usage (JavaScript):**
+
+```javascript
+function uuid() {
+    return ([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g, c =>
+      (c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
+    );
+}
+
+async function upload_files_or_urls(data) {
+    let bucket_id = uuid(); // Use a random generated key for your bucket
+
+    let formData = new FormData();
+    if (typeof data === "object" && data.constructor === Array) { //URLs
+        const blob = new Blob([JSON.stringify(data)], { type: 'application/json' });
+        const file = new File([blob], 'downloads.json', { type: 'application/json' }); // Create File object
+        formData.append('files', file); // Append as a file
+    } else { //Files
+        Array.from(data).forEach(file => {
+            formData.append('files', file);
+        });
+    }
+
+    await fetch("/v1/files/" + bucket_id, {
+        method: 'POST',
+        body: formData
+    });
+
+    function connectToSSE(url) {
+        const eventSource = new EventSource(url);
+        eventSource.onmessage = (event) => {
+            const data = JSON.parse(event.data);
+            if (data.error) {
+                console.error("Error:", data.error.message);
+            } else if (data.action === "done") {
+                console.log("Files loaded successfully. Bucket ID:", bucket_id);
+                // Use bucket_id in your LLM prompt.
+                const prompt = `Use files from bucket. ${JSON.stringify({"bucket_id": bucket_id})} to answer this: ...your question...`;
+                // ... Send prompt to your language model ...
+            } else {
+                console.log("SSE Event:", data); // Update UI with progress as needed
+            }
+        };
+        eventSource.onerror = (event) => {
+            console.error("SSE Error:", event);
+            eventSource.close();
+        };
+    }
+
+    connectToSSE(`/v1/files/${bucket_id}`); //Retrieve and refine
+}
+
+// Example with URLs
+const urls = [{"url": "https://github.com/xtekky/gpt4free/issues"}];
+upload_files_or_urls(urls)
+
+// Example with files (using a file input element)
+const fileInput = document.getElementById('fileInput');
+fileInput.addEventListener('change', () => {
+    upload_files_or_urls(fileInput.files);
+});
+```
+
+**Integrating with `ChatCompletion`:**
+
+To incorporate file uploads into your client applications, include the `tool_calls` parameter in your chat completion requests, using the `bucket_tool` function.  The `bucket_id` is passed as a JSON object within your prompt.
+
+
+```json
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "Answer this question using the files in the specified bucket: ...your question...\n{\"bucket_id\": \"your_actual_bucket_id\"}"
+    }
+  ],
+  "tool_calls": [
+    {
+      "function": {
+        "name": "bucket_tool"
+      },
+      "type": "function"
+    }
+  ]
+}
+```
+
+**Important Considerations:**
+
+* **Error Handling:** Implement robust error handling in both Python and JavaScript to gracefully manage potential issues during file uploads, downloads, and API interactions.
+* **Dependencies:** Ensure all required packages are installed (`pip install -U g4f[files]` for Python).
+
+---  
+[Return to Home](/)
\ No newline at end of file
-- 
cgit v1.2.3