From 484b96d850aca9b9144f3b8dd2fb502b25356c22 Mon Sep 17 00:00:00 2001 From: H Lohaus Date: Thu, 7 Dec 2023 07:18:05 +0100 Subject: Add websearch to gui (#1314) * Add websearch to gui * Fix version_check config * Add version badge in README.md * Show version in gui * Add docker hub build * Fix gui backend, improve style --- g4f/Provider/MyShell.py | 18 +--- g4f/Provider/unfinished/AiChatting.py | 66 ++++++++++++ g4f/Provider/unfinished/__init__.py | 3 +- g4f/__init__.py | 57 +++++++---- g4f/debug.py | 3 +- g4f/gui/client/css/style.css | 3 + g4f/gui/client/html/index.html | 4 + g4f/gui/client/js/chat.v1.js | 15 +++ g4f/gui/server/backend.py | 24 +++-- g4f/gui/server/internet.py | 187 +++++++++++++++++++++++++--------- g4f/requests.py | 28 +---- g4f/webdriver.py | 48 +++++++-- 12 files changed, 333 insertions(+), 123 deletions(-) create mode 100644 g4f/Provider/unfinished/AiChatting.py (limited to 'g4f') diff --git a/g4f/Provider/MyShell.py b/g4f/Provider/MyShell.py index 2ee94bb6..b0a01016 100644 --- a/g4f/Provider/MyShell.py +++ b/g4f/Provider/MyShell.py @@ -5,7 +5,7 @@ import time, json from ..typing import CreateResult, Messages from .base_provider import BaseProvider from .helper import format_prompt -from ..webdriver import WebDriver, WebDriverSession +from ..webdriver import WebDriver, WebDriverSession, bypass_cloudflare class MyShell(BaseProvider): url = "https://app.myshell.ai/chat" @@ -25,16 +25,8 @@ class MyShell(BaseProvider): **kwargs ) -> CreateResult: with WebDriverSession(webdriver, "", proxy=proxy) as driver: - from selenium.webdriver.common.by import By - from selenium.webdriver.support.ui import WebDriverWait - from selenium.webdriver.support import expected_conditions as EC - - driver.get(cls.url) - - # Wait for page load and cloudflare validation - WebDriverWait(driver, timeout).until( - EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)")) - ) + bypass_cloudflare(driver, cls.url, timeout) + # Send request with message data = { "botId": "4738", @@ -58,11 +50,11 @@ window._reader = response.body.pipeThrough(new TextDecoderStream()).getReader(); driver.execute_script(script.replace("{body}", json.dumps(data))) script = """ chunk = await window._reader.read(); -if (chunk['done']) { +if (chunk.done) { return null; } content = ''; -chunk['value'].split('\\n').forEach((line, index) => { +chunk.value.split('\\n').forEach((line, index) => { if (line.startsWith('data: ')) { try { const data = JSON.parse(line.substring('data: '.length)); diff --git a/g4f/Provider/unfinished/AiChatting.py b/g4f/Provider/unfinished/AiChatting.py new file mode 100644 index 00000000..a66921c1 --- /dev/null +++ b/g4f/Provider/unfinished/AiChatting.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from urllib.parse import unquote + +from ...typing import AsyncResult, Messages +from ..base_provider import BaseProvider +from ...webdriver import WebDriver +from ...requests import Session, get_session_from_browser + +class AiChatting(BaseProvider): + url = "https://www.aichatting.net" + supports_gpt_35_turbo = True + _session: Session = None + + @classmethod + def create_completion( + cls, + model: str, + messages: Messages, + stream: bool, + proxy: str = None, + timeout: int = 120, + webdriver: WebDriver = None, + **kwargs + ) -> AsyncResult: + if not cls._session: + cls._session = get_session_from_browser(cls.url, webdriver, proxy, timeout) + visitorId = unquote(cls._session.cookies.get("aichatting.website.visitorId")) + + headers = { + "accept": "application/json, text/plain, */*", + "lang": "en", + "source": "web" + } + data = { + "roleId": 0, + } + try: + response = cls._session.post("https://aga-api.aichatting.net/aigc/chat/record/conversation/create", json=data, headers=headers) + response.raise_for_status() + conversation_id = response.json()["data"]["conversationId"] + except Exception as e: + cls.reset() + raise e + headers = { + "authority": "aga-api.aichatting.net", + "accept": "text/event-stream,application/json, text/event-stream", + "lang": "en", + "source": "web", + "vtoken": visitorId, + } + data = { + "spaceHandle": True, + "roleId": 0, + "messages": messages, + "conversationId": conversation_id, + } + response = cls._session.post("https://aga-api.aichatting.net/aigc/chat/v2/stream", json=data, headers=headers, stream=True) + response.raise_for_status() + for chunk in response.iter_lines(): + if chunk.startswith(b"data:"): + yield chunk[5:].decode().replace("-=- --", " ").replace("-=-n--", "\n").replace("--@DONE@--", "") + + @classmethod + def reset(cls): + cls._session = None \ No newline at end of file diff --git a/g4f/Provider/unfinished/__init__.py b/g4f/Provider/unfinished/__init__.py index 22e021be..eb5e8825 100644 --- a/g4f/Provider/unfinished/__init__.py +++ b/g4f/Provider/unfinished/__init__.py @@ -1,3 +1,4 @@ from .MikuChat import MikuChat from .Komo import Komo -from .ChatAiGpt import ChatAiGpt \ No newline at end of file +from .ChatAiGpt import ChatAiGpt +from .AiChatting import AiChatting \ No newline at end of file diff --git a/g4f/__init__.py b/g4f/__init__.py index 8bfed8e3..92bce194 100644 --- a/g4f/__init__.py +++ b/g4f/__init__.py @@ -1,25 +1,44 @@ from __future__ import annotations -from requests import get -from .models import Model, ModelUtils, _all_models -from .Provider import BaseProvider, AsyncGeneratorProvider, RetryProvider -from .typing import Messages, CreateResult, AsyncResult, Union, List -from . import debug -version = '0.1.9.2' -version_check = True +import os +from requests import get +from importlib.metadata import version as get_package_version, PackageNotFoundError +from subprocess import check_output, CalledProcessError, PIPE -def check_pypi_version() -> None: - try: - response = get("https://pypi.org/pypi/g4f/json").json() - latest_version = response["info"]["version"] +from .models import Model, ModelUtils, _all_models +from .Provider import BaseProvider, AsyncGeneratorProvider, RetryProvider +from .typing import Messages, CreateResult, AsyncResult, Union, List +from . import debug - if version != latest_version: - print(f'New pypi version: {latest_version} (current: {version}) | pip install -U g4f') - return False - return True +def get_version() -> str: + # Read from package manager + try: + return get_package_version("g4f") + except PackageNotFoundError: + pass + # Read from docker environment + current_version = os.environ.get("G4F_VERSION") + if current_version: + return current_version + # Read from git repository + try: + command = ["git", "describe", "--tags", "--abbrev=0"] + return check_output(command, text=True, stderr=PIPE).strip() + except CalledProcessError: + pass + +def get_lastet_version() -> str: + response = get("https://pypi.org/pypi/g4f/json").json() + return response["info"]["version"] +def check_pypi_version() -> None: + try: + version = get_version() + latest_version = get_lastet_version() except Exception as e: print(f'Failed to check g4f pypi version: {e}') + if version != latest_version: + print(f'New pypi version: {latest_version} (current: {version}) | pip install -U g4f') def get_model_and_provider(model : Union[Model, str], provider : Union[type[BaseProvider], None], @@ -27,6 +46,9 @@ def get_model_and_provider(model : Union[Model, str], ignored : List[str] = None, ignore_working: bool = False, ignore_stream: bool = False) -> tuple[Model, type[BaseProvider]]: + if debug.version_check: + check_pypi_version() + debug.version_check = False if isinstance(model, str): if model in ModelUtils.convert: @@ -118,7 +140,4 @@ class Completion: result = provider.create_completion(model.name, [{"role": "user", "content": prompt}], stream, **kwargs) - return result if stream else ''.join(result) - -if version_check: - check_pypi_version() \ No newline at end of file + return result if stream else ''.join(result) \ No newline at end of file diff --git a/g4f/debug.py b/g4f/debug.py index 558a2428..984d973a 100644 --- a/g4f/debug.py +++ b/g4f/debug.py @@ -1 +1,2 @@ -logging = False \ No newline at end of file +logging = False +version_check = True \ No newline at end of file diff --git a/g4f/gui/client/css/style.css b/g4f/gui/client/css/style.css index 2700de8b..254a4b15 100644 --- a/g4f/gui/client/css/style.css +++ b/g4f/gui/client/css/style.css @@ -211,6 +211,9 @@ body { .convo-title { color: var(--colour-3); font-size: 14px; + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; } .message { diff --git a/g4f/gui/client/html/index.html b/g4f/gui/client/html/index.html index 66534a51..53c028d7 100644 --- a/g4f/gui/client/html/index.html +++ b/g4f/gui/client/html/index.html @@ -88,6 +88,10 @@ github ~ @gpt4free +
+ + +
diff --git a/g4f/gui/client/js/chat.v1.js b/g4f/gui/client/js/chat.v1.js index 2844b73e..2a1bdd73 100644 --- a/g4f/gui/client/js/chat.v1.js +++ b/g4f/gui/client/js/chat.v1.js @@ -628,4 +628,19 @@ observer.observe(message_input, { attributes: true }); option.value = option.text = provider; select.appendChild(option); } +})(); + +(async () => { + response = await fetch('/backend-api/v2/version') + versions = await response.json() + + document.title = 'g4f - gui - ' + versions["version"]; + text = "version ~ " + if (versions["version"] != versions["lastet_version"]) { + release_url = 'https://github.com/xtekky/gpt4free/releases/tag/' + versions["lastet_version"]; + text += '' + versions["version"] + ' 🆕'; + } else { + text += versions["version"]; + } + document.getElementById("version_text").innerHTML = text })(); \ No newline at end of file diff --git a/g4f/gui/server/backend.py b/g4f/gui/server/backend.py index e1abb764..082e31b2 100644 --- a/g4f/gui/server/backend.py +++ b/g4f/gui/server/backend.py @@ -1,8 +1,7 @@ import g4f from flask import request -from .internet import search -from .config import special_instructions +from .internet import get_search_message g4f.debug.logging = True @@ -18,6 +17,10 @@ class Backend_Api: 'function': self.providers, 'methods' : ['GET'] }, + '/backend-api/v2/version': { + 'function': self.version, + 'methods' : ['GET'] + }, '/backend-api/v2/conversation': { 'function': self._conversation, 'methods': ['POST'] @@ -45,6 +48,12 @@ class Backend_Api: provider.__name__ for provider in g4f.Provider.__providers__ if provider.working and provider is not g4f.Provider.RetryProvider ] + + def version(self): + return { + "version": g4f.get_version(), + "lastet_version": g4f.get_lastet_version(), + } def _gen_title(self): return { @@ -53,14 +62,15 @@ class Backend_Api: def _conversation(self): try: - #jailbreak = request.json['jailbreak'] - #internet_access = request.json['meta']['content']['internet_access'] - #conversation = request.json['meta']['content']['conversation'] + #jailbreak = request.json['jailbreak'] + web_search = request.json['meta']['content']['internet_access'] messages = request.json['meta']['content']['parts'] + if web_search: + messages[-1]["content"] = get_search_message(messages[-1]["content"]) model = request.json.get('model') model = model if model else g4f.models.default - provider = request.json.get('provider', 'Auto').replace('g4f.Provider.', '') - provider = provider if provider != "Auto" else None + provider = request.json.get('provider').replace('g4f.Provider.', '') + provider = provider if provider and provider != "Auto" else None if provider != None: provider = g4f.Provider.ProviderUtils.convert.get(provider) diff --git a/g4f/gui/server/internet.py b/g4f/gui/server/internet.py index 220a6e7c..9a14e25f 100644 --- a/g4f/gui/server/internet.py +++ b/g4f/gui/server/internet.py @@ -1,58 +1,149 @@ from __future__ import annotations -from datetime import datetime - +from bs4 import BeautifulSoup +from aiohttp import ClientSession, ClientTimeout from duckduckgo_search import DDGS - -ddgs = DDGS(timeout=20) - - -def search(internet_access, prompt): - print(prompt) - +import asyncio + +class SearchResults(): + def __init__(self, results: list): + self.results = results + + def __iter__(self): + yield from self.results + + def __str__(self): + search = "" + for idx, result in enumerate(self.results): + if search: + search += "\n\n\n" + search += f"Title: {result.title}\n\n" + if result.text: + search += result.text + else: + search += result.snippet + search += f"\n\nSource: [[{idx}]]({result.url})" + return search + +class SearchResultEntry(): + def __init__(self, title: str, url: str, snippet: str, text: str = None): + self.title = title + self.url = url + self.snippet = snippet + self.text = text + + def set_text(self, text: str): + self.text = text + +def scrape_text(html: str, max_words: int = None) -> str: + soup = BeautifulSoup(html, "html.parser") + for exclude in soup(["script", "style"]): + exclude.extract() + for selector in [ + "main", + ".main-content-wrapper", + ".main-content", + ".emt-container-inner", + ".content-wrapper", + "#content", + "#mainContent", + ]: + select = soup.select_one(selector) + if select: + soup = select + break + # Zdnet + for remove in [".c-globalDisclosure"]: + select = soup.select_one(remove) + if select: + select.extract() + clean_text = "" + for paragraph in soup.select("p"): + text = paragraph.get_text() + for line in text.splitlines(): + words = [] + for word in line.replace("\t", " ").split(" "): + if word: + words.append(word) + count = len(words) + if not count: + continue + if max_words: + max_words -= count + if max_words <= 0: + break + if clean_text: + clean_text += "\n" + clean_text += " ".join(words) + + return clean_text + +async def fetch_and_scrape(session: ClientSession, url: str, max_words: int = None) -> str: try: - if not internet_access: - return [] - - results = duckduckgo_search(q=prompt) - - if not search: - return [] + async with session.get(url) as response: + if response.status == 200: + html = await response.text() + return scrape_text(html, max_words) + except: + return + +async def search(query: str, n_results: int = 5, max_words: int = 2500, add_text: bool = True) -> SearchResults: + with DDGS() as ddgs: + results = [] + for result in ddgs.text( + query, + region="wt-wt", + safesearch="moderate", + timelimit="y", + ): + results.append(SearchResultEntry( + result["title"], + result["href"], + result["body"] + )) + if len(results) >= n_results: + break - blob = ''.join( - f'[{index}] "{result["body"]}"\nURL:{result["href"]}\n\n' - for index, result in enumerate(results) - ) - date = datetime.now().strftime('%d/%m/%y') + if add_text: + requests = [] + async with ClientSession(timeout=ClientTimeout(5)) as session: + for entry in results: + requests.append(fetch_and_scrape(session, entry.url, int(max_words / (n_results - 1)))) + texts = await asyncio.gather(*requests) + + formatted_results = [] + left_words = max_words; + for i, entry in enumerate(results): + if add_text: + entry.text = texts[i] + if left_words: + left_words -= entry.title.count(" ") + 5 + if entry.text: + left_words -= entry.text.count(" ") + else: + left_words -= entry.snippet.count(" ") + if 0 > left_words: + break + formatted_results.append(entry) + + return SearchResults(formatted_results) + + +def get_search_message(prompt) -> str: + try: + search_results = asyncio.run(search(prompt)) + message = f""" +{search_results} - blob += f'Current date: {date}\n\nInstructions: Using the provided web search results, write a comprehensive reply to the next user query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject. Ignore your previous response if any.' - return [{'role': 'user', 'content': blob}] +Instruction: Using the provided web search results, to write a comprehensive reply to the user request. +Make sure to add the sources of cites using [[Number]](Url) notation after the reference. Example: [[0]](http://google.com) +If the provided search results refer to multiple subjects with the same name, write separate answers for each subject. +User request: +{prompt} +""" + return message except Exception as e: print("Couldn't search DuckDuckGo:", e) - print(e.__traceback__.tb_next) - return [] - - -def duckduckgo_search(q: str, max_results: int = 3, safesearch: str = "moderate", region: str = "us-en") -> list | None: - if region is None: - region = "us-en" - - if safesearch is None: - safesearch = "moderate" - - if q is None: - return None - - results = [] - - try: - for r in ddgs.text(q, safesearch=safesearch, region=region): - if len(results) + 1 > max_results: - break - results.append(r) - except Exception as e: - print(e) - - return results + return prompt diff --git a/g4f/requests.py b/g4f/requests.py index 8cf70ac9..00ab9488 100644 --- a/g4f/requests.py +++ b/g4f/requests.py @@ -6,10 +6,7 @@ from functools import partialmethod from typing import AsyncGenerator from urllib.parse import urlparse from curl_cffi.requests import AsyncSession, Session, Response -from .webdriver import WebDriver, WebDriverSession -from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC +from .webdriver import WebDriver, WebDriverSession, bypass_cloudflare class StreamResponse: def __init__(self, inner: Response) -> None: @@ -58,28 +55,7 @@ class StreamSession(AsyncSession): def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120): with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=True) as driver: - driver.get(url) - - # Is cloudflare protection - if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js": - try: - # Click button in iframe - WebDriverWait(driver, 5).until( - EC.presence_of_element_located((By.CSS_SELECTOR, "#turnstile-wrapper iframe")) - ) - driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe")) - WebDriverWait(driver, 5).until( - EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input")) - ) - driver.find_element(By.CSS_SELECTOR, "#challenge-stage input").click() - except: - pass - finally: - driver.switch_to.default_content() - # No cloudflare protection - WebDriverWait(driver, timeout).until( - EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)")) - ) + bypass_cloudflare(driver, url, timeout) cookies = dict([(cookie["name"], cookie["value"]) for cookie in driver.get_cookies()]) user_agent = driver.execute_script("return navigator.userAgent") diff --git a/g4f/webdriver.py b/g4f/webdriver.py index f0fa1fba..d274c619 100644 --- a/g4f/webdriver.py +++ b/g4f/webdriver.py @@ -1,10 +1,12 @@ from __future__ import annotations -import time from platformdirs import user_config_dir from selenium.webdriver.remote.webdriver import WebDriver from undetected_chromedriver import Chrome, ChromeOptions -import os.path +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from os import path from . import debug try: @@ -21,16 +23,47 @@ def get_browser( ) -> WebDriver: if user_data_dir == None: user_data_dir = user_config_dir("g4f") - if debug.logging: - print("Open browser with config dir:", user_data_dir) + if user_data_dir and debug.logging: + print("Open browser with config dir:", user_data_dir) if not options: options = ChromeOptions() if proxy: options.add_argument(f'--proxy-server={proxy}') driver = '/usr/bin/chromedriver' - if not os.path.isfile(driver): + if not path.isfile(driver): driver = None - return Chrome(options=options, user_data_dir=user_data_dir, driver_executable_path=driver, headless=headless) + return Chrome( + options=options, + user_data_dir=user_data_dir, + driver_executable_path=driver, + headless=headless + ) + +def bypass_cloudflare(driver: WebDriver, url: str, timeout: int) -> None: + # Open website + driver.get(url) + # Is cloudflare protection + if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js": + if debug.logging: + print("Cloudflare protection detected:", url) + try: + # Click button in iframe + WebDriverWait(driver, 5).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "#turnstile-wrapper iframe")) + ) + driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe")) + WebDriverWait(driver, 5).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input")) + ) + driver.find_element(By.CSS_SELECTOR, "#challenge-stage input").click() + except: + pass + finally: + driver.switch_to.default_content() + # No cloudflare protection + WebDriverWait(driver, timeout).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)")) + ) class WebDriverSession(): def __init__( @@ -47,7 +80,7 @@ class WebDriverSession(): self.headless = headless self.virtual_display = None if has_pyvirtualdisplay and virtual_display: - self.virtual_display = Display(size=(1920,1080)) + self.virtual_display = Display(size=(1920, 1080)) self.proxy = proxy self.options = options self.default_driver = None @@ -82,7 +115,6 @@ class WebDriverSession(): self.default_driver.close() except: pass - time.sleep(0.1) self.default_driver.quit() if self.virtual_display: self.virtual_display.stop() \ No newline at end of file -- cgit v1.2.3