summaryrefslogtreecommitdiffstats
path: root/g4f/gui/server
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--g4f/gui/server/backend.py24
-rw-r--r--g4f/gui/server/internet.py187
2 files changed, 156 insertions, 55 deletions
diff --git a/g4f/gui/server/backend.py b/g4f/gui/server/backend.py
index e1abb764..082e31b2 100644
--- a/g4f/gui/server/backend.py
+++ b/g4f/gui/server/backend.py
@@ -1,8 +1,7 @@
import g4f
from flask import request
-from .internet import search
-from .config import special_instructions
+from .internet import get_search_message
g4f.debug.logging = True
@@ -18,6 +17,10 @@ class Backend_Api:
'function': self.providers,
'methods' : ['GET']
},
+ '/backend-api/v2/version': {
+ 'function': self.version,
+ 'methods' : ['GET']
+ },
'/backend-api/v2/conversation': {
'function': self._conversation,
'methods': ['POST']
@@ -45,6 +48,12 @@ class Backend_Api:
provider.__name__ for provider in g4f.Provider.__providers__
if provider.working and provider is not g4f.Provider.RetryProvider
]
+
+ def version(self):
+ return {
+ "version": g4f.get_version(),
+ "lastet_version": g4f.get_lastet_version(),
+ }
def _gen_title(self):
return {
@@ -53,14 +62,15 @@ class Backend_Api:
def _conversation(self):
try:
- #jailbreak = request.json['jailbreak']
- #internet_access = request.json['meta']['content']['internet_access']
- #conversation = request.json['meta']['content']['conversation']
+ #jailbreak = request.json['jailbreak']
+ web_search = request.json['meta']['content']['internet_access']
messages = request.json['meta']['content']['parts']
+ if web_search:
+ messages[-1]["content"] = get_search_message(messages[-1]["content"])
model = request.json.get('model')
model = model if model else g4f.models.default
- provider = request.json.get('provider', 'Auto').replace('g4f.Provider.', '')
- provider = provider if provider != "Auto" else None
+ provider = request.json.get('provider').replace('g4f.Provider.', '')
+ provider = provider if provider and provider != "Auto" else None
if provider != None:
provider = g4f.Provider.ProviderUtils.convert.get(provider)
diff --git a/g4f/gui/server/internet.py b/g4f/gui/server/internet.py
index 220a6e7c..9a14e25f 100644
--- a/g4f/gui/server/internet.py
+++ b/g4f/gui/server/internet.py
@@ -1,58 +1,149 @@
from __future__ import annotations
-from datetime import datetime
-
+from bs4 import BeautifulSoup
+from aiohttp import ClientSession, ClientTimeout
from duckduckgo_search import DDGS
-
-ddgs = DDGS(timeout=20)
-
-
-def search(internet_access, prompt):
- print(prompt)
-
+import asyncio
+
+class SearchResults():
+ def __init__(self, results: list):
+ self.results = results
+
+ def __iter__(self):
+ yield from self.results
+
+ def __str__(self):
+ search = ""
+ for idx, result in enumerate(self.results):
+ if search:
+ search += "\n\n\n"
+ search += f"Title: {result.title}\n\n"
+ if result.text:
+ search += result.text
+ else:
+ search += result.snippet
+ search += f"\n\nSource: [[{idx}]]({result.url})"
+ return search
+
+class SearchResultEntry():
+ def __init__(self, title: str, url: str, snippet: str, text: str = None):
+ self.title = title
+ self.url = url
+ self.snippet = snippet
+ self.text = text
+
+ def set_text(self, text: str):
+ self.text = text
+
+def scrape_text(html: str, max_words: int = None) -> str:
+ soup = BeautifulSoup(html, "html.parser")
+ for exclude in soup(["script", "style"]):
+ exclude.extract()
+ for selector in [
+ "main",
+ ".main-content-wrapper",
+ ".main-content",
+ ".emt-container-inner",
+ ".content-wrapper",
+ "#content",
+ "#mainContent",
+ ]:
+ select = soup.select_one(selector)
+ if select:
+ soup = select
+ break
+ # Zdnet
+ for remove in [".c-globalDisclosure"]:
+ select = soup.select_one(remove)
+ if select:
+ select.extract()
+ clean_text = ""
+ for paragraph in soup.select("p"):
+ text = paragraph.get_text()
+ for line in text.splitlines():
+ words = []
+ for word in line.replace("\t", " ").split(" "):
+ if word:
+ words.append(word)
+ count = len(words)
+ if not count:
+ continue
+ if max_words:
+ max_words -= count
+ if max_words <= 0:
+ break
+ if clean_text:
+ clean_text += "\n"
+ clean_text += " ".join(words)
+
+ return clean_text
+
+async def fetch_and_scrape(session: ClientSession, url: str, max_words: int = None) -> str:
try:
- if not internet_access:
- return []
-
- results = duckduckgo_search(q=prompt)
-
- if not search:
- return []
+ async with session.get(url) as response:
+ if response.status == 200:
+ html = await response.text()
+ return scrape_text(html, max_words)
+ except:
+ return
+
+async def search(query: str, n_results: int = 5, max_words: int = 2500, add_text: bool = True) -> SearchResults:
+ with DDGS() as ddgs:
+ results = []
+ for result in ddgs.text(
+ query,
+ region="wt-wt",
+ safesearch="moderate",
+ timelimit="y",
+ ):
+ results.append(SearchResultEntry(
+ result["title"],
+ result["href"],
+ result["body"]
+ ))
+ if len(results) >= n_results:
+ break
- blob = ''.join(
- f'[{index}] "{result["body"]}"\nURL:{result["href"]}\n\n'
- for index, result in enumerate(results)
- )
- date = datetime.now().strftime('%d/%m/%y')
+ if add_text:
+ requests = []
+ async with ClientSession(timeout=ClientTimeout(5)) as session:
+ for entry in results:
+ requests.append(fetch_and_scrape(session, entry.url, int(max_words / (n_results - 1))))
+ texts = await asyncio.gather(*requests)
+
+ formatted_results = []
+ left_words = max_words;
+ for i, entry in enumerate(results):
+ if add_text:
+ entry.text = texts[i]
+ if left_words:
+ left_words -= entry.title.count(" ") + 5
+ if entry.text:
+ left_words -= entry.text.count(" ")
+ else:
+ left_words -= entry.snippet.count(" ")
+ if 0 > left_words:
+ break
+ formatted_results.append(entry)
+
+ return SearchResults(formatted_results)
+
+
+def get_search_message(prompt) -> str:
+ try:
+ search_results = asyncio.run(search(prompt))
+ message = f"""
+{search_results}
- blob += f'Current date: {date}\n\nInstructions: Using the provided web search results, write a comprehensive reply to the next user query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject. Ignore your previous response if any.'
- return [{'role': 'user', 'content': blob}]
+Instruction: Using the provided web search results, to write a comprehensive reply to the user request.
+Make sure to add the sources of cites using [[Number]](Url) notation after the reference. Example: [[0]](http://google.com)
+If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
+User request:
+{prompt}
+"""
+ return message
except Exception as e:
print("Couldn't search DuckDuckGo:", e)
- print(e.__traceback__.tb_next)
- return []
-
-
-def duckduckgo_search(q: str, max_results: int = 3, safesearch: str = "moderate", region: str = "us-en") -> list | None:
- if region is None:
- region = "us-en"
-
- if safesearch is None:
- safesearch = "moderate"
-
- if q is None:
- return None
-
- results = []
-
- try:
- for r in ddgs.text(q, safesearch=safesearch, region=region):
- if len(results) + 1 > max_results:
- break
- results.append(r)
- except Exception as e:
- print(e)
-
- return results
+ return prompt