From e5b7f72b719814ffa2748e8e8ed1c6713a24e1a6 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Thu, 22 Feb 2024 00:16:58 +0100 Subject: Move some modules, create providers dir Set min version for duckduckgo Make duckduckgo search async Remove get_lastet_version --- g4f/requests/__init__.py | 56 ++++++++++++++++++++++++++++++++++ g4f/requests/aiohttp.py | 30 ++++++++++++++++++ g4f/requests/curl_cffi.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++ g4f/requests/defaults.py | 13 ++++++++ 4 files changed, 176 insertions(+) create mode 100644 g4f/requests/__init__.py create mode 100644 g4f/requests/aiohttp.py create mode 100644 g4f/requests/curl_cffi.py create mode 100644 g4f/requests/defaults.py (limited to 'g4f/requests') diff --git a/g4f/requests/__init__.py b/g4f/requests/__init__.py new file mode 100644 index 00000000..d278ffaf --- /dev/null +++ b/g4f/requests/__init__.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from urllib.parse import urlparse + +try: + from curl_cffi.requests import Session + from .curl_cffi import StreamResponse, StreamSession + has_curl_cffi = True +except ImportError: + from typing import Type as Session + from .aiohttp import StreamResponse, StreamSession + has_curl_cffi = False + +from ..webdriver import WebDriver, WebDriverSession, bypass_cloudflare, get_driver_cookies +from ..errors import MissingRequirementsError +from .defaults import DEFAULT_HEADERS + +def get_args_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> dict: + """ + Create a Session object using a WebDriver to handle cookies and headers. + + Args: + url (str): The URL to navigate to using the WebDriver. + webdriver (WebDriver, optional): The WebDriver instance to use. + proxy (str, optional): Proxy server to use for the Session. + timeout (int, optional): Timeout in seconds for the WebDriver. + + Returns: + Session: A Session object configured with cookies and headers from the WebDriver. + """ + with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=False) as driver: + bypass_cloudflare(driver, url, timeout) + cookies = get_driver_cookies(driver) + user_agent = driver.execute_script("return navigator.userAgent") + parse = urlparse(url) + return { + 'cookies': cookies, + 'headers': { + **DEFAULT_HEADERS, + 'Authority': parse.netloc, + 'Origin': f'{parse.scheme}://{parse.netloc}', + 'Referer': url, + 'User-Agent': user_agent, + }, + } + +def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session: + if not has_curl_cffi: + raise MissingRequirementsError('Install "curl_cffi" package') + args = get_args_from_browser(url, webdriver, proxy, timeout) + return Session( + **args, + proxies={"https": proxy, "http": proxy}, + timeout=timeout, + impersonate="chrome110" + ) \ No newline at end of file diff --git a/g4f/requests/aiohttp.py b/g4f/requests/aiohttp.py new file mode 100644 index 00000000..d9bd6541 --- /dev/null +++ b/g4f/requests/aiohttp.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from aiohttp import ClientSession, ClientResponse, ClientTimeout +from typing import AsyncGenerator, Any + +from ..providers.helper import get_connector +from .defaults import DEFAULT_HEADERS + +class StreamResponse(ClientResponse): + async def iter_lines(self) -> AsyncGenerator[bytes, None]: + async for line in self.content: + yield line.rstrip(b"\r\n") + + async def json(self) -> Any: + return await super().json(content_type=None) + +class StreamSession(ClientSession): + def __init__(self, headers: dict = {}, timeout: int = None, proxies: dict = {}, impersonate = None, **kwargs): + if impersonate: + headers = { + **DEFAULT_HEADERS, + **headers + } + super().__init__( + **kwargs, + timeout=ClientTimeout(timeout) if timeout else None, + response_class=StreamResponse, + connector=get_connector(kwargs.get("connector"), proxies.get("https")), + headers=headers + ) \ No newline at end of file diff --git a/g4f/requests/curl_cffi.py b/g4f/requests/curl_cffi.py new file mode 100644 index 00000000..cfcdd63b --- /dev/null +++ b/g4f/requests/curl_cffi.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from curl_cffi.requests import AsyncSession, Response +from typing import AsyncGenerator, Any +from functools import partialmethod +import json + +class StreamResponse: + """ + A wrapper class for handling asynchronous streaming responses. + + Attributes: + inner (Response): The original Response object. + """ + + def __init__(self, inner: Response) -> None: + """Initialize the StreamResponse with the provided Response object.""" + self.inner: Response = inner + + async def text(self) -> str: + """Asynchronously get the response text.""" + return await self.inner.atext() + + def raise_for_status(self) -> None: + """Raise an HTTPError if one occurred.""" + self.inner.raise_for_status() + + async def json(self, **kwargs) -> Any: + """Asynchronously parse the JSON response content.""" + return json.loads(await self.inner.acontent(), **kwargs) + + async def iter_lines(self) -> AsyncGenerator[bytes, None]: + """Asynchronously iterate over the lines of the response.""" + async for line in self.inner.aiter_lines(): + yield line + + async def iter_content(self) -> AsyncGenerator[bytes, None]: + """Asynchronously iterate over the response content.""" + async for chunk in self.inner.aiter_content(): + yield chunk + + async def __aenter__(self): + """Asynchronously enter the runtime context for the response object.""" + inner: Response = await self.inner + self.inner = inner + self.request = inner.request + self.status: int = inner.status_code + self.reason: str = inner.reason + self.ok: bool = inner.ok + self.headers = inner.headers + self.cookies = inner.cookies + return self + + async def __aexit__(self, *args): + """Asynchronously exit the runtime context for the response object.""" + await self.inner.aclose() + +class StreamSession(AsyncSession): + """ + An asynchronous session class for handling HTTP requests with streaming. + + Inherits from AsyncSession. + """ + + def request( + self, method: str, url: str, **kwargs + ) -> StreamResponse: + """Create and return a StreamResponse object for the given HTTP request.""" + return StreamResponse(super().request(method, url, stream=True, **kwargs)) + + # Defining HTTP methods as partial methods of the request method. + head = partialmethod(request, "HEAD") + get = partialmethod(request, "GET") + post = partialmethod(request, "POST") + put = partialmethod(request, "PUT") + patch = partialmethod(request, "PATCH") + delete = partialmethod(request, "DELETE") diff --git a/g4f/requests/defaults.py b/g4f/requests/defaults.py new file mode 100644 index 00000000..6ae6d7eb --- /dev/null +++ b/g4f/requests/defaults.py @@ -0,0 +1,13 @@ +DEFAULT_HEADERS = { + 'Accept': '*/*', + 'Accept-Encoding': 'gzip, deflate, br', + 'Accept-Language': 'en-US', + 'Connection': 'keep-alive', + 'Sec-Ch-Ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"', + 'Sec-Ch-Ua-Mobile': '?0', + 'Sec-Ch-Ua-Platform': '"Windows"', + 'Sec-Fetch-Dest': 'empty', + 'Sec-Fetch-Mode': 'cors', + 'Sec-Fetch-Site': 'same-site', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' +} \ No newline at end of file -- cgit v1.2.3