From 2157ccbcdb5d781d389e24db332d2fb78b1159a9 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Sat, 2 Dec 2023 05:40:07 +0100 Subject: Add get_session_from_browser as cloudflare bypass --- g4f/requests.py | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) (limited to 'g4f/requests.py') diff --git a/g4f/requests.py b/g4f/requests.py index b70789d4..8cf70ac9 100644 --- a/g4f/requests.py +++ b/g4f/requests.py @@ -4,8 +4,12 @@ import json from contextlib import asynccontextmanager from functools import partialmethod from typing import AsyncGenerator - -from curl_cffi.requests import AsyncSession, Response +from urllib.parse import urlparse +from curl_cffi.requests import AsyncSession, Session, Response +from .webdriver import WebDriver, WebDriverSession +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC class StreamResponse: def __init__(self, inner: Response) -> None: @@ -50,4 +54,50 @@ class StreamSession(AsyncSession): post = partialmethod(request, "POST") put = partialmethod(request, "PUT") patch = partialmethod(request, "PATCH") - delete = partialmethod(request, "DELETE") \ No newline at end of file + delete = partialmethod(request, "DELETE") + +def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120): + with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=True) as driver: + driver.get(url) + + # Is cloudflare protection + if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js": + try: + # Click button in iframe + WebDriverWait(driver, 5).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "#turnstile-wrapper iframe")) + ) + driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe")) + WebDriverWait(driver, 5).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input")) + ) + driver.find_element(By.CSS_SELECTOR, "#challenge-stage input").click() + except: + pass + finally: + driver.switch_to.default_content() + # No cloudflare protection + WebDriverWait(driver, timeout).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)")) + ) + + cookies = dict([(cookie["name"], cookie["value"]) for cookie in driver.get_cookies()]) + user_agent = driver.execute_script("return navigator.userAgent") + + parse = urlparse(url) + return Session( + cookies=cookies, + headers={ + 'accept': '*/*', + 'authority': parse.netloc, + 'origin': f'{parse.scheme}://{parse.netloc}', + 'referer': url, + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'user-agent': user_agent + }, + proxies={"https": proxy, "http": proxy}, + timeout=timeout, + impersonate="chrome110" + ) -- cgit v1.2.3