summaryrefslogtreecommitdiffstats
path: root/tool
diff options
context:
space:
mode:
Diffstat (limited to 'tool')
-rw-r--r--tool/provider_init.py33
-rw-r--r--tool/readme_table.py103
-rw-r--r--tool/vercel.py103
3 files changed, 239 insertions, 0 deletions
diff --git a/tool/provider_init.py b/tool/provider_init.py
new file mode 100644
index 00000000..22f21d4d
--- /dev/null
+++ b/tool/provider_init.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+
+
+def main():
+ content = create_content()
+ with open("g4f/provider/__init__.py", "w", encoding="utf-8") as f:
+ f.write(content)
+
+
+def create_content():
+ path = Path()
+ paths = path.glob("g4f/provider/*.py")
+ paths = [p for p in paths if p.name not in ["__init__.py", "base_provider.py"]]
+ classnames = [p.stem for p in paths]
+
+ import_lines = [f"from .{name} import {name}" for name in classnames]
+ import_content = "\n".join(import_lines)
+
+ classnames.insert(0, "BaseProvider")
+ all_content = [f' "{name}"' for name in classnames]
+ all_content = ",\n".join(all_content)
+ all_content = f"__all__ = [\n{all_content},\n]"
+
+ return f"""from .base_provider import BaseProvider
+{import_content}
+
+
+{all_content}
+"""
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tool/readme_table.py b/tool/readme_table.py
new file mode 100644
index 00000000..a4d07adf
--- /dev/null
+++ b/tool/readme_table.py
@@ -0,0 +1,103 @@
+import re
+import sys
+from pathlib import Path
+from urllib.parse import urlparse
+
+sys.path.append(str(Path(__file__).parent.parent))
+
+from g4f import models, provider
+from g4f.provider.base_provider import BaseProvider
+
+
+def main():
+ print_providers()
+ print("\n", "-" * 50, "\n")
+ print_models()
+
+
+def print_providers():
+ lines = [
+ "| Website| Provider| gpt-3.5 | gpt-4 | Streaming | Status | Auth |",
+ "| ------ | ------- | ------- | ----- | --------- | ------ | ---- |",
+ ]
+ providers = get_providers()
+ for _provider in providers:
+ netloc = urlparse(_provider.url).netloc
+ website = f"[{netloc}]({_provider.url})"
+
+ provider_name = f"g4f.provider.{_provider.__name__}"
+
+ has_gpt_35 = "✔️" if _provider.supports_gpt_35_turbo else "❌"
+ has_gpt_4 = "✔️" if _provider.supports_gpt_4 else "❌"
+ stream = "✔️" if _provider.supports_stream else "❌"
+ status = (
+ "![Active](https://img.shields.io/badge/Active-brightgreen)"
+ if _provider.working
+ else "![Inactive](https://img.shields.io/badge/Inactive-red)"
+ )
+ auth = "✔️" if _provider.needs_auth else "❌"
+
+ lines.append(
+ f"| {website} | {provider_name} | {has_gpt_35} | {has_gpt_4} | {stream} | {status} | {auth} |"
+ )
+ print("\n".join(lines))
+
+
+def get_providers() -> list[type[BaseProvider]]:
+ provider_names = dir(provider)
+ ignore_names = [
+ "base_provider",
+ "BaseProvider",
+ ]
+ provider_names = [
+ provider_name
+ for provider_name in provider_names
+ if not provider_name.startswith("__") and provider_name not in ignore_names
+ ]
+ return [getattr(provider, provider_name) for provider_name in provider_names]
+
+
+def print_models():
+ base_provider_names = {
+ "cohere": "Cohere",
+ "google": "Google",
+ "openai": "OpenAI",
+ "anthropic": "Anthropic",
+ "replicate": "Replicate",
+ "huggingface": "Huggingface",
+ }
+ provider_urls = {
+ "Bard": "https://bard.google.com/",
+ "H2o": "https://www.h2o.ai/",
+ "Vercel": "https://sdk.vercel.ai/",
+ }
+
+ lines = [
+ "| Model | Base Provider | Provider | Website |",
+ "| ----- | ------------- | -------- | ------- |",
+ ]
+
+ _models = get_models()
+ for model in _models:
+ split_name = re.split(r":|/", model.name)
+ name = split_name[-1]
+
+ base_provider = base_provider_names[model.base_provider]
+ provider_name = f"g4f.provider.{model.best_provider.__name__}"
+
+ provider_url = provider_urls[model.best_provider.__name__]
+ netloc = urlparse(provider_url).netloc
+ website = f"[{netloc}]({provider_url})"
+ lines.append(f"| {name} | {base_provider} | {provider_name} | {website} |")
+
+ print("\n".join(lines))
+
+
+def get_models():
+ _models = [item[1] for item in models.__dict__.items()]
+ _models = [model for model in _models if type(model) is models.Model]
+ return [model for model in _models if model.name not in ["gpt-3.5-turbo", "gpt-4"]]
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tool/vercel.py b/tool/vercel.py
new file mode 100644
index 00000000..7b87e298
--- /dev/null
+++ b/tool/vercel.py
@@ -0,0 +1,103 @@
+import json
+import re
+from typing import Any
+
+import quickjs
+from curl_cffi import requests
+
+session = requests.Session(impersonate="chrome107")
+
+
+def get_model_info() -> dict[str, Any]:
+ url = "https://sdk.vercel.ai"
+ response = session.get(url)
+ html = response.text
+ paths_regex = r"static\/chunks.+?\.js"
+ separator_regex = r'"\]\)<\/script><script>self\.__next_f\.push\(\[.,"'
+
+ paths = re.findall(paths_regex, html)
+ paths = [re.sub(separator_regex, "", path) for path in paths]
+ paths = list(set(paths))
+
+ urls = [f"{url}/_next/{path}" for path in paths]
+ scripts = [session.get(url).text for url in urls]
+
+ for script in scripts:
+ models_regex = r'let .="\\n\\nHuman:\",r=(.+?),.='
+ matches = re.findall(models_regex, script)
+
+ if matches:
+ models_str = matches[0]
+ stop_sequences_regex = r"(?<=stopSequences:{value:\[)\D(?<!\])"
+ models_str = re.sub(
+ stop_sequences_regex, re.escape('"\\n\\nHuman:"'), models_str
+ )
+
+ context = quickjs.Context() # type: ignore
+ json_str: str = context.eval(f"({models_str})").json() # type: ignore
+ return json.loads(json_str) # type: ignore
+
+ return {}
+
+
+def convert_model_info(models: dict[str, Any]) -> dict[str, Any]:
+ model_info: dict[str, Any] = {}
+ for model_name, params in models.items():
+ default_params = params_to_default_params(params["parameters"])
+ model_info[model_name] = {"id": params["id"], "default_params": default_params}
+ return model_info
+
+
+def params_to_default_params(parameters: dict[str, Any]):
+ defaults: dict[str, Any] = {}
+ for key, parameter in parameters.items():
+ if key == "maximumLength":
+ key = "maxTokens"
+ defaults[key] = parameter["value"]
+ return defaults
+
+
+def get_model_names(model_info: dict[str, Any]):
+ model_names = model_info.keys()
+ model_names = [
+ name
+ for name in model_names
+ if name not in ["openai:gpt-4", "openai:gpt-3.5-turbo"]
+ ]
+ model_names.sort()
+ return model_names
+
+
+def print_providers(model_names: list[str]):
+ for name in model_names:
+ split_name = re.split(r":|/", name)
+ base_provider = split_name[0]
+ variable_name = split_name[-1].replace("-", "_").replace(".", "")
+ line = f'{variable_name} = Model(name="{name}", base_provider="{base_provider}", best_provider=Vercel,)\n'
+ print(line)
+
+
+def print_convert(model_names: list[str]):
+ for name in model_names:
+ split_name = re.split(r":|/", name)
+ key = split_name[-1]
+ variable_name = split_name[-1].replace("-", "_").replace(".", "")
+ # "claude-instant-v1": claude_instant_v1,
+ line = f' "{key}": {variable_name},'
+ print(line)
+
+
+def main():
+ model_info = get_model_info()
+ model_info = convert_model_info(model_info)
+ print(json.dumps(model_info, indent=2))
+
+ model_names = get_model_names(model_info)
+ print("-------" * 40)
+ print_providers(model_names)
+ print("-------" * 40)
+ print_convert(model_names)
+
+
+if __name__ == "__main__":
+ main()