summaryrefslogtreecommitdiffstats
path: root/g4f/api/_tokenizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'g4f/api/_tokenizer.py')
-rw-r--r--g4f/api/_tokenizer.py9
1 files changed, 9 insertions, 0 deletions
diff --git a/g4f/api/_tokenizer.py b/g4f/api/_tokenizer.py
new file mode 100644
index 00000000..fd8f9d5a
--- /dev/null
+++ b/g4f/api/_tokenizer.py
@@ -0,0 +1,9 @@
+import tiktoken
+from typing import Union
+
+def tokenize(text: str, model: str = 'gpt-3.5-turbo') -> Union[int, str]:
+ encoding = tiktoken.encoding_for_model(model)
+ encoded = encoding.encode(text)
+ num_tokens = len(encoded)
+
+ return num_tokens, encoded \ No newline at end of file