summaryrefslogtreecommitdiffstats
path: root/g4f/api/_tokenizer.py
blob: fd8f9d5a09b59270f2739a4007c94106e9e0ee05 (plain) (blame)
1
2
3
4
5
6
7
8
9
import tiktoken
from typing import Union

def tokenize(text: str, model: str = 'gpt-3.5-turbo') -> Union[int, str]:
    encoding   = tiktoken.encoding_for_model(model)
    encoded    = encoding.encode(text)
    num_tokens = len(encoded)
    
    return num_tokens, encoded