From b2459a5897589d4a60743187a532e1c1d25e63a9 Mon Sep 17 00:00:00 2001 From: "t.me/xtekky" <98614666+xtekky@users.noreply.github.com> Date: Thu, 20 Apr 2023 10:22:44 +0100 Subject: phind major improvement ( stream ) removed timeout error, added data streaming. Soon integration into gpt clone --- README.md | 19 ++++++-- phind/__init__.py | 133 +++++++++++++++++++++++++++++++++++++++++--------- testing/phind_test.py | 17 ++++++- 3 files changed, 141 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index fbd910da..e1fcb152 100644 --- a/README.md +++ b/README.md @@ -102,16 +102,27 @@ print(response.completion.choices[0].text) ### Example: `phind` (use like openai pypi package) ```python -# HELP WANTED: tls_client does not accept stream and timeout gets hit with long responses - import phind -prompt = 'hello world' +prompt = 'who won the quatar world cup' + +# help needed: not getting newlines from the stream, please submit a PR if you know how to fix this +# stream completion +for result in phind.StreamingCompletion.create( + model = 'gpt-4', + prompt = prompt, + results = phind.Search.create(prompt, actualSearch = True), # create search (set actualSearch to False to disable internet) + creative = False, + detailed = False, + codeContext = ''): # up to 3000 chars of code + + print(result.completion.choices[0].text, end='', flush=True) +# normal completion result = phind.Completion.create( model = 'gpt-4', prompt = prompt, - results = phind.Search.create(prompt, actualSearch = False), # create search (set actualSearch to False to disable internet) + results = phind.Search.create(prompt, actualSearch = True), # create search (set actualSearch to False to disable internet) creative = False, detailed = False, codeContext = '') # up to 3000 chars of code diff --git a/phind/__init__.py b/phind/__init__.py index 8177c8dd..469c09b7 100644 --- a/phind/__init__.py +++ b/phind/__init__.py @@ -1,24 +1,11 @@ from urllib.parse import quote -from tls_client import Session from time import time from datetime import datetime +from queue import Queue, Empty +from threading import Thread +from re import findall -client = Session(client_identifier='chrome110') -client.headers = { - 'authority': 'www.phind.com', - 'accept': '*/*', - 'accept-language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3', - 'content-type': 'application/json', - 'origin': 'https://www.phind.com', - 'referer': 'https://www.phind.com/search', - 'sec-ch-ua': '"Chromium";v="110", "Google Chrome";v="110", "Not:A-Brand";v="99"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"macOS"', - 'sec-fetch-dest': 'empty', - 'sec-fetch-mode': 'cors', - 'sec-fetch-site': 'same-origin', - 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', -} +from curl_cffi.requests import post class PhindResponse: @@ -81,11 +68,19 @@ class Search: } } - return client.post('https://www.phind.com/api/bing/search', json = { - 'q' : prompt, + headers = { + 'authority' : 'www.phind.com', + 'origin' : 'https://www.phind.com', + 'referer' : 'https://www.phind.com/search', + 'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', + } + + return post('https://www.phind.com/api/bing/search', headers = headers, json = { + 'q': prompt, 'userRankList': {}, 'browserLanguage': language}).json()['rawBingResults'] + class Completion: def create( model = 'gpt-4', @@ -121,12 +116,19 @@ class Completion: } } + headers = { + 'authority' : 'www.phind.com', + 'origin' : 'https://www.phind.com', + 'referer' : f'https://www.phind.com/search?q={quote(prompt)}&c=&source=searchbox&init=true', + 'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', + } + completion = '' - response = client.post('https://www.phind.com/api/infer/answer', json=json_data, timeout_seconds=200) + response = post('https://www.phind.com/api/infer/answer', headers = headers, json = json_data, timeout=99999) for line in response.text.split('\r\n\r\n'): completion += (line.replace('data: ', '')) - - return PhindResponse({ + + return PhindResponse({ 'id' : f'cmpl-1337-{int(time())}', 'object' : 'text_completion', 'created': int(time()), @@ -142,4 +144,89 @@ class Completion: 'completion_tokens' : len(completion), 'total_tokens' : len(prompt) + len(completion) } - }) \ No newline at end of file + }) + + +class StreamingCompletion: + message_queue = Queue() + stream_completed = False + + def request(model, prompt, results, creative, detailed, codeContext, language) -> None: + + models = { + 'gpt-4' : 'expert', + 'gpt-3.5-turbo' : 'intermediate', + 'gpt-3.5': 'intermediate', + } + + json_data = { + 'question' : prompt, + 'bingResults' : results, + 'codeContext' : codeContext, + 'options': { + 'skill' : models[model], + 'date' : datetime.now().strftime("%d/%m/%Y"), + 'language': language, + 'detailed': detailed, + 'creative': creative + } + } + + stream_req = post('https://www.phind.com/api/infer/answer', json=json_data, timeout=99999, + content_callback = StreamingCompletion.handle_stream_response, + headers = { + 'authority' : 'www.phind.com', + 'origin' : 'https://www.phind.com', + 'referer' : f'https://www.phind.com/search?q={quote(prompt)}&c=&source=searchbox&init=true', + 'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36', + }) + + StreamingCompletion.stream_completed = True + + @staticmethod + def create( + model : str = 'gpt-4', + prompt : str = '', + results : dict = None, + creative : bool = False, + detailed : bool = False, + codeContext : str = '', + language : str = 'en'): + + if results is None: + results = Search.create(prompt, actualSearch = True) + + if len(codeContext) > 2999: + raise ValueError('codeContext must be less than 3000 characters') + + Thread(target = StreamingCompletion.request, args = [ + model, prompt, results, creative, detailed, codeContext, language]).start() + + while StreamingCompletion.stream_completed != True or not StreamingCompletion.message_queue.empty(): + try: + message = StreamingCompletion.message_queue.get(timeout=0) + for token in findall(r'(?<=data: )(.+?)(?=\r\n\r\n)', message.decode()): + yield PhindResponse({ + 'id' : f'cmpl-1337-{int(time())}', + 'object' : 'text_completion', + 'created': int(time()), + 'model' : model, + 'choices': [{ + 'text' : token, + 'index' : 0, + 'logprobs' : None, + 'finish_reason' : 'stop' + }], + 'usage': { + 'prompt_tokens' : len(prompt), + 'completion_tokens' : len(token), + 'total_tokens' : len(prompt) + len(token) + } + }) + + except Empty: + pass + + @staticmethod + def handle_stream_response(response): + StreamingCompletion.message_queue.put(response) \ No newline at end of file diff --git a/testing/phind_test.py b/testing/phind_test.py index e3148eb6..6b78f02d 100644 --- a/testing/phind_test.py +++ b/testing/phind_test.py @@ -2,6 +2,7 @@ import phind prompt = 'hello world' +# normal completion result = phind.Completion.create( model = 'gpt-4', prompt = prompt, @@ -10,4 +11,18 @@ result = phind.Completion.create( detailed = False, codeContext = '') # up to 3000 chars of code -print(result.completion.choices[0].text) \ No newline at end of file +print(result.completion.choices[0].text) + +prompt = 'who won the quatar world cup' + +# help needed: not getting newlines from the stream, please submit a PR if you know how to fix this +# stream completion +for result in phind.StreamingCompletion.create( + model = 'gpt-3.5', + prompt = prompt, + results = phind.Search.create(prompt, actualSearch = True), # create search (set actualSearch to False to disable internet) + creative = False, + detailed = False, + codeContext = ''): # up to 3000 chars of code + + print(result.completion.choices[0].text, end='', flush=True) \ No newline at end of file -- cgit v1.2.3