feat: openai_api_compatible support config stream_mode_delimiter (#2190)

Co-authored-by: wanggang <wanggy01@servyou.com.cn>
Co-authored-by: Chenhe Gu <guchenhe@gmail.com>
This commit is contained in:
geosmart 2024-01-26 00:31:59 +08:00 committed by GitHub
parent 5fc1bd026a
commit 21450b8a51
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 87 additions and 26 deletions

View File

@ -343,31 +343,43 @@ class OAIAPICompatLargeLanguageModel(_CommonOAI_API_Compat, LargeLanguageModel):
) )
) )
for chunk in response.iter_lines(decode_unicode=True, delimiter='\n\n'): # delimiter for stream response, need unicode_escape
import codecs
delimiter = credentials.get("stream_mode_delimiter", "\n\n")
delimiter = codecs.decode(delimiter, "unicode_escape")
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
if chunk: if chunk:
decoded_chunk = chunk.strip().lstrip('data: ').lstrip() decoded_chunk = chunk.strip().lstrip('data: ').lstrip()
chunk_json = None chunk_json = None
try: try:
chunk_json = json.loads(decoded_chunk) chunk_json = json.loads(decoded_chunk)
# stream ended # stream ended
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.error(f"decoded_chunk error,delimiter={delimiter},decoded_chunk={decoded_chunk}")
yield create_final_llm_result_chunk( yield create_final_llm_result_chunk(
index=chunk_index + 1, index=chunk_index + 1,
message=AssistantPromptMessage(content=""), message=AssistantPromptMessage(content=""),
finish_reason="Non-JSON encountered." finish_reason="Non-JSON encountered."
) )
break break
if not chunk_json or len(chunk_json['choices']) == 0: if not chunk_json or len(chunk_json['choices']) == 0:
continue continue
choice = chunk_json['choices'][0] choice = chunk_json['choices'][0]
finish_reason = chunk_json['choices'][0].get('finish_reason')
chunk_index += 1 chunk_index += 1
if 'delta' in choice: if 'delta' in choice:
delta = choice['delta'] delta = choice['delta']
if delta.get('content') is None or delta.get('content') == '': if delta.get('content') is None or delta.get('content') == '':
if finish_reason is not None:
yield create_final_llm_result_chunk(
index=chunk_index,
message=AssistantPromptMessage(content=choice.get('text', '')),
finish_reason=finish_reason
)
else:
continue continue
assistant_message_tool_calls = delta.get('tool_calls', None) assistant_message_tool_calls = delta.get('tool_calls', None)
@ -387,24 +399,22 @@ class OAIAPICompatLargeLanguageModel(_CommonOAI_API_Compat, LargeLanguageModel):
full_assistant_content += delta.get('content', '') full_assistant_content += delta.get('content', '')
elif 'text' in choice: elif 'text' in choice:
if choice.get('text') is None or choice.get('text') == '': choice_text = choice.get('text', '')
if choice_text == '':
continue continue
# transform assistant message to prompt message # transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage( assistant_prompt_message = AssistantPromptMessage(content=choice_text)
content=choice.get('text', '') full_assistant_content += choice_text
)
full_assistant_content += choice.get('text', '')
else: else:
continue continue
# check payload indicator for completion # check payload indicator for completion
if chunk_json['choices'][0].get('finish_reason') is not None: if finish_reason is not None:
yield create_final_llm_result_chunk( yield create_final_llm_result_chunk(
index=chunk_index, index=chunk_index,
message=assistant_prompt_message, message=assistant_prompt_message,
finish_reason=chunk_json['choices'][0]['finish_reason'] finish_reason=finish_reason
) )
else: else:
yield LLMResultChunk( yield LLMResultChunk(

View File

@ -75,3 +75,12 @@ model_credential_schema:
value: llm value: llm
default: '4096' default: '4096'
type: text-input type: text-input
- variable: stream_mode_delimiter
label:
zh_Hans: 流模式返回结果的分隔符
en_US: Delimiter for streaming results
show_on:
- variable: __model_type
value: llm
default: '\n\n'
type: text-input

View File

@ -12,6 +12,7 @@ from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAI
Using Together.ai's OpenAI-compatible API as testing endpoint Using Together.ai's OpenAI-compatible API as testing endpoint
""" """
def test_validate_credentials(): def test_validate_credentials():
model = OAIAPICompatLargeLanguageModel() model = OAIAPICompatLargeLanguageModel()
@ -34,6 +35,7 @@ def test_validate_credentials():
} }
) )
def test_invoke_model(): def test_invoke_model():
model = OAIAPICompatLargeLanguageModel() model = OAIAPICompatLargeLanguageModel()
@ -65,9 +67,47 @@ def test_invoke_model():
assert isinstance(response, LLMResult) assert isinstance(response, LLMResult)
assert len(response.message.content) > 0 assert len(response.message.content) > 0
def test_invoke_stream_model(): def test_invoke_stream_model():
model = OAIAPICompatLargeLanguageModel() model = OAIAPICompatLargeLanguageModel()
response = model.invoke(
model='mistralai/Mixtral-8x7B-Instruct-v0.1',
credentials={
'api_key': os.environ.get('TOGETHER_API_KEY'),
'endpoint_url': 'https://api.together.xyz/v1/',
'mode': 'chat',
'stream_mode_delimiter': '\\n\\n'
},
prompt_messages=[
SystemPromptMessage(
content='You are a helpful AI assistant.',
),
UserPromptMessage(
content='Who are you?'
)
],
model_parameters={
'temperature': 1.0,
'top_k': 2,
'top_p': 0.5,
},
stop=['How'],
stream=True,
user="abc-123"
)
assert isinstance(response, Generator)
for chunk in response:
assert isinstance(chunk, LLMResultChunk)
assert isinstance(chunk.delta, LLMResultChunkDelta)
assert isinstance(chunk.delta.message, AssistantPromptMessage)
def test_invoke_stream_model_without_delimiter():
model = OAIAPICompatLargeLanguageModel()
response = model.invoke( response = model.invoke(
model='mistralai/Mixtral-8x7B-Instruct-v0.1', model='mistralai/Mixtral-8x7B-Instruct-v0.1',
credentials={ credentials={
@ -100,6 +140,7 @@ def test_invoke_stream_model():
assert isinstance(chunk.delta, LLMResultChunkDelta) assert isinstance(chunk.delta, LLMResultChunkDelta)
assert isinstance(chunk.delta.message, AssistantPromptMessage) assert isinstance(chunk.delta.message, AssistantPromptMessage)
# using OpenAI's ChatGPT-3.5 as testing endpoint # using OpenAI's ChatGPT-3.5 as testing endpoint
def test_invoke_chat_model_with_tools(): def test_invoke_chat_model_with_tools():
model = OAIAPICompatLargeLanguageModel() model = OAIAPICompatLargeLanguageModel()
@ -156,6 +197,7 @@ def test_invoke_chat_model_with_tools():
assert isinstance(result.message, AssistantPromptMessage) assert isinstance(result.message, AssistantPromptMessage)
assert len(result.message.tool_calls) > 0 assert len(result.message.tool_calls) > 0
def test_get_num_tokens(): def test_get_num_tokens():
model = OAIAPICompatLargeLanguageModel() model = OAIAPICompatLargeLanguageModel()