dify/api/tests/integration_tests/model_runtime/xinference/test_llm.py

392 lines
13 KiB
Python
Raw Permalink Normal View History

import os
from typing import Generator
import pytest
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (AssistantPromptMessage, PromptMessageTool,
SystemPromptMessage, TextPromptMessageContent,
UserPromptMessage)
from core.model_runtime.entities.model_entities import AIModelEntity
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.xinference.llm.llm import XinferenceAILargeLanguageModel
"""FOR MOCK FIXTURES, DO NOT REMOVE"""
from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock
@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
def test_validate_credentials_for_chat_model(setup_openai_mock, setup_xinference_mock):
model = XinferenceAILargeLanguageModel()
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model='ChatGLM3',
credentials={
'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
'model_uid': 'www ' + os.environ.get('XINFERENCE_CHAT_MODEL_UID')
}
)
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model='aaaaa',
credentials={
'server_url': '',
'model_uid': ''
}
)
model.validate_credentials(
model='ChatGLM3',
credentials={
'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
}
)
@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
def test_invoke_chat_model(setup_openai_mock, setup_xinference_mock):
model = XinferenceAILargeLanguageModel()
response = model.invoke(
model='ChatGLM3',
credentials={
'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
},
prompt_messages=[
SystemPromptMessage(
content='You are a helpful AI assistant.',
),
UserPromptMessage(
content='Hello World!'
)
],
model_parameters={
'temperature': 0.7,
'top_p': 1.0,
},
stop=['you'],
user="abc-123",
stream=False
)
assert isinstance(response, LLMResult)
assert len(response.message.content) > 0
assert response.usage.total_tokens > 0
@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
def test_invoke_stream_chat_model(setup_openai_mock, setup_xinference_mock):
model = XinferenceAILargeLanguageModel()
response = model.invoke(
model='ChatGLM3',
credentials={
'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
},
prompt_messages=[
SystemPromptMessage(
content='You are a helpful AI assistant.',
),
UserPromptMessage(
content='Hello World!'
)
],
model_parameters={
'temperature': 0.7,
'top_p': 1.0,
},
stop=['you'],
stream=True,
user="abc-123"
)
assert isinstance(response, Generator)
for chunk in response:
assert isinstance(chunk, LLMResultChunk)
assert isinstance(chunk.delta, LLMResultChunkDelta)
assert isinstance(chunk.delta.message, AssistantPromptMessage)
assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
"""
Funtion calling of xinference does not support stream mode currently
"""
# def test_invoke_stream_chat_model_with_functions():
# model = XinferenceAILargeLanguageModel()
# response = model.invoke(
# model='ChatGLM3-6b',
# credentials={
# 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
# 'model_type': 'text-generation',
# 'model_name': 'ChatGLM3',
# 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
# },
# prompt_messages=[
# SystemPromptMessage(
# content='你是一个天气机器人,可以通过调用函数来获取天气信息',
# ),
# UserPromptMessage(
# content='波士顿天气如何?'
# )
# ],
# model_parameters={
# 'temperature': 0,
# 'top_p': 1.0,
# },
# stop=['you'],
# user='abc-123',
# stream=True,
# tools=[
# PromptMessageTool(
# name='get_current_weather',
# description='Get the current weather in a given location',
# parameters={
# "type": "object",
# "properties": {
# "location": {
# "type": "string",
# "description": "The city and state e.g. San Francisco, CA"
# },
# "unit": {
# "type": "string",
# "enum": ["celsius", "fahrenheit"]
# }
# },
# "required": [
# "location"
# ]
# }
# )
# ]
# )
# assert isinstance(response, Generator)
# call: LLMResultChunk = None
# chunks = []
# for chunk in response:
# chunks.append(chunk)
# assert isinstance(chunk, LLMResultChunk)
# assert isinstance(chunk.delta, LLMResultChunkDelta)
# assert isinstance(chunk.delta.message, AssistantPromptMessage)
# assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
# if chunk.delta.message.tool_calls and len(chunk.delta.message.tool_calls) > 0:
# call = chunk
# break
# assert call is not None
# assert call.delta.message.tool_calls[0].function.name == 'get_current_weather'
# def test_invoke_chat_model_with_functions():
# model = XinferenceAILargeLanguageModel()
# response = model.invoke(
# model='ChatGLM3-6b',
# credentials={
# 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
# 'model_type': 'text-generation',
# 'model_name': 'ChatGLM3',
# 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
# },
# prompt_messages=[
# UserPromptMessage(
# content='What is the weather like in San Francisco?'
# )
# ],
# model_parameters={
# 'temperature': 0.7,
# 'top_p': 1.0,
# },
# stop=['you'],
# user='abc-123',
# stream=False,
# tools=[
# PromptMessageTool(
# name='get_current_weather',
# description='Get the current weather in a given location',
# parameters={
# "type": "object",
# "properties": {
# "location": {
# "type": "string",
# "description": "The city and state e.g. San Francisco, CA"
# },
# "unit": {
# "type": "string",
# "enum": [
# "c",
# "f"
# ]
# }
# },
# "required": [
# "location"
# ]
# }
# )
# ]
# )
# assert isinstance(response, LLMResult)
# assert len(response.message.content) > 0
# assert response.usage.total_tokens > 0
# assert response.message.tool_calls[0].function.name == 'get_current_weather'
@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
def test_validate_credentials_for_generation_model(setup_openai_mock, setup_xinference_mock):
model = XinferenceAILargeLanguageModel()
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model='alapaca',
credentials={
'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
'model_uid': 'www ' + os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
}
)
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model='alapaca',
credentials={
'server_url': '',
'model_uid': ''
}
)
model.validate_credentials(
model='alapaca',
credentials={
'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
}
)
@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
def test_invoke_generation_model(setup_openai_mock, setup_xinference_mock):
model = XinferenceAILargeLanguageModel()
response = model.invoke(
model='alapaca',
credentials={
'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
},
prompt_messages=[
UserPromptMessage(
content='the United States is'
)
],
model_parameters={
'temperature': 0.7,
'top_p': 1.0,
},
stop=['you'],
user="abc-123",
stream=False
)
assert isinstance(response, LLMResult)
assert len(response.message.content) > 0
assert response.usage.total_tokens > 0
@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
def test_invoke_stream_generation_model(setup_openai_mock, setup_xinference_mock):
model = XinferenceAILargeLanguageModel()
response = model.invoke(
model='alapaca',
credentials={
'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
},
prompt_messages=[
UserPromptMessage(
content='the United States is'
)
],
model_parameters={
'temperature': 0.7,
'top_p': 1.0,
},
stop=['you'],
stream=True,
user="abc-123"
)
assert isinstance(response, Generator)
for chunk in response:
assert isinstance(chunk, LLMResultChunk)
assert isinstance(chunk.delta, LLMResultChunkDelta)
assert isinstance(chunk.delta.message, AssistantPromptMessage)
assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
def test_get_num_tokens():
model = XinferenceAILargeLanguageModel()
num_tokens = model.get_num_tokens(
model='ChatGLM3',
credentials={
'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
},
prompt_messages=[
SystemPromptMessage(
content='You are a helpful AI assistant.',
),
UserPromptMessage(
content='Hello World!'
)
],
tools=[
PromptMessageTool(
name='get_current_weather',
description='Get the current weather in a given location',
parameters={
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": [
"c",
"f"
]
}
},
"required": [
"location"
]
}
)
]
)
assert isinstance(num_tokens, int)
assert num_tokens == 77
num_tokens = model.get_num_tokens(
model='ChatGLM3',
credentials={
'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
},
prompt_messages=[
SystemPromptMessage(
content='You are a helpful AI assistant.',
),
UserPromptMessage(
content='Hello World!'
)
],
)
assert isinstance(num_tokens, int)
assert num_tokens == 21