From e61242a337587646b9632ce22f8b13042af4b5b1 Mon Sep 17 00:00:00 2001
From: "SiliconFlow, Inc" <chkain@gmail.com>
Date: Thu, 14 Nov 2024 20:53:35 +0800
Subject: [PATCH] feat: add vlm models from siliconflow (#10704)

---
 .../siliconflow/llm/Internvl2-26b.yaml        | 84 +++++++++++++++++++
 .../siliconflow/llm/Internvl2-8b.yaml         | 84 +++++++++++++++++++
 .../siliconflow/llm/_position.yaml            | 17 ++--
 .../llm/deepdeek-coder-v2-instruct.yaml       |  1 +
 .../siliconflow/llm/deepseek-v2-chat.yaml     |  1 +
 .../siliconflow/llm/deepseek-v2.5.yaml        |  2 +
 .../llm/hunyuan-a52b-instruct.yaml            | 84 +++++++++++++++++++
 .../siliconflow/llm/internvl2-llama3-76b.yaml | 84 +++++++++++++++++++
 .../llm/meta-mlama-3-70b-instruct.yaml        |  1 +
 .../llm/meta-mlama-3-8b-instruct.yaml         |  1 +
 .../llm/meta-mlama-3.1-70b-instruct.yaml      |  2 +-
 .../llm/qwen2-57b-a14b-instruct.yaml          |  1 +
 .../siliconflow/llm/qwen2-72b-instruct.yaml   |  1 +
 .../siliconflow/llm/qwen2-7b-instruct.yaml    |  1 +
 .../llm/qwen2-vl-72b-instruct.yaml            | 84 +++++++++++++++++++
 .../siliconflow/llm/qwen2-vl-7b-Instruct.yaml | 84 +++++++++++++++++++
 .../llm/qwen2.5-coder-32b-instruct.yaml       | 84 +++++++++++++++++++
 .../funaudio-sense-voice-small.yaml           |  5 ++
 .../speech2text/sense-voice-small.yaml        |  1 +
 19 files changed, 612 insertions(+), 10 deletions(-)
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-26b.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-8b.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/hunyuan-a52b-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/internvl2-llama3-76b.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-72b-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-7b-Instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-32b-instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/siliconflow/speech2text/funaudio-sense-voice-small.yaml

diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-26b.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-26b.yaml
new file mode 100644
index 0000000000..f7b03e1254
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-26b.yaml
@@ -0,0 +1,84 @@
+model: OpenGVLab/InternVL2-26B
+label:
+  en_US: OpenGVLab/InternVL2-26B
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-8b.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-8b.yaml
new file mode 100644
index 0000000000..1e858bb4be
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-8b.yaml
@@ -0,0 +1,84 @@
+model: Pro/OpenGVLab/InternVL2-8B
+label:
+  en_US: Pro/OpenGVLab/InternVL2-8B
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
index 8d1df82140..f010e4c826 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -1,16 +1,18 @@
+- Tencent/Hunyuan-A52B-Instruct
 - Qwen/Qwen2.5-72B-Instruct
 - Qwen/Qwen2.5-32B-Instruct
 - Qwen/Qwen2.5-14B-Instruct
 - Qwen/Qwen2.5-7B-Instruct
+- Qwen/Qwen2.5-Coder-32B-Instruct
 - Qwen/Qwen2.5-Coder-7B-Instruct
 - Qwen/Qwen2.5-Math-72B-Instruct
-- Qwen/Qwen2-72B-Instruct
-- Qwen/Qwen2-57B-A14B-Instruct
-- Qwen/Qwen2-7B-Instruct
+- Qwen/Qwen2-VL-72B-Instruct
 - Qwen/Qwen2-1.5B-Instruct
+- Pro/Qwen/Qwen2-VL-7B-Instruct
+- OpenGVLab/InternVL2-Llama3-76B
+- OpenGVLab/InternVL2-26B
+- Pro/OpenGVLab/InternVL2-8B
 - deepseek-ai/DeepSeek-V2.5
-- deepseek-ai/DeepSeek-V2-Chat
-- deepseek-ai/DeepSeek-Coder-V2-Instruct
 - THUDM/glm-4-9b-chat
 - 01-ai/Yi-1.5-34B-Chat-16K
 - 01-ai/Yi-1.5-9B-Chat-16K
@@ -20,9 +22,6 @@
 - meta-llama/Meta-Llama-3.1-405B-Instruct
 - meta-llama/Meta-Llama-3.1-70B-Instruct
 - meta-llama/Meta-Llama-3.1-8B-Instruct
-- meta-llama/Meta-Llama-3-70B-Instruct
-- meta-llama/Meta-Llama-3-8B-Instruct
 - google/gemma-2-27b-it
 - google/gemma-2-9b-it
-- mistralai/Mistral-7B-Instruct-v0.2
-- mistralai/Mixtral-8x7B-Instruct-v0.1
+- deepseek-ai/DeepSeek-V2-Chat
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml
index d5f23776ea..b13a2a751c 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml
@@ -37,3 +37,4 @@ pricing:
   output: '1.33'
   unit: '0.000001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml
index 7aa684ef38..00bdb0ddf5 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml
@@ -37,3 +37,4 @@ pricing:
   output: '1.33'
   unit: '0.000001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
index 3155b70816..511cc1df9f 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
@@ -4,6 +4,8 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - tool-call
+  - stream-tool-call
 model_properties:
   mode: chat
   context_size: 32768
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/hunyuan-a52b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/hunyuan-a52b-instruct.yaml
new file mode 100644
index 0000000000..c5489554a6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/hunyuan-a52b-instruct.yaml
@@ -0,0 +1,84 @@
+model: Tencent/Hunyuan-A52B-Instruct
+label:
+  en_US: Tencent/Hunyuan-A52B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/internvl2-llama3-76b.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/internvl2-llama3-76b.yaml
new file mode 100644
index 0000000000..65386d3175
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/internvl2-llama3-76b.yaml
@@ -0,0 +1,84 @@
+model: OpenGVLab/InternVL2-Llama3-76B
+label:
+  en_US: OpenGVLab/InternVL2-Llama3-76B
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
index d01770cb01..9825090759 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
@@ -37,3 +37,4 @@ pricing:
   output: '4.13'
   unit: '0.000001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
index 3cd75d89e8..0133fd15d4 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
@@ -37,3 +37,4 @@ pricing:
   output: '0'
   unit: '0.000001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
index e87871294c..f1fbb74cbd 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
@@ -6,7 +6,7 @@ features:
   - agent-thought
 model_properties:
   mode: chat
-  context_size: 32768
+  context_size: 8192
 parameter_rules:
   - name: temperature
     use_template: temperature
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
index b2461335f8..0f56d16d9b 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
@@ -37,3 +37,4 @@ pricing:
   output: '1.26'
   unit: '0.000001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml
index e0f23bd89e..af65cfb8ed 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml
@@ -37,3 +37,4 @@ pricing:
   output: '4.13'
   unit: '0.000001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml
index 47a9da8119..f0f10ae625 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml
@@ -37,3 +37,4 @@ pricing:
   output: '0'
   unit: '0.000001'
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-72b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-72b-instruct.yaml
new file mode 100644
index 0000000000..1866a684b5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-72b-instruct.yaml
@@ -0,0 +1,84 @@
+model: Qwen/Qwen2-VL-72B-Instruct
+label:
+  en_US: Qwen/Qwen2-VL-72B-Instruct
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-7b-Instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-7b-Instruct.yaml
new file mode 100644
index 0000000000..a508344689
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-7b-Instruct.yaml
@@ -0,0 +1,84 @@
+model: Pro/Qwen/Qwen2-VL-7B-Instruct
+label:
+  en_US: Pro/Qwen/Qwen2-VL-7B-Instruct
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-32b-instruct.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-32b-instruct.yaml
new file mode 100644
index 0000000000..de2224a67b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-32b-instruct.yaml
@@ -0,0 +1,84 @@
+model: Qwen/Qwen2.5-Coder-32B-Instruct
+label:
+  en_US: Qwen/Qwen2.5-Coder-32B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '1.26'
+  output: '1.26'
+  unit: '0.000001'
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/siliconflow/speech2text/funaudio-sense-voice-small.yaml b/api/core/model_runtime/model_providers/siliconflow/speech2text/funaudio-sense-voice-small.yaml
new file mode 100644
index 0000000000..d4bc33c68e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/siliconflow/speech2text/funaudio-sense-voice-small.yaml
@@ -0,0 +1,5 @@
+model: FunAudioLLM/SenseVoiceSmall
+model_type: speech2text
+model_properties:
+  file_upload_limit: 1
+  supported_file_extensions: mp3,wav
diff --git a/api/core/model_runtime/model_providers/siliconflow/speech2text/sense-voice-small.yaml b/api/core/model_runtime/model_providers/siliconflow/speech2text/sense-voice-small.yaml
index deceaf60f4..455b2ad467 100644
--- a/api/core/model_runtime/model_providers/siliconflow/speech2text/sense-voice-small.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/speech2text/sense-voice-small.yaml
@@ -3,3 +3,4 @@ model_type: speech2text
 model_properties:
   file_upload_limit: 1
   supported_file_extensions: mp3,wav
+deprecated: true