Merge e92d3237b4 into 5ff02b469f

fix:position error when creating segments (#10706 )
chore: Bump Alpine Linux to 3.20 in web dockerfile (#10671 )
2024-11-16 11:42:29 +08:00 · 2024-11-15 08:51:11 +08:00 · 2024-11-14 21:25:15 +08:00 · 2024-11-14 20:57:01 +08:00 · 2024-11-14 20:54:13 +08:00 · 2024-11-14 20:53:35 +08:00
68 changed files with 699 additions and 84 deletions
--- a/api/commands.py
+++ b/api/commands.py
@ -589,7 +589,7 @@ def upgrade_db():
            click.echo(click.style("Database migration successful!", fg="green"))

        except Exception as e:
-            logging.exception(f"Database migration failed: {e}")
+            logging.exception("Failed to execute database migration")
        finally:
            lock.release()
    else:
@ -633,7 +633,7 @@ where sites.id is null limit 1000"""
                except Exception as e:
                    failed_app_ids.append(app_id)
                    click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red"))
-                    logging.exception(f"Fix app related site missing issue failed, error: {e}")
+                    logging.exception(f"Failed to fix app related site missing issue, app_id: {app_id}")
                    continue

            if not processed_count:
--- a/api/controllers/console/app/audio.py
+++ b/api/controllers/console/app/audio.py
@ -70,7 +70,7 @@ class ChatMessageAudioApi(Resource):
        except ValueError as e:
            raise e
        except Exception as e:
-            logging.exception(f"internal server error, {str(e)}.")
+            logging.exception("Failed to handle post request to ChatMessageAudioApi")
            raise InternalServerError()


@ -128,7 +128,7 @@ class ChatMessageTextApi(Resource):
        except ValueError as e:
            raise e
        except Exception as e:
-            logging.exception(f"internal server error, {str(e)}.")
+            logging.exception("Failed to handle post request to ChatMessageTextApi")
            raise InternalServerError()


@ -170,7 +170,7 @@ class TextModesApi(Resource):
        except ValueError as e:
            raise e
        except Exception as e:
-            logging.exception(f"internal server error, {str(e)}.")
+            logging.exception("Failed to handle get request to TextModesApi")
            raise InternalServerError()


--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@ -948,7 +948,7 @@ class DocumentRetryApi(DocumentResource):
                    raise DocumentAlreadyFinishedError()
                retry_documents.append(document)
            except Exception as e:
-                logging.exception(f"Document {document_id} retry failed: {str(e)}")
+                logging.exception(f"Failed to retry document, document id: {document_id}")
                continue
        # retry document
        DocumentService.retry_document(dataset_id, retry_documents)
--- a/api/controllers/console/workspace/models.py
+++ b/api/controllers/console/workspace/models.py
@ -72,7 +72,10 @@ class DefaultModelApi(Resource):
                    model=model_setting["model"],
                )
            except Exception as ex:
-                logging.exception(f"{model_setting['model_type']} save error: {ex}")
+                logging.exception(
+                    f"Failed to update default model, model type: {model_setting['model_type']},"
+                    f" model:{model_setting.get('model')}"
+                )
                raise ex

        return {"result": "success"}
@ -156,7 +159,10 @@ class ModelProviderModelApi(Resource):
                        credentials=args["credentials"],
                    )
                except CredentialsValidateFailedError as ex:
-                    logging.exception(f"save model credentials error: {ex}")
+                    logging.exception(
+                        f"Failed to save model credentials, tenant_id: {tenant_id},"
+                        f" model: {args.get('model')}, model_type: {args.get('model_type')}"
+                    )
                    raise ValueError(str(ex))

        return {"result": "success"}, 200
--- a/api/controllers/web/audio.py
+++ b/api/controllers/web/audio.py
@ -59,7 +59,7 @@ class AudioApi(WebApiResource):
        except ValueError as e:
            raise e
        except Exception as e:
-            logging.exception(f"internal server error: {str(e)}")
+            logging.exception("Failed to handle post request to AudioApi")
            raise InternalServerError()


@ -117,7 +117,7 @@ class TextApi(WebApiResource):
        except ValueError as e:
            raise e
        except Exception as e:
-            logging.exception(f"internal server error: {str(e)}")
+            logging.exception("Failed to handle post request to TextApi")
            raise InternalServerError()


--- a/api/core/app/apps/advanced_chat/app_generator.py
+++ b/api/core/app/apps/advanced_chat/app_generator.py
@ -362,5 +362,5 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
            if e.args[0] == "I/O operation on closed file.":  # ignore this error
                raise GenerateTaskStoppedError()
            else:
-                logger.exception(e)
+                logger.exception(f"Failed to process generate task pipeline, conversation_id: {conversation.id}")
                raise e
--- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py
+++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@ -242,7 +242,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
                    start_listener_time = time.time()
                    yield MessageAudioStreamResponse(audio=audio_trunk.audio, task_id=task_id)
            except Exception as e:
-                logger.exception(e)
+                logger.exception(f"Failed to listen audio message, task_id: {task_id}")
                break
        if tts_publisher:
            yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
--- a/api/core/app/apps/message_based_app_generator.py
+++ b/api/core/app/apps/message_based_app_generator.py
@ -80,7 +80,7 @@ class MessageBasedAppGenerator(BaseAppGenerator):
            if e.args[0] == "I/O operation on closed file.":  # ignore this error
                raise GenerateTaskStoppedError()
            else:
-                logger.exception(e)
+                logger.exception(f"Failed to handle response, conversation_id: {conversation.id}")
                raise e

    def _get_conversation_by_user(
--- a/api/core/app/apps/workflow/app_generator.py
+++ b/api/core/app/apps/workflow/app_generator.py
@ -298,5 +298,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
            if e.args[0] == "I/O operation on closed file.":  # ignore this error
                raise GenerateTaskStoppedError()
            else:
-                logger.exception(e)
+                logger.exception(
+                    f"Fails to process generate task pipeline, task_id: {application_generate_entity.task_id}"
+                )
                raise e
--- a/api/core/app/apps/workflow/generate_task_pipeline.py
+++ b/api/core/app/apps/workflow/generate_task_pipeline.py
@ -216,7 +216,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
                else:
                    yield MessageAudioStreamResponse(audio=audio_trunk.audio, task_id=task_id)
            except Exception as e:
-                logger.exception(e)
+                logger.exception(f"Fails to get audio trunk, task_id: {task_id}")
                break
        if tts_publisher:
            yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
--- a/api/core/app/task_pipeline/message_cycle_manage.py
+++ b/api/core/app/task_pipeline/message_cycle_manage.py
@ -86,7 +86,7 @@ class MessageCycleManage:
                    conversation.name = name
                except Exception as e:
                    if dify_config.DEBUG:
-                        logging.exception(f"generate conversation name failed: {e}")
+                        logging.exception(f"generate conversation name failed, conversation_id: {conversation_id}")
                    pass

                db.session.merge(conversation)
--- a/api/core/helper/moderation.py
+++ b/api/core/helper/moderation.py
@ -41,7 +41,7 @@ def check_moderation(model_config: ModelConfigWithCredentialsEntity, text: str)
                if moderation_result is True:
                    return True
            except Exception as ex:
-                logger.exception(ex)
+                logger.exception(f"Fails to check moderation, provider_name: {provider_name}")
                raise InvokeBadRequestError("Rate limit exceeded, please try again later.")

    return False
--- a/api/core/helper/module_import_helper.py
+++ b/api/core/helper/module_import_helper.py
@ -29,7 +29,7 @@ def import_module_from_source(*, module_name: str, py_file_path: AnyStr, use_laz
        spec.loader.exec_module(module)
        return module
    except Exception as e:
-        logging.exception(f"Failed to load module {module_name} from {py_file_path}: {str(e)}")
+        logging.exception(f"Failed to load module {module_name} from script file '{py_file_path}'")
        raise e


--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@ -554,7 +554,7 @@ class IndexingRunner:
                    qa_documents.append(qa_document)
                format_documents.extend(qa_documents)
            except Exception as e:
-                logging.exception(e)
+                logging.exception("Failed to format qa document")

            all_qa_documents.extend(format_documents)

--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@ -102,7 +102,7 @@ class LLMGenerator:
        except InvokeError:
            questions = []
        except Exception as e:
-            logging.exception(e)
+            logging.exception("Failed to generate suggested questions after answer")
            questions = []

        return questions
@ -148,7 +148,7 @@ class LLMGenerator:
                error = str(e)
                error_step = "generate rule config"
            except Exception as e:
-                logging.exception(e)
+                logging.exception(f"Failed to generate rule config, model: {model_config.get('name')}")
                rule_config["error"] = str(e)

            rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
@ -234,7 +234,7 @@ class LLMGenerator:
                error_step = "generate conversation opener"

        except Exception as e:
-            logging.exception(e)
+            logging.exception(f"Failed to generate rule config, model: {model_config.get('name')}")
            rule_config["error"] = str(e)

        rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
@ -286,7 +286,9 @@ class LLMGenerator:
            error = str(e)
            return {"code": "", "language": code_language, "error": f"Failed to generate code. Error: {error}"}
        except Exception as e:
-            logging.exception(e)
+            logging.exception(
+                f"Failed to invoke LLM model, model: {model_config.get('name')}, language: {code_language}"
+            )
            return {"code": "", "language": code_language, "error": f"An unexpected error occurred: {str(e)}"}

    @classmethod
--- a/api/core/model_runtime/model_providers/azure_ai_studio/rerank/rerank.py
+++ b/api/core/model_runtime/model_providers/azure_ai_studio/rerank/rerank.py
@ -103,7 +103,7 @@ class AzureRerankModel(RerankModel):
            return RerankResult(model=model, docs=rerank_documents)

        except Exception as e:
-            logger.exception(f"Exception in Azure rerank: {e}")
+            logger.exception(f"Failed to invoke rerank model, model: {model}")
            raise

    def validate_credentials(self, model: str, credentials: dict) -> None:
--- a/api/core/model_runtime/model_providers/sagemaker/rerank/rerank.py
+++ b/api/core/model_runtime/model_providers/sagemaker/rerank/rerank.py
@ -113,7 +113,7 @@ class SageMakerRerankModel(RerankModel):
            return RerankResult(model=model, docs=rerank_documents)

        except Exception as e:
-            logger.exception(f"Exception {e}, line : {line}")
+            logger.exception(f"Failed to invoke rerank model, model: {model}")

    def validate_credentials(self, model: str, credentials: dict) -> None:
        """
--- a/api/core/model_runtime/model_providers/sagemaker/speech2text/speech2text.py
+++ b/api/core/model_runtime/model_providers/sagemaker/speech2text/speech2text.py
@ -78,7 +78,7 @@ class SageMakerSpeech2TextModel(Speech2TextModel):
            json_obj = json.loads(json_str)
            asr_text = json_obj["text"]
        except Exception as e:
-            logger.exception(f"failed to invoke speech2text model, {e}")
+            logger.exception(f"failed to invoke speech2text model, model: {model}")
            raise CredentialsValidateFailedError(str(e))

        return asr_text
--- a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
@ -117,7 +117,7 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
            return TextEmbeddingResult(embeddings=all_embeddings, usage=usage, model=model)

        except Exception as e:
-            logger.exception(f"Exception {e}, line : {line}")
+            logger.exception(f"Failed to invoke text embedding model, model: {model}, line: {line}")

    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
        """
--- a/api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-26b.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-26b.yaml
@ -0,0 +1,84 @@
+model: OpenGVLab/InternVL2-26B
+label:
+  en_US: OpenGVLab/InternVL2-26B
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-8b.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/Internvl2-8b.yaml
@ -0,0 +1,84 @@
+model: Pro/OpenGVLab/InternVL2-8B
+label:
+  en_US: Pro/OpenGVLab/InternVL2-8B
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@ -1,16 +1,18 @@
+- Tencent/Hunyuan-A52B-Instruct
 - Qwen/Qwen2.5-72B-Instruct
 - Qwen/Qwen2.5-32B-Instruct
 - Qwen/Qwen2.5-14B-Instruct
 - Qwen/Qwen2.5-7B-Instruct
+- Qwen/Qwen2.5-Coder-32B-Instruct
 - Qwen/Qwen2.5-Coder-7B-Instruct
 - Qwen/Qwen2.5-Math-72B-Instruct
- Qwen/Qwen2-72B-Instruct
- Qwen/Qwen2-57B-A14B-Instruct
- Qwen/Qwen2-7B-Instruct
+- Qwen/Qwen2-VL-72B-Instruct
 - Qwen/Qwen2-1.5B-Instruct
+- Pro/Qwen/Qwen2-VL-7B-Instruct
+- OpenGVLab/InternVL2-Llama3-76B
+- OpenGVLab/InternVL2-26B
+- Pro/OpenGVLab/InternVL2-8B
 - deepseek-ai/DeepSeek-V2.5
- deepseek-ai/DeepSeek-V2-Chat
- deepseek-ai/DeepSeek-Coder-V2-Instruct
 - THUDM/glm-4-9b-chat
 - 01-ai/Yi-1.5-34B-Chat-16K
 - 01-ai/Yi-1.5-9B-Chat-16K
@ -20,9 +22,6 @@
 - meta-llama/Meta-Llama-3.1-405B-Instruct
 - meta-llama/Meta-Llama-3.1-70B-Instruct
 - meta-llama/Meta-Llama-3.1-8B-Instruct
- meta-llama/Meta-Llama-3-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
 - google/gemma-2-27b-it
 - google/gemma-2-9b-it
- mistralai/Mistral-7B-Instruct-v0.2
- mistralai/Mixtral-8x7B-Instruct-v0.1
+- deepseek-ai/DeepSeek-V2-Chat
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepdeek-coder-v2-instruct.yaml
@ -37,3 +37,4 @@ pricing:
  output: '1.33'
  unit: '0.000001'
  currency: RMB
+deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2-chat.yaml
@ -37,3 +37,4 @@ pricing:
  output: '1.33'
  unit: '0.000001'
  currency: RMB
+deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/deepseek-v2.5.yaml
@ -4,6 +4,8 @@ label:
 model_type: llm
 features:
  - agent-thought
+  - tool-call
+  - stream-tool-call
 model_properties:
  mode: chat
  context_size: 32768
--- a/api/core/model_runtime/model_providers/siliconflow/llm/hunyuan-a52b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/hunyuan-a52b-instruct.yaml
@ -0,0 +1,84 @@
+model: Tencent/Hunyuan-A52B-Instruct
+label:
+  en_US: Tencent/Hunyuan-A52B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/internvl2-llama3-76b.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/internvl2-llama3-76b.yaml
@ -0,0 +1,84 @@
+model: OpenGVLab/InternVL2-Llama3-76B
+label:
+  en_US: OpenGVLab/InternVL2-Llama3-76B
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-70b-instruct.yaml
@ -37,3 +37,4 @@ pricing:
  output: '4.13'
  unit: '0.000001'
  currency: RMB
+deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3-8b-instruct.yaml
@ -37,3 +37,4 @@ pricing:
  output: '0'
  unit: '0.000001'
  currency: RMB
+deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/meta-mlama-3.1-70b-instruct.yaml
@ -6,7 +6,7 @@ features:
  - agent-thought
 model_properties:
  mode: chat
-  context_size: 32768
+  context_size: 8192
 parameter_rules:
  - name: temperature
    use_template: temperature
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-57b-a14b-instruct.yaml
@ -37,3 +37,4 @@ pricing:
  output: '1.26'
  unit: '0.000001'
  currency: RMB
+deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-72b-instruct.yaml
@ -37,3 +37,4 @@ pricing:
  output: '4.13'
  unit: '0.000001'
  currency: RMB
+deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-7b-instruct.yaml
@ -37,3 +37,4 @@ pricing:
  output: '0'
  unit: '0.000001'
  currency: RMB
+deprecated: true
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-72b-instruct.yaml
@ -0,0 +1,84 @@
+model: Qwen/Qwen2-VL-72B-Instruct
+label:
+  en_US: Qwen/Qwen2-VL-72B-Instruct
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-7b-Instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2-vl-7b-Instruct.yaml
@ -0,0 +1,84 @@
+model: Pro/Qwen/Qwen2-VL-7B-Instruct
+label:
+  en_US: Pro/Qwen/Qwen2-VL-7B-Instruct
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '21'
+  output: '21'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-32b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-coder-32b-instruct.yaml
@ -0,0 +1,84 @@
+model: Qwen/Qwen2.5-Coder-32B-Instruct
+label:
+  en_US: Qwen/Qwen2.5-Coder-32B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 8192
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: seed
+    required: false
+    type: int
+    default: 1234
+    label:
+      zh_Hans: 随机种子
+      en_US: Random seed
+    help:
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      zh_Hans: 重复惩罚
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '1.26'
+  output: '1.26'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/speech2text/funaudio-sense-voice-small.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/speech2text/funaudio-sense-voice-small.yaml
@ -0,0 +1,5 @@
+model: FunAudioLLM/SenseVoiceSmall
+model_type: speech2text
+model_properties:
+  file_upload_limit: 1
+  supported_file_extensions: mp3,wav
--- a/api/core/model_runtime/model_providers/siliconflow/speech2text/sense-voice-small.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/speech2text/sense-voice-small.yaml
@ -3,3 +3,4 @@ model_type: speech2text
 model_properties:
  file_upload_limit: 1
  supported_file_extensions: mp3,wav
+deprecated: true
--- a/api/core/moderation/output_moderation.py
+++ b/api/core/moderation/output_moderation.py
@ -126,6 +126,6 @@ class OutputModeration(BaseModel):
            result: ModerationOutputsResult = moderation_factory.moderation_for_outputs(moderation_buffer)
            return result
        except Exception as e:
-            logger.exception("Moderation Output error: %s", e)
+            logger.exception(f"Moderation Output error, app_id: {app_id}")

        return None
--- a/api/core/ops/ops_trace_manager.py
+++ b/api/core/ops/ops_trace_manager.py
@ -711,7 +711,7 @@ class TraceQueueManager:
                trace_task.app_id = self.app_id
                trace_manager_queue.put(trace_task)
        except Exception as e:
-            logging.exception(f"Error adding trace task: {e}")
+            logging.exception(f"Error adding trace task, trace_type {trace_task.trace_type}")
        finally:
            self.start_timer()

@ -730,7 +730,7 @@ class TraceQueueManager:
            if tasks:
                self.send_to_celery(tasks)
        except Exception as e:
-            logging.exception(f"Error processing trace tasks: {e}")
+            logging.exception("Error processing trace tasks")

    def start_timer(self):
        global trace_manager_timer
--- a/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py
+++ b/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py
@ -242,7 +242,7 @@ class CouchbaseVector(BaseVector):
        try:
            self._cluster.query(query, named_parameters={"doc_ids": ids}).execute()
        except Exception as e:
-            logger.exception(e)
+            logger.exception(f"Failed to delete documents, ids: {ids}")

    def delete_by_document_id(self, document_id: str):
        query = f"""
--- a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py
+++ b/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py
@ -79,7 +79,7 @@ class LindormVectorStore(BaseVector):
                existing_docs = self._client.mget(index=self._collection_name, body={"ids": batch_ids}, _source=False)
                return {doc["_id"] for doc in existing_docs["docs"] if doc["found"]}
            except Exception as e:
-                logger.exception(f"Error fetching batch {batch_ids}: {e}")
+                logger.exception(f"Error fetching batch {batch_ids}")
                return set()

        @retry(stop=stop_after_attempt(3), wait=wait_fixed(60))
@ -96,7 +96,7 @@ class LindormVectorStore(BaseVector):
                )
                return {doc["_id"] for doc in existing_docs["docs"] if doc["found"]}
            except Exception as e:
-                logger.exception(f"Error fetching batch {batch_ids}: {e}")
+                logger.exception(f"Error fetching batch ids: {batch_ids}")
                return set()

        if ids is None:
@ -177,7 +177,7 @@ class LindormVectorStore(BaseVector):
            else:
                logger.warning(f"Index '{self._collection_name}' does not exist. No deletion performed.")
        except Exception as e:
-            logger.exception(f"Error occurred while deleting the index: {e}")
+            logger.exception(f"Error occurred while deleting the index: {self._collection_name}")
            raise e

    def text_exists(self, id: str) -> bool:
@ -201,7 +201,7 @@ class LindormVectorStore(BaseVector):
        try:
            response = self._client.search(index=self._collection_name, body=query)
        except Exception as e:
-            logger.exception(f"Error executing search: {e}")
+            logger.exception(f"Error executing vector search, query: {query}")
            raise

        docs_and_scores = []
--- a/api/core/rag/datasource/vdb/myscale/myscale_vector.py
+++ b/api/core/rag/datasource/vdb/myscale/myscale_vector.py
@ -142,7 +142,7 @@ class MyScaleVector(BaseVector):
                for r in self._client.query(sql).named_results()
            ]
        except Exception as e:
-            logging.exception(f"\033[91m\033[1m{type(e)}\033[0m \033[95m{str(e)}\033[0m")
+            logging.exception(f"\033[91m\033[1m{type(e)}\033[0m \033[95m{str(e)}\033[0m")  # noqa:TRY401
            return []

    def delete(self) -> None:
--- a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py
+++ b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py
@ -158,7 +158,7 @@ class OpenSearchVector(BaseVector):
        try:
            response = self._client.search(index=self._collection_name.lower(), body=query)
        except Exception as e:
-            logger.exception(f"Error executing search: {e}")
+            logger.exception(f"Error executing vector search, query: {query}")
            raise

        docs = []
--- a/api/core/rag/embedding/cached_embedding.py
+++ b/api/core/rag/embedding/cached_embedding.py
@ -69,7 +69,7 @@ class CacheEmbedding(Embeddings):
                        except IntegrityError:
                            db.session.rollback()
                        except Exception as e:
-                            logging.exception("Failed transform embedding: %s", e)
+                            logging.exception("Failed transform embedding")
                cache_embeddings = []
                try:
                    for i, embedding in zip(embedding_queue_indices, embedding_queue_embeddings):
@ -89,7 +89,7 @@ class CacheEmbedding(Embeddings):
                    db.session.rollback()
            except Exception as ex:
                db.session.rollback()
-                logger.exception("Failed to embed documents: %s", ex)
+                logger.exception("Failed to embed documents: %s")
                raise ex

        return text_embeddings
@ -112,7 +112,7 @@ class CacheEmbedding(Embeddings):
            embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()
        except Exception as ex:
            if dify_config.DEBUG:
-                logging.exception(f"Failed to embed query text: {ex}")
+                logging.exception(f"Failed to embed query text '{text[:10]}...({len(text)} chars)'")
            raise ex

        try:
@ -126,7 +126,7 @@ class CacheEmbedding(Embeddings):
            redis_client.setex(embedding_cache_key, 600, encoded_str)
        except Exception as ex:
            if dify_config.DEBUG:
-                logging.exception("Failed to add embedding to redis %s", ex)
+                logging.exception(f"Failed to add embedding to redis for the text '{text[:10]}...({len(text)} chars)'")
            raise ex

        return embedding_results
--- a/api/core/rag/extractor/word_extractor.py
+++ b/api/core/rag/extractor/word_extractor.py
@ -229,7 +229,7 @@ class WordExtractor(BaseExtractor):
                                for i in url_pattern.findall(x.text):
                                    hyperlinks_url = str(i)
                    except Exception as e:
-                        logger.exception(e)
+                        logger.exception("Failed to parse HYPERLINK xml")

        def parse_paragraph(paragraph):
            paragraph_content = []
--- a/api/core/rag/index_processor/processor/qa_index_processor.py
+++ b/api/core/rag/index_processor/processor/qa_index_processor.py
@ -159,7 +159,7 @@ class QAIndexProcessor(BaseIndexProcessor):
                    qa_documents.append(qa_document)
                format_documents.extend(qa_documents)
            except Exception as e:
-                logging.exception(e)
+                logging.exception("Failed to format qa document")

            all_qa_documents.extend(format_documents)

--- a/api/core/tools/tool/workflow_tool.py
+++ b/api/core/tools/tool/workflow_tool.py
@ -175,7 +175,7 @@ class WorkflowTool(Tool):

                            files.append(file_dict)
                    except Exception as e:
-                        logger.exception(e)
+                        logger.exception(f"Failed to transform file {file}")
            else:
                parameters_result[parameter.name] = tool_parameters.get(parameter.name)

--- a/api/core/tools/tool_file_manager.py
+++ b/api/core/tools/tool_file_manager.py
@ -98,7 +98,7 @@ class ToolFileManager:
            response.raise_for_status()
            blob = response.content
        except Exception as e:
-            logger.exception(f"Failed to download file from {file_url}: {e}")
+            logger.exception(f"Failed to download file from {file_url}")
            raise

        mimetype = guess_type(file_url)[0] or "octet/stream"
--- a/api/core/tools/tool_manager.py
+++ b/api/core/tools/tool_manager.py
@ -388,7 +388,7 @@ class ToolManager:
                    yield provider

                except Exception as e:
-                    logger.exception(f"load builtin provider {provider} error: {e}")
+                    logger.exception(f"load builtin provider {provider}")
                    continue
        # set builtin providers loaded
        cls._builtin_providers_loaded = True
--- a/api/core/tools/utils/message_transformer.py
+++ b/api/core/tools/utils/message_transformer.py
@ -40,7 +40,7 @@ class ToolFileMessageTransformer:
                        )
                    )
                except Exception as e:
-                    logger.exception(e)
+                    logger.exception(f"Failed to download image from {url}")
                    result.append(
                        ToolInvokeMessage(
                            type=ToolInvokeMessage.MessageType.TEXT,
--- a/api/core/workflow/graph_engine/graph_engine.py
+++ b/api/core/workflow/graph_engine/graph_engine.py
@ -172,7 +172,7 @@ class GraphEngine:
                                "answer"
                            ].strip()
                except Exception as e:
-                    logger.exception(f"Graph run failed: {str(e)}")
+                    logger.exception("Graph run failed")
                    yield GraphRunFailedEvent(error=str(e))
                    return

@ -692,7 +692,7 @@ class GraphEngine:
            )
            return
        except Exception as e:
-            logger.exception(f"Node {node_instance.node_data.title} run failed: {str(e)}")
+            logger.exception(f"Node {node_instance.node_data.title} run failed")
            raise e
        finally:
            db.session.close()
--- a/api/core/workflow/nodes/base/node.py
+++ b/api/core/workflow/nodes/base/node.py
@ -69,7 +69,7 @@ class BaseNode(Generic[GenericNodeData]):
        try:
            result = self._run()
        except Exception as e:
-            logger.exception(f"Node {self.node_id} failed to run: {e}")
+            logger.exception(f"Node {self.node_id} failed to run")
            result = NodeRunResult(
                status=WorkflowNodeExecutionStatus.FAILED,
                error=str(e),
--- a/api/extensions/ext_storage.py
+++ b/api/extensions/ext_storage.py
@ -70,7 +70,7 @@ class Storage:
        try:
            self.storage_runner.save(filename, data)
        except Exception as e:
-            logging.exception("Failed to save file: %s", e)
+            logging.exception(f"Failed to save file {filename}")
            raise e

    def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]:
@ -80,42 +80,42 @@ class Storage:
            else:
                return self.load_once(filename)
        except Exception as e:
-            logging.exception("Failed to load file: %s", e)
+            logging.exception(f"Failed to load file {filename}")
            raise e

    def load_once(self, filename: str) -> bytes:
        try:
            return self.storage_runner.load_once(filename)
        except Exception as e:
-            logging.exception("Failed to load_once file: %s", e)
+            logging.exception(f"Failed to load_once file {filename}")
            raise e

    def load_stream(self, filename: str) -> Generator:
        try:
            return self.storage_runner.load_stream(filename)
        except Exception as e:
-            logging.exception("Failed to load_stream file: %s", e)
+            logging.exception(f"Failed to load_stream file {filename}")
            raise e

    def download(self, filename, target_filepath):
        try:
            self.storage_runner.download(filename, target_filepath)
        except Exception as e:
-            logging.exception("Failed to download file: %s", e)
+            logging.exception(f"Failed to download file {filename}")
            raise e

    def exists(self, filename):
        try:
            return self.storage_runner.exists(filename)
        except Exception as e:
-            logging.exception("Failed to check file exists: %s", e)
+            logging.exception(f"Failed to check file exists {filename}")
            raise e

    def delete(self, filename):
        try:
            return self.storage_runner.delete(filename)
        except Exception as e:
-            logging.exception("Failed to delete file: %s", e)
+            logging.exception(f"Failed to delete file {filename}")
            raise e


--- a/api/libs/smtp.py
+++ b/api/libs/smtp.py
@ -39,13 +39,13 @@ class SMTPClient:

            smtp.sendmail(self._from, mail["to"], msg.as_string())
        except smtplib.SMTPException as e:
-            logging.exception(f"SMTP error occurred: {str(e)}")
+            logging.exception("SMTP error occurred")
            raise
        except TimeoutError as e:
-            logging.exception(f"Timeout occurred while sending email: {str(e)}")
+            logging.exception("Timeout occurred while sending email")
            raise
        except Exception as e:
-            logging.exception(f"Unexpected error occurred while sending email: {str(e)}")
+            logging.exception(f"Unexpected error occurred while sending email to {mail['to']}")
            raise
        finally:
            if smtp:
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@ -679,7 +679,7 @@ class DatasetKeywordTable(db.Model):
                    return json.loads(keyword_table_text.decode("utf-8"), cls=SetDecoder)
                return None
            except Exception as e:
-                logging.exception(str(e))
+                logging.exception(f"Failed to load keyword table from file: {file_key}")
                return None


--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@ -35,6 +35,7 @@ select = [
    "S506", # unsafe-yaml-load
    "SIM", # flake8-simplify rules
    "TRY400", # error-instead-of-exception
+    "TRY401", # verbose-log-message
    "UP", # pyupgrade rules
    "W191", # tab-indentation
    "W605", # invalid-escape-sequence
--- a/api/services/account_service.py
+++ b/api/services/account_service.py
@ -779,7 +779,7 @@ class RegisterService:
            db.session.query(Tenant).delete()
            db.session.commit()

-            logging.exception(f"Setup failed: {e}")
+            logging.exception(f"Setup account failed, email: {email}, name: {name}")
            raise ValueError(f"Setup failed: {e}")

    @classmethod
@ -821,7 +821,7 @@ class RegisterService:
            db.session.rollback()
        except Exception as e:
            db.session.rollback()
-            logging.exception(f"Register failed: {e}")
+            logging.exception("Register failed")
            raise AccountRegisterError(f"Registration failed: {e}") from e

        return account
--- a/api/services/app_service.py
+++ b/api/services/app_service.py
@ -88,7 +88,7 @@ class AppService:
            except (ProviderTokenNotInitError, LLMBadRequestError):
                model_instance = None
            except Exception as e:
-                logging.exception(e)
+                logging.exception(f"Get default model instance failed, tenant_id: {tenant_id}")
                model_instance = None

            if model_instance:
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@ -1458,6 +1458,7 @@ class SegmentService:
            pre_segment_data_list = []
            segment_data_list = []
            keywords_list = []
+            position = max_position + 1 if max_position else 1
            for segment_item in segments:
                content = segment_item["content"]
                doc_id = str(uuid.uuid4())
@ -1475,7 +1476,7 @@ class SegmentService:
                    document_id=document.id,
                    index_node_id=doc_id,
                    index_node_hash=segment_hash,
-                    position=max_position + 1 if max_position else 1,
+                    position=position,
                    content=content,
                    word_count=len(content),
                    tokens=tokens,
@ -1490,6 +1491,7 @@ class SegmentService:
                increment_word_count += segment_document.word_count
                db.session.add(segment_document)
                segment_data_list.append(segment_document)
+                position += 1

                pre_segment_data_list.append(segment_document)
                if "keywords" in segment_item:
--- a/api/services/tools/api_tools_manage_service.py
+++ b/api/services/tools/api_tools_manage_service.py
@ -195,7 +195,7 @@ class ApiToolManageService:
            # try to parse schema, avoid SSRF attack
            ApiToolManageService.parser_api_schema(schema)
        except Exception as e:
-            logger.exception(f"parse api schema error: {str(e)}")
+            logger.exception("parse api schema error")
            raise ValueError("invalid schema, please check the url you provided")

        return {"schema": schema}
--- a/api/services/tools/tools_transform_service.py
+++ b/api/services/tools/tools_transform_service.py
@ -183,7 +183,7 @@ class ToolTransformService:
        try:
            username = db_provider.user.name
        except Exception as e:
-            logger.exception(f"failed to get user name for api provider {db_provider.id}: {str(e)}")
+            logger.exception(f"failed to get user name for api provider {db_provider.id}")
        # add provider into providers
        credentials = db_provider.credentials
        result = UserToolProvider(
--- a/api/tasks/annotation/delete_annotation_index_task.py
+++ b/api/tasks/annotation/delete_annotation_index_task.py
@ -38,4 +38,4 @@ def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str
            click.style("App annotations index deleted : {} latency: {}".format(app_id, end_at - start_at), fg="green")
        )
    except Exception as e:
-        logging.exception("Annotation deleted index failed:{}".format(str(e)))
+        logging.exception("Annotation deleted index failed")
--- a/api/tasks/annotation/disable_annotation_reply_task.py
+++ b/api/tasks/annotation/disable_annotation_reply_task.py
@ -60,7 +60,7 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str):
            click.style("App annotations index deleted : {} latency: {}".format(app_id, end_at - start_at), fg="green")
        )
    except Exception as e:
-        logging.exception("Annotation batch deleted index failed:{}".format(str(e)))
+        logging.exception("Annotation batch deleted index failed")
        redis_client.setex(disable_app_annotation_job_key, 600, "error")
        disable_app_annotation_error_key = "disable_app_annotation_error_{}".format(str(job_id))
        redis_client.setex(disable_app_annotation_error_key, 600, str(e))
--- a/api/tasks/annotation/enable_annotation_reply_task.py
+++ b/api/tasks/annotation/enable_annotation_reply_task.py
@ -93,7 +93,7 @@ def enable_annotation_reply_task(
            click.style("App annotations added to index: {} latency: {}".format(app_id, end_at - start_at), fg="green")
        )
    except Exception as e:
-        logging.exception("Annotation batch created index failed:{}".format(str(e)))
+        logging.exception("Annotation batch created index failed")
        redis_client.setex(enable_app_annotation_job_key, 600, "error")
        enable_app_annotation_error_key = "enable_app_annotation_error_{}".format(str(job_id))
        redis_client.setex(enable_app_annotation_error_key, 600, str(e))
--- a/api/tasks/batch_create_segment_to_index_task.py
+++ b/api/tasks/batch_create_segment_to_index_task.py
@ -103,5 +103,5 @@ def batch_create_segment_to_index_task(
            click.style("Segment batch created job: {} latency: {}".format(job_id, end_at - start_at), fg="green")
        )
    except Exception as e:
-        logging.exception("Segments batch created index failed:{}".format(str(e)))
+        logging.exception("Segments batch created index failed")
        redis_client.setex(indexing_cache_key, 600, "error")
--- a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py
+++ b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py
@ -140,10 +140,10 @@ def test_extract_text_from_plain_text():
    assert text == "Hello, world!"


-def tet_extract_text_from_plain_text_non_utf8():
+def test_extract_text_from_plain_text_non_utf8():
    import tempfile

-    non_utf8_content = b"Hello world\xa9."  # \xA9 represents © in Latin-1
+    non_utf8_content = b"Hello, world\xa9."  # \xA9 represents © in Latin-1
    with tempfile.NamedTemporaryFile(delete=True) as temp_file:
        temp_file.write(non_utf8_content)
        temp_file.seek(0)
--- a/web/Dockerfile
+++ b/web/Dockerfile
@ -1,5 +1,5 @@
 # base image
-FROM node:20.11-alpine3.19 AS base
+FROM node:20-alpine3.20 AS base
 LABEL maintainer="takatost@gmail.com"

 # if you located in China, you can use aliyun mirror to speed up
Author	SHA1	Message	Date
Bowen Liang	1713943c12	Merge `e92d3237b4` into `5ff02b469f`	2024-11-15 08:51:11 +08:00
jarvis2f	5ff02b469f	fix:position error when creating segments (#10706 ) Some checks are pending Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run Details Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run Details Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run Details Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions Details Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions Details	2024-11-14 21:25:15 +08:00
Bowen Liang	44f57ad9a8	chore: Bump Alpine Linux to 3.20 in web dockerfile (#10671 )	2024-11-14 20:57:01 +08:00
yihong	94fd6f6901	fix: typo in test (#10707 ) Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2024-11-14 20:54:13 +08:00
SiliconFlow, Inc	e61242a337	feat: add vlm models from siliconflow (#10704 )	2024-11-14 20:53:35 +08:00
Bowen Liang	e92d3237b4	apply TRY401	2024-11-11 20:44:07 +08:00
Bowen Liang	2e0da361b0	add TRY401	2024-11-11 20:44:07 +08:00