feat: Jina Search & Jina Reader CSS selectors (#4523)

This commit is contained in:
rennokki 2024-05-20 11:40:46 +03:00 committed by GitHub
parent e8e213ad1e
commit 6b5685ef0c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 88 additions and 3 deletions

View File

@ -6,8 +6,8 @@ identity:
zh_Hans: JinaReader
pt_BR: JinaReader
description:
en_US: Convert any URL to an LLM-friendly input. Experience improved output for your agent and RAG systems at no cost.
zh_Hans: 将任何 URL 转换为 LLM 友好的输入。无需付费即可体验为您的 Agent 和 RAG 系统提供的改进输出
pt_BR: Converta qualquer URL em uma entrada amigável ao LLM. Experimente uma saída aprimorada para seus sistemas de agente e RAG sem custo.
en_US: Convert any URL to an LLM-friendly input or perform searches on the web for grounding information. Experience improved output for your agent and RAG systems at no cost.
zh_Hans: 将任何URL转换为LLM易读的输入或在网页上搜索引擎上搜索引擎
pt_BR: Converte qualquer URL em uma entrada LLm-fácil de ler ou realize pesquisas na web para obter informação de grounding. Tenha uma experiência melhor para seu agente e sistemas RAG sem custo.
icon: icon.svg
credentials_for_provider:

View File

@ -23,6 +23,14 @@ class JinaReaderTool(BuiltinTool):
'Accept': 'application/json'
}
target_selector = tool_parameters.get('target_selector', None)
if target_selector is not None:
headers['X-Target-Selector'] = target_selector
wait_for_selector = tool_parameters.get('wait_for_selector', None)
if wait_for_selector is not None:
headers['X-Wait-For-Selector'] = wait_for_selector
response = ssrf_proxy.get(
str(URL(self._jina_reader_endpoint + url)),
headers=headers,

View File

@ -25,6 +25,32 @@ parameters:
pt_BR: used for linking to webpages
llm_description: url for scraping
form: llm
- name: target_selector
type: string
required: false
label:
en_US: Target selector
zh_Hans: 目标选择器
pt_BR: Seletor de destino
human_description:
en_US: css selector for scraping specific elements
zh_Hans: css 选择器用于抓取特定元素
pt_BR: css selector for scraping specific elements
llm_description: css selector of the target element to scrape
form: form
- name: wait_for_selector
type: string
required: false
label:
en_US: Wait for selector
zh_Hans: 等待选择器
pt_BR: Aguardar por seletor
human_description:
en_US: css selector for waiting for specific elements
zh_Hans: css 选择器用于等待特定元素
pt_BR: css selector for waiting for specific elements
llm_description: css selector of the target element to wait for
form: form
- name: summary
type: boolean
required: false

View File

@ -0,0 +1,30 @@
from typing import Any, Union
from yarl import URL
from core.helper import ssrf_proxy
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool
class JinaSearchTool(BuiltinTool):
_jina_search_endpoint = 'https://s.jina.ai/'
def _invoke(
self,
user_id: str,
tool_parameters: dict[str, Any],
) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
query = tool_parameters['query']
headers = {
'Accept': 'application/json'
}
response = ssrf_proxy.get(
str(URL(self._jina_search_endpoint + query)),
headers=headers,
timeout=(10, 60)
)
return self.create_text_message(response.text)

View File

@ -0,0 +1,21 @@
identity:
name: jina_search
author: Dify
label:
en_US: JinaSearch
zh_Hans: JinaSearch
pt_BR: JinaSearch
description:
human:
en_US: Search on the web and get the top 5 results. Useful for grounding using information from the web.
llm: A tool for searching results on the web for grounding. Input should be a simple question.
parameters:
- name: query
type: string
required: true
label:
en_US: Question (Query)
human_description:
en_US: used to find information on the web
llm_description: simple question to ask on the web
form: llm