mirror of
https://github.com/langgenius/dify.git
synced 2024-11-16 11:42:29 +08:00
Merge branch 'feat/add-remote-file-upload-api' into deploy/dev
Some checks are pending
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
Some checks are pending
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
This commit is contained in:
commit
2508bf9712
58
api/controllers/common/helpers.py
Normal file
58
api/controllers/common/helpers.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
from uuid import uuid4
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class FileInfo(BaseModel):
|
||||
filename: str
|
||||
extension: str
|
||||
mimetype: str
|
||||
size: int
|
||||
|
||||
|
||||
def guess_file_info_from_response(response: httpx.Response):
|
||||
url = str(response.url)
|
||||
# Try to extract filename from URL
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
filename = os.path.basename(url_path)
|
||||
|
||||
# If filename couldn't be extracted, use Content-Disposition header
|
||||
if not filename:
|
||||
content_disposition = response.headers.get("Content-Disposition")
|
||||
if content_disposition:
|
||||
filename_match = re.search(r'filename="?(.+)"?', content_disposition)
|
||||
if filename_match:
|
||||
filename = filename_match.group(1)
|
||||
|
||||
# If still no filename, generate a unique one
|
||||
if not filename:
|
||||
unique_name = str(uuid4())
|
||||
filename = f"{unique_name}"
|
||||
|
||||
# Guess MIME type from filename first, then URL
|
||||
mimetype, _ = mimetypes.guess_type(filename)
|
||||
if mimetype is None:
|
||||
mimetype, _ = mimetypes.guess_type(url)
|
||||
if mimetype is None:
|
||||
# If guessing fails, use Content-Type from response headers
|
||||
mimetype = response.headers.get("Content-Type", "application/octet-stream")
|
||||
|
||||
extension = os.path.splitext(filename)[1]
|
||||
|
||||
# Ensure filename has an extension
|
||||
if not extension:
|
||||
extension = mimetypes.guess_extension(mimetype) or ".bin"
|
||||
filename = f"{filename}{extension}"
|
||||
|
||||
return FileInfo(
|
||||
filename=filename,
|
||||
extension=extension,
|
||||
mimetype=mimetype,
|
||||
size=int(response.headers.get("Content-Length", -1)),
|
||||
)
|
|
@ -1,15 +1,13 @@
|
|||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
from typing import cast
|
||||
from uuid import uuid4
|
||||
|
||||
from flask_login import current_user
|
||||
from flask_restful import Resource, marshal_with, reqparse
|
||||
|
||||
from controllers.common import helpers
|
||||
from core.file import helpers as file_helpers
|
||||
from core.helper import ssrf_proxy
|
||||
from fields.file_fields import file_fields, remote_file_info_fields
|
||||
from fields.file_fields import file_fields_with_signed_url, remote_file_info_fields
|
||||
from models.account import Account
|
||||
from services.file_service import FileService
|
||||
|
||||
|
@ -29,7 +27,7 @@ class RemoteFileInfoApi(Resource):
|
|||
|
||||
|
||||
class RemoteFileUploadApi(Resource):
|
||||
@marshal_with(file_fields)
|
||||
@marshal_with(file_fields_with_signed_url)
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("url", type=str, required=True, help="URL is required")
|
||||
|
@ -37,53 +35,36 @@ class RemoteFileUploadApi(Resource):
|
|||
|
||||
url = args["url"]
|
||||
|
||||
try:
|
||||
response = ssrf_proxy.get(url)
|
||||
response.raise_for_status()
|
||||
content = response.content
|
||||
except Exception as e:
|
||||
return {"error": str(e)}, 400
|
||||
response = ssrf_proxy.head(url)
|
||||
response.raise_for_status()
|
||||
|
||||
# Try to extract filename from URL
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
filename = os.path.basename(url_path)
|
||||
file_info = helpers.guess_file_info_from_response(response)
|
||||
|
||||
# If filename couldn't be extracted, use Content-Disposition header
|
||||
if not filename:
|
||||
content_disposition = response.headers.get("Content-Disposition")
|
||||
if content_disposition:
|
||||
filename_match = re.search(r'filename="?(.+)"?', content_disposition)
|
||||
if filename_match:
|
||||
filename = filename_match.group(1)
|
||||
if not FileService.is_file_size_within_limit(extension=file_info.extension, file_size=file_info.size):
|
||||
return {"error": "File size exceeded"}, 400
|
||||
|
||||
# If still no filename, generate a unique one
|
||||
if not filename:
|
||||
unique_name = str(uuid4())
|
||||
filename = f"{unique_name}"
|
||||
|
||||
# Guess MIME type from filename first, then URL
|
||||
mimetype, _ = mimetypes.guess_type(filename)
|
||||
if mimetype is None:
|
||||
mimetype, _ = mimetypes.guess_type(url)
|
||||
if mimetype is None:
|
||||
# If guessing fails, use Content-Type from response headers
|
||||
mimetype = response.headers.get("Content-Type", "application/octet-stream")
|
||||
|
||||
# Ensure filename has an extension
|
||||
if not os.path.splitext(filename)[1]:
|
||||
extension = mimetypes.guess_extension(mimetype) or ".bin"
|
||||
filename = f"{filename}{extension}"
|
||||
response = ssrf_proxy.get(url)
|
||||
response.raise_for_status()
|
||||
content = response.content
|
||||
|
||||
try:
|
||||
user = cast(Account, current_user)
|
||||
upload_file = FileService.upload_file(
|
||||
filename=filename,
|
||||
filename=file_info.filename,
|
||||
content=content,
|
||||
mimetype=mimetype,
|
||||
mimetype=file_info.mimetype,
|
||||
user=user,
|
||||
)
|
||||
except Exception as e:
|
||||
return {"error": str(e)}, 400
|
||||
|
||||
return upload_file, 201
|
||||
return {
|
||||
"id": upload_file.id,
|
||||
"name": upload_file.name,
|
||||
"size": upload_file.size,
|
||||
"extension": upload_file.extension,
|
||||
"url": file_helpers.get_signed_file_url(upload_file_id=upload_file.id),
|
||||
"mime_type": upload_file.mime_type,
|
||||
"created_by": upload_file.created_by,
|
||||
"created_at": upload_file.created_at,
|
||||
}, 201
|
||||
|
|
|
@ -3,7 +3,7 @@ from flask import Blueprint
|
|||
from libs.external_api import ExternalApi
|
||||
|
||||
from .files import FileApi
|
||||
from .remote_files import RemoteFileInfoApi
|
||||
from .remote_files import RemoteFileInfoApi, RemoteFileUploadApi
|
||||
|
||||
bp = Blueprint("web", __name__, url_prefix="/api")
|
||||
api = ExternalApi(bp)
|
||||
|
@ -13,5 +13,6 @@ api.add_resource(FileApi, "/files/upload")
|
|||
|
||||
# Remote files
|
||||
api.add_resource(RemoteFileInfoApi, "/remote-files/<path:url>")
|
||||
api.add_resource(RemoteFileUploadApi, "/remote-files/upload")
|
||||
|
||||
from . import app, audio, completion, conversation, feature, message, passport, saved_message, site, workflow
|
||||
|
|
|
@ -1,15 +1,13 @@
|
|||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
from uuid import uuid4
|
||||
|
||||
from flask_login import current_user
|
||||
from flask_restful import marshal_with, reqparse
|
||||
|
||||
from controllers.common import helpers
|
||||
from controllers.web.wraps import WebApiResource
|
||||
from core.file import helpers as file_helpers
|
||||
from core.helper import ssrf_proxy
|
||||
from fields.file_fields import file_fields, remote_file_info_fields
|
||||
from fields.file_fields import file_fields_with_signed_url, remote_file_info_fields
|
||||
from services.file_service import FileService
|
||||
|
||||
|
||||
|
@ -28,7 +26,7 @@ class RemoteFileInfoApi(WebApiResource):
|
|||
|
||||
|
||||
class RemoteFileUploadApi(WebApiResource):
|
||||
@marshal_with(file_fields)
|
||||
@marshal_with(file_fields_with_signed_url)
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("url", type=str, required=True, help="URL is required")
|
||||
|
@ -36,52 +34,35 @@ class RemoteFileUploadApi(WebApiResource):
|
|||
|
||||
url = args["url"]
|
||||
|
||||
try:
|
||||
response = ssrf_proxy.get(url)
|
||||
response.raise_for_status()
|
||||
content = response.content
|
||||
except Exception as e:
|
||||
return {"error": str(e)}, 400
|
||||
response = ssrf_proxy.head(url)
|
||||
response.raise_for_status()
|
||||
|
||||
# Try to extract filename from URL
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
filename = os.path.basename(url_path)
|
||||
file_info = helpers.guess_file_info_from_response(response)
|
||||
|
||||
# If filename couldn't be extracted, use Content-Disposition header
|
||||
if not filename:
|
||||
content_disposition = response.headers.get("Content-Disposition")
|
||||
if content_disposition:
|
||||
filename_match = re.search(r'filename="?(.+)"?', content_disposition)
|
||||
if filename_match:
|
||||
filename = filename_match.group(1)
|
||||
if not FileService.is_file_size_within_limit(extension=file_info.extension, file_size=file_info.size):
|
||||
return {"error": "File size exceeded"}, 400
|
||||
|
||||
# If still no filename, generate a unique one
|
||||
if not filename:
|
||||
unique_name = str(uuid4())
|
||||
filename = f"{unique_name}"
|
||||
|
||||
# Guess MIME type from filename first, then URL
|
||||
mimetype, _ = mimetypes.guess_type(filename)
|
||||
if mimetype is None:
|
||||
mimetype, _ = mimetypes.guess_type(url)
|
||||
if mimetype is None:
|
||||
# If guessing fails, use Content-Type from response headers
|
||||
mimetype = response.headers.get("Content-Type", "application/octet-stream")
|
||||
|
||||
# Ensure filename has an extension
|
||||
if not os.path.splitext(filename)[1]:
|
||||
extension = mimetypes.guess_extension(mimetype) or ".bin"
|
||||
filename = f"{filename}{extension}"
|
||||
response = ssrf_proxy.get(url)
|
||||
response.raise_for_status()
|
||||
content = response.content
|
||||
|
||||
try:
|
||||
upload_file = FileService.upload_file(
|
||||
filename=filename,
|
||||
filename=file_info.filename,
|
||||
content=content,
|
||||
mimetype=mimetype,
|
||||
mimetype=file_info.mimetype,
|
||||
user=current_user,
|
||||
)
|
||||
except Exception as e:
|
||||
return {"error": str(e)}, 400
|
||||
|
||||
return upload_file, 201
|
||||
return {
|
||||
"id": upload_file.id,
|
||||
"name": upload_file.name,
|
||||
"size": upload_file.size,
|
||||
"extension": upload_file.extension,
|
||||
"url": file_helpers.get_signed_file_url(upload_file_id=upload_file.id),
|
||||
"mime_type": upload_file.mime_type,
|
||||
"created_by": upload_file.created_by,
|
||||
"created_at": upload_file.created_at,
|
||||
}, 201
|
||||
|
|
|
@ -24,3 +24,15 @@ remote_file_info_fields = {
|
|||
"file_type": fields.String(attribute="file_type"),
|
||||
"file_length": fields.Integer(attribute="file_length"),
|
||||
}
|
||||
|
||||
|
||||
file_fields_with_signed_url = {
|
||||
"id": fields.String,
|
||||
"name": fields.String,
|
||||
"size": fields.Integer,
|
||||
"extension": fields.String,
|
||||
"url": fields.String,
|
||||
"mime_type": fields.String,
|
||||
"created_by": fields.String,
|
||||
"created_at": TimestampField,
|
||||
}
|
||||
|
|
|
@ -44,23 +44,12 @@ class FileService:
|
|||
if source == "datasets" and extension not in DOCUMENT_EXTENSIONS:
|
||||
raise UnsupportedFileTypeError()
|
||||
|
||||
# select file size limit
|
||||
if extension in IMAGE_EXTENSIONS:
|
||||
file_size_limit = dify_config.UPLOAD_IMAGE_FILE_SIZE_LIMIT * 1024 * 1024
|
||||
elif extension in VIDEO_EXTENSIONS:
|
||||
file_size_limit = dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT * 1024 * 1024
|
||||
elif extension in AUDIO_EXTENSIONS:
|
||||
file_size_limit = dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT * 1024 * 1024
|
||||
else:
|
||||
file_size_limit = dify_config.UPLOAD_FILE_SIZE_LIMIT * 1024 * 1024
|
||||
|
||||
# get file size
|
||||
file_size = len(content)
|
||||
|
||||
# check if the file size is exceeded
|
||||
if file_size > file_size_limit:
|
||||
message = f"File size exceeded. {file_size} > {file_size_limit}"
|
||||
raise FileTooLargeError(message)
|
||||
if not FileService.is_file_size_within_limit(extension=extension, file_size=file_size):
|
||||
raise FileTooLargeError
|
||||
|
||||
# generate file key
|
||||
file_uuid = str(uuid.uuid4())
|
||||
|
@ -97,6 +86,19 @@ class FileService:
|
|||
|
||||
return upload_file
|
||||
|
||||
@staticmethod
|
||||
def is_file_size_within_limit(*, extension: str, file_size: int) -> bool:
|
||||
if extension in IMAGE_EXTENSIONS:
|
||||
file_size_limit = dify_config.UPLOAD_IMAGE_FILE_SIZE_LIMIT * 1024 * 1024
|
||||
elif extension in VIDEO_EXTENSIONS:
|
||||
file_size_limit = dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT * 1024 * 1024
|
||||
elif extension in AUDIO_EXTENSIONS:
|
||||
file_size_limit = dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT * 1024 * 1024
|
||||
else:
|
||||
file_size_limit = dify_config.UPLOAD_FILE_SIZE_LIMIT * 1024 * 1024
|
||||
|
||||
return file_size <= file_size_limit
|
||||
|
||||
@staticmethod
|
||||
def upload_text(text: str, text_name: str) -> UploadFile:
|
||||
if len(text_name) > 200:
|
||||
|
|
Loading…
Reference in New Issue
Block a user