Merge branch 'feat/add-remote-file-upload-api' into deploy/dev
Some checks are pending
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions

This commit is contained in:
-LAN- 2024-10-28 17:56:27 +08:00
commit 2508bf9712
6 changed files with 135 additions and 100 deletions

View File

@ -0,0 +1,58 @@
import mimetypes
import os
import re
import urllib.parse
from uuid import uuid4
import httpx
from pydantic import BaseModel
class FileInfo(BaseModel):
filename: str
extension: str
mimetype: str
size: int
def guess_file_info_from_response(response: httpx.Response):
url = str(response.url)
# Try to extract filename from URL
parsed_url = urllib.parse.urlparse(url)
url_path = parsed_url.path
filename = os.path.basename(url_path)
# If filename couldn't be extracted, use Content-Disposition header
if not filename:
content_disposition = response.headers.get("Content-Disposition")
if content_disposition:
filename_match = re.search(r'filename="?(.+)"?', content_disposition)
if filename_match:
filename = filename_match.group(1)
# If still no filename, generate a unique one
if not filename:
unique_name = str(uuid4())
filename = f"{unique_name}"
# Guess MIME type from filename first, then URL
mimetype, _ = mimetypes.guess_type(filename)
if mimetype is None:
mimetype, _ = mimetypes.guess_type(url)
if mimetype is None:
# If guessing fails, use Content-Type from response headers
mimetype = response.headers.get("Content-Type", "application/octet-stream")
extension = os.path.splitext(filename)[1]
# Ensure filename has an extension
if not extension:
extension = mimetypes.guess_extension(mimetype) or ".bin"
filename = f"{filename}{extension}"
return FileInfo(
filename=filename,
extension=extension,
mimetype=mimetype,
size=int(response.headers.get("Content-Length", -1)),
)

View File

@ -1,15 +1,13 @@
import mimetypes
import os
import re
import urllib.parse
from typing import cast
from uuid import uuid4
from flask_login import current_user
from flask_restful import Resource, marshal_with, reqparse
from controllers.common import helpers
from core.file import helpers as file_helpers
from core.helper import ssrf_proxy
from fields.file_fields import file_fields, remote_file_info_fields
from fields.file_fields import file_fields_with_signed_url, remote_file_info_fields
from models.account import Account
from services.file_service import FileService
@ -29,7 +27,7 @@ class RemoteFileInfoApi(Resource):
class RemoteFileUploadApi(Resource):
@marshal_with(file_fields)
@marshal_with(file_fields_with_signed_url)
def post(self):
parser = reqparse.RequestParser()
parser.add_argument("url", type=str, required=True, help="URL is required")
@ -37,53 +35,36 @@ class RemoteFileUploadApi(Resource):
url = args["url"]
try:
response = ssrf_proxy.get(url)
response.raise_for_status()
content = response.content
except Exception as e:
return {"error": str(e)}, 400
response = ssrf_proxy.head(url)
response.raise_for_status()
# Try to extract filename from URL
parsed_url = urllib.parse.urlparse(url)
url_path = parsed_url.path
filename = os.path.basename(url_path)
file_info = helpers.guess_file_info_from_response(response)
# If filename couldn't be extracted, use Content-Disposition header
if not filename:
content_disposition = response.headers.get("Content-Disposition")
if content_disposition:
filename_match = re.search(r'filename="?(.+)"?', content_disposition)
if filename_match:
filename = filename_match.group(1)
if not FileService.is_file_size_within_limit(extension=file_info.extension, file_size=file_info.size):
return {"error": "File size exceeded"}, 400
# If still no filename, generate a unique one
if not filename:
unique_name = str(uuid4())
filename = f"{unique_name}"
# Guess MIME type from filename first, then URL
mimetype, _ = mimetypes.guess_type(filename)
if mimetype is None:
mimetype, _ = mimetypes.guess_type(url)
if mimetype is None:
# If guessing fails, use Content-Type from response headers
mimetype = response.headers.get("Content-Type", "application/octet-stream")
# Ensure filename has an extension
if not os.path.splitext(filename)[1]:
extension = mimetypes.guess_extension(mimetype) or ".bin"
filename = f"{filename}{extension}"
response = ssrf_proxy.get(url)
response.raise_for_status()
content = response.content
try:
user = cast(Account, current_user)
upload_file = FileService.upload_file(
filename=filename,
filename=file_info.filename,
content=content,
mimetype=mimetype,
mimetype=file_info.mimetype,
user=user,
)
except Exception as e:
return {"error": str(e)}, 400
return upload_file, 201
return {
"id": upload_file.id,
"name": upload_file.name,
"size": upload_file.size,
"extension": upload_file.extension,
"url": file_helpers.get_signed_file_url(upload_file_id=upload_file.id),
"mime_type": upload_file.mime_type,
"created_by": upload_file.created_by,
"created_at": upload_file.created_at,
}, 201

View File

@ -3,7 +3,7 @@ from flask import Blueprint
from libs.external_api import ExternalApi
from .files import FileApi
from .remote_files import RemoteFileInfoApi
from .remote_files import RemoteFileInfoApi, RemoteFileUploadApi
bp = Blueprint("web", __name__, url_prefix="/api")
api = ExternalApi(bp)
@ -13,5 +13,6 @@ api.add_resource(FileApi, "/files/upload")
# Remote files
api.add_resource(RemoteFileInfoApi, "/remote-files/<path:url>")
api.add_resource(RemoteFileUploadApi, "/remote-files/upload")
from . import app, audio, completion, conversation, feature, message, passport, saved_message, site, workflow

View File

@ -1,15 +1,13 @@
import mimetypes
import os
import re
import urllib.parse
from uuid import uuid4
from flask_login import current_user
from flask_restful import marshal_with, reqparse
from controllers.common import helpers
from controllers.web.wraps import WebApiResource
from core.file import helpers as file_helpers
from core.helper import ssrf_proxy
from fields.file_fields import file_fields, remote_file_info_fields
from fields.file_fields import file_fields_with_signed_url, remote_file_info_fields
from services.file_service import FileService
@ -28,7 +26,7 @@ class RemoteFileInfoApi(WebApiResource):
class RemoteFileUploadApi(WebApiResource):
@marshal_with(file_fields)
@marshal_with(file_fields_with_signed_url)
def post(self):
parser = reqparse.RequestParser()
parser.add_argument("url", type=str, required=True, help="URL is required")
@ -36,52 +34,35 @@ class RemoteFileUploadApi(WebApiResource):
url = args["url"]
try:
response = ssrf_proxy.get(url)
response.raise_for_status()
content = response.content
except Exception as e:
return {"error": str(e)}, 400
response = ssrf_proxy.head(url)
response.raise_for_status()
# Try to extract filename from URL
parsed_url = urllib.parse.urlparse(url)
url_path = parsed_url.path
filename = os.path.basename(url_path)
file_info = helpers.guess_file_info_from_response(response)
# If filename couldn't be extracted, use Content-Disposition header
if not filename:
content_disposition = response.headers.get("Content-Disposition")
if content_disposition:
filename_match = re.search(r'filename="?(.+)"?', content_disposition)
if filename_match:
filename = filename_match.group(1)
if not FileService.is_file_size_within_limit(extension=file_info.extension, file_size=file_info.size):
return {"error": "File size exceeded"}, 400
# If still no filename, generate a unique one
if not filename:
unique_name = str(uuid4())
filename = f"{unique_name}"
# Guess MIME type from filename first, then URL
mimetype, _ = mimetypes.guess_type(filename)
if mimetype is None:
mimetype, _ = mimetypes.guess_type(url)
if mimetype is None:
# If guessing fails, use Content-Type from response headers
mimetype = response.headers.get("Content-Type", "application/octet-stream")
# Ensure filename has an extension
if not os.path.splitext(filename)[1]:
extension = mimetypes.guess_extension(mimetype) or ".bin"
filename = f"{filename}{extension}"
response = ssrf_proxy.get(url)
response.raise_for_status()
content = response.content
try:
upload_file = FileService.upload_file(
filename=filename,
filename=file_info.filename,
content=content,
mimetype=mimetype,
mimetype=file_info.mimetype,
user=current_user,
)
except Exception as e:
return {"error": str(e)}, 400
return upload_file, 201
return {
"id": upload_file.id,
"name": upload_file.name,
"size": upload_file.size,
"extension": upload_file.extension,
"url": file_helpers.get_signed_file_url(upload_file_id=upload_file.id),
"mime_type": upload_file.mime_type,
"created_by": upload_file.created_by,
"created_at": upload_file.created_at,
}, 201

View File

@ -24,3 +24,15 @@ remote_file_info_fields = {
"file_type": fields.String(attribute="file_type"),
"file_length": fields.Integer(attribute="file_length"),
}
file_fields_with_signed_url = {
"id": fields.String,
"name": fields.String,
"size": fields.Integer,
"extension": fields.String,
"url": fields.String,
"mime_type": fields.String,
"created_by": fields.String,
"created_at": TimestampField,
}

View File

@ -44,23 +44,12 @@ class FileService:
if source == "datasets" and extension not in DOCUMENT_EXTENSIONS:
raise UnsupportedFileTypeError()
# select file size limit
if extension in IMAGE_EXTENSIONS:
file_size_limit = dify_config.UPLOAD_IMAGE_FILE_SIZE_LIMIT * 1024 * 1024
elif extension in VIDEO_EXTENSIONS:
file_size_limit = dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT * 1024 * 1024
elif extension in AUDIO_EXTENSIONS:
file_size_limit = dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT * 1024 * 1024
else:
file_size_limit = dify_config.UPLOAD_FILE_SIZE_LIMIT * 1024 * 1024
# get file size
file_size = len(content)
# check if the file size is exceeded
if file_size > file_size_limit:
message = f"File size exceeded. {file_size} > {file_size_limit}"
raise FileTooLargeError(message)
if not FileService.is_file_size_within_limit(extension=extension, file_size=file_size):
raise FileTooLargeError
# generate file key
file_uuid = str(uuid.uuid4())
@ -97,6 +86,19 @@ class FileService:
return upload_file
@staticmethod
def is_file_size_within_limit(*, extension: str, file_size: int) -> bool:
if extension in IMAGE_EXTENSIONS:
file_size_limit = dify_config.UPLOAD_IMAGE_FILE_SIZE_LIMIT * 1024 * 1024
elif extension in VIDEO_EXTENSIONS:
file_size_limit = dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT * 1024 * 1024
elif extension in AUDIO_EXTENSIONS:
file_size_limit = dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT * 1024 * 1024
else:
file_size_limit = dify_config.UPLOAD_FILE_SIZE_LIMIT * 1024 * 1024
return file_size <= file_size_limit
@staticmethod
def upload_text(text: str, text_name: str) -> UploadFile:
if len(text_name) > 200: