refactor(file_factory): improve filename and mime type determination (#9784)

This commit is contained in:
-LAN- 2024-10-24 17:07:20 +08:00 committed by GitHub
parent fc63841169
commit e54b7cda3d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -179,27 +179,19 @@ def _build_from_remote_url(
if not url:
raise ValueError("Invalid file url")
mime_type = mimetypes.guess_type(url)[0] or ""
file_size = -1
filename = url.split("/")[-1].split("?")[0] or "unknown_file"
resp = ssrf_proxy.head(url, follow_redirects=True)
if resp.status_code == httpx.codes.OK:
# Try to extract filename from response headers or URL
content_disposition = resp.headers.get("Content-Disposition")
if content_disposition:
if content_disposition := resp.headers.get("Content-Disposition"):
filename = content_disposition.split("filename=")[-1].strip('"')
else:
filename = url.split("/")[-1].split("?")[0]
# Create the File object
file_size = int(resp.headers.get("Content-Length", -1))
mime_type = str(resp.headers.get("Content-Type", ""))
else:
filename = ""
file_size = -1
mime_type = ""
file_size = int(resp.headers.get("Content-Length", file_size))
mime_type = mime_type or str(resp.headers.get("Content-Type", ""))
# If filename is empty, set a default one
if not filename:
filename = "unknown_file"
# Determine file extension
extension = "." + filename.split(".")[-1] if "." in filename else ".bin"
extension = mimetypes.guess_extension(mime_type) or "." + filename.split(".")[-1] if "." in filename else ".bin"
if not mime_type:
mime_type, _ = mimetypes.guess_type(url)