From e54b7cda3d543408b045170bd854dd31002296fc Mon Sep 17 00:00:00 2001 From: -LAN- Date: Thu, 24 Oct 2024 17:07:20 +0800 Subject: [PATCH] refactor(file_factory): improve filename and mime type determination (#9784) --- api/factories/file_factory.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/api/factories/file_factory.py b/api/factories/file_factory.py index fa88e2b4fe..ead7b9a8b3 100644 --- a/api/factories/file_factory.py +++ b/api/factories/file_factory.py @@ -179,27 +179,19 @@ def _build_from_remote_url( if not url: raise ValueError("Invalid file url") + mime_type = mimetypes.guess_type(url)[0] or "" + file_size = -1 + filename = url.split("/")[-1].split("?")[0] or "unknown_file" + resp = ssrf_proxy.head(url, follow_redirects=True) if resp.status_code == httpx.codes.OK: - # Try to extract filename from response headers or URL - content_disposition = resp.headers.get("Content-Disposition") - if content_disposition: + if content_disposition := resp.headers.get("Content-Disposition"): filename = content_disposition.split("filename=")[-1].strip('"') - else: - filename = url.split("/")[-1].split("?")[0] - # Create the File object - file_size = int(resp.headers.get("Content-Length", -1)) - mime_type = str(resp.headers.get("Content-Type", "")) - else: - filename = "" - file_size = -1 - mime_type = "" + file_size = int(resp.headers.get("Content-Length", file_size)) + mime_type = mime_type or str(resp.headers.get("Content-Type", "")) - # If filename is empty, set a default one - if not filename: - filename = "unknown_file" # Determine file extension - extension = "." + filename.split(".")[-1] if "." in filename else ".bin" + extension = mimetypes.guess_extension(mime_type) or "." + filename.split(".")[-1] if "." in filename else ".bin" if not mime_type: mime_type, _ = mimetypes.guess_type(url)