Merge branch 'fix/upload-remote-image-preview' into deploy/dev

This commit is contained in:
StyleZhang 2024-10-28 16:16:18 +08:00
commit 73933b3350
4 changed files with 170 additions and 167 deletions

View File

@ -760,166 +760,168 @@ class DocumentService:
)
db.session.add(dataset_process_rule)
db.session.commit()
position = DocumentService.get_documents_position(dataset.id)
document_ids = []
duplicate_document_ids = []
if document_data["data_source"]["type"] == "upload_file":
upload_file_list = document_data["data_source"]["info_list"]["file_info_list"]["file_ids"]
for file_id in upload_file_list:
file = (
db.session.query(UploadFile)
.filter(UploadFile.tenant_id == dataset.tenant_id, UploadFile.id == file_id)
.first()
)
# raise error if file not found
if not file:
raise FileNotExistsError()
file_name = file.name
data_source_info = {
"upload_file_id": file_id,
}
# check duplicate
if document_data.get("duplicate", False):
document = Document.query.filter_by(
dataset_id=dataset.id,
tenant_id=current_user.current_tenant_id,
data_source_type="upload_file",
enabled=True,
name=file_name,
).first()
if document:
document.dataset_process_rule_id = dataset_process_rule.id
document.updated_at = datetime.datetime.utcnow()
document.created_from = created_from
document.doc_form = document_data["doc_form"]
document.doc_language = document_data["doc_language"]
document.data_source_info = json.dumps(data_source_info)
document.batch = batch
document.indexing_status = "waiting"
db.session.add(document)
documents.append(document)
duplicate_document_ids.append(document.id)
continue
document = DocumentService.build_document(
dataset,
dataset_process_rule.id,
document_data["data_source"]["type"],
document_data["doc_form"],
document_data["doc_language"],
data_source_info,
created_from,
position,
account,
file_name,
batch,
)
db.session.add(document)
db.session.flush()
document_ids.append(document.id)
documents.append(document)
position += 1
elif document_data["data_source"]["type"] == "notion_import":
notion_info_list = document_data["data_source"]["info_list"]["notion_info_list"]
exist_page_ids = []
exist_document = {}
documents = Document.query.filter_by(
dataset_id=dataset.id,
tenant_id=current_user.current_tenant_id,
data_source_type="notion_import",
enabled=True,
).all()
if documents:
for document in documents:
data_source_info = json.loads(document.data_source_info)
exist_page_ids.append(data_source_info["notion_page_id"])
exist_document[data_source_info["notion_page_id"]] = document.id
for notion_info in notion_info_list:
workspace_id = notion_info["workspace_id"]
data_source_binding = DataSourceOauthBinding.query.filter(
db.and_(
DataSourceOauthBinding.tenant_id == current_user.current_tenant_id,
DataSourceOauthBinding.provider == "notion",
DataSourceOauthBinding.disabled == False,
DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"',
lock_name = "add_document_lock_dataset_id_{}".format(dataset.id)
with redis_client.lock(lock_name, timeout=600):
position = DocumentService.get_documents_position(dataset.id)
document_ids = []
duplicate_document_ids = []
if document_data["data_source"]["type"] == "upload_file":
upload_file_list = document_data["data_source"]["info_list"]["file_info_list"]["file_ids"]
for file_id in upload_file_list:
file = (
db.session.query(UploadFile)
.filter(UploadFile.tenant_id == dataset.tenant_id, UploadFile.id == file_id)
.first()
)
).first()
if not data_source_binding:
raise ValueError("Data source binding not found.")
for page in notion_info["pages"]:
if page["page_id"] not in exist_page_ids:
data_source_info = {
"notion_workspace_id": workspace_id,
"notion_page_id": page["page_id"],
"notion_page_icon": page["page_icon"],
"type": page["type"],
}
document = DocumentService.build_document(
dataset,
dataset_process_rule.id,
document_data["data_source"]["type"],
document_data["doc_form"],
document_data["doc_language"],
data_source_info,
created_from,
position,
account,
page["page_name"],
batch,
# raise error if file not found
if not file:
raise FileNotExistsError()
file_name = file.name
data_source_info = {
"upload_file_id": file_id,
}
# check duplicate
if document_data.get("duplicate", False):
document = Document.query.filter_by(
dataset_id=dataset.id,
tenant_id=current_user.current_tenant_id,
data_source_type="upload_file",
enabled=True,
name=file_name,
).first()
if document:
document.dataset_process_rule_id = dataset_process_rule.id
document.updated_at = datetime.datetime.utcnow()
document.created_from = created_from
document.doc_form = document_data["doc_form"]
document.doc_language = document_data["doc_language"]
document.data_source_info = json.dumps(data_source_info)
document.batch = batch
document.indexing_status = "waiting"
db.session.add(document)
documents.append(document)
duplicate_document_ids.append(document.id)
continue
document = DocumentService.build_document(
dataset,
dataset_process_rule.id,
document_data["data_source"]["type"],
document_data["doc_form"],
document_data["doc_language"],
data_source_info,
created_from,
position,
account,
file_name,
batch,
)
db.session.add(document)
db.session.flush()
document_ids.append(document.id)
documents.append(document)
position += 1
elif document_data["data_source"]["type"] == "notion_import":
notion_info_list = document_data["data_source"]["info_list"]["notion_info_list"]
exist_page_ids = []
exist_document = {}
documents = Document.query.filter_by(
dataset_id=dataset.id,
tenant_id=current_user.current_tenant_id,
data_source_type="notion_import",
enabled=True,
).all()
if documents:
for document in documents:
data_source_info = json.loads(document.data_source_info)
exist_page_ids.append(data_source_info["notion_page_id"])
exist_document[data_source_info["notion_page_id"]] = document.id
for notion_info in notion_info_list:
workspace_id = notion_info["workspace_id"]
data_source_binding = DataSourceOauthBinding.query.filter(
db.and_(
DataSourceOauthBinding.tenant_id == current_user.current_tenant_id,
DataSourceOauthBinding.provider == "notion",
DataSourceOauthBinding.disabled == False,
DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"',
)
db.session.add(document)
db.session.flush()
document_ids.append(document.id)
documents.append(document)
position += 1
).first()
if not data_source_binding:
raise ValueError("Data source binding not found.")
for page in notion_info["pages"]:
if page["page_id"] not in exist_page_ids:
data_source_info = {
"notion_workspace_id": workspace_id,
"notion_page_id": page["page_id"],
"notion_page_icon": page["page_icon"],
"type": page["type"],
}
document = DocumentService.build_document(
dataset,
dataset_process_rule.id,
document_data["data_source"]["type"],
document_data["doc_form"],
document_data["doc_language"],
data_source_info,
created_from,
position,
account,
page["page_name"],
batch,
)
db.session.add(document)
db.session.flush()
document_ids.append(document.id)
documents.append(document)
position += 1
else:
exist_document.pop(page["page_id"])
# delete not selected documents
if len(exist_document) > 0:
clean_notion_document_task.delay(list(exist_document.values()), dataset.id)
elif document_data["data_source"]["type"] == "website_crawl":
website_info = document_data["data_source"]["info_list"]["website_info_list"]
urls = website_info["urls"]
for url in urls:
data_source_info = {
"url": url,
"provider": website_info["provider"],
"job_id": website_info["job_id"],
"only_main_content": website_info.get("only_main_content", False),
"mode": "crawl",
}
if len(url) > 255:
document_name = url[:200] + "..."
else:
exist_document.pop(page["page_id"])
# delete not selected documents
if len(exist_document) > 0:
clean_notion_document_task.delay(list(exist_document.values()), dataset.id)
elif document_data["data_source"]["type"] == "website_crawl":
website_info = document_data["data_source"]["info_list"]["website_info_list"]
urls = website_info["urls"]
for url in urls:
data_source_info = {
"url": url,
"provider": website_info["provider"],
"job_id": website_info["job_id"],
"only_main_content": website_info.get("only_main_content", False),
"mode": "crawl",
}
if len(url) > 255:
document_name = url[:200] + "..."
else:
document_name = url
document = DocumentService.build_document(
dataset,
dataset_process_rule.id,
document_data["data_source"]["type"],
document_data["doc_form"],
document_data["doc_language"],
data_source_info,
created_from,
position,
account,
document_name,
batch,
)
db.session.add(document)
db.session.flush()
document_ids.append(document.id)
documents.append(document)
position += 1
db.session.commit()
document_name = url
document = DocumentService.build_document(
dataset,
dataset_process_rule.id,
document_data["data_source"]["type"],
document_data["doc_form"],
document_data["doc_language"],
data_source_info,
created_from,
position,
account,
document_name,
batch,
)
db.session.add(document)
db.session.flush()
document_ids.append(document.id)
documents.append(document)
position += 1
db.session.commit()
# trigger async task
if document_ids:
document_indexing_task.delay(dataset.id, document_ids)
if duplicate_document_ids:
duplicate_document_indexing_task.delay(dataset.id, duplicate_document_ids)
# trigger async task
if document_ids:
document_indexing_task.delay(dataset.id, document_ids)
if duplicate_document_ids:
duplicate_document_indexing_task.delay(dataset.id, duplicate_document_ids)
return documents, batch
return documents, batch
@staticmethod
def check_documents_upload_quota(count: int, features: FeatureModel):

View File

@ -25,7 +25,7 @@ import { TransferMethod } from '@/types/app'
import { SupportUploadFileTypes } from '@/app/components/workflow/types'
import type { FileUpload } from '@/app/components/base/features/types'
import { formatFileSize } from '@/utils/format'
import { fetchRemoteFileInfo } from '@/service/common'
import { uploadRemoteFileInfo } from '@/service/common'
import type { FileUploadConfigResponse } from '@/models/common'
export const useFileSizeLimit = (fileUploadConfig?: FileUploadConfigResponse) => {
@ -49,7 +49,7 @@ export const useFile = (fileConfig: FileUpload) => {
const params = useParams()
const { imgSizeLimit, docSizeLimit, audioSizeLimit, videoSizeLimit } = useFileSizeLimit(fileConfig.fileUploadConfig)
const checkSizeLimit = (fileType: string, fileSize: number) => {
const checkSizeLimit = useCallback((fileType: string, fileSize: number) => {
switch (fileType) {
case SupportUploadFileTypes.image: {
if (fileSize > imgSizeLimit) {
@ -120,7 +120,7 @@ export const useFile = (fileConfig: FileUpload) => {
return true
}
}
}
}, [audioSizeLimit, docSizeLimit, imgSizeLimit, notify, t, videoSizeLimit])
const handleAddFile = useCallback((newFile: FileEntity) => {
const {
@ -203,13 +203,15 @@ export const useFile = (fileConfig: FileUpload) => {
}
handleAddFile(uploadingFile)
fetchRemoteFileInfo(url).then((res) => {
uploadRemoteFileInfo(url).then((res) => {
const newFile = {
...uploadingFile,
type: res.file_type,
size: res.file_length,
type: res.mime_type,
size: res.size,
progress: 100,
supportFileType: getSupportFileType(url, res.file_type, allowedFileTypes?.includes(SupportUploadFileTypes.custom)),
supportFileType: getSupportFileType(res.name, res.mime_type, allowedFileTypes?.includes(SupportUploadFileTypes.custom)),
uploadedId: res.id,
url: res.url,
}
if (!checkSizeLimit(newFile.supportFileType, newFile.size))
handleRemoveFile(uploadingFile.id)

View File

@ -47,10 +47,8 @@ export const getLocaleOnServer = (): Locale => {
}
// Validate languages
if (!Array.isArray(languages) || languages.length === 0 || !languages.every(lang => typeof lang === 'string' && /^[\w-]+$/.test(lang))) {
console.error(`Invalid languages: ${languages}`)
if (!Array.isArray(languages) || languages.length === 0 || !languages.every(lang => typeof lang === 'string' && /^[\w-]+$/.test(lang)))
languages = [i18n.defaultLocale]
}
// match locale
const matchedLocale = match(languages, locales, i18n.defaultLocale) as Locale

View File

@ -324,9 +324,10 @@ export const verifyForgotPasswordToken: Fetcher<CommonResponse & { is_valid: boo
export const changePasswordWithToken: Fetcher<CommonResponse, { url: string; body: { token: string; new_password: string; password_confirm: string } }> = ({ url, body }) =>
post<CommonResponse>(url, { body })
export const fetchRemoteFileInfo = (url: string) => {
return get<{ file_type: string; file_length: number }>(`/remote-files/${url}`)
export const uploadRemoteFileInfo = (url: string) => {
return post<{ id: string; name: string; size: number; mime_type: string; url: string }>('/remote-files/upload', { body: { url } })
}
export const sendEMailLoginCode = (email: string, language = 'en-US') =>
post<CommonResponse & { data: string }>('/email-code-login', { body: { email, language } })