mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 03:32:22 +08:00
fix(scrapeURL/pdf): fix llamaparse upload
This commit is contained in:
parent
687ea69621
commit
9ace2ad071
|
@ -81,7 +81,6 @@
|
|||
"escape-html": "^1.0.3",
|
||||
"express-rate-limit": "^7.3.1",
|
||||
"express-ws": "^5.0.2",
|
||||
"form-data": "^4.0.0",
|
||||
"glob": "^10.4.2",
|
||||
"gpt3-tokenizer": "^1.1.5",
|
||||
"ioredis": "^5.4.1",
|
||||
|
|
|
@ -101,9 +101,6 @@ importers:
|
|||
express-ws:
|
||||
specifier: ^5.0.2
|
||||
version: 5.0.2(express@4.19.2)
|
||||
form-data:
|
||||
specifier: ^4.0.0
|
||||
version: 4.0.0
|
||||
glob:
|
||||
specifier: ^10.4.2
|
||||
version: 10.4.2
|
||||
|
@ -3932,8 +3929,8 @@ packages:
|
|||
engines: {node: '>=14.17'}
|
||||
hasBin: true
|
||||
|
||||
typescript@5.6.2:
|
||||
resolution: {integrity: sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==}
|
||||
typescript@5.6.3:
|
||||
resolution: {integrity: sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==}
|
||||
engines: {node: '>=14.17'}
|
||||
hasBin: true
|
||||
|
||||
|
@ -7742,7 +7739,7 @@ snapshots:
|
|||
csv-parse: 5.5.6
|
||||
gpt3-tokenizer: 1.1.5
|
||||
openai: 3.3.0
|
||||
typescript: 5.6.2
|
||||
typescript: 5.6.3
|
||||
uuid: 9.0.1
|
||||
zod: 3.23.8
|
||||
transitivePeerDependencies:
|
||||
|
@ -8320,7 +8317,7 @@ snapshots:
|
|||
|
||||
typescript@5.4.5: {}
|
||||
|
||||
typescript@5.6.2: {}
|
||||
typescript@5.6.3: {}
|
||||
|
||||
typesense@1.8.2(@babel/runtime@7.24.6):
|
||||
dependencies:
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import { createReadStream, promises as fs } from "node:fs";
|
||||
import FormData from "form-data";
|
||||
import { Meta } from "../..";
|
||||
import { EngineScrapeResult } from "..";
|
||||
import * as marked from "marked";
|
||||
|
@ -16,10 +15,26 @@ async function scrapePDFWithLlamaParse(meta: Meta, tempFilePath: string): Promis
|
|||
meta.logger.debug("Processing PDF document with LlamaIndex", { tempFilePath });
|
||||
|
||||
const uploadForm = new FormData();
|
||||
uploadForm.append("file", createReadStream(tempFilePath), {
|
||||
filename: tempFilePath,
|
||||
contentType: "application/pdf", // NOTE: request.headers["Content-Type"]?
|
||||
});
|
||||
|
||||
// This is utterly stupid but it works! - mogery
|
||||
uploadForm.append("file", {
|
||||
[Symbol.toStringTag]: "Blob",
|
||||
name: tempFilePath,
|
||||
stream() {
|
||||
return createReadStream(tempFilePath) as unknown as ReadableStream<Uint8Array>
|
||||
},
|
||||
arrayBuffer() {
|
||||
throw Error("Unimplemented in mock Blob: arrayBuffer")
|
||||
},
|
||||
size: (await fs.stat(tempFilePath)).size,
|
||||
text() {
|
||||
throw Error("Unimplemented in mock Blob: text")
|
||||
},
|
||||
slice(start, end, contentType) {
|
||||
throw Error("Unimplemented in mock Blob: slice")
|
||||
},
|
||||
type: "application/pdf",
|
||||
} as Blob);
|
||||
|
||||
const upload = await robustFetch({
|
||||
url: "https://api.cloud.llamaindex.ai/api/parsing/upload",
|
||||
|
|
|
@ -2,7 +2,6 @@ import { Logger } from "winston";
|
|||
import { z, ZodError } from "zod";
|
||||
import { v4 as uuid } from "uuid";
|
||||
import * as Sentry from "@sentry/node";
|
||||
import FormData from "form-data";
|
||||
|
||||
export type RobustFetchParams<Schema extends z.Schema<any>> = {
|
||||
url: string;
|
||||
|
@ -38,14 +37,14 @@ export async function robustFetch<Schema extends z.Schema<any>, Output = z.infer
|
|||
method,
|
||||
headers: {
|
||||
...(body instanceof FormData
|
||||
? body.getHeaders()
|
||||
? ({})
|
||||
: body !== undefined ? ({
|
||||
"Content-Type": "application/json",
|
||||
}) : {}),
|
||||
...(headers !== undefined ? headers : {}),
|
||||
},
|
||||
...(body instanceof FormData ? ({
|
||||
body: body.getBuffer(),
|
||||
body,
|
||||
}) : body !== undefined ? ({
|
||||
body: JSON.stringify(body),
|
||||
}) : {}),
|
||||
|
|
Loading…
Reference in New Issue
Block a user