mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
feat(v1/webhook): complex webhook object w/ headers (#899)
* feat(v1/webhook): complex webhook object w/ headers * feat(js-sdk/crawl): add complex webhook support
This commit is contained in:
parent
ea1302960f
commit
32be2cf786
|
@ -220,11 +220,22 @@ const crawlerOptions = z.object({
|
||||||
|
|
||||||
export type CrawlerOptions = z.infer<typeof crawlerOptions>;
|
export type CrawlerOptions = z.infer<typeof crawlerOptions>;
|
||||||
|
|
||||||
|
export const webhookSchema = z.preprocess(x => {
|
||||||
|
if (typeof x === "string") {
|
||||||
|
return { url: x };
|
||||||
|
} else {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
}, z.object({
|
||||||
|
url: z.string().url(),
|
||||||
|
headers: z.record(z.string(), z.string()).default({}),
|
||||||
|
}).strict(strictMessage))
|
||||||
|
|
||||||
export const crawlRequestSchema = crawlerOptions.extend({
|
export const crawlRequestSchema = crawlerOptions.extend({
|
||||||
url,
|
url,
|
||||||
origin: z.string().optional().default("api"),
|
origin: z.string().optional().default("api"),
|
||||||
scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
|
scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
|
||||||
webhook: z.string().url().optional(),
|
webhook: webhookSchema.optional(),
|
||||||
limit: z.number().default(10000),
|
limit: z.number().default(10000),
|
||||||
}).strict(strictMessage);
|
}).strict(strictMessage);
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,17 @@
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import { logger } from "../../src/lib/logger";
|
import { logger } from "../lib/logger";
|
||||||
import { supabase_service } from "./supabase";
|
import { supabase_service } from "./supabase";
|
||||||
import { WebhookEventType } from "../types";
|
import { WebhookEventType } from "../types";
|
||||||
import { configDotenv } from "dotenv";
|
import { configDotenv } from "dotenv";
|
||||||
|
import { z } from "zod";
|
||||||
|
import { webhookSchema } from "../controllers/v1/types";
|
||||||
configDotenv();
|
configDotenv();
|
||||||
|
|
||||||
export const callWebhook = async (
|
export const callWebhook = async (
|
||||||
teamId: string,
|
teamId: string,
|
||||||
id: string,
|
id: string,
|
||||||
data: any | null,
|
data: any | null,
|
||||||
specified?: string,
|
specified?: z.infer<typeof webhookSchema>,
|
||||||
v1 = false,
|
v1 = false,
|
||||||
eventType: WebhookEventType = "crawl.page",
|
eventType: WebhookEventType = "crawl.page",
|
||||||
awaitWebhook: boolean = false
|
awaitWebhook: boolean = false
|
||||||
|
@ -20,7 +22,7 @@ export const callWebhook = async (
|
||||||
id
|
id
|
||||||
);
|
);
|
||||||
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
|
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
|
||||||
let webhookUrl = specified ?? selfHostedUrl;
|
let webhookUrl = specified ?? (selfHostedUrl ? webhookSchema.parse({ url: selfHostedUrl }) : undefined);
|
||||||
|
|
||||||
// Only fetch the webhook URL from the database if the self-hosted webhook URL and specified webhook are not set
|
// Only fetch the webhook URL from the database if the self-hosted webhook URL and specified webhook are not set
|
||||||
// and the USE_DB_AUTHENTICATION environment variable is set to true
|
// and the USE_DB_AUTHENTICATION environment variable is set to true
|
||||||
|
@ -73,7 +75,7 @@ export const callWebhook = async (
|
||||||
if (awaitWebhook) {
|
if (awaitWebhook) {
|
||||||
try {
|
try {
|
||||||
await axios.post(
|
await axios.post(
|
||||||
webhookUrl,
|
webhookUrl.url,
|
||||||
{
|
{
|
||||||
success: !v1
|
success: !v1
|
||||||
? data.success
|
? data.success
|
||||||
|
@ -92,6 +94,7 @@ export const callWebhook = async (
|
||||||
{
|
{
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
|
...webhookUrl.headers,
|
||||||
},
|
},
|
||||||
timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1)
|
timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1)
|
||||||
}
|
}
|
||||||
|
@ -104,7 +107,7 @@ export const callWebhook = async (
|
||||||
} else {
|
} else {
|
||||||
axios
|
axios
|
||||||
.post(
|
.post(
|
||||||
webhookUrl,
|
webhookUrl.url,
|
||||||
{
|
{
|
||||||
success: !v1
|
success: !v1
|
||||||
? data.success
|
? data.success
|
||||||
|
@ -123,6 +126,7 @@ export const callWebhook = async (
|
||||||
{
|
{
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
|
...webhookUrl.headers,
|
||||||
},
|
},
|
||||||
timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1)
|
timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import { AuthCreditUsageChunk, ScrapeOptions, Document as V1Document } from "./controllers/v1/types";
|
import { z } from "zod";
|
||||||
|
import { AuthCreditUsageChunk, ScrapeOptions, Document as V1Document, webhookSchema } from "./controllers/v1/types";
|
||||||
import { ExtractorOptions, Document } from "./lib/entities";
|
import { ExtractorOptions, Document } from "./lib/entities";
|
||||||
import { InternalOptions } from "./scraper/scrapeURL";
|
import { InternalOptions } from "./scraper/scrapeURL";
|
||||||
|
|
||||||
|
@ -33,7 +34,7 @@ export interface WebScraperOptions {
|
||||||
origin?: string;
|
origin?: string;
|
||||||
crawl_id?: string;
|
crawl_id?: string;
|
||||||
sitemapped?: boolean;
|
sitemapped?: boolean;
|
||||||
webhook?: string;
|
webhook?: z.infer<typeof webhookSchema>;
|
||||||
v1?: boolean;
|
v1?: boolean;
|
||||||
is_scrape?: boolean;
|
is_scrape?: boolean;
|
||||||
}
|
}
|
||||||
|
|
|
@ -153,7 +153,10 @@ export interface CrawlParams {
|
||||||
allowExternalLinks?: boolean;
|
allowExternalLinks?: boolean;
|
||||||
ignoreSitemap?: boolean;
|
ignoreSitemap?: boolean;
|
||||||
scrapeOptions?: CrawlScrapeOptions;
|
scrapeOptions?: CrawlScrapeOptions;
|
||||||
webhook?: string;
|
webhook?: string | {
|
||||||
|
url: string;
|
||||||
|
headers?: Record<string, string>;
|
||||||
|
};
|
||||||
deduplicateSimilarURLs?: boolean;
|
deduplicateSimilarURLs?: boolean;
|
||||||
ignoreQueryParameters?: boolean;
|
ignoreQueryParameters?: boolean;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user