mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge branch 'main' into v1-webscraper
This commit is contained in:
commit
e7f267b6fe
4
.github/workflows/fly-direct.yml
vendored
4
.github/workflows/fly-direct.yml
vendored
|
@ -22,6 +22,7 @@ env:
|
|||
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
|
||||
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
|
||||
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
|
@ -30,8 +31,9 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: superfly/flyctl-actions/setup-flyctl@master
|
||||
- run: flyctl deploy --remote-only -a firecrawl-scraper-js
|
||||
- run: flyctl deploy --remote-only -a firecrawl-scraper-js --build-secret SENTRY_AUTH_TOKEN=$SENTRY_AUTH_TOKEN
|
||||
working-directory: ./apps/api
|
||||
env:
|
||||
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
||||
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
|
||||
|
|
4
.github/workflows/fly.yml
vendored
4
.github/workflows/fly.yml
vendored
|
@ -26,6 +26,7 @@ env:
|
|||
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
|
||||
|
||||
jobs:
|
||||
pre-deploy-e2e-tests:
|
||||
|
@ -211,11 +212,12 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: superfly/flyctl-actions/setup-flyctl@master
|
||||
- run: flyctl deploy --remote-only -a firecrawl-scraper-js
|
||||
- run: flyctl deploy --remote-only -a firecrawl-scraper-js --build-secret SENTRY_AUTH_TOKEN=$SENTRY_AUTH_TOKEN
|
||||
working-directory: ./apps/api
|
||||
env:
|
||||
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
||||
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
|
||||
|
||||
build-and-publish-python-sdk:
|
||||
name: Build and publish Python SDK
|
||||
|
|
|
@ -12,8 +12,10 @@ RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-l
|
|||
FROM base AS build
|
||||
RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile
|
||||
|
||||
RUN apt-get update -qq && apt-get install -y ca-certificates && update-ca-certificates
|
||||
RUN pnpm install
|
||||
RUN pnpm run build
|
||||
RUN --mount=type=secret,id=SENTRY_AUTH_TOKEN \
|
||||
bash -c 'export SENTRY_AUTH_TOKEN="$(cat /run/secrets/SENTRY_AUTH_TOKEN)"; if [ -z $SENTRY_AUTH_TOKEN ]; then pnpm run build:nosentry; else pnpm run build; fi'
|
||||
|
||||
# Install packages needed for deployment
|
||||
|
||||
|
|
|
@ -9,7 +9,8 @@
|
|||
"format": "prettier --write \"src/**/*.(js|ts)\"",
|
||||
"flyio": "node dist/src/index.js",
|
||||
"start:dev": "nodemon --exec ts-node src/index.ts",
|
||||
"build": "tsc",
|
||||
"build": "tsc && pnpm sentry:sourcemaps",
|
||||
"build:nosentry": "tsc",
|
||||
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
|
||||
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
|
||||
"test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'",
|
||||
|
@ -19,8 +20,9 @@
|
|||
"mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest",
|
||||
"mongo-docker-console": "docker exec -it mongodb mongosh",
|
||||
"run-example": "npx ts-node src/example.ts",
|
||||
"deploy:fly": "flyctl deploy",
|
||||
"deploy:fly:staging": "fly deploy -c fly.staging.toml"
|
||||
"deploy:fly": "flyctl deploy --build-secret SENTRY_AUTH_TOKEN=$(dotenv -p SENTRY_AUTH_TOKEN)",
|
||||
"deploy:fly:staging": "fly deploy -c fly.staging.toml",
|
||||
"sentry:sourcemaps": "sentry-cli sourcemaps inject --org caleb-peffer --project firecrawl-scraper-js ./dist && sentry-cli sourcemaps upload --org caleb-peffer --project firecrawl-scraper-js ./dist"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
|
@ -52,10 +54,12 @@
|
|||
"@bull-board/express": "^5.20.5",
|
||||
"@devil7softwares/pos": "^1.0.2",
|
||||
"@dqbd/tiktoken": "^1.0.13",
|
||||
"@hyperdx/node-opentelemetry": "^0.8.0",
|
||||
"@hyperdx/node-opentelemetry": "^0.8.1",
|
||||
"@logtail/node": "^0.4.12",
|
||||
"@nangohq/node": "^0.40.8",
|
||||
"@sentry/node": "^8.13.0",
|
||||
"@sentry/cli": "^2.33.1",
|
||||
"@sentry/node": "^8.26.0",
|
||||
"@sentry/profiling-node": "^8.26.0",
|
||||
"@supabase/supabase-js": "^2.44.2",
|
||||
"@types/express-ws": "^3.0.4",
|
||||
"@types/ws": "^8.5.12",
|
||||
|
@ -72,6 +76,7 @@
|
|||
"cron-parser": "^4.9.0",
|
||||
"date-fns": "^3.6.0",
|
||||
"dotenv": "^16.3.1",
|
||||
"dotenv-cli": "^7.4.2",
|
||||
"express-rate-limit": "^7.3.1",
|
||||
"express-ws": "^5.0.2",
|
||||
"form-data": "^4.0.0",
|
||||
|
|
|
@ -27,17 +27,23 @@ importers:
|
|||
specifier: ^1.0.13
|
||||
version: 1.0.15
|
||||
'@hyperdx/node-opentelemetry':
|
||||
specifier: ^0.8.0
|
||||
version: 0.8.0
|
||||
specifier: ^0.8.1
|
||||
version: 0.8.1
|
||||
'@logtail/node':
|
||||
specifier: ^0.4.12
|
||||
version: 0.4.21
|
||||
'@nangohq/node':
|
||||
specifier: ^0.40.8
|
||||
version: 0.40.8
|
||||
'@sentry/cli':
|
||||
specifier: ^2.33.1
|
||||
version: 2.33.1
|
||||
'@sentry/node':
|
||||
specifier: ^8.13.0
|
||||
version: 8.13.0
|
||||
specifier: ^8.26.0
|
||||
version: 8.26.0
|
||||
'@sentry/profiling-node':
|
||||
specifier: ^8.26.0
|
||||
version: 8.26.0
|
||||
'@supabase/supabase-js':
|
||||
specifier: ^2.44.2
|
||||
version: 2.44.2
|
||||
|
@ -86,6 +92,9 @@ importers:
|
|||
dotenv:
|
||||
specifier: ^16.3.1
|
||||
version: 16.4.5
|
||||
dotenv-cli:
|
||||
specifier: ^7.4.2
|
||||
version: 7.4.2
|
||||
express-rate-limit:
|
||||
specifier: ^7.3.1
|
||||
version: 7.3.1(express@4.19.2)
|
||||
|
@ -511,8 +520,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@hyperdx/node-opentelemetry@0.8.0':
|
||||
resolution: {integrity: sha512-2z1jQqg2czctHOgo17WETUJOX2BJJ2jqg50R/z4o4ADRCS7Ynp4n3eVMLtsJHypQeDdrInUDE0VtVoXN5b+6hw==}
|
||||
'@hyperdx/node-opentelemetry@0.8.1':
|
||||
resolution: {integrity: sha512-wNw0yQf54j/9KXVWeEOu8G6C5FT5EFlrz4dcmscTkwCvo6fQOLRZa/NbGcqugt0LSFMc0/6/Q5RDWVqDpEn0LQ==}
|
||||
hasBin: true
|
||||
|
||||
'@ioredis/commands@1.2.0':
|
||||
|
@ -815,8 +824,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-connect@0.37.0':
|
||||
resolution: {integrity: sha512-SeQktDIH5rNzjiEiazWiJAIXkmnLOnNV7wwHpahrqE0Ph+Z3heqMfxRtoMtbdJSIYLfcNZYO51AjxZ00IXufdw==}
|
||||
'@opentelemetry/instrumentation-connect@0.38.0':
|
||||
resolution: {integrity: sha512-2/nRnx3pjYEmdPIaBwtgtSviTKHWnDZN3R+TkRUnhIVrvBKVcq+I5B2rtd6mr6Fe9cHlZ9Ojcuh7pkNh/xdWWg==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -845,8 +854,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-express@0.40.1':
|
||||
resolution: {integrity: sha512-+RKMvVe2zw3kIXRup9c1jFu3T4d0fs5aKy015TpiMyoCKX1UMu3Z0lfgYtuyiSTANvg5hZnDbWmQmqSPj9VTvg==}
|
||||
'@opentelemetry/instrumentation-express@0.41.1':
|
||||
resolution: {integrity: sha512-uRx0V3LPGzjn2bxAnV8eUsDT82vT7NTwI0ezEuPMBOTOsnPpGhWdhcdNdhH80sM4TrWrOfXm9HGEdfWE3TRIww==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -857,8 +866,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-fastify@0.37.0':
|
||||
resolution: {integrity: sha512-WRjwzNZgupSzbEYvo9s+QuHJRqZJjVdNxSEpGBwWK8RKLlHGwGVAu0gcc2gPamJWUJsGqPGvahAPWM18ZkWj6A==}
|
||||
'@opentelemetry/instrumentation-fastify@0.38.0':
|
||||
resolution: {integrity: sha512-HBVLpTSYpkQZ87/Df3N0gAw7VzYZV3n28THIBrJWfuqw3Or7UqdhnjeuMIPQ04BKk3aZc0cWn2naSQObbh5vXw==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -869,6 +878,12 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-fs@0.14.0':
|
||||
resolution: {integrity: sha512-pVc8P5AgliC1DphyyBUgsxXlm2XaPH4BpYvt7rAZDMIqUpRk8gs19SioABtKqqxvFzg5jPtgJfJsdxq0Y+maLw==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-generic-pool@0.36.0':
|
||||
resolution: {integrity: sha512-CExAEqJvK8jYxrhN8cl6EaGg57EGJi+qsSKouLC5lndXi68gZLOKbZIMZg4pF0kNfp/D4BFaGmA6Ap7d5WoPTw==}
|
||||
engines: {node: '>=14'}
|
||||
|
@ -881,8 +896,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-graphql@0.41.0':
|
||||
resolution: {integrity: sha512-R/gXeljgIhaRDKquVkKYT5QHPnFouM8ooyePZEP0kqyaVAedtR1V7NfAUJbxfTG5fBQa5wdmLjvu63+tzRXZCA==}
|
||||
'@opentelemetry/instrumentation-graphql@0.42.0':
|
||||
resolution: {integrity: sha512-N8SOwoKL9KQSX7z3gOaw5UaTeVQcfDO1c21csVHnmnmGUoqsXbArK2B8VuwPWcv6/BC/i3io+xTo7QGRZ/z28Q==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -899,8 +914,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-hapi@0.39.0':
|
||||
resolution: {integrity: sha512-ik2nA9Yj2s2ay+aNY+tJsKCsEx6Tsc2g/MK0iWBW5tibwrWKTy1pdVt5sB3kd5Gkimqj23UV5+FH2JFcQLeKug==}
|
||||
'@opentelemetry/instrumentation-hapi@0.40.0':
|
||||
resolution: {integrity: sha512-8U/w7Ifumtd2bSN1OLaSwAAFhb9FyqWUki3lMMB0ds+1+HdSxYBe9aspEJEgvxAqOkrQnVniAPTEGf1pGM7SOw==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -923,8 +938,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-ioredis@0.41.0':
|
||||
resolution: {integrity: sha512-rxiLloU8VyeJGm5j2fZS8ShVdB82n7VNP8wTwfUQqDwRfHCnkzGr+buKoxuhGD91gtwJ91RHkjHA1Eg6RqsUTg==}
|
||||
'@opentelemetry/instrumentation-ioredis@0.42.0':
|
||||
resolution: {integrity: sha512-P11H168EKvBB9TUSasNDOGJCSkpT44XgoM6d3gRIWAa9ghLpYhl0uRkS8//MqPzcJVHr3h3RmfXIpiYLjyIZTw==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -941,8 +956,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-koa@0.41.0':
|
||||
resolution: {integrity: sha512-mbPnDt7ELvpM2S0vixYUsde7122lgegLOJQxx8iJQbB8YHal/xnTh9v7IfArSVzIDo+E+080hxZyUZD4boOWkw==}
|
||||
'@opentelemetry/instrumentation-koa@0.42.0':
|
||||
resolution: {integrity: sha512-H1BEmnMhho8o8HuNRq5zEI4+SIHDIglNB7BPKohZyWG4fWNuR7yM4GTlR01Syq21vODAS7z5omblScJD/eZdKw==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -965,8 +980,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-mongodb@0.45.0':
|
||||
resolution: {integrity: sha512-xnZP9+ayeB1JJyNE9cIiwhOJTzNEsRhXVdLgfzmrs48Chhhk026mQdM5CITfyXSCfN73FGAIB8d91+pflJEfWQ==}
|
||||
'@opentelemetry/instrumentation-mongodb@0.46.0':
|
||||
resolution: {integrity: sha512-VF/MicZ5UOBiXrqBslzwxhN7TVqzu1/LN/QDpkskqM0Zm0aZ4CVRbUygL8d7lrjLn15x5kGIe8VsSphMfPJzlA==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -977,8 +992,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-mongoose@0.39.0':
|
||||
resolution: {integrity: sha512-J1r66A7zJklPPhMtrFOO7/Ud2p0Pv5u8+r23Cd1JUH6fYPmftNJVsLp2urAt6PHK4jVqpP/YegN8wzjJ2mZNPQ==}
|
||||
'@opentelemetry/instrumentation-mongoose@0.40.0':
|
||||
resolution: {integrity: sha512-niRi5ZUnkgzRhIGMOozTyoZIvJKNJyhijQI4nF4iFSb+FUx2v5fngfR+8XLmdQAO7xmsD8E5vEGdDVYVtKbZew==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -989,8 +1004,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-mysql2@0.39.0':
|
||||
resolution: {integrity: sha512-Iypuq2z6TCfriAXCIZjRq8GTFCKhQv5SpXbmI+e60rYdXw8NHtMH4NXcGF0eKTuoCsC59IYSTUvDQYDKReaszA==}
|
||||
'@opentelemetry/instrumentation-mysql2@0.40.0':
|
||||
resolution: {integrity: sha512-0xfS1xcqUmY7WE1uWjlmI67Xg3QsSUlNT+AcXHeA4BDUPwZtWqF4ezIwLgpVZfHOnkAEheqGfNSWd1PIu3Wnfg==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -1001,8 +1016,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-mysql@0.39.0':
|
||||
resolution: {integrity: sha512-8snHPh83rhrDf31v9Kq0Nf+ts8hdr7NguuszRqZomZBHgE0+UyXZSkXHAAFZoBPPRMGyM68uaFE5hVtFl+wOcA==}
|
||||
'@opentelemetry/instrumentation-mysql@0.40.0':
|
||||
resolution: {integrity: sha512-d7ja8yizsOCNMYIJt5PH/fKZXjb/mS48zLROO4BzZTtDfhNCl2UM/9VIomP2qkGIFVouSJrGr/T00EzY7bPtKA==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -1013,8 +1028,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-nestjs-core@0.38.0':
|
||||
resolution: {integrity: sha512-M381Df1dM8aqihZz2yK+ugvMFK5vlHG/835dc67Sx2hH4pQEQYDA2PpFPTgc9AYYOydQaj7ClFQunESimjXDgg==}
|
||||
'@opentelemetry/instrumentation-nestjs-core@0.39.0':
|
||||
resolution: {integrity: sha512-mewVhEXdikyvIZoMIUry8eb8l3HUjuQjSjVbmLVTt4NQi35tkpnHQrG9bTRBrl3403LoWZ2njMPJyg4l6HfKvA==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -1031,8 +1046,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-pg@0.42.0':
|
||||
resolution: {integrity: sha512-sjgcM8CswYy8zxHgXv4RAZ09DlYhQ+9TdlourUs63Df/ek5RrB1ZbjznqW7PB6c3TyJJmX6AVtPTjAsROovEjA==}
|
||||
'@opentelemetry/instrumentation-pg@0.43.0':
|
||||
resolution: {integrity: sha512-og23KLyoxdnAeFs1UWqzSonuCkePUzCX30keSYigIzJe/6WSYA8rnEI5lobcxPEzg+GcU06J7jzokuEHbjVJNw==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -1049,8 +1064,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation-redis-4@0.40.0':
|
||||
resolution: {integrity: sha512-0ieQYJb6yl35kXA75LQUPhHtGjtQU9L85KlWa7d4ohBbk/iQKZ3X3CFl5jC5vNMq/GGPB3+w3IxNvALlHtrp7A==}
|
||||
'@opentelemetry/instrumentation-redis-4@0.41.0':
|
||||
resolution: {integrity: sha512-H7IfGTqW2reLXqput4yzAe8YpDC0fmVNal95GHMLOrS89W+qWUKIqxolSh63hJyfmwPSFwXASzj7wpSk8Az+Dg==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -1103,8 +1118,8 @@ packages:
|
|||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
||||
'@opentelemetry/instrumentation@0.43.0':
|
||||
resolution: {integrity: sha512-S1uHE+sxaepgp+t8lvIDuRgyjJWisAb733198kwQTUc9ZtYQ2V2gmyCtR1x21ePGVLoMiX/NWY7WA290hwkjJQ==}
|
||||
'@opentelemetry/instrumentation@0.46.0':
|
||||
resolution: {integrity: sha512-a9TijXZZbk0vI5TGLZl+0kxyFfrXHhX6Svtz7Pp2/VBlCSKrazuULEyoJQrOknJyFWNMEmbbJgOciHCCpQcisw==}
|
||||
engines: {node: '>=14'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.3.0
|
||||
|
@ -1282,8 +1297,8 @@ packages:
|
|||
resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
|
||||
engines: {node: '>=14'}
|
||||
|
||||
'@prisma/instrumentation@5.16.0':
|
||||
resolution: {integrity: sha512-MVzNRW2ikWvVNnMIEgQMcwWxpFD+XF2U2h0Qz7MjutRqJxrhWexWV2aSi2OXRaU8UL5wzWw7pnjdKUzYhWauLg==}
|
||||
'@prisma/instrumentation@5.17.0':
|
||||
resolution: {integrity: sha512-c1Sle4ji8aasMcYfBBHFM56We4ljfenVtRmS8aY06BllS7SoU6SmJBwG7vil+GHiR0Yrh+t9iBwt4AY0Jr4KNQ==}
|
||||
|
||||
'@protobufjs/aspromise@1.1.2':
|
||||
resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==}
|
||||
|
@ -1356,16 +1371,66 @@ packages:
|
|||
'@selderee/plugin-htmlparser2@0.11.0':
|
||||
resolution: {integrity: sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ==}
|
||||
|
||||
'@sentry/cli-darwin@2.33.1':
|
||||
resolution: {integrity: sha512-+4/VIx/E1L2hChj5nGf5MHyEPHUNHJ/HoG5RY+B+vyEutGily1c1+DM2bum7RbD0xs6wKLIyup5F02guzSzG8A==}
|
||||
engines: {node: '>=10'}
|
||||
os: [darwin]
|
||||
|
||||
'@sentry/cli-linux-arm64@2.33.1':
|
||||
resolution: {integrity: sha512-DbGV56PRKOLsAZJX27Jt2uZ11QfQEMmWB4cIvxkKcFVE+LJP4MVA+MGGRUL6p+Bs1R9ZUuGbpKGtj0JiG6CoXw==}
|
||||
engines: {node: '>=10'}
|
||||
cpu: [arm64]
|
||||
os: [linux, freebsd]
|
||||
|
||||
'@sentry/cli-linux-arm@2.33.1':
|
||||
resolution: {integrity: sha512-zbxEvQju+tgNvzTOt635le4kS/Fbm2XC2RtYbCTs034Vb8xjrAxLnK0z1bQnStUV8BkeBHtsNVrG+NSQDym2wg==}
|
||||
engines: {node: '>=10'}
|
||||
cpu: [arm]
|
||||
os: [linux, freebsd]
|
||||
|
||||
'@sentry/cli-linux-i686@2.33.1':
|
||||
resolution: {integrity: sha512-g2LS4oPXkPWOfKWukKzYp4FnXVRRSwBxhuQ9eSw2peeb58ZIObr4YKGOA/8HJRGkooBJIKGaAR2mH2Pk1TKaiA==}
|
||||
engines: {node: '>=10'}
|
||||
cpu: [x86, ia32]
|
||||
os: [linux, freebsd]
|
||||
|
||||
'@sentry/cli-linux-x64@2.33.1':
|
||||
resolution: {integrity: sha512-IV3dcYV/ZcvO+VGu9U6kuxSdbsV2kzxaBwWUQxtzxJ+cOa7J8Hn1t0koKGtU53JVZNBa06qJWIcqgl4/pCuKIg==}
|
||||
engines: {node: '>=10'}
|
||||
cpu: [x64]
|
||||
os: [linux, freebsd]
|
||||
|
||||
'@sentry/cli-win32-i686@2.33.1':
|
||||
resolution: {integrity: sha512-F7cJySvkpzIu7fnLKNHYwBzZYYwlhoDbAUnaFX0UZCN+5DNp/5LwTp37a5TWOsmCaHMZT4i9IO4SIsnNw16/zQ==}
|
||||
engines: {node: '>=10'}
|
||||
cpu: [x86, ia32]
|
||||
os: [win32]
|
||||
|
||||
'@sentry/cli-win32-x64@2.33.1':
|
||||
resolution: {integrity: sha512-8VyRoJqtb2uQ8/bFRKNuACYZt7r+Xx0k2wXRGTyH05lCjAiVIXn7DiS2BxHFty7M1QEWUCMNsb/UC/x/Cu2wuA==}
|
||||
engines: {node: '>=10'}
|
||||
cpu: [x64]
|
||||
os: [win32]
|
||||
|
||||
'@sentry/cli@2.33.1':
|
||||
resolution: {integrity: sha512-dUlZ4EFh98VFRPJ+f6OW3JEYQ7VvqGNMa0AMcmvk07ePNeK/GicAWmSQE4ZfJTTl80ul6HZw1kY01fGQOQlVRA==}
|
||||
engines: {node: '>= 10'}
|
||||
hasBin: true
|
||||
|
||||
'@sentry/core@8.13.0':
|
||||
resolution: {integrity: sha512-N9Qg4ZGxZWp8eb2eUUHVVKgjBLtFIjS805nG92s6yJmkvOpKm6mLtcUaT/iDf3Hta6nG+xRkhbE3r+Z4cbXG8w==}
|
||||
engines: {node: '>=14.18'}
|
||||
|
||||
'@sentry/node@8.13.0':
|
||||
resolution: {integrity: sha512-OeZ7K90RhyxfwfreerIi4cszzHrPRRH36STJno2+p3sIGbG5VScOccqXzYEOAqHpByxnti4KQN34BLAT2BFOEA==}
|
||||
'@sentry/core@8.26.0':
|
||||
resolution: {integrity: sha512-g/tVmTZD4GNbLFf++hKJfBpcCAtduFEMLnbfa9iT/QEZjlmP+EzY+GsH9bafM5VsNe8DiOUp+kJKWtShzlVdBA==}
|
||||
engines: {node: '>=14.18'}
|
||||
|
||||
'@sentry/opentelemetry@8.13.0':
|
||||
resolution: {integrity: sha512-NYn/HNE/SxFXe8pfnxJknhrrRzYRMHNssCoi5M1CeR5G7F2BGxxVmaGsd8j0WyTCpUS4i97G4vhYtDGxHvWN6w==}
|
||||
'@sentry/node@8.26.0':
|
||||
resolution: {integrity: sha512-N9mNLzicnfGgsq6P10ckPdTzEFusjTC7gpqPopwq5eEMF7g798hH8CcE5o6FZ4iAAR3vWliAR/jgccdoMmJMpQ==}
|
||||
engines: {node: '>=14.18'}
|
||||
|
||||
'@sentry/opentelemetry@8.26.0':
|
||||
resolution: {integrity: sha512-HBDheM/+ysfIz8R1OH4bBIxdgD7ZbQkKLJAUXkdAbBcfbpK/CTtwcplbauF5wY7Q+GYvwL/ShuDwvXRfW+gFyQ==}
|
||||
engines: {node: '>=14.18'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.9.0
|
||||
|
@ -1374,14 +1439,27 @@ packages:
|
|||
'@opentelemetry/sdk-trace-base': ^1.25.1
|
||||
'@opentelemetry/semantic-conventions': ^1.25.1
|
||||
|
||||
'@sentry/profiling-node@8.26.0':
|
||||
resolution: {integrity: sha512-yGHFoqSKe5j9fDK9n5ntJxDyZnedwjCm6fAXwIlsLJOUBqn5g7l8V1XgBPlCJLZzOG0fbvGvSo4WyBfDoSD8vQ==}
|
||||
engines: {node: '>=14.18'}
|
||||
hasBin: true
|
||||
|
||||
'@sentry/types@8.13.0':
|
||||
resolution: {integrity: sha512-r63s/H5gvQnQM9tTGBXz2xErUbxZALh4e2Lg/1aHj4zIvGLBjA2z5qWsh6TEZYbpmgAyGShLDr6+rWeUVf9yBQ==}
|
||||
engines: {node: '>=14.18'}
|
||||
|
||||
'@sentry/types@8.26.0':
|
||||
resolution: {integrity: sha512-zKmh6SWsJh630rpt7a9vP4Cm4m1C2gDTUqUiH565CajCL/4cePpNWYrNwalSqsOSL7B9OrczA1+n6a6XvND+ng==}
|
||||
engines: {node: '>=14.18'}
|
||||
|
||||
'@sentry/utils@8.13.0':
|
||||
resolution: {integrity: sha512-PxV0v9VbGWH9zP37P5w2msLUFDr287nYjoY2XVF+RSolyiTs1CQNI5ZMUO3o4MsSac/dpXxjyrZXQd72t/jRYA==}
|
||||
engines: {node: '>=14.18'}
|
||||
|
||||
'@sentry/utils@8.26.0':
|
||||
resolution: {integrity: sha512-xvlPU9Hd2BlyT+FhWHGNwnxWqdVRk2AHnDtVcW4Ma0Ri5EwS+uy4Jeik5UkSv8C5RVb9VlxFmS8LN3I1MPJsLw==}
|
||||
engines: {node: '>=14.18'}
|
||||
|
||||
'@sinclair/typebox@0.27.8':
|
||||
resolution: {integrity: sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==}
|
||||
|
||||
|
@ -1734,6 +1812,10 @@ packages:
|
|||
afinn-165@1.0.4:
|
||||
resolution: {integrity: sha512-7+Wlx3BImrK0HiG6y3lU4xX7SpBPSSu8T9iguPMlaueRFxjbYwAQrp9lqZUuFikqKbd/en8lVREILvP2J80uJA==}
|
||||
|
||||
agent-base@6.0.2:
|
||||
resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==}
|
||||
engines: {node: '>= 6.0.0'}
|
||||
|
||||
agent-base@7.1.1:
|
||||
resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==}
|
||||
engines: {node: '>= 14'}
|
||||
|
@ -2303,6 +2385,14 @@ packages:
|
|||
domutils@3.1.0:
|
||||
resolution: {integrity: sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==}
|
||||
|
||||
dotenv-cli@7.4.2:
|
||||
resolution: {integrity: sha512-SbUj8l61zIbzyhIbg0FwPJq6+wjbzdn9oEtozQpZ6kW2ihCcapKVZj49oCT3oPM+mgQm+itgvUQcG5szxVrZTA==}
|
||||
hasBin: true
|
||||
|
||||
dotenv-expand@10.0.0:
|
||||
resolution: {integrity: sha512-GopVGCpVS1UKH75VKHGuQFqS1Gusej0z4FyQkPdwjil2gNIv+LNsqBlboOzpJFZKVT95GkCyWJbBSdFEFUWI2A==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
dotenv@16.4.5:
|
||||
resolution: {integrity: sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==}
|
||||
engines: {node: '>=12'}
|
||||
|
@ -2684,6 +2774,10 @@ packages:
|
|||
engines: {node: '>=12'}
|
||||
hasBin: true
|
||||
|
||||
https-proxy-agent@5.0.1:
|
||||
resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==}
|
||||
engines: {node: '>= 6'}
|
||||
|
||||
https-proxy-agent@7.0.4:
|
||||
resolution: {integrity: sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==}
|
||||
engines: {node: '>= 14'}
|
||||
|
@ -2720,15 +2814,15 @@ packages:
|
|||
resolution: {integrity: sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==}
|
||||
engines: {node: '>=6'}
|
||||
|
||||
import-in-the-middle@1.4.2:
|
||||
resolution: {integrity: sha512-9WOz1Yh/cvO/p69sxRmhyQwrIGGSp7EIdcb+fFNVi7CzQGQB8U1/1XrKVSbEd/GNOAeM0peJtmi7+qphe7NvAw==}
|
||||
import-in-the-middle@1.11.0:
|
||||
resolution: {integrity: sha512-5DimNQGoe0pLUHbR9qK84iWaWjjbsxiqXnw6Qz64+azRgleqv9k2kTt5fw7QsOpmaGYtuxxursnPPsnTKEx10Q==}
|
||||
|
||||
import-in-the-middle@1.7.1:
|
||||
resolution: {integrity: sha512-1LrZPDtW+atAxH42S6288qyDFNQ2YCty+2mxEPRtfazH6Z5QwkaBSTS2ods7hnVJioF6rkRfNoA6A/MstpFXLg==}
|
||||
|
||||
import-in-the-middle@1.7.4:
|
||||
resolution: {integrity: sha512-Lk+qzWmiQuRPPulGQeK5qq0v32k2bHnWrRPFgqyvhw7Kkov5L6MOLOIU3pcWeujc9W4q54Cp3Q2WV16eQkc7Bg==}
|
||||
|
||||
import-in-the-middle@1.8.1:
|
||||
resolution: {integrity: sha512-yhRwoHtiLGvmSozNOALgjRPFI6uYsds60EoMqqnXyyv+JOIW/BrrLejuTGBt+bq0T5tLzOHrN0T7xYTm4Qt/ng==}
|
||||
|
||||
import-local@3.1.0:
|
||||
resolution: {integrity: sha512-ASB07uLtnDs1o6EHjKpX34BKYDSqnFerfTOJL2HvMqF70LnxpjkzDB8J44oT9pu4AMPkQwf8jl6szgvNd2tRIg==}
|
||||
engines: {node: '>=8'}
|
||||
|
@ -3548,6 +3642,10 @@ packages:
|
|||
resolution: {integrity: sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==}
|
||||
engines: {node: '>= 0.4.0'}
|
||||
|
||||
node-abi@3.67.0:
|
||||
resolution: {integrity: sha512-bLn/fU/ALVBE9wj+p4Y21ZJWYFjUXLXPi/IewyLZkx3ApxKDNBWCKdReeKOtD8dWpOdDCeMyLh6ZewzcLsG2Nw==}
|
||||
engines: {node: '>=10'}
|
||||
|
||||
node-abort-controller@3.1.1:
|
||||
resolution: {integrity: sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ==}
|
||||
|
||||
|
@ -3646,9 +3744,11 @@ packages:
|
|||
resolution: {integrity: sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==}
|
||||
hasBin: true
|
||||
|
||||
opentelemetry-instrumentation-fetch-node@1.2.0:
|
||||
resolution: {integrity: sha512-aiSt/4ubOTyb1N5C2ZbGrBvaJOXIZhZvpRPYuUVxQJe27wJZqf/o65iPrqgLcgfeOLaQ8cS2Q+762jrYvniTrA==}
|
||||
opentelemetry-instrumentation-fetch-node@1.2.3:
|
||||
resolution: {integrity: sha512-Qb11T7KvoCevMaSeuamcLsAD+pZnavkhDnlVL0kRozfhl42dKG5Q3anUklAFKJZjY3twLR+BnRa6DlwwkIE/+A==}
|
||||
engines: {node: '>18.0.0'}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': ^1.6.0
|
||||
|
||||
option@0.2.4:
|
||||
resolution: {integrity: sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==}
|
||||
|
@ -4971,7 +5071,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@hyperdx/node-opentelemetry@0.8.0':
|
||||
'@hyperdx/node-opentelemetry@0.8.1':
|
||||
dependencies:
|
||||
'@hyperdx/instrumentation-exception': 0.1.0(@opentelemetry/api@1.9.0)
|
||||
'@hyperdx/instrumentation-sentry-node': 0.1.0(@opentelemetry/api@1.9.0)
|
||||
|
@ -4996,6 +5096,7 @@ snapshots:
|
|||
lodash.isobject: 3.0.2
|
||||
lodash.isplainobject: 4.0.6
|
||||
lodash.isstring: 4.0.1
|
||||
node-fetch: 2.7.0
|
||||
open: 8.4.2
|
||||
ora: 5.4.1
|
||||
pino-abstract-transport: 1.2.0
|
||||
|
@ -5518,7 +5619,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-connect@0.37.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-connect@0.38.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5561,7 +5662,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-express@0.40.1(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-express@0.41.1(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5579,7 +5680,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-fastify@0.37.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-fastify@0.38.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5596,6 +5697,14 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-fs@0.14.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-generic-pool@0.36.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
|
@ -5611,7 +5720,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-graphql@0.41.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-graphql@0.42.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5635,7 +5744,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-hapi@0.39.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-hapi@0.40.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5673,7 +5782,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-ioredis@0.41.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-ioredis@0.42.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5701,14 +5810,12 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-koa@0.41.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-koa@0.42.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/semantic-conventions': 1.25.1
|
||||
'@types/koa': 2.14.0
|
||||
'@types/koa__router': 12.0.3
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
|
@ -5737,7 +5844,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-mongodb@0.45.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-mongodb@0.46.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5755,7 +5862,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-mongoose@0.39.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-mongoose@0.40.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5773,7 +5880,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-mysql2@0.39.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-mysql2@0.40.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5791,7 +5898,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-mysql@0.39.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-mysql@0.40.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5808,7 +5915,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-nestjs-core@0.38.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-nestjs-core@0.39.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5835,7 +5942,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-pg@0.42.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-pg@0.43.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5862,7 +5969,7 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation-redis-4@0.40.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation-redis-4@0.41.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -5937,11 +6044,11 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@opentelemetry/instrumentation@0.43.0(@opentelemetry/api@1.9.0)':
|
||||
'@opentelemetry/instrumentation@0.46.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@types/shimmer': 1.0.5
|
||||
import-in-the-middle: 1.4.2
|
||||
import-in-the-middle: 1.7.1
|
||||
require-in-the-middle: 7.3.0
|
||||
semver: 7.6.2
|
||||
shimmer: 1.2.1
|
||||
|
@ -5966,7 +6073,7 @@ snapshots:
|
|||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/api-logs': 0.52.1
|
||||
'@types/shimmer': 1.0.5
|
||||
import-in-the-middle: 1.8.1
|
||||
import-in-the-middle: 1.11.0
|
||||
require-in-the-middle: 7.3.0
|
||||
semver: 7.6.2
|
||||
shimmer: 1.2.1
|
||||
|
@ -6156,7 +6263,7 @@ snapshots:
|
|||
'@pkgjs/parseargs@0.11.0':
|
||||
optional: true
|
||||
|
||||
'@prisma/instrumentation@5.16.0':
|
||||
'@prisma/instrumentation@5.17.0':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
|
@ -6239,62 +6346,126 @@ snapshots:
|
|||
domhandler: 5.0.3
|
||||
selderee: 0.11.0
|
||||
|
||||
'@sentry/cli-darwin@2.33.1':
|
||||
optional: true
|
||||
|
||||
'@sentry/cli-linux-arm64@2.33.1':
|
||||
optional: true
|
||||
|
||||
'@sentry/cli-linux-arm@2.33.1':
|
||||
optional: true
|
||||
|
||||
'@sentry/cli-linux-i686@2.33.1':
|
||||
optional: true
|
||||
|
||||
'@sentry/cli-linux-x64@2.33.1':
|
||||
optional: true
|
||||
|
||||
'@sentry/cli-win32-i686@2.33.1':
|
||||
optional: true
|
||||
|
||||
'@sentry/cli-win32-x64@2.33.1':
|
||||
optional: true
|
||||
|
||||
'@sentry/cli@2.33.1':
|
||||
dependencies:
|
||||
https-proxy-agent: 5.0.1
|
||||
node-fetch: 2.7.0
|
||||
progress: 2.0.3
|
||||
proxy-from-env: 1.1.0
|
||||
which: 2.0.2
|
||||
optionalDependencies:
|
||||
'@sentry/cli-darwin': 2.33.1
|
||||
'@sentry/cli-linux-arm': 2.33.1
|
||||
'@sentry/cli-linux-arm64': 2.33.1
|
||||
'@sentry/cli-linux-i686': 2.33.1
|
||||
'@sentry/cli-linux-x64': 2.33.1
|
||||
'@sentry/cli-win32-i686': 2.33.1
|
||||
'@sentry/cli-win32-x64': 2.33.1
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
- supports-color
|
||||
|
||||
'@sentry/core@8.13.0':
|
||||
dependencies:
|
||||
'@sentry/types': 8.13.0
|
||||
'@sentry/utils': 8.13.0
|
||||
|
||||
'@sentry/node@8.13.0':
|
||||
'@sentry/core@8.26.0':
|
||||
dependencies:
|
||||
'@sentry/types': 8.26.0
|
||||
'@sentry/utils': 8.26.0
|
||||
|
||||
'@sentry/node@8.26.0':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/context-async-hooks': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-connect': 0.37.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-express': 0.40.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-fastify': 0.37.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-graphql': 0.41.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-hapi': 0.39.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-connect': 0.38.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-express': 0.41.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-fastify': 0.38.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-fs': 0.14.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-graphql': 0.42.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-hapi': 0.40.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-http': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-ioredis': 0.41.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-koa': 0.41.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-mongodb': 0.45.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-mongoose': 0.39.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-mysql': 0.39.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-mysql2': 0.39.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-nestjs-core': 0.38.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-pg': 0.42.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-redis-4': 0.40.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-ioredis': 0.42.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-koa': 0.42.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-mongodb': 0.46.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-mongoose': 0.40.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-mysql': 0.40.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-mysql2': 0.40.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-nestjs-core': 0.39.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-pg': 0.43.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation-redis-4': 0.41.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/semantic-conventions': 1.25.1
|
||||
'@prisma/instrumentation': 5.16.0
|
||||
'@sentry/core': 8.13.0
|
||||
'@sentry/opentelemetry': 8.13.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)
|
||||
'@sentry/types': 8.13.0
|
||||
'@sentry/utils': 8.13.0
|
||||
'@prisma/instrumentation': 5.17.0
|
||||
'@sentry/core': 8.26.0
|
||||
'@sentry/opentelemetry': 8.26.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)
|
||||
'@sentry/types': 8.26.0
|
||||
'@sentry/utils': 8.26.0
|
||||
import-in-the-middle: 1.11.0
|
||||
optionalDependencies:
|
||||
opentelemetry-instrumentation-fetch-node: 1.2.0
|
||||
opentelemetry-instrumentation-fetch-node: 1.2.3(@opentelemetry/api@1.9.0)
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@sentry/opentelemetry@8.13.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)':
|
||||
'@sentry/opentelemetry@8.26.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/semantic-conventions': 1.25.1
|
||||
'@sentry/core': 8.13.0
|
||||
'@sentry/types': 8.13.0
|
||||
'@sentry/utils': 8.13.0
|
||||
'@sentry/core': 8.26.0
|
||||
'@sentry/types': 8.26.0
|
||||
'@sentry/utils': 8.26.0
|
||||
|
||||
'@sentry/profiling-node@8.26.0':
|
||||
dependencies:
|
||||
'@sentry/core': 8.26.0
|
||||
'@sentry/node': 8.26.0
|
||||
'@sentry/types': 8.26.0
|
||||
'@sentry/utils': 8.26.0
|
||||
detect-libc: 2.0.3
|
||||
node-abi: 3.67.0
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
'@sentry/types@8.13.0': {}
|
||||
|
||||
'@sentry/types@8.26.0': {}
|
||||
|
||||
'@sentry/utils@8.13.0':
|
||||
dependencies:
|
||||
'@sentry/types': 8.13.0
|
||||
|
||||
'@sentry/utils@8.26.0':
|
||||
dependencies:
|
||||
'@sentry/types': 8.26.0
|
||||
|
||||
'@sinclair/typebox@0.27.8': {}
|
||||
|
||||
'@sinonjs/commons@3.0.1':
|
||||
|
@ -6681,6 +6852,12 @@ snapshots:
|
|||
|
||||
afinn-165@1.0.4: {}
|
||||
|
||||
agent-base@6.0.2:
|
||||
dependencies:
|
||||
debug: 4.3.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
agent-base@7.1.1:
|
||||
dependencies:
|
||||
debug: 4.3.5
|
||||
|
@ -7232,8 +7409,7 @@ snapshots:
|
|||
|
||||
destroy@1.2.0: {}
|
||||
|
||||
detect-libc@2.0.3:
|
||||
optional: true
|
||||
detect-libc@2.0.3: {}
|
||||
|
||||
detect-newline@3.1.0: {}
|
||||
|
||||
|
@ -7270,6 +7446,15 @@ snapshots:
|
|||
domelementtype: 2.3.0
|
||||
domhandler: 5.0.3
|
||||
|
||||
dotenv-cli@7.4.2:
|
||||
dependencies:
|
||||
cross-spawn: 7.0.3
|
||||
dotenv: 16.4.5
|
||||
dotenv-expand: 10.0.0
|
||||
minimist: 1.2.8
|
||||
|
||||
dotenv-expand@10.0.0: {}
|
||||
|
||||
dotenv@16.4.5: {}
|
||||
|
||||
duck@0.1.12:
|
||||
|
@ -7708,6 +7893,13 @@ snapshots:
|
|||
- debug
|
||||
- supports-color
|
||||
|
||||
https-proxy-agent@5.0.1:
|
||||
dependencies:
|
||||
agent-base: 6.0.2
|
||||
debug: 4.3.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
https-proxy-agent@7.0.4:
|
||||
dependencies:
|
||||
agent-base: 7.1.1
|
||||
|
@ -7747,7 +7939,14 @@ snapshots:
|
|||
parent-module: 1.0.1
|
||||
resolve-from: 4.0.0
|
||||
|
||||
import-in-the-middle@1.4.2:
|
||||
import-in-the-middle@1.11.0:
|
||||
dependencies:
|
||||
acorn: 8.12.0
|
||||
acorn-import-attributes: 1.9.5(acorn@8.12.0)
|
||||
cjs-module-lexer: 1.3.1
|
||||
module-details-from-path: 1.0.3
|
||||
|
||||
import-in-the-middle@1.7.1:
|
||||
dependencies:
|
||||
acorn: 8.12.0
|
||||
acorn-import-assertions: 1.9.0(acorn@8.12.0)
|
||||
|
@ -7762,13 +7961,6 @@ snapshots:
|
|||
cjs-module-lexer: 1.3.1
|
||||
module-details-from-path: 1.0.3
|
||||
|
||||
import-in-the-middle@1.8.1:
|
||||
dependencies:
|
||||
acorn: 8.12.0
|
||||
acorn-import-attributes: 1.9.5(acorn@8.12.0)
|
||||
cjs-module-lexer: 1.3.1
|
||||
module-details-from-path: 1.0.3
|
||||
|
||||
import-local@3.1.0:
|
||||
dependencies:
|
||||
pkg-dir: 4.2.0
|
||||
|
@ -8647,6 +8839,10 @@ snapshots:
|
|||
|
||||
netmask@2.0.2: {}
|
||||
|
||||
node-abi@3.67.0:
|
||||
dependencies:
|
||||
semver: 7.6.2
|
||||
|
||||
node-abort-controller@3.1.1: {}
|
||||
|
||||
node-domexception@1.0.0: {}
|
||||
|
@ -8749,10 +8945,10 @@ snapshots:
|
|||
|
||||
opener@1.5.2: {}
|
||||
|
||||
opentelemetry-instrumentation-fetch-node@1.2.0:
|
||||
opentelemetry-instrumentation-fetch-node@1.2.3(@opentelemetry/api@1.9.0):
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/instrumentation': 0.43.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/instrumentation': 0.46.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/semantic-conventions': 1.25.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
|
|
@ -15,6 +15,7 @@ import { redlock } from "../../../src/services/redlock";
|
|||
import { getValue } from "../../../src/services/redis";
|
||||
import { setValue } from "../../../src/services/redis";
|
||||
import { validate } from "uuid";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
function normalizedApiIsUuid(potentialUuid: string): boolean {
|
||||
// Check if the string is a valid UUID
|
||||
|
@ -34,6 +35,7 @@ function setTrace(team_id: string, api_key: string) {
|
|||
api_key,
|
||||
});
|
||||
} catch (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(`Error setting trace attributes: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
@ -49,6 +51,7 @@ async function getKeyAndPriceId(normalizedApi: string): Promise<{
|
|||
api_key: normalizedApi,
|
||||
});
|
||||
if (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(`RPC ERROR (get_key_and_price_id_2): ${error.message}`);
|
||||
return {
|
||||
success: false,
|
||||
|
@ -58,7 +61,10 @@ async function getKeyAndPriceId(normalizedApi: string): Promise<{
|
|||
};
|
||||
}
|
||||
if (!data || data.length === 0) {
|
||||
if (error) {
|
||||
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
|
||||
Sentry.captureException(error);
|
||||
}
|
||||
// TODO: change this error code ?
|
||||
return {
|
||||
success: false,
|
||||
|
@ -152,7 +158,8 @@ export async function supaAuthenticateUser(
|
|||
);
|
||||
}
|
||||
} catch (error) {
|
||||
Logger.error(`Error with auth function: ${error.message}`);
|
||||
Sentry.captureException(error);
|
||||
Logger.error(`Error with auth function: ${error}`);
|
||||
// const {
|
||||
// success,
|
||||
// teamId: tId,
|
||||
|
@ -268,7 +275,7 @@ export async function supaAuthenticateUser(
|
|||
|
||||
return {
|
||||
success: false,
|
||||
error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
|
||||
error: `Rate limit exceeded. Consumed (req/min): ${rateLimiterRes.consumedPoints}, Remaining (req/min): ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
|
||||
status: 429,
|
||||
};
|
||||
}
|
||||
|
@ -302,7 +309,10 @@ export async function supaAuthenticateUser(
|
|||
.eq("key", normalizedApi);
|
||||
|
||||
if (error || !data || data.length === 0) {
|
||||
if (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
|
||||
}
|
||||
return {
|
||||
success: false,
|
||||
error: "Unauthorized: Invalid token",
|
||||
|
|
|
@ -4,6 +4,7 @@ import { RateLimiterMode } from "../../../src/types";
|
|||
import { supabase_service } from "../../../src/services/supabase";
|
||||
import { Logger } from "../../../src/lib/logger";
|
||||
import { getCrawl, saveCrawl } from "../../../src/lib/crawl-redis";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
export async function crawlCancelController(req: Request, res: Response) {
|
||||
try {
|
||||
|
@ -50,6 +51,7 @@ export async function crawlCancelController(req: Request, res: Response) {
|
|||
status: "cancelled"
|
||||
});
|
||||
} catch (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
|
|
|
@ -4,7 +4,29 @@ import { RateLimiterMode } from "../../../src/types";
|
|||
import { getScrapeQueue } from "../../../src/services/queue-service";
|
||||
import { Logger } from "../../../src/lib/logger";
|
||||
import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
|
||||
import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
|
||||
import { supabaseGetJobsById } from "../../../src/lib/supabase-jobs";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
export async function getJobs(ids: string[]) {
|
||||
const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x);
|
||||
|
||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
||||
const supabaseData = await supabaseGetJobsById(ids);
|
||||
|
||||
supabaseData.forEach(x => {
|
||||
const job = jobs.find(y => y.id === x.job_id);
|
||||
if (job) {
|
||||
job.returnvalue = x.docs;
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
jobs.forEach(job => {
|
||||
job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue;
|
||||
});
|
||||
|
||||
return jobs;
|
||||
}
|
||||
|
||||
export async function crawlStatusController(req: Request, res: Response) {
|
||||
try {
|
||||
|
@ -28,19 +50,7 @@ export async function crawlStatusController(req: Request, res: Response) {
|
|||
|
||||
const jobIDs = await getCrawlJobs(req.params.jobId);
|
||||
|
||||
const jobs = (await Promise.all(jobIDs.map(async x => {
|
||||
const job = await getScrapeQueue().getJob(x);
|
||||
|
||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
||||
const supabaseData = await supabaseGetJobById(job.id);
|
||||
|
||||
if (supabaseData) {
|
||||
job.returnvalue = supabaseData.docs;
|
||||
}
|
||||
}
|
||||
|
||||
return job;
|
||||
}))).sort((a, b) => a.timestamp - b.timestamp);
|
||||
const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
|
||||
const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
|
||||
const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
|
||||
|
||||
|
@ -54,6 +64,7 @@ export async function crawlStatusController(req: Request, res: Response) {
|
|||
partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null),
|
||||
});
|
||||
} catch (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@ import { Logger } from "../../../src/lib/logger";
|
|||
import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
|
||||
import { getScrapeQueue } from "../../../src/services/queue-service";
|
||||
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
export async function crawlController(req: Request, res: Response) {
|
||||
try {
|
||||
|
@ -38,16 +39,50 @@ export async function crawlController(req: Request, res: Response) {
|
|||
}
|
||||
}
|
||||
|
||||
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
||||
await checkTeamCredits(team_id, 1);
|
||||
if (!creditsCheckSuccess) {
|
||||
return res.status(402).json({ error: "Insufficient credits" });
|
||||
const crawlerOptions = {
|
||||
...defaultCrawlerOptions,
|
||||
...req.body.crawlerOptions,
|
||||
};
|
||||
const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
|
||||
|
||||
if (Array.isArray(crawlerOptions.includes)) {
|
||||
for (const x of crawlerOptions.includes) {
|
||||
try {
|
||||
new RegExp(x);
|
||||
} catch (e) {
|
||||
return res.status(400).json({ error: e.message });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(crawlerOptions.excludes)) {
|
||||
for (const x of crawlerOptions.excludes) {
|
||||
try {
|
||||
new RegExp(x);
|
||||
} catch (e) {
|
||||
return res.status(400).json({ error: e.message });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const limitCheck = req.body?.crawlerOptions?.limit ?? 1;
|
||||
const { success: creditsCheckSuccess, message: creditsCheckMessage, remainingCredits } =
|
||||
await checkTeamCredits(team_id, limitCheck);
|
||||
|
||||
if (!creditsCheckSuccess) {
|
||||
return res.status(402).json({ error: "Insufficient credits. You may be requesting with a higher limit than the amount of credits you have left. If not, upgrade your plan at https://firecrawl.dev/pricing or contact us at hello@firecrawl.com" });
|
||||
}
|
||||
|
||||
// TODO: need to do this to v1
|
||||
crawlerOptions.limit = Math.min(remainingCredits, crawlerOptions.limit);
|
||||
|
||||
let url = req.body.url;
|
||||
if (!url) {
|
||||
return res.status(400).json({ error: "Url is required" });
|
||||
}
|
||||
if (typeof url !== "string") {
|
||||
return res.status(400).json({ error: "URL must be a string" });
|
||||
}
|
||||
try {
|
||||
url = checkAndUpdateURL(url).url;
|
||||
} catch (e) {
|
||||
|
@ -57,19 +92,12 @@ export async function crawlController(req: Request, res: Response) {
|
|||
}
|
||||
|
||||
if (isUrlBlocked(url)) {
|
||||
return res
|
||||
.status(403)
|
||||
.json({
|
||||
return res.status(403).json({
|
||||
error:
|
||||
"Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
|
||||
});
|
||||
}
|
||||
|
||||
const mode = req.body.mode ?? "crawl";
|
||||
|
||||
const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
|
||||
const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
|
||||
|
||||
// if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
|
||||
// try {
|
||||
// const a = new WebScraperDataProvider();
|
||||
|
@ -119,10 +147,12 @@ export async function crawlController(req: Request, res: Response) {
|
|||
|
||||
await saveCrawl(id, sc);
|
||||
|
||||
const sitemap = sc.crawlerOptions?.ignoreSitemap ? null : await crawler.tryGetSitemap();
|
||||
const sitemap = sc.crawlerOptions?.ignoreSitemap
|
||||
? null
|
||||
: await crawler.tryGetSitemap();
|
||||
|
||||
if (sitemap !== null) {
|
||||
const jobs = sitemap.map(x => {
|
||||
if (sitemap !== null && sitemap.length > 0) {
|
||||
const jobs = sitemap.map((x) => {
|
||||
const url = x.url;
|
||||
const uuid = uuidv4();
|
||||
return {
|
||||
|
@ -140,16 +170,30 @@ export async function crawlController(req: Request, res: Response) {
|
|||
opts: {
|
||||
jobId: uuid,
|
||||
priority: 20,
|
||||
}
|
||||
},
|
||||
};
|
||||
})
|
||||
});
|
||||
|
||||
await lockURLs(id, jobs.map(x => x.data.url));
|
||||
await addCrawlJobs(id, jobs.map(x => x.opts.jobId));
|
||||
await lockURLs(
|
||||
id,
|
||||
jobs.map((x) => x.data.url)
|
||||
);
|
||||
await addCrawlJobs(
|
||||
id,
|
||||
jobs.map((x) => x.opts.jobId)
|
||||
);
|
||||
if (Sentry.isInitialized()) {
|
||||
for (const job of jobs) {
|
||||
// add with sentry instrumentation
|
||||
await addScrapeJob(job.data as any, {}, job.opts.jobId);
|
||||
}
|
||||
} else {
|
||||
await getScrapeQueue().addBulk(jobs);
|
||||
}
|
||||
} else {
|
||||
await lockURL(id, sc, url);
|
||||
const job = await addScrapeJob({
|
||||
const job = await addScrapeJob(
|
||||
{
|
||||
url,
|
||||
mode: "single_urls",
|
||||
crawlerOptions: crawlerOptions,
|
||||
|
@ -157,14 +201,17 @@ export async function crawlController(req: Request, res: Response) {
|
|||
pageOptions: pageOptions,
|
||||
origin: req.body.origin ?? defaultOrigin,
|
||||
crawl_id: id,
|
||||
}, {
|
||||
},
|
||||
{
|
||||
priority: 15, // prioritize request 0 of crawl jobs same as scrape jobs
|
||||
});
|
||||
}
|
||||
);
|
||||
await addCrawlJob(id, job.id);
|
||||
}
|
||||
|
||||
res.json({ jobId: id });
|
||||
} catch (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import { Logger } from "../../../src/lib/logger";
|
|||
import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
|
||||
import { addScrapeJob } from "../../../src/services/queue-jobs";
|
||||
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
export async function crawlPreviewController(req: Request, res: Response) {
|
||||
try {
|
||||
|
@ -129,6 +130,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
|
|||
|
||||
res.json({ jobId: id });
|
||||
} catch (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
|
|
|
@ -9,9 +9,10 @@ import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Impo
|
|||
import { numTokensFromString } from '../../lib/LLM-extraction/helpers';
|
||||
import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../lib/default-values';
|
||||
import { addScrapeJob } from '../../services/queue-jobs';
|
||||
import { scrapeQueueEvents } from '../../services/queue-service';
|
||||
import { getScrapeQueue } from '../../services/queue-service';
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { Logger } from '../../lib/logger';
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
export async function scrapeHelper(
|
||||
jobId: string,
|
||||
|
@ -48,10 +49,24 @@ export async function scrapeHelper(
|
|||
}, {}, jobId);
|
||||
|
||||
let doc;
|
||||
|
||||
const err = await Sentry.startSpan({ name: "Wait for job to finish", op: "bullmq.wait", attributes: { job: jobId } }, async (span) => {
|
||||
try {
|
||||
doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0]; //60 seconds timeout
|
||||
doc = (await new Promise((resolve, reject) => {
|
||||
const start = Date.now();
|
||||
const int = setInterval(async () => {
|
||||
if (Date.now() >= start + timeout) {
|
||||
clearInterval(int);
|
||||
reject(new Error("Job wait "));
|
||||
} else if (await job.getState() === "completed") {
|
||||
clearInterval(int);
|
||||
resolve((await getScrapeQueue().getJob(job.id)).returnvalue);
|
||||
}
|
||||
}, 1000);
|
||||
}))[0]
|
||||
} catch (e) {
|
||||
if (e instanceof Error && e.message.startsWith("Job wait")) {
|
||||
span.setAttribute("timedOut", true);
|
||||
return {
|
||||
success: false,
|
||||
error: "Request timed out",
|
||||
|
@ -61,6 +76,13 @@ export async function scrapeHelper(
|
|||
throw e;
|
||||
}
|
||||
}
|
||||
span.setAttribute("result", JSON.stringify(doc));
|
||||
return null;
|
||||
});
|
||||
|
||||
if (err !== null) {
|
||||
return err;
|
||||
}
|
||||
|
||||
await job.remove();
|
||||
|
||||
|
@ -112,11 +134,15 @@ export async function scrapeController(req: Request, res: Response) {
|
|||
let timeout = req.body.timeout ?? defaultTimeout;
|
||||
|
||||
if (extractorOptions.mode.includes("llm-extraction")) {
|
||||
if (typeof extractorOptions.extractionSchema !== "object" || extractorOptions.extractionSchema === null) {
|
||||
return res.status(400).json({ error: "extractorOptions.extractionSchema must be an object if llm-extraction mode is specified" });
|
||||
}
|
||||
|
||||
pageOptions.onlyMainContent = true;
|
||||
timeout = req.body.timeout ?? 90000;
|
||||
}
|
||||
|
||||
const checkCredits = async () => {
|
||||
// checkCredits
|
||||
try {
|
||||
const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1);
|
||||
if (!creditsCheckSuccess) {
|
||||
|
@ -128,10 +154,6 @@ export async function scrapeController(req: Request, res: Response) {
|
|||
earlyReturn = true;
|
||||
return res.status(500).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." });
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
await checkCredits();
|
||||
|
||||
const jobId = uuidv4();
|
||||
|
||||
|
@ -198,6 +220,7 @@ export async function scrapeController(req: Request, res: Response) {
|
|||
|
||||
return res.status(result.returnCode).json(result);
|
||||
} catch (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
|
|
|
@ -9,7 +9,9 @@ import { search } from "../../search";
|
|||
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { Logger } from "../../lib/logger";
|
||||
import { getScrapeQueue, scrapeQueueEvents } from "../../services/queue-service";
|
||||
import { getScrapeQueue } from "../../services/queue-service";
|
||||
import { addScrapeJob } from "../../services/queue-jobs";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
export async function searchHelper(
|
||||
jobId: string,
|
||||
|
@ -90,22 +92,44 @@ export async function searchHelper(
|
|||
},
|
||||
opts: {
|
||||
jobId: uuid,
|
||||
priority: 10,
|
||||
priority: 20,
|
||||
}
|
||||
};
|
||||
})
|
||||
|
||||
const jobs = await getScrapeQueue().addBulk(jobDatas);
|
||||
let jobs = [];
|
||||
if (Sentry.isInitialized()) {
|
||||
for (const job of jobDatas) {
|
||||
// add with sentry instrumentation
|
||||
jobs.push(await addScrapeJob(job.data as any, {}, job.opts.jobId));
|
||||
}
|
||||
} else {
|
||||
jobs = await getScrapeQueue().addBulk(jobDatas);
|
||||
await getScrapeQueue().addBulk(jobs);
|
||||
}
|
||||
|
||||
const docs = (await Promise.all(jobs.map(x => x.waitUntilFinished(scrapeQueueEvents, 60000)))).map(x => x[0]);
|
||||
const docs = (await Promise.all(jobs.map(x => new Promise((resolve, reject) => {
|
||||
const start = Date.now();
|
||||
const int = setInterval(async () => {
|
||||
if (Date.now() >= start + 60000) {
|
||||
clearInterval(int);
|
||||
reject(new Error("Job wait "));
|
||||
} else if (await x.getState() === "completed") {
|
||||
clearInterval(int);
|
||||
resolve((await getScrapeQueue().getJob(x.id)).returnvalue);
|
||||
}
|
||||
}, 1000);
|
||||
})))).map(x => x[0]);
|
||||
|
||||
if (docs.length === 0) {
|
||||
return { success: true, error: "No search results found", returnCode: 200 };
|
||||
}
|
||||
|
||||
await Promise.all(jobs.map(x => x.remove()));
|
||||
|
||||
// make sure doc.content is not empty
|
||||
const filteredDocs = docs.filter(
|
||||
(doc: { content?: string }) => doc.content && doc.content.trim().length > 0
|
||||
(doc: { content?: string }) => doc && doc.content && doc.content.trim().length > 0
|
||||
);
|
||||
|
||||
if (filteredDocs.length === 0) {
|
||||
|
@ -151,6 +175,7 @@ export async function searchController(req: Request, res: Response) {
|
|||
return res.status(402).json({ error: "Insufficient credits" });
|
||||
}
|
||||
} catch (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
|
@ -181,6 +206,11 @@ export async function searchController(req: Request, res: Response) {
|
|||
});
|
||||
return res.status(result.returnCode).json(result);
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.startsWith("Job wait")) {
|
||||
return res.status(408).json({ error: "Request timed out" });
|
||||
}
|
||||
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import { Request, Response } from "express";
|
||||
import { Logger } from "../../../src/lib/logger";
|
||||
import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
|
||||
import { getScrapeQueue } from "../../../src/services/queue-service";
|
||||
import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
|
||||
import { getJobs } from "./crawl-status";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
export async function crawlJobStatusPreviewController(req: Request, res: Response) {
|
||||
try {
|
||||
|
@ -22,19 +22,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons
|
|||
// }
|
||||
// }
|
||||
|
||||
const jobs = (await Promise.all(jobIDs.map(async x => {
|
||||
const job = await getScrapeQueue().getJob(x);
|
||||
|
||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
||||
const supabaseData = await supabaseGetJobById(job.id);
|
||||
|
||||
if (supabaseData) {
|
||||
job.returnvalue = supabaseData.docs;
|
||||
}
|
||||
}
|
||||
|
||||
return job;
|
||||
}))).sort((a, b) => a.timestamp - b.timestamp);
|
||||
const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
|
||||
const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
|
||||
const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
|
||||
|
||||
|
@ -48,6 +36,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons
|
|||
partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null),
|
||||
});
|
||||
} catch (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
import "dotenv/config";
|
||||
import "./services/sentry"
|
||||
import * as Sentry from "@sentry/node";
|
||||
import express from "express";
|
||||
import bodyParser from "body-parser";
|
||||
import cors from "cors";
|
||||
import "dotenv/config";
|
||||
import { getScrapeQueue } from "./services/queue-service";
|
||||
import { v0Router } from "./routes/v0";
|
||||
import { initSDK } from "@hyperdx/node-opentelemetry";
|
||||
|
@ -17,6 +19,8 @@ import { v1Router } from "./routes/v1";
|
|||
import expressWs from "express-ws";
|
||||
import { crawlStatusWSController } from "./controllers/v1/crawl-status-ws";
|
||||
|
||||
|
||||
|
||||
const { createBullBoard } = require("@bull-board/api");
|
||||
const { BullAdapter } = require("@bull-board/api/bullAdapter");
|
||||
const { ExpressAdapter } = require("@bull-board/express");
|
||||
|
@ -120,6 +124,7 @@ if (cluster.isMaster) {
|
|||
waitingJobs,
|
||||
});
|
||||
} catch (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.error(error);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
|
@ -171,6 +176,7 @@ if (cluster.isMaster) {
|
|||
}, timeout);
|
||||
}
|
||||
} catch (error) {
|
||||
Sentry.captureException(error);
|
||||
Logger.debug(error);
|
||||
}
|
||||
};
|
||||
|
@ -183,6 +189,8 @@ if (cluster.isMaster) {
|
|||
res.send({ isProduction: global.isProduction });
|
||||
});
|
||||
|
||||
Sentry.setupExpressErrorHandler(app);
|
||||
|
||||
Logger.info(`Worker ${process.pid} started`);
|
||||
}
|
||||
|
||||
|
@ -195,3 +203,5 @@ if (cluster.isMaster) {
|
|||
// sq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed"));
|
||||
// sq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed"));
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ export async function generateCompletions(
|
|||
return completionResult;
|
||||
} catch (error) {
|
||||
Logger.error(`Error generating completions: ${error}`);
|
||||
throw new Error(`Error generating completions: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
default:
|
||||
throw new Error("Invalid client");
|
||||
|
|
|
@ -15,7 +15,7 @@ const defaultPrompt =
|
|||
function prepareOpenAIDoc(
|
||||
document: Document,
|
||||
mode: "markdown" | "raw-html"
|
||||
): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] {
|
||||
): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null {
|
||||
|
||||
let markdown = document.markdown;
|
||||
|
||||
|
@ -27,9 +27,10 @@ function prepareOpenAIDoc(
|
|||
|
||||
// Check if the markdown content exists in the document
|
||||
if (!extractionTarget) {
|
||||
throw new Error(
|
||||
`${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai`
|
||||
);
|
||||
return null;
|
||||
// throw new Error(
|
||||
// `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai`
|
||||
// );
|
||||
}
|
||||
|
||||
|
||||
|
@ -64,7 +65,16 @@ export async function generateOpenAICompletions({
|
|||
mode: "markdown" | "raw-html";
|
||||
}): Promise<Document> {
|
||||
const openai = client as OpenAI;
|
||||
const [content, numTokens] = prepareOpenAIDoc(document, mode);
|
||||
const preparedDoc = prepareOpenAIDoc(document, mode);
|
||||
|
||||
if (preparedDoc === null) {
|
||||
return {
|
||||
...document,
|
||||
warning: "LLM extraction was not performed since the document's content is empty or missing.",
|
||||
};
|
||||
}
|
||||
|
||||
const [content, numTokens] = preparedDoc;
|
||||
|
||||
const completion = await openai.chat.completions.create({
|
||||
model,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
export const defaultOrigin = "api";
|
||||
|
||||
export const defaultTimeout = 45000; // 45 seconds
|
||||
export const defaultTimeout = 60000; // 60 seconds
|
||||
|
||||
export const defaultPageOptions = {
|
||||
onlyMainContent: false,
|
||||
|
@ -12,7 +12,8 @@ export const defaultPageOptions = {
|
|||
};
|
||||
|
||||
export const defaultCrawlerOptions = {
|
||||
allowBackwardCrawling: false
|
||||
allowBackwardCrawling: false,
|
||||
limit: 10000
|
||||
}
|
||||
|
||||
export const defaultCrawlPageOptions = {
|
||||
|
|
|
@ -26,6 +26,9 @@ export type PageOptions = {
|
|||
removeTags?: string | string[];
|
||||
onlyIncludeTags?: string | string[];
|
||||
includeLinks?: boolean;
|
||||
useFastMode?: boolean; // beta
|
||||
disableJSDom?: boolean; // beta
|
||||
atsv?: boolean; // beta
|
||||
};
|
||||
|
||||
export type ExtractorOptions = {
|
||||
|
@ -68,6 +71,7 @@ export type WebScraperOptions = {
|
|||
concurrentRequests?: number;
|
||||
bullJobId?: string;
|
||||
priority?: number;
|
||||
teamId?: string;
|
||||
};
|
||||
|
||||
export interface DocumentUrl {
|
||||
|
@ -144,4 +148,5 @@ export interface FireEngineOptions{
|
|||
blockMedia?: boolean;
|
||||
blockAds?: boolean;
|
||||
disableJsDom?: boolean;
|
||||
atsv?: boolean; // beta
|
||||
}
|
||||
|
|
|
@ -12,7 +12,6 @@ import { Document } from "../lib/entities";
|
|||
import { supabase_service } from "../services/supabase";
|
||||
import { Logger } from "../lib/logger";
|
||||
import { ScrapeEvents } from "../lib/scrape-events";
|
||||
import { getScrapeQueue } from "../services/queue-service";
|
||||
|
||||
export async function startWebScraperPipeline({
|
||||
job,
|
||||
|
@ -95,6 +94,7 @@ export async function runWebScraper({
|
|||
crawlerOptions: crawlerOptions,
|
||||
pageOptions: pageOptions,
|
||||
priority,
|
||||
teamId: team_id
|
||||
});
|
||||
}
|
||||
const docs = (await provider.getDocuments(false, (progress: Progress) => {
|
||||
|
|
|
@ -53,8 +53,8 @@ export class WebCrawler {
|
|||
this.jobId = jobId;
|
||||
this.initialUrl = initialUrl;
|
||||
this.baseUrl = new URL(initialUrl).origin;
|
||||
this.includes = includes ?? [];
|
||||
this.excludes = excludes ?? [];
|
||||
this.includes = Array.isArray(includes) ? includes : [];
|
||||
this.excludes = Array.isArray(excludes) ? excludes : [];
|
||||
this.limit = limit;
|
||||
this.robotsTxtUrl = `${this.baseUrl}/robots.txt`;
|
||||
this.robots = robotsParser(this.robotsTxtUrl, "");
|
||||
|
@ -69,7 +69,13 @@ export class WebCrawler {
|
|||
public filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
|
||||
return sitemapLinks
|
||||
.filter((link) => {
|
||||
const url = new URL(link.trim(), this.baseUrl);
|
||||
let url: URL;
|
||||
try {
|
||||
url = new URL(link.trim(), this.baseUrl);
|
||||
} catch (error) {
|
||||
Logger.debug(`Error processing link: ${link} | Error: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
const path = url.pathname;
|
||||
|
||||
const depth = getURLDepth(url.toString());
|
||||
|
@ -102,7 +108,12 @@ export class WebCrawler {
|
|||
|
||||
// Normalize the initial URL and the link to account for www and non-www versions
|
||||
const normalizedInitialUrl = new URL(this.initialUrl);
|
||||
const normalizedLink = new URL(link);
|
||||
let normalizedLink;
|
||||
try {
|
||||
normalizedLink = new URL(link);
|
||||
} catch (_) {
|
||||
return false;
|
||||
}
|
||||
const initialHostname = normalizedInitialUrl.hostname.replace(/^www\./, '');
|
||||
const linkHostname = normalizedLink.hostname.replace(/^www\./, '');
|
||||
|
||||
|
@ -261,9 +272,18 @@ export class WebCrawler {
|
|||
public filterURL(href: string, url: string): string | null {
|
||||
let fullUrl = href;
|
||||
if (!href.startsWith("http")) {
|
||||
try {
|
||||
fullUrl = new URL(href, this.baseUrl).toString();
|
||||
} catch (_) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
let urlObj;
|
||||
try {
|
||||
urlObj = new URL(fullUrl);
|
||||
} catch (_) {
|
||||
return null;
|
||||
}
|
||||
const urlObj = new URL(fullUrl);
|
||||
const path = urlObj.pathname;
|
||||
|
||||
if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS
|
||||
|
|
|
@ -16,7 +16,6 @@ import {
|
|||
replacePathsWithAbsolutePaths,
|
||||
} from "./utils/replacePaths";
|
||||
import { generateCompletions } from "../../lib/LLM-extraction";
|
||||
import { getScrapeQueue } from "../../../src/services/queue-service";
|
||||
import { fetchAndProcessDocx } from "./utils/docxProcessor";
|
||||
import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils";
|
||||
import { Logger } from "../../lib/logger";
|
||||
|
@ -45,6 +44,7 @@ export class WebScraperDataProvider {
|
|||
private allowBackwardCrawling: boolean = false;
|
||||
private allowExternalContentLinks: boolean = false;
|
||||
private priority?: number;
|
||||
private teamId?: string;
|
||||
|
||||
authorize(): void {
|
||||
throw new Error("Method not implemented.");
|
||||
|
@ -75,6 +75,7 @@ export class WebScraperDataProvider {
|
|||
this.extractorOptions,
|
||||
existingHTML,
|
||||
this.priority,
|
||||
this.teamId,
|
||||
);
|
||||
processedUrls++;
|
||||
if (inProgress) {
|
||||
|
@ -613,6 +614,7 @@ export class WebScraperDataProvider {
|
|||
this.allowExternalContentLinks =
|
||||
options.crawlerOptions?.allowExternalContentLinks ?? false;
|
||||
this.priority = options.priority;
|
||||
this.teamId = options.teamId ?? null;
|
||||
|
||||
// make sure all urls start with https://
|
||||
this.urls = this.urls.map((url) => {
|
||||
|
|
|
@ -5,6 +5,7 @@ import { generateRequestParams } from "../single_url";
|
|||
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
|
||||
import { universalTimeout } from "../global";
|
||||
import { Logger } from "../../../lib/logger";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
/**
|
||||
* Scrapes a URL with Fire-Engine
|
||||
|
@ -22,21 +23,23 @@ export async function scrapWithFireEngine({
|
|||
waitFor = 0,
|
||||
screenshot = false,
|
||||
fullPageScreenshot = false,
|
||||
pageOptions = { parsePDF: true },
|
||||
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
|
||||
fireEngineOptions = {},
|
||||
headers,
|
||||
options,
|
||||
priority,
|
||||
teamId,
|
||||
}: {
|
||||
url: string;
|
||||
waitFor?: number;
|
||||
screenshot?: boolean;
|
||||
fullPageScreenshot?: boolean;
|
||||
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean };
|
||||
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
|
||||
fireEngineOptions?: FireEngineOptions;
|
||||
headers?: Record<string, string>;
|
||||
options?: any;
|
||||
priority?: number;
|
||||
teamId?: string;
|
||||
}): Promise<FireEngineResponse> {
|
||||
const logParams = {
|
||||
url,
|
||||
|
@ -51,11 +54,11 @@ export async function scrapWithFireEngine({
|
|||
|
||||
try {
|
||||
const reqParams = await generateRequestParams(url);
|
||||
const waitParam = reqParams["params"]?.wait ?? waitFor;
|
||||
const engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright";
|
||||
const screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
|
||||
const fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
|
||||
const fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
|
||||
let waitParam = reqParams["params"]?.wait ?? waitFor;
|
||||
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright";
|
||||
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
|
||||
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
|
||||
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
|
||||
|
||||
|
||||
let endpoint = "/scrape";
|
||||
|
@ -70,8 +73,30 @@ export async function scrapWithFireEngine({
|
|||
`⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, fullPageScreenshot: ${fullPageScreenshot}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
|
||||
);
|
||||
|
||||
if (pageOptions?.useFastMode) {
|
||||
fireEngineOptionsParam.engine = "tlsclient";
|
||||
engine = "tlsclient";
|
||||
}
|
||||
|
||||
const response = await axios.post(
|
||||
// atsv is only available for beta customers
|
||||
const betaCustomersString = process.env.BETA_CUSTOMERS;
|
||||
const betaCustomers = betaCustomersString ? betaCustomersString.split(",") : [];
|
||||
|
||||
if (pageOptions?.atsv && betaCustomers.includes(teamId)) {
|
||||
fireEngineOptionsParam.atsv = true;
|
||||
} else {
|
||||
pageOptions.atsv = false;
|
||||
}
|
||||
|
||||
const axiosInstance = axios.create({
|
||||
headers: { "Content-Type": "application/json" }
|
||||
});
|
||||
|
||||
const startTime = Date.now();
|
||||
const _response = await Sentry.startSpan({
|
||||
name: "Call to fire-engine"
|
||||
}, async span => {
|
||||
return await axiosInstance.post(
|
||||
process.env.FIRE_ENGINE_BETA_URL + endpoint,
|
||||
{
|
||||
url: url,
|
||||
|
@ -80,38 +105,69 @@ export async function scrapWithFireEngine({
|
|||
fullPageScreenshot: fullPageScreenshotParam,
|
||||
headers: headers,
|
||||
pageOptions: pageOptions,
|
||||
disableJsDom: pageOptions?.disableJsDom ?? false,
|
||||
priority,
|
||||
engine,
|
||||
instantReturn: true,
|
||||
...fireEngineOptionsParam,
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
timeout: universalTimeout + waitParam,
|
||||
...(Sentry.isInitialized() ? ({
|
||||
"sentry-trace": Sentry.spanToTraceHeader(span),
|
||||
"baggage": Sentry.spanToBaggageHeader(span),
|
||||
}) : {}),
|
||||
}
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
if (response.status !== 200) {
|
||||
let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`);
|
||||
while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitParam) {
|
||||
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
|
||||
checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`);
|
||||
}
|
||||
|
||||
if (checkStatusResponse.data.processing) {
|
||||
Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`);
|
||||
axiosInstance.delete(
|
||||
process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, {
|
||||
validateStatus: (status) => true
|
||||
}
|
||||
).catch((error) => {
|
||||
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${_response.data.jobId} | error: ${error}`);
|
||||
});
|
||||
|
||||
Logger.debug(`⛏️ Fire-Engine (${engine}): Request timed out for ${url}`);
|
||||
logParams.error_message = "Request timed out";
|
||||
return { html: "", screenshot: "", pageStatusCode: null, pageError: "" };
|
||||
}
|
||||
|
||||
if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) {
|
||||
Logger.debug(
|
||||
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`
|
||||
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}`
|
||||
);
|
||||
|
||||
logParams.error_message = response.data?.pageError;
|
||||
logParams.response_code = response.data?.pageStatusCode;
|
||||
logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error;
|
||||
logParams.response_code = checkStatusResponse.data?.pageStatusCode;
|
||||
|
||||
if(response.data && response.data?.pageStatusCode !== 200) {
|
||||
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`);
|
||||
if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) {
|
||||
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.data?.pageStatusCode}`);
|
||||
}
|
||||
|
||||
const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined;
|
||||
|
||||
return {
|
||||
html: "",
|
||||
screenshot: "",
|
||||
pageStatusCode: response.data?.pageStatusCode,
|
||||
pageError: response.data?.pageError,
|
||||
pageStatusCode,
|
||||
pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error,
|
||||
};
|
||||
}
|
||||
|
||||
const contentType = response.headers["content-type"];
|
||||
const contentType = checkStatusResponse.data.responseHeaders?.["content-type"];
|
||||
|
||||
if (contentType && contentType.includes("application/pdf")) {
|
||||
const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
|
||||
url,
|
||||
|
@ -122,18 +178,19 @@ export async function scrapWithFireEngine({
|
|||
logParams.error_message = pageError;
|
||||
return { html: content, screenshot: "", pageStatusCode, pageError };
|
||||
} else {
|
||||
const data = response.data;
|
||||
const data = checkStatusResponse.data;
|
||||
|
||||
logParams.success =
|
||||
(data.pageStatusCode >= 200 && data.pageStatusCode < 300) ||
|
||||
data.pageStatusCode === 404;
|
||||
logParams.html = data.content ?? "";
|
||||
logParams.response_code = data.pageStatusCode;
|
||||
logParams.error_message = data.pageError;
|
||||
logParams.error_message = data.pageError ?? data.error;
|
||||
return {
|
||||
html: data.content ?? "",
|
||||
screenshot: data.screenshot ?? "",
|
||||
pageStatusCode: data.pageStatusCode,
|
||||
pageError: data.pageError,
|
||||
pageError: data.pageError ?? data.error,
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
|
|
|
@ -43,6 +43,9 @@ export async function scrapWithScrapingBee(
|
|||
transparent_status_code: "True",
|
||||
},
|
||||
});
|
||||
Logger.info(
|
||||
`⛏️ ScrapingBee: Scraping ${url}`
|
||||
);
|
||||
const contentType = response.headers["content-type"];
|
||||
if (contentType && contentType.includes("application/pdf")) {
|
||||
logParams.success = true;
|
||||
|
|
|
@ -126,6 +126,7 @@ export async function scrapSingleUrl(
|
|||
extractorOptions?: ExtractorOptions,
|
||||
existingHtml?: string,
|
||||
priority?: number,
|
||||
teamId?: string
|
||||
): Promise<Document> {
|
||||
pageOptions = {
|
||||
includeMarkdown: pageOptions.includeMarkdown ?? true,
|
||||
|
@ -179,7 +180,7 @@ export async function scrapSingleUrl(
|
|||
case "fire-engine;chrome-cdp":
|
||||
|
||||
let engine: "playwright" | "chrome-cdp" | "tlsclient" = "playwright";
|
||||
if(method === "fire-engine;chrome-cdp"){
|
||||
if (method === "fire-engine;chrome-cdp") {
|
||||
engine = "chrome-cdp";
|
||||
}
|
||||
|
||||
|
@ -193,8 +194,10 @@ export async function scrapSingleUrl(
|
|||
headers: pageOptions.headers,
|
||||
fireEngineOptions: {
|
||||
engine: engine,
|
||||
atsv: pageOptions.atsv,
|
||||
},
|
||||
priority,
|
||||
teamId,
|
||||
});
|
||||
scraperResponse.text = response.html;
|
||||
scraperResponse.screenshot = response.screenshot;
|
||||
|
|
|
@ -15,6 +15,8 @@ const socialMediaBlocklist = [
|
|||
'whatsapp.com',
|
||||
'wechat.com',
|
||||
'telegram.org',
|
||||
'researchhub.com',
|
||||
'youtube.com'
|
||||
];
|
||||
|
||||
const allowedKeywords = [
|
||||
|
|
|
@ -49,7 +49,7 @@ export async function checkAlerts() {
|
|||
};
|
||||
|
||||
const checkAll = async () => {
|
||||
// await checkActiveJobs();
|
||||
await checkActiveJobs();
|
||||
await checkWaitingQueue();
|
||||
};
|
||||
|
||||
|
|
|
@ -2,16 +2,47 @@ import { Job, Queue } from "bullmq";
|
|||
import { getScrapeQueue } from "./queue-service";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { WebScraperOptions } from "../types";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
async function addScrapeJobRaw(
|
||||
webScraperOptions: any,
|
||||
options: any,
|
||||
jobId: string,
|
||||
): Promise<Job> {
|
||||
return await getScrapeQueue().add(jobId, webScraperOptions, {
|
||||
...options,
|
||||
priority: webScraperOptions.crawl_id ? 20 : 10,
|
||||
jobId,
|
||||
});
|
||||
}
|
||||
|
||||
export async function addScrapeJob(
|
||||
webScraperOptions: WebScraperOptions,
|
||||
options: any = {},
|
||||
jobId: string = uuidv4(),
|
||||
): Promise<Job> {
|
||||
return await getScrapeQueue().add(jobId, webScraperOptions, {
|
||||
priority: webScraperOptions.crawl_id ? 20 : 10,
|
||||
...options,
|
||||
jobId,
|
||||
if (Sentry.isInitialized()) {
|
||||
const size = JSON.stringify(webScraperOptions).length;
|
||||
return await Sentry.startSpan({
|
||||
name: "Add scrape job",
|
||||
op: "queue.publish",
|
||||
attributes: {
|
||||
"messaging.message.id": jobId,
|
||||
"messaging.destination.name": getScrapeQueue().name,
|
||||
"messaging.message.body.size": size,
|
||||
},
|
||||
}, async (span) => {
|
||||
return await addScrapeJobRaw({
|
||||
...webScraperOptions,
|
||||
sentry: {
|
||||
trace: Sentry.spanToTraceHeader(span),
|
||||
baggage: Sentry.spanToBaggageHeader(span),
|
||||
size,
|
||||
},
|
||||
}, options, jobId);
|
||||
});
|
||||
} else {
|
||||
return await addScrapeJobRaw(webScraperOptions, options, jobId);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,6 +35,6 @@ export function getScrapeQueue() {
|
|||
}
|
||||
|
||||
|
||||
import { QueueEvents } from 'bullmq';
|
||||
|
||||
export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection });
|
||||
// === REMOVED IN FAVOR OF POLLING -- NOT RELIABLE
|
||||
// import { QueueEvents } from 'bullmq';
|
||||
// export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection.duplicate() });
|
|
@ -1,4 +1,6 @@
|
|||
import "dotenv/config";
|
||||
import "./sentry"
|
||||
import * as Sentry from "@sentry/node";
|
||||
import { CustomError } from "../lib/custom-error";
|
||||
import {
|
||||
getScrapeQueue,
|
||||
|
@ -48,6 +50,7 @@ const processJobInternal = async (token: string, job: Job) => {
|
|||
await job.extendLock(token, jobLockExtensionTime);
|
||||
}, jobLockExtendInterval);
|
||||
|
||||
let err = null;
|
||||
try {
|
||||
const result = await processJob(job, token);
|
||||
try{
|
||||
|
@ -60,11 +63,14 @@ const processJobInternal = async (token: string, job: Job) => {
|
|||
}
|
||||
} catch (error) {
|
||||
console.log("Job failed, error:", error);
|
||||
|
||||
Sentry.captureException(error);
|
||||
err = error;
|
||||
await job.moveToFailed(error, token, false);
|
||||
} finally {
|
||||
clearInterval(extendLockInterval);
|
||||
}
|
||||
|
||||
return err;
|
||||
};
|
||||
|
||||
let isShuttingDown = false;
|
||||
|
@ -74,7 +80,7 @@ process.on("SIGINT", () => {
|
|||
isShuttingDown = true;
|
||||
});
|
||||
|
||||
const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise<void>) => {
|
||||
const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise<any>) => {
|
||||
const worker = new Worker(queueName, null, {
|
||||
connection: redisConnection,
|
||||
lockDuration: 1 * 60 * 1000, // 1 minute
|
||||
|
@ -102,7 +108,47 @@ const workerFun = async (queueName: string, processJobInternal: (token: string,
|
|||
|
||||
const job = await worker.getNextJob(token);
|
||||
if (job) {
|
||||
if (job.data && job.data.sentry && Sentry.isInitialized()) {
|
||||
Sentry.continueTrace({ sentryTrace: job.data.sentry.trace, baggage: job.data.sentry.baggage }, () => {
|
||||
Sentry.startSpan({
|
||||
name: "Scrape job",
|
||||
attributes: {
|
||||
job: job.id,
|
||||
worker: process.env.FLY_MACHINE_ID ?? worker.id,
|
||||
},
|
||||
}, async (span) => {
|
||||
await Sentry.startSpan({
|
||||
name: "Process scrape job",
|
||||
op: "queue.process",
|
||||
attributes: {
|
||||
"messaging.message.id": job.id,
|
||||
"messaging.destination.name": getScrapeQueue().name,
|
||||
"messaging.message.body.size": job.data.sentry.size,
|
||||
"messaging.message.receive.latency": Date.now() - (job.processedOn ?? job.timestamp),
|
||||
"messaging.message.retry.count": job.attemptsMade,
|
||||
}
|
||||
}, async () => {
|
||||
const res = await processJobInternal(token, job);
|
||||
if (res !== null) {
|
||||
span.setStatus({ code: 2 }); // ERROR
|
||||
} else {
|
||||
span.setStatus({ code: 1 }); // OK
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
} else {
|
||||
Sentry.startSpan({
|
||||
name: "Scrape job",
|
||||
attributes: {
|
||||
job: job.id,
|
||||
worker: process.env.FLY_MACHINE_ID ?? worker.id,
|
||||
},
|
||||
}, () => {
|
||||
processJobInternal(token, job);
|
||||
});
|
||||
}
|
||||
|
||||
await sleep(gotJobInterval);
|
||||
} else {
|
||||
await sleep(connectionMonitorInterval);
|
||||
|
@ -115,6 +161,20 @@ workerFun(scrapeQueueName, processJobInternal);
|
|||
async function processJob(job: Job, token: string) {
|
||||
Logger.info(`🐂 Worker taking job ${job.id}`);
|
||||
|
||||
// Check if the job URL is researchhub and block it immediately
|
||||
// TODO: remove this once solve the root issue
|
||||
if (job.data.url && (job.data.url.includes("researchhub.com") || job.data.url.includes("ebay.com") || job.data.url.includes("youtube.com") || job.data.url.includes("microsoft.com") )) {
|
||||
Logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`);
|
||||
const data = {
|
||||
success: false,
|
||||
docs: [],
|
||||
project_id: job.data.project_id,
|
||||
error: "URL is blocked. Suspecious activity detected. Please contact hello@firecrawl.com if you believe this is an error.",
|
||||
};
|
||||
await job.moveToCompleted(data.docs, token, false);
|
||||
return data;
|
||||
}
|
||||
|
||||
try {
|
||||
job.updateProgress({
|
||||
current: 1,
|
||||
|
@ -123,6 +183,7 @@ async function processJob(job: Job, token: string) {
|
|||
current_url: "",
|
||||
});
|
||||
const start = Date.now();
|
||||
|
||||
const { success, message, docs } = await startWebScraperPipeline({
|
||||
job,
|
||||
token,
|
||||
|
@ -276,6 +337,12 @@ async function processJob(job: Job, token: string) {
|
|||
} catch (error) {
|
||||
Logger.error(`🐂 Job errored ${job.id} - ${error}`);
|
||||
|
||||
Sentry.captureException(error, {
|
||||
data: {
|
||||
job: job.id
|
||||
},
|
||||
})
|
||||
|
||||
if (error instanceof CustomError) {
|
||||
// Here we handle the error, then save the failed job
|
||||
Logger.error(error.message); // or any other error handling
|
||||
|
|
|
@ -103,7 +103,7 @@ export function getRateLimiter(
|
|||
plan?: string
|
||||
) {
|
||||
|
||||
if (token.includes("a01ccae") || token.includes("6254cf9")) {
|
||||
if (token.includes("a01ccae") || token.includes("6254cf9") || token.includes("0f96e673")) {
|
||||
return testSuiteRateLimiter;
|
||||
}
|
||||
|
||||
|
|
18
apps/api/src/services/sentry.ts
Normal file
18
apps/api/src/services/sentry.ts
Normal file
|
@ -0,0 +1,18 @@
|
|||
// Import with `import * as Sentry from "@sentry/node"` if you are using ESM
|
||||
import * as Sentry from "@sentry/node";
|
||||
import { nodeProfilingIntegration } from "@sentry/profiling-node";
|
||||
import { Logger } from "../lib/logger";
|
||||
|
||||
if (process.env.SENTRY_DSN) {
|
||||
Logger.info("Setting up Sentry...");
|
||||
Sentry.init({
|
||||
dsn: process.env.SENTRY_DSN,
|
||||
integrations: [
|
||||
nodeProfilingIntegration(),
|
||||
],
|
||||
tracesSampleRate: process.env.SENTRY_ENVIRONMENT === "dev" ? 1.0 : 0.045,
|
||||
profilesSampleRate: 1.0,
|
||||
serverName: process.env.FLY_MACHINE_ID,
|
||||
environment: process.env.SENTRY_ENVIRONMENT ?? "production",
|
||||
});
|
||||
}
|
|
@ -2,12 +2,22 @@
|
|||
"compilerOptions": {
|
||||
"rootDir": "./src",
|
||||
"lib": ["es6","DOM"],
|
||||
"target": "ES2020", // or higher
|
||||
|
||||
// or higher
|
||||
"target": "ES2020",
|
||||
|
||||
"module": "commonjs",
|
||||
"esModuleInterop": true,
|
||||
"sourceMap": true,
|
||||
"outDir": "./dist/src",
|
||||
"moduleResolution": "node",
|
||||
"baseUrl": ".",
|
||||
|
||||
"paths": {
|
||||
"*": ["node_modules/*", "src/types/*"],
|
||||
},
|
||||
|
||||
"inlineSources": true
|
||||
},
|
||||
"include": ["src/","src/**/*", "services/db/supabase.ts", "utils/utils.ts", "services/db/supabaseEmbeddings.ts", "utils/EventEmmitter.ts", "src/services/queue-service.ts"]
|
||||
}
|
||||
|
|
2
apps/redis/.dockerignore
Normal file
2
apps/redis/.dockerignore
Normal file
|
@ -0,0 +1,2 @@
|
|||
.git
|
||||
fly.toml
|
6
apps/redis/Dockerfile
Normal file
6
apps/redis/Dockerfile
Normal file
|
@ -0,0 +1,6 @@
|
|||
ARG REDIS_VERSION=7.2.5
|
||||
FROM bitnami/redis:${REDIS_VERSION}
|
||||
|
||||
COPY start-redis-server.sh /usr/bin/start-redis-server.sh
|
||||
|
||||
CMD ["/usr/bin/start-redis-server.sh"]
|
2
apps/redis/Procfile
Normal file
2
apps/redis/Procfile
Normal file
|
@ -0,0 +1,2 @@
|
|||
redis: /usr/bin/start-redis-server.sh
|
||||
metrics: /usr/local/bin/redis_exporter -redis.addr localhost:6379 -web.listen-address ":9091"
|
48
apps/redis/README.md
Normal file
48
apps/redis/README.md
Normal file
|
@ -0,0 +1,48 @@
|
|||
The official repository for Running Redis on Fly.io. Find the accompanying Docker image at [flyio/redis](https://hub.docker.com/repository/docker/flyio/redis).
|
||||
|
||||
## Usage
|
||||
|
||||
This installation requires setting a password on Redis. To do that, run `fly secrets set REDIS_PASSWORD=mypassword` before deploying. Keep
|
||||
track of this password - it won't be visible again after deployment!
|
||||
|
||||
If you need no customizations, you can deploy using the official Docker image. See `fly.toml` in this repository for an example to get started with.
|
||||
## Runtime requirements
|
||||
|
||||
By default, this Redis installation will only accept connections on the private IPv6 network, on the standard port 6379.
|
||||
|
||||
If you want to access it from the public internet, add a `[[services]]` section to your `fly.toml`. An example is included in this repo for accessing Redis on port 10000.
|
||||
|
||||
|
||||
We recommend adding persistent storage for Redis data. If you skip this step, data will be lost across deploys or restarts. For Fly apps, the volume needs to be in the same region as the app instances. For example:
|
||||
|
||||
```cmd
|
||||
flyctl volumes create redis_server --region ord
|
||||
```
|
||||
```out
|
||||
Name: redis_server
|
||||
Region: ord
|
||||
Size GB: 10
|
||||
Created at: 02 Nov 20 19:55 UTC
|
||||
```
|
||||
|
||||
To connect this volume to the app, `fly.toml` includes a `[mounts]` entry.
|
||||
|
||||
```
|
||||
[mounts]
|
||||
source = "redis_server"
|
||||
destination = "/data"
|
||||
```
|
||||
|
||||
When the app starts, that volume will be mounted on /data.
|
||||
|
||||
## Cutting a release
|
||||
|
||||
If you have write access to this repo, you can ship a prerelease or full release with:
|
||||
|
||||
```
|
||||
scripts/bump_version.sh
|
||||
```
|
||||
or
|
||||
```
|
||||
scripts/bump_version.sh prerel
|
||||
```
|
22
apps/redis/fly.toml
Normal file
22
apps/redis/fly.toml
Normal file
|
@ -0,0 +1,22 @@
|
|||
app = 'firecrawl-dragonfly'
|
||||
primary_region = 'iad'
|
||||
|
||||
[[mounts]]
|
||||
source = 'firecrawl_redis'
|
||||
destination = '/data'
|
||||
|
||||
[[services]]
|
||||
protocol = 'tcp'
|
||||
internal_port = 6379
|
||||
|
||||
[[services.tcp_checks]]
|
||||
interval = '10s'
|
||||
timeout = '2s'
|
||||
|
||||
[[vm]]
|
||||
size = 'performance-4x'
|
||||
memory = '32gb'
|
||||
|
||||
[[metrics]]
|
||||
port = 9091
|
||||
path = '/metrics'
|
91
apps/redis/scripts/bump_version.sh
Executable file
91
apps/redis/scripts/bump_version.sh
Executable file
|
@ -0,0 +1,91 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ORIGIN=${ORIGIN:-origin}
|
||||
|
||||
bump=${1:-patch}
|
||||
|
||||
prerel=${2:-none}
|
||||
|
||||
if [[ $bump == "prerel" ]]; then
|
||||
bump="patch"
|
||||
prerel="prerel"
|
||||
fi
|
||||
|
||||
if [[ $(git status --porcelain) != "" ]]; then
|
||||
echo "Error: repo is dirty. Run git status, clean repo and try again."
|
||||
exit 1
|
||||
elif [[ $(git status --porcelain -b | grep -e "ahead" -e "behind") != "" ]]; then
|
||||
echo "Error: repo has unpushed commits. Push commits to remote and try again."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BRANCH="$(git rev-parse --abbrev-ref HEAD)"
|
||||
if [[ "$prerel" == "prerel" && "$BRANCH" != "prerelease" ]]; then
|
||||
# echo "❌ Sorry, you can only cut a pre-release from the 'prelease' branch"
|
||||
# echo "Run 'git checkout prerelease && git pull origin prerelease' and try again."
|
||||
# exit 1
|
||||
echo "⚠️ Pre-releases should be cut from the 'prerelease' branch"
|
||||
echo "Please make sure you're not overwriting someone else's prerelease!"
|
||||
echo
|
||||
read -p "Release anyway? " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[^Yy]$ ]]; then
|
||||
echo Aborting.
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$prerel" != "prerel" && "$BRANCH" != "main" ]]; then
|
||||
echo "❌ Sorry, you can only cut a release from the 'main' branch"
|
||||
echo "Run 'git checkout main && git pull origin main' and try again."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
git fetch
|
||||
if [[ "$(git rev-parse HEAD 2>&1)" != "$(git rev-parse '@{u}' 2>&1)" ]]; then
|
||||
echo "There are upstream commits that won't be included in this release."
|
||||
echo "You probably want to exit, run 'git pull', then release."
|
||||
echo
|
||||
read -p "Release anyway? " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[^Yy]$ ]]; then
|
||||
echo Aborting.
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
previous_version="$("$dir"/../scripts/version.sh -s)"
|
||||
|
||||
if [[ $prerel == "prerel" ]]; then
|
||||
prerelversion=$("$dir"/../scripts/semver get prerel "$previous_version")
|
||||
if [[ $prerelversion == "" ]]; then
|
||||
new_version=$("$dir"/../scripts/semver bump "$bump" "$previous_version")
|
||||
new_version=$("$dir"/../scripts/semver bump prerel pre-1 "$new_version")
|
||||
else
|
||||
prerel=pre-$((${prerelversion#pre-} + 1))
|
||||
new_version=$("$dir"/../scripts/semver bump prerel "$prerel" "$previous_version")
|
||||
fi
|
||||
else
|
||||
prerelversion=$("$dir"/../scripts/semver get prerel "$previous_version")
|
||||
if [[ $prerelversion == "" ]]; then
|
||||
new_version=$("$dir"/../scripts/semver bump "$bump" "$previous_version")
|
||||
else
|
||||
new_version=${previous_version//-$prerelversion/}
|
||||
fi
|
||||
fi
|
||||
|
||||
new_version="v$new_version"
|
||||
|
||||
echo "Bumping version from v${previous_version} to ${new_version}"
|
||||
|
||||
read -p "Are you sure? " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]
|
||||
then
|
||||
git tag -m "release ${new_version}" -a "$new_version" && git push "${ORIGIN}" tag "$new_version"
|
||||
echo "done"
|
||||
fi
|
200
apps/redis/scripts/semver
Executable file
200
apps/redis/scripts/semver
Executable file
|
@ -0,0 +1,200 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -o errexit -o nounset -o pipefail
|
||||
|
||||
SEMVER_REGEX="^[vV]?(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)(\\-[0-9A-Za-z-]+(\\.[0-9A-Za-z-]+)*)?(\\+[0-9A-Za-z-]+(\\.[0-9A-Za-z-]+)*)?$"
|
||||
|
||||
PROG=semver
|
||||
PROG_VERSION=2.1.0
|
||||
|
||||
USAGE="\
|
||||
Usage:
|
||||
$PROG bump (major|minor|patch|release|prerel <prerel>|build <build>) <version>
|
||||
$PROG compare <version> <other_version>
|
||||
$PROG get (major|minor|patch|release|prerel|build) <version>
|
||||
$PROG --help
|
||||
$PROG --version
|
||||
|
||||
Arguments:
|
||||
<version> A version must match the following regex pattern:
|
||||
\"${SEMVER_REGEX}\".
|
||||
In english, the version must match X.Y.Z(-PRERELEASE)(+BUILD)
|
||||
where X, Y and Z are positive integers, PRERELEASE is an optional
|
||||
string composed of alphanumeric characters and hyphens and
|
||||
BUILD is also an optional string composed of alphanumeric
|
||||
characters and hyphens.
|
||||
|
||||
<other_version> See <version> definition.
|
||||
|
||||
<prerel> String that must be composed of alphanumeric characters and hyphens.
|
||||
|
||||
<build> String that must be composed of alphanumeric characters and hyphens.
|
||||
|
||||
Options:
|
||||
-v, --version Print the version of this tool.
|
||||
-h, --help Print this help message.
|
||||
|
||||
Commands:
|
||||
bump Bump <version> by one of major, minor, patch, prerel, build
|
||||
or a forced potentially conflicting version. The bumped version is
|
||||
shown to stdout.
|
||||
|
||||
compare Compare <version> with <other_version>, output to stdout the
|
||||
following values: -1 if <other_version> is newer, 0 if equal, 1 if
|
||||
older.
|
||||
|
||||
get Extract given part of <version>, where part is one of major, minor,
|
||||
patch, prerel, build."
|
||||
|
||||
function error {
|
||||
echo -e "$1" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
function usage-help {
|
||||
error "$USAGE"
|
||||
}
|
||||
|
||||
function usage-version {
|
||||
echo -e "${PROG}: $PROG_VERSION"
|
||||
exit 0
|
||||
}
|
||||
|
||||
function validate-version {
|
||||
local version=$1
|
||||
if [[ "$version" =~ $SEMVER_REGEX ]]; then
|
||||
# if a second argument is passed, store the result in var named by $2
|
||||
if [ "$#" -eq "2" ]; then
|
||||
local major=${BASH_REMATCH[1]}
|
||||
local minor=${BASH_REMATCH[2]}
|
||||
local patch=${BASH_REMATCH[3]}
|
||||
local prere=${BASH_REMATCH[4]}
|
||||
local build=${BASH_REMATCH[6]}
|
||||
eval "$2=(\"$major\" \"$minor\" \"$patch\" \"$prere\" \"$build\")"
|
||||
else
|
||||
echo "$version"
|
||||
fi
|
||||
else
|
||||
error "version $version does not match the semver scheme 'X.Y.Z(-PRERELEASE)(+BUILD)'. See help for more information."
|
||||
fi
|
||||
}
|
||||
|
||||
function compare-version {
|
||||
validate-version "$1" V
|
||||
validate-version "$2" V_
|
||||
|
||||
# MAJOR, MINOR and PATCH should compare numerically
|
||||
for i in 0 1 2; do
|
||||
local diff=$((${V[$i]} - ${V_[$i]}))
|
||||
if [[ $diff -lt 0 ]]; then
|
||||
echo -1; return 0
|
||||
elif [[ $diff -gt 0 ]]; then
|
||||
echo 1; return 0
|
||||
fi
|
||||
done
|
||||
|
||||
# PREREL should compare with the ASCII order.
|
||||
if [[ -z "${V[3]}" ]] && [[ -n "${V_[3]}" ]]; then
|
||||
echo 1; return 0;
|
||||
elif [[ -n "${V[3]}" ]] && [[ -z "${V_[3]}" ]]; then
|
||||
echo -1; return 0;
|
||||
elif [[ -n "${V[3]}" ]] && [[ -n "${V_[3]}" ]]; then
|
||||
if [[ "${V[3]}" > "${V_[3]}" ]]; then
|
||||
echo 1; return 0;
|
||||
elif [[ "${V[3]}" < "${V_[3]}" ]]; then
|
||||
echo -1; return 0;
|
||||
fi
|
||||
fi
|
||||
|
||||
echo 0
|
||||
}
|
||||
|
||||
function command-bump {
|
||||
local new; local version; local sub_version; local command;
|
||||
|
||||
case $# in
|
||||
2) case $1 in
|
||||
major|minor|patch|release) command=$1; version=$2;;
|
||||
*) usage-help;;
|
||||
esac ;;
|
||||
3) case $1 in
|
||||
prerel|build) command=$1; sub_version=$2 version=$3 ;;
|
||||
*) usage-help;;
|
||||
esac ;;
|
||||
*) usage-help;;
|
||||
esac
|
||||
|
||||
validate-version "$version" parts
|
||||
# shellcheck disable=SC2154
|
||||
local major="${parts[0]}"
|
||||
local minor="${parts[1]}"
|
||||
local patch="${parts[2]}"
|
||||
local prere="${parts[3]}"
|
||||
local build="${parts[4]}"
|
||||
|
||||
case "$command" in
|
||||
major) new="$((major + 1)).0.0";;
|
||||
minor) new="${major}.$((minor + 1)).0";;
|
||||
patch) new="${major}.${minor}.$((patch + 1))";;
|
||||
release) new="${major}.${minor}.${patch}";;
|
||||
prerel) new=$(validate-version "${major}.${minor}.${patch}-${sub_version}");;
|
||||
build) new=$(validate-version "${major}.${minor}.${patch}${prere}+${sub_version}");;
|
||||
*) usage-help ;;
|
||||
esac
|
||||
|
||||
echo "$new"
|
||||
exit 0
|
||||
}
|
||||
|
||||
function command-compare {
|
||||
local v; local v_;
|
||||
|
||||
case $# in
|
||||
2) v=$(validate-version "$1"); v_=$(validate-version "$2") ;;
|
||||
*) usage-help ;;
|
||||
esac
|
||||
|
||||
compare-version "$v" "$v_"
|
||||
exit 0
|
||||
}
|
||||
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
function command-get {
|
||||
local part version
|
||||
|
||||
if [[ "$#" -ne "2" ]] || [[ -z "$1" ]] || [[ -z "$2" ]]; then
|
||||
usage-help
|
||||
exit 0
|
||||
fi
|
||||
|
||||
part="$1"
|
||||
version="$2"
|
||||
|
||||
validate-version "$version" parts
|
||||
local major="${parts[0]}"
|
||||
local minor="${parts[1]}"
|
||||
local patch="${parts[2]}"
|
||||
local prerel="${parts[3]:1}"
|
||||
local build="${parts[4]:1}"
|
||||
|
||||
case "$part" in
|
||||
major|minor|patch|release|prerel|build) echo "${!part}" ;;
|
||||
*) usage-help ;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
case $# in
|
||||
0) echo "Unknown command: $*"; usage-help;;
|
||||
esac
|
||||
|
||||
case $1 in
|
||||
--help|-h) echo -e "$USAGE"; exit 0;;
|
||||
--version|-v) usage-version ;;
|
||||
bump) shift; command-bump "$@";;
|
||||
get) shift; command-get "$@";;
|
||||
compare) shift; command-compare "$@";;
|
||||
*) echo "Unknown arguments: $*"; usage-help;;
|
||||
esac
|
5
apps/redis/scripts/version.sh
Executable file
5
apps/redis/scripts/version.sh
Executable file
|
@ -0,0 +1,5 @@
|
|||
ORIGIN=${ORIGIN:-origin}
|
||||
|
||||
version=$(git fetch --tags "${ORIGIN}" &>/dev/null | git -c "versionsort.prereleasesuffix=-pre" tag -l --sort=version:refname | grep -v dev | grep -vE '^v2$' | grep -vE '^v1$' | tail -n1 | cut -c 2-)
|
||||
|
||||
echo "$version"
|
30
apps/redis/start-redis-server.sh
Executable file
30
apps/redis/start-redis-server.sh
Executable file
|
@ -0,0 +1,30 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
sysctl vm.overcommit_memory=1 || true
|
||||
sysctl net.core.somaxconn=1024 || true
|
||||
|
||||
PW_ARG=""
|
||||
if [[ ! -z "${REDIS_PASSWORD}" ]]; then
|
||||
PW_ARG="--requirepass $REDIS_PASSWORD"
|
||||
fi
|
||||
|
||||
# Set maxmemory-policy to 'allkeys-lru' for caching servers that should always evict old keys
|
||||
: ${MAXMEMORY_POLICY:="volatile-lru"}
|
||||
: ${APPENDONLY:="no"}
|
||||
: ${FLY_VM_MEMORY_MB:=512}
|
||||
if [ "${NOSAVE}" = "" ] ; then
|
||||
: ${SAVE:="3600 1 300 100 60 10000"}
|
||||
fi
|
||||
# Set maxmemory to 10% of available memory
|
||||
MAXMEMORY=$(($FLY_VM_MEMORY_MB*80/100))
|
||||
|
||||
mkdir /data/redis
|
||||
|
||||
redis-server $PW_ARG \
|
||||
--dir /data/redis \
|
||||
--maxmemory "${MAXMEMORY}mb" \
|
||||
--maxmemory-policy $MAXMEMORY_POLICY \
|
||||
--appendonly $APPENDONLY \
|
||||
--save "$SAVE"
|
Loading…
Reference in New Issue
Block a user