mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Add proxy and media blocking configurations
Updated environment variables and application settings to include proxy configurations and media blocking option. The proxy settings allow users to use a proxy service, while the media blocking is an optional feature that can help save bandwidth. Changes have been made in the .env.example, docker-compose.yaml, and main.py files.
This commit is contained in:
parent
6a5b9ca314
commit
b001aded46
|
@ -35,4 +35,11 @@ STRIPE_PRICE_ID_SCALE=
|
|||
HYPERDX_API_KEY=
|
||||
HDX_NODE_BETA_MODE=1
|
||||
|
||||
FIRE_ENGINE_BETA_URL= # set if you'd like to use the fire engine closed beta
|
||||
FIRE_ENGINE_BETA_URL= # set if you'd like to use the fire engine closed beta
|
||||
|
||||
# Proxy Settings (Alternative you can can use a proxy service like oxylabs, which rotates IPs for you on every request)
|
||||
PROXY_SERVER=
|
||||
PROXY_USERNAME=
|
||||
PROXY_PASSWORD=
|
||||
# set if you'd like to block media requests to save proxy bandwidth
|
||||
BLOCK_MEDIA=
|
|
@ -2,9 +2,16 @@ from fastapi import FastAPI
|
|||
from playwright.async_api import async_playwright, Browser
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
from os import environ
|
||||
|
||||
PROXY_SERVER = environ.get('PROXY_SERVER', None)
|
||||
PROXY_USERNAME = environ.get('PROXY_USERNAME', None)
|
||||
PROXY_PASSWORD = environ.get('PROXY_PASSWORD', None)
|
||||
BLOCK_MEDIA = environ.get('BLOCK_MEDIA', 'False').upper() == 'TRUE'
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
class UrlModel(BaseModel):
|
||||
url: str
|
||||
wait: int = None
|
||||
|
@ -27,7 +34,18 @@ async def shutdown_event():
|
|||
|
||||
@app.post("/html")
|
||||
async def root(body: UrlModel):
|
||||
context = await browser.new_context()
|
||||
context = None
|
||||
if PROXY_SERVER and PROXY_USERNAME and PROXY_PASSWORD:
|
||||
context = await browser.new_context(proxy={"server": PROXY_SERVER,
|
||||
"username": PROXY_USERNAME,
|
||||
"password": PROXY_PASSWORD})
|
||||
else:
|
||||
context = await browser.new_context()
|
||||
|
||||
if BLOCK_MEDIA:
|
||||
await context.route("**/*.{png,jpg,jpeg,gif,svg,mp3,mp4,avi,flac,ogg,wav,webm}",
|
||||
handler=lambda route, request: route.abort())
|
||||
|
||||
page = await context.new_page()
|
||||
await page.goto(body.url, timeout=15000) # Set max timeout to 15s
|
||||
if body.wait: # Check if wait parameter is provided in the request body
|
||||
|
|
|
@ -5,6 +5,10 @@ services:
|
|||
build: apps/playwright-service
|
||||
environment:
|
||||
- PORT=3000
|
||||
- PROXY_SERVER=${PROXY_SERVER}
|
||||
- PROXY_USERNAME=${PROXY_USERNAME}
|
||||
- PROXY_PASSWORD=${PROXY_PASSWORD}
|
||||
- BLOCK_MEDIA=${BLOCK_MEDIA}
|
||||
networks:
|
||||
- backend
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user