diff --git a/Dockerfile b/Dockerfile index e9ee3c8..b39d7ae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ # Use Node.js 18 slim image (Debian-based) FROM node:18-slim + # Install necessary tools and libraries RUN apt-get update && apt-get install -y \ chromium \ @@ -13,23 +14,31 @@ RUN apt-get update && apt-get install -y \ && apt-get update \ && apt-get install -y google-chrome-stable \ && rm -rf /var/lib/apt/lists/* + # Set environment variables -ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true -ENV PUPPETEER_EXECUTABLE_PATH /usr/bin/google-chrome-stable +ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true +ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable + # Set working directory WORKDIR /app + # Copy package.json and package-lock.json COPY backend/functions/package*.json ./ + # Install dependencies RUN npm ci + # Copy the rest of the application code COPY backend/functions . + # Build the application RUN npm run build -# Expose the port the app runs on -EXPOSE 3000 -# Start the application -CMD ["node", "build/server.js"] # Create local storage directory and set permissions RUN mkdir -p /app/local-storage && chmod 777 /app/local-storage + +# Expose the port the app runs on +EXPOSE 3000 + +# Start the application +CMD ["node", "build/server.js"] diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index 153b74b..55969c3 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -612,6 +612,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; } async crawl(req: Request, res: Response) { + this.logger.info(`Crawl request received for URL: ${req.url}`); console.log('Crawl method called with request:', req.url); const ctx = { req, res }; console.log(`req.headers: ${JSON.stringify(req.headers)}`); @@ -730,6 +731,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`; } async *scrap(urlToCrawl: URL, crawlOpts?: ExtraScrappingOptions, crawlerOpts?: CrawlerOptions) { + this.logger.info(`Starting scrap for URL: ${urlToCrawl.toString()}`); console.log('Starting scrap for URL:', urlToCrawl.toString()); console.log('Crawl options:', crawlOpts); console.log('Crawler options:', crawlerOpts); diff --git a/local-storage/instant-screenshots/pageshot-93b6e8f2-0326-4aab-adb6-1b246913c7c4.png b/local-storage/instant-screenshots/pageshot-93b6e8f2-0326-4aab-adb6-1b246913c7c4.png new file mode 100644 index 0000000..6444f5f Binary files /dev/null and b/local-storage/instant-screenshots/pageshot-93b6e8f2-0326-4aab-adb6-1b246913c7c4.png differ diff --git a/local-storage/instant-screenshots/pageshot-dba8f147-a3e2-4e6e-a27b-42819243773a.png b/local-storage/instant-screenshots/pageshot-dba8f147-a3e2-4e6e-a27b-42819243773a.png new file mode 100644 index 0000000..2885066 Binary files /dev/null and b/local-storage/instant-screenshots/pageshot-dba8f147-a3e2-4e6e-a27b-42819243773a.png differ