mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Update ui component to v1
This commit is contained in:
parent
63264644e1
commit
7caaee28c5
|
@ -20,7 +20,7 @@ This template provides an easy way to spin up a UI for Firecrawl using React. It
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Set up your Firecrawl API key:
|
2. Set up your Firecrawl API key:
|
||||||
Open `src/components/FirecrawlComponent.tsx` and replace the placeholder API key:
|
Open `src/components/ingestion.tsx` and replace the placeholder API key:
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
const FIRECRAWL_API_KEY = "your-api-key-here";
|
const FIRECRAWL_API_KEY = "your-api-key-here";
|
||||||
|
@ -36,7 +36,7 @@ This template provides an easy way to spin up a UI for Firecrawl using React. It
|
||||||
|
|
||||||
## Customization
|
## Customization
|
||||||
|
|
||||||
The main Firecrawl component is located in `src/components/FirecrawlComponent.tsx`. You can modify this file to customize the UI or add additional features.
|
The main Firecrawl component is located in `src/components/ingestion.tsx`. You can modify this file to customize the UI or add additional features.
|
||||||
|
|
||||||
## Security Considerations
|
## Security Considerations
|
||||||
|
|
||||||
|
|
105
apps/ui/ingestion-ui/package-lock.json
generated
105
apps/ui/ingestion-ui/package-lock.json
generated
|
@ -11,6 +11,7 @@
|
||||||
"@radix-ui/react-checkbox": "^1.1.1",
|
"@radix-ui/react-checkbox": "^1.1.1",
|
||||||
"@radix-ui/react-collapsible": "^1.1.0",
|
"@radix-ui/react-collapsible": "^1.1.0",
|
||||||
"@radix-ui/react-label": "^2.1.0",
|
"@radix-ui/react-label": "^2.1.0",
|
||||||
|
"@radix-ui/react-radio-group": "^1.2.0",
|
||||||
"@radix-ui/react-slot": "^1.1.0",
|
"@radix-ui/react-slot": "^1.1.0",
|
||||||
"class-variance-authority": "^0.7.0",
|
"class-variance-authority": "^0.7.0",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
|
@ -1192,6 +1193,32 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-collection": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-GZsZslMJEyo1VKm5L1ZJY8tGDxZNPAoUeQUIbKeJfoi7Q4kmig5AsgLMYYuyYbfjd8fBmFORAIwYAkXMnXZgZw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.0",
|
||||||
|
"@radix-ui/react-context": "1.1.0",
|
||||||
|
"@radix-ui/react-primitive": "2.0.0",
|
||||||
|
"@radix-ui/react-slot": "1.1.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-compose-refs": {
|
"node_modules/@radix-ui/react-compose-refs": {
|
||||||
"version": "1.1.0",
|
"version": "1.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.0.tgz",
|
||||||
|
@ -1220,6 +1247,21 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-direction": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-BUuBvgThEiAXh2DWu93XsT+a3aWrGqolGlqqw5VU1kG7p/ZH2cuDlM1sRLNnY3QcBS69UIz2mcKhMxDsdewhjg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-id": {
|
"node_modules/@radix-ui/react-id": {
|
||||||
"version": "1.1.0",
|
"version": "1.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.0.tgz",
|
||||||
|
@ -1304,6 +1346,69 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-radio-group": {
|
||||||
|
"version": "1.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-radio-group/-/react-radio-group-1.2.0.tgz",
|
||||||
|
"integrity": "sha512-yv+oiLaicYMBpqgfpSPw6q+RyXlLdIpQWDHZbUKURxe+nEh53hFXPPlfhfQQtYkS5MMK/5IWIa76SksleQZSzw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/primitive": "1.1.0",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.0",
|
||||||
|
"@radix-ui/react-context": "1.1.0",
|
||||||
|
"@radix-ui/react-direction": "1.1.0",
|
||||||
|
"@radix-ui/react-presence": "1.1.0",
|
||||||
|
"@radix-ui/react-primitive": "2.0.0",
|
||||||
|
"@radix-ui/react-roving-focus": "1.1.0",
|
||||||
|
"@radix-ui/react-use-controllable-state": "1.1.0",
|
||||||
|
"@radix-ui/react-use-previous": "1.1.0",
|
||||||
|
"@radix-ui/react-use-size": "1.1.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-roving-focus": {
|
||||||
|
"version": "1.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.0.tgz",
|
||||||
|
"integrity": "sha512-EA6AMGeq9AEeQDeSH0aZgG198qkfHSbvWTf1HvoDmOB5bBG/qTxjYMWUKMnYiV6J/iP/J8MEFSuB2zRU2n7ODA==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/primitive": "1.1.0",
|
||||||
|
"@radix-ui/react-collection": "1.1.0",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.0",
|
||||||
|
"@radix-ui/react-context": "1.1.0",
|
||||||
|
"@radix-ui/react-direction": "1.1.0",
|
||||||
|
"@radix-ui/react-id": "1.1.0",
|
||||||
|
"@radix-ui/react-primitive": "2.0.0",
|
||||||
|
"@radix-ui/react-use-callback-ref": "1.1.0",
|
||||||
|
"@radix-ui/react-use-controllable-state": "1.1.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-slot": {
|
"node_modules/@radix-ui/react-slot": {
|
||||||
"version": "1.1.0",
|
"version": "1.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.1.0.tgz",
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
"@radix-ui/react-checkbox": "^1.1.1",
|
"@radix-ui/react-checkbox": "^1.1.1",
|
||||||
"@radix-ui/react-collapsible": "^1.1.0",
|
"@radix-ui/react-collapsible": "^1.1.0",
|
||||||
"@radix-ui/react-label": "^2.1.0",
|
"@radix-ui/react-label": "^2.1.0",
|
||||||
|
"@radix-ui/react-radio-group": "^1.2.0",
|
||||||
"@radix-ui/react-slot": "^1.1.0",
|
"@radix-ui/react-slot": "^1.1.0",
|
||||||
"class-variance-authority": "^0.7.0",
|
"class-variance-authority": "^0.7.0",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
|
|
|
@ -1,9 +1,35 @@
|
||||||
|
import { useState } from "react";
|
||||||
import FirecrawlComponent from "./components/ingestion";
|
import FirecrawlComponent from "./components/ingestion";
|
||||||
|
import FirecrawlComponentV1 from "./components/ingestionV1";
|
||||||
|
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
|
||||||
|
import { Label } from "@/components/ui/label";
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
|
const [selectedComponent, setSelectedComponent] = useState<"v0" | "v1">("v1");
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<FirecrawlComponent />
|
<div className="flex justify-center items-center space-x-2 p-4">
|
||||||
|
<RadioGroup
|
||||||
|
defaultValue="v1"
|
||||||
|
onValueChange={(value) => setSelectedComponent(value as "v0" | "v1")}
|
||||||
|
className="flex space-x-6 mt-6"
|
||||||
|
>
|
||||||
|
<div className="flex items-center space-x-2 p-2">
|
||||||
|
<RadioGroupItem value="v0" id="v0"></RadioGroupItem>
|
||||||
|
<Label htmlFor="v0">Firecrawl Component V0</Label>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center space-x-2 p-2">
|
||||||
|
<RadioGroupItem value="v1" id="v1"></RadioGroupItem>
|
||||||
|
<Label htmlFor="v1">Firecrawl Component V1</Label>
|
||||||
|
</div>
|
||||||
|
</RadioGroup>
|
||||||
|
</div>
|
||||||
|
{selectedComponent === "v1" ? (
|
||||||
|
<FirecrawlComponentV1 />
|
||||||
|
) : (
|
||||||
|
<FirecrawlComponent />
|
||||||
|
)}
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
603
apps/ui/ingestion-ui/src/components/ingestionV1.tsx
Normal file
603
apps/ui/ingestion-ui/src/components/ingestionV1.tsx
Normal file
|
@ -0,0 +1,603 @@
|
||||||
|
import { useState, ChangeEvent, FormEvent, useEffect } from "react";
|
||||||
|
import {
|
||||||
|
Card,
|
||||||
|
CardHeader,
|
||||||
|
CardTitle,
|
||||||
|
CardContent,
|
||||||
|
CardFooter,
|
||||||
|
} from "@/components/ui/card";
|
||||||
|
import { Input } from "@/components/ui/input";
|
||||||
|
import { Button } from "@/components/ui/button";
|
||||||
|
import { Checkbox } from "@/components/ui/checkbox";
|
||||||
|
import { Label } from "@/components/ui/label";
|
||||||
|
import {
|
||||||
|
Collapsible,
|
||||||
|
CollapsibleContent,
|
||||||
|
CollapsibleTrigger,
|
||||||
|
} from "@/components/ui/collapsible";
|
||||||
|
import { ChevronDown, ChevronLeft, ChevronRight } from "lucide-react";
|
||||||
|
|
||||||
|
//! Hardcoded values (not recommended for production)
|
||||||
|
//! Highly recommended to move all Firecrawl API calls to the backend (e.g. Next.js API route)
|
||||||
|
const FIRECRAWL_API_URL = "https://api.firecrawl.dev"; // Replace with your actual API URL whether it is local or using Firecrawl Cloud
|
||||||
|
const FIRECRAWL_API_KEY = "fc-YOUR_API_KEY"; // Replace with your actual API key
|
||||||
|
|
||||||
|
interface FormData {
|
||||||
|
url: string;
|
||||||
|
crawlSubPages: boolean;
|
||||||
|
search: string;
|
||||||
|
limit: string;
|
||||||
|
maxDepth: string;
|
||||||
|
excludePaths: string;
|
||||||
|
includePaths: string;
|
||||||
|
extractMainContent: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CrawlerOptions {
|
||||||
|
includes?: string[];
|
||||||
|
excludes?: string[];
|
||||||
|
maxDepth?: number;
|
||||||
|
limit?: number;
|
||||||
|
returnOnlyUrls: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScrapeOptions {
|
||||||
|
formats?: string[];
|
||||||
|
onlyMainContent?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PageOptions {
|
||||||
|
onlyMainContent: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RequestBody {
|
||||||
|
url: string;
|
||||||
|
crawlerOptions?: CrawlerOptions;
|
||||||
|
pageOptions?: PageOptions;
|
||||||
|
search?: string;
|
||||||
|
excludePaths?: string[];
|
||||||
|
includePaths?: string[];
|
||||||
|
maxDepth?: number;
|
||||||
|
limit?: number;
|
||||||
|
scrapeOptions?: ScrapeOptions;
|
||||||
|
formats?: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScrapeResultMetadata {
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
language: string;
|
||||||
|
sourceURL: string;
|
||||||
|
pageStatusCode: number;
|
||||||
|
pageError?: string;
|
||||||
|
[key: string]: string | number | undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScrapeResultData {
|
||||||
|
markdown: string;
|
||||||
|
content: string;
|
||||||
|
html: string;
|
||||||
|
rawHtml: string;
|
||||||
|
metadata: ScrapeResultMetadata;
|
||||||
|
llm_extraction: Record<string, unknown>;
|
||||||
|
warning?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScrapeResult {
|
||||||
|
success: boolean;
|
||||||
|
data: ScrapeResultData;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function FirecrawlComponentV1() {
|
||||||
|
const [formData, setFormData] = useState<FormData>({
|
||||||
|
url: "",
|
||||||
|
crawlSubPages: false,
|
||||||
|
search: "",
|
||||||
|
limit: "",
|
||||||
|
maxDepth: "",
|
||||||
|
excludePaths: "",
|
||||||
|
includePaths: "",
|
||||||
|
extractMainContent: false,
|
||||||
|
});
|
||||||
|
const [loading, setLoading] = useState<boolean>(false);
|
||||||
|
const [scrapingSelectedLoading, setScrapingSelectedLoading] =
|
||||||
|
useState<boolean>(false);
|
||||||
|
const [crawledUrls, setCrawledUrls] = useState<string[]>([]);
|
||||||
|
const [selectedUrls, setSelectedUrls] = useState<string[]>([]);
|
||||||
|
const [scrapeResults, setScrapeResults] = useState<
|
||||||
|
Record<string, ScrapeResult>
|
||||||
|
>({});
|
||||||
|
const [isCollapsibleOpen, setIsCollapsibleOpen] = useState(true);
|
||||||
|
const [crawlStatus, setCrawlStatus] = useState<{
|
||||||
|
current: number;
|
||||||
|
total: number | null;
|
||||||
|
}>({ current: 0, total: null });
|
||||||
|
const [elapsedTime, setElapsedTime] = useState<number>(0);
|
||||||
|
const [showCrawlStatus, setShowCrawlStatus] = useState<boolean>(false);
|
||||||
|
const [isScraping, setIsScraping] = useState<boolean>(false);
|
||||||
|
const [currentPage, setCurrentPage] = useState<number>(1);
|
||||||
|
const urlsPerPage = 10;
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
let timer: NodeJS.Timeout;
|
||||||
|
if (loading) {
|
||||||
|
setShowCrawlStatus(true);
|
||||||
|
timer = setInterval(() => {
|
||||||
|
setElapsedTime((prevTime) => prevTime + 1);
|
||||||
|
}, 1000);
|
||||||
|
}
|
||||||
|
return () => {
|
||||||
|
if (timer) clearInterval(timer);
|
||||||
|
};
|
||||||
|
}, [loading]);
|
||||||
|
|
||||||
|
const handleChange = (e: ChangeEvent<HTMLInputElement>) => {
|
||||||
|
const { name, value, type, checked } = e.target;
|
||||||
|
setFormData((prevData) => {
|
||||||
|
const newData = {
|
||||||
|
...prevData,
|
||||||
|
[name]: type === "checkbox" ? checked : value,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Automatically check "Crawl Sub-pages" if limit or search have content
|
||||||
|
if (name === "limit" || name === "search") {
|
||||||
|
newData.crawlSubPages = !!value || !!newData.limit || !!newData.search;
|
||||||
|
}
|
||||||
|
|
||||||
|
return newData;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleSubmit = async (e: FormEvent<HTMLFormElement>) => {
|
||||||
|
e.preventDefault();
|
||||||
|
setLoading(true);
|
||||||
|
setIsCollapsibleOpen(false);
|
||||||
|
setElapsedTime(0);
|
||||||
|
setCrawlStatus({ current: 0, total: null });
|
||||||
|
setIsScraping(!formData.crawlSubPages);
|
||||||
|
setCrawledUrls([]);
|
||||||
|
setSelectedUrls([]);
|
||||||
|
setScrapeResults({});
|
||||||
|
setScrapingSelectedLoading(false);
|
||||||
|
setShowCrawlStatus(false);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const endpoint = `${FIRECRAWL_API_URL}/v1/${
|
||||||
|
formData.crawlSubPages ? "map" : "scrape"
|
||||||
|
}`;
|
||||||
|
|
||||||
|
const requestBody: RequestBody = formData.crawlSubPages
|
||||||
|
? {
|
||||||
|
url: formData.url,
|
||||||
|
search: formData.search || undefined,
|
||||||
|
limit: formData.limit ? parseInt(formData.limit) : undefined,
|
||||||
|
}
|
||||||
|
: {
|
||||||
|
url: formData.url,
|
||||||
|
formats: ["markdown"],
|
||||||
|
};
|
||||||
|
|
||||||
|
const response = await fetch(endpoint, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
body: JSON.stringify(requestBody),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP error! status: ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
if (formData.crawlSubPages) {
|
||||||
|
if (data.success === true && Array.isArray(data.links)) {
|
||||||
|
setCrawledUrls(data.links);
|
||||||
|
setSelectedUrls(data.links);
|
||||||
|
setCrawlStatus({
|
||||||
|
current: data.links.length,
|
||||||
|
total: data.links.length,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Set scrape results with the links
|
||||||
|
const linkResults: Record<string, ScrapeResult> = {};
|
||||||
|
data.links.forEach((link: string) => {
|
||||||
|
linkResults[link] = {
|
||||||
|
success: true,
|
||||||
|
data: {
|
||||||
|
metadata: {
|
||||||
|
sourceURL: link,
|
||||||
|
title: "",
|
||||||
|
description: "",
|
||||||
|
language: "",
|
||||||
|
pageStatusCode: 200,
|
||||||
|
},
|
||||||
|
markdown: "",
|
||||||
|
content: "",
|
||||||
|
html: "",
|
||||||
|
rawHtml: "",
|
||||||
|
llm_extraction: {},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
console.error("Unexpected response format from map endpoint");
|
||||||
|
console.log(data);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
setScrapeResults({ [formData.url]: data });
|
||||||
|
setCrawlStatus({ current: 1, total: 1 });
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error:", error);
|
||||||
|
setScrapeResults({
|
||||||
|
error: {
|
||||||
|
success: false,
|
||||||
|
data: {
|
||||||
|
metadata: {
|
||||||
|
pageError: "Error occurred while fetching data",
|
||||||
|
title: "",
|
||||||
|
description: "",
|
||||||
|
language: "",
|
||||||
|
sourceURL: "",
|
||||||
|
pageStatusCode: 0,
|
||||||
|
},
|
||||||
|
markdown: "",
|
||||||
|
content: "",
|
||||||
|
html: "",
|
||||||
|
rawHtml: "",
|
||||||
|
llm_extraction: {},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleScrapeSelected = async () => {
|
||||||
|
setLoading(true);
|
||||||
|
setElapsedTime(0);
|
||||||
|
setCrawlStatus({ current: 0, total: selectedUrls.length });
|
||||||
|
setIsScraping(true);
|
||||||
|
setScrapingSelectedLoading(true);
|
||||||
|
const newScrapeResults: Record<string, ScrapeResult> = {};
|
||||||
|
|
||||||
|
for (const [index, url] of selectedUrls.entries()) {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${FIRECRAWL_API_URL}/v1/scrape`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${FIRECRAWL_API_KEY}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
url: url,
|
||||||
|
formats: ["markdown"],
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP error! status: ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data: ScrapeResult = await response.json();
|
||||||
|
newScrapeResults[url] = data;
|
||||||
|
setCrawlStatus((prev) => ({ ...prev, current: index + 1 }));
|
||||||
|
setScrapeResults({ ...scrapeResults, ...newScrapeResults });
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error scraping ${url}:`, error);
|
||||||
|
newScrapeResults[url] = {
|
||||||
|
success: false,
|
||||||
|
data: {
|
||||||
|
markdown: "",
|
||||||
|
content: "",
|
||||||
|
html: "",
|
||||||
|
rawHtml: "",
|
||||||
|
metadata: {
|
||||||
|
title: "",
|
||||||
|
description: "",
|
||||||
|
language: "",
|
||||||
|
sourceURL: url,
|
||||||
|
pageStatusCode: 0,
|
||||||
|
pageError: (error as Error).message,
|
||||||
|
},
|
||||||
|
llm_extraction: {},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setLoading(false);
|
||||||
|
setIsScraping(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handlePageChange = (newPage: number) => {
|
||||||
|
setCurrentPage(newPage);
|
||||||
|
};
|
||||||
|
|
||||||
|
const paginatedUrls = crawledUrls.slice(
|
||||||
|
(currentPage - 1) * urlsPerPage,
|
||||||
|
currentPage * urlsPerPage
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="max-w-2xl mx-auto p-4">
|
||||||
|
<Card>
|
||||||
|
<CardHeader className="flex items-start justify-between mb-0 pb-4">
|
||||||
|
<CardTitle className="flex items-center justify-between w-full space-x-2">
|
||||||
|
<span className="text-base">Extract web content (V1)</span>
|
||||||
|
<a
|
||||||
|
href="https://www.firecrawl.dev"
|
||||||
|
className="text-xs text-gray-500 font-normal px-3 py-1 bg-zinc-100 rounded-xl hover:bg-zinc-200 transition-colors"
|
||||||
|
>
|
||||||
|
Powered by Firecrawl 🔥
|
||||||
|
</a>
|
||||||
|
</CardTitle>
|
||||||
|
<div className="text-sm text-gray-500 w-11/12 items-center">
|
||||||
|
Use this component to quickly give your users the ability to connect
|
||||||
|
their AI apps to web data with Firecrawl. Learn more on the{" "}
|
||||||
|
<a
|
||||||
|
href="https://docs.firecrawl.dev/"
|
||||||
|
className="text-sm text-blue-500"
|
||||||
|
>
|
||||||
|
Firecrawl docs!
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="space-y-4">
|
||||||
|
<form onSubmit={handleSubmit}>
|
||||||
|
<div className="flex items-center space-x-2">
|
||||||
|
<Input
|
||||||
|
placeholder="https://www.firecrawl.dev/"
|
||||||
|
className="flex-grow"
|
||||||
|
name="url"
|
||||||
|
value={formData.url}
|
||||||
|
onChange={handleChange}
|
||||||
|
/>
|
||||||
|
<Button type="submit" variant="default" disabled={loading}>
|
||||||
|
{loading ? (
|
||||||
|
<div
|
||||||
|
role="status"
|
||||||
|
className="flex items-center justify-between space-x-2"
|
||||||
|
>
|
||||||
|
<svg
|
||||||
|
className="animate-spin h-4 w-4 text-white"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
fill="none"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
>
|
||||||
|
<circle
|
||||||
|
className="opacity-25"
|
||||||
|
cx="12"
|
||||||
|
cy="12"
|
||||||
|
r="10"
|
||||||
|
stroke="currentColor"
|
||||||
|
strokeWidth="4"
|
||||||
|
></circle>
|
||||||
|
<path
|
||||||
|
className="opacity-75"
|
||||||
|
fill="currentColor"
|
||||||
|
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
|
||||||
|
></path>
|
||||||
|
</svg>
|
||||||
|
<span className="sr-only">Loading...</span>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
"Run"
|
||||||
|
)}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
<Collapsible
|
||||||
|
open={isCollapsibleOpen}
|
||||||
|
onOpenChange={setIsCollapsibleOpen}
|
||||||
|
className="mt-2"
|
||||||
|
>
|
||||||
|
<CollapsibleTrigger asChild>
|
||||||
|
<Button variant="ghost" className="w-full justify-between pl-2">
|
||||||
|
Advanced Options
|
||||||
|
<ChevronDown className="h-4 w-4 opacity-50" />
|
||||||
|
</Button>
|
||||||
|
</CollapsibleTrigger>
|
||||||
|
<CollapsibleContent className="space-y-4 mt-4 px-2">
|
||||||
|
<div className="flex items-center space-x-2">
|
||||||
|
<Checkbox
|
||||||
|
id="crawlSubPages"
|
||||||
|
name="crawlSubPages"
|
||||||
|
checked={formData.crawlSubPages}
|
||||||
|
onCheckedChange={(checked: boolean) =>
|
||||||
|
setFormData((prev) => ({
|
||||||
|
...prev,
|
||||||
|
crawlSubPages: checked,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
<label htmlFor="crawlSubPages" className="text-sm">
|
||||||
|
Crawl Sub-pages
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="grid grid-cols-2 gap-4">
|
||||||
|
<div>
|
||||||
|
<Label
|
||||||
|
htmlFor="search"
|
||||||
|
className="block text-left w-full pb-2"
|
||||||
|
>
|
||||||
|
Search for specific pages in crawl *
|
||||||
|
</Label>
|
||||||
|
<Input
|
||||||
|
id="search"
|
||||||
|
name="search"
|
||||||
|
placeholder="python sdk"
|
||||||
|
value={formData.search}
|
||||||
|
onChange={handleChange}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<Label
|
||||||
|
htmlFor="limit"
|
||||||
|
className="block text-left w-full pb-2"
|
||||||
|
>
|
||||||
|
Limit *
|
||||||
|
</Label>
|
||||||
|
<Input
|
||||||
|
id="limit"
|
||||||
|
name="limit"
|
||||||
|
placeholder="10"
|
||||||
|
value={formData.limit}
|
||||||
|
onChange={handleChange}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</CollapsibleContent>
|
||||||
|
</Collapsible>
|
||||||
|
</form>
|
||||||
|
{showCrawlStatus && (
|
||||||
|
<div className="flex items-center justify-between mb-2 space-x-2 bg-gray-100 p-2 rounded-md">
|
||||||
|
<div className="flex items-center space-x-2">
|
||||||
|
{!isScraping &&
|
||||||
|
crawledUrls.length > 0 &&
|
||||||
|
!scrapingSelectedLoading && (
|
||||||
|
<>
|
||||||
|
<Checkbox
|
||||||
|
id="selectAll"
|
||||||
|
checked={selectedUrls.length === crawledUrls.length}
|
||||||
|
onCheckedChange={(checked) => {
|
||||||
|
if (checked) {
|
||||||
|
setSelectedUrls([...crawledUrls]);
|
||||||
|
} else {
|
||||||
|
setSelectedUrls([]);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<label
|
||||||
|
htmlFor="selectAll"
|
||||||
|
className="text-sm cursor-pointer"
|
||||||
|
>
|
||||||
|
{selectedUrls.length === crawledUrls.length
|
||||||
|
? `Unselect All (${selectedUrls.length})`
|
||||||
|
: `Select All (${selectedUrls.length})`}
|
||||||
|
</label>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="text-sm text-gray-600">
|
||||||
|
{isScraping
|
||||||
|
? `Scraped ${crawlStatus.current} page(s) in ${elapsedTime}s`
|
||||||
|
: `Crawled ${crawlStatus.current} pages in ${elapsedTime}s`}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{crawledUrls.length > 0 &&
|
||||||
|
!scrapingSelectedLoading &&
|
||||||
|
!isScraping && (
|
||||||
|
<>
|
||||||
|
<ul className="pl-2">
|
||||||
|
{paginatedUrls.map((url, index) => (
|
||||||
|
<li
|
||||||
|
key={index}
|
||||||
|
className="flex items-center space-x-2 my-2 text-sm"
|
||||||
|
>
|
||||||
|
<Checkbox
|
||||||
|
checked={selectedUrls.includes(url)}
|
||||||
|
onCheckedChange={() =>
|
||||||
|
setSelectedUrls((prev) =>
|
||||||
|
prev.includes(url)
|
||||||
|
? prev.filter((u) => u !== url)
|
||||||
|
: [...prev, url]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
<span className="flex items-center max-w-lg">
|
||||||
|
{url.length > 70 ? `${url.slice(0, 70)}...` : url}
|
||||||
|
</span>
|
||||||
|
</li>
|
||||||
|
))}
|
||||||
|
</ul>
|
||||||
|
<div className="flex items-center justify-between mt-4">
|
||||||
|
<Button
|
||||||
|
variant="outline"
|
||||||
|
className="px-2"
|
||||||
|
onClick={() => handlePageChange(currentPage - 1)}
|
||||||
|
disabled={currentPage === 1}
|
||||||
|
>
|
||||||
|
<ChevronLeft className="h-5 w-5" />
|
||||||
|
</Button>
|
||||||
|
<span className="text-sm text-gray-500">
|
||||||
|
Page {currentPage} of{" "}
|
||||||
|
{Math.ceil(crawledUrls.length / urlsPerPage)}
|
||||||
|
</span>
|
||||||
|
<Button
|
||||||
|
variant="outline"
|
||||||
|
className="px-2"
|
||||||
|
onClick={() => handlePageChange(currentPage + 1)}
|
||||||
|
disabled={currentPage * urlsPerPage >= crawledUrls.length}
|
||||||
|
>
|
||||||
|
<ChevronRight className="h-5 w-5 " />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</CardContent>
|
||||||
|
<CardFooter className="w-full flex justify-center">
|
||||||
|
{crawledUrls.length > 0 && !scrapingSelectedLoading && (
|
||||||
|
<Button
|
||||||
|
variant="default"
|
||||||
|
className="w-full"
|
||||||
|
onClick={handleScrapeSelected}
|
||||||
|
disabled={loading || selectedUrls.length === 0}
|
||||||
|
>
|
||||||
|
Scrape Selected URLs
|
||||||
|
</Button>
|
||||||
|
)}
|
||||||
|
</CardFooter>
|
||||||
|
</Card>
|
||||||
|
|
||||||
|
{Object.keys(scrapeResults).length > 0 && (
|
||||||
|
<div className="mt-4">
|
||||||
|
<h2 className="text-base font-bold ">Scrape Results</h2>
|
||||||
|
<p className="text-sm text-gray-500">
|
||||||
|
You can do whatever you want with the scrape results. Here is a
|
||||||
|
basic showcase of the markdown.
|
||||||
|
</p>
|
||||||
|
<div className="flex flex-col gap-4 mt-4 w-full">
|
||||||
|
{Object.entries(scrapeResults).map(([url, result]) => (
|
||||||
|
<Card key={url} className="relative p-4 w-full">
|
||||||
|
<CardTitle className="text-sm font-normal flex flex-col">
|
||||||
|
<span>{result.data.metadata.title}</span>
|
||||||
|
<span className="text-xs text-gray-500">
|
||||||
|
{url
|
||||||
|
.replace(/^(https?:\/\/)?(www\.)?/, "")
|
||||||
|
.replace(/\/$/, "")}
|
||||||
|
</span>
|
||||||
|
</CardTitle>
|
||||||
|
<CardContent className="relative px-0 pt-2 !text-xs w-full">
|
||||||
|
<div className=" overflow-y-auto h-32 bg-zinc-100 rounded-md p-2 w-full">
|
||||||
|
{result.success ? (
|
||||||
|
<>
|
||||||
|
<pre className="text-xs whitespace-pre-wrap">
|
||||||
|
{result.data.markdown.trim()}
|
||||||
|
</pre>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<p className="text-red-500">
|
||||||
|
Failed to scrape this URL
|
||||||
|
</p>
|
||||||
|
<p className="text-zinc-500 font-mono">
|
||||||
|
{result.toString()}
|
||||||
|
</p>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
42
apps/ui/ingestion-ui/src/components/ui/radio-group.tsx
Normal file
42
apps/ui/ingestion-ui/src/components/ui/radio-group.tsx
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
import * as React from "react"
|
||||||
|
import * as RadioGroupPrimitive from "@radix-ui/react-radio-group"
|
||||||
|
import { Circle } from "lucide-react"
|
||||||
|
|
||||||
|
import { cn } from "@/lib/utils"
|
||||||
|
|
||||||
|
const RadioGroup = React.forwardRef<
|
||||||
|
React.ElementRef<typeof RadioGroupPrimitive.Root>,
|
||||||
|
React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Root>
|
||||||
|
>(({ className, ...props }, ref) => {
|
||||||
|
return (
|
||||||
|
<RadioGroupPrimitive.Root
|
||||||
|
className={cn("grid gap-2", className)}
|
||||||
|
{...props}
|
||||||
|
ref={ref}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
})
|
||||||
|
RadioGroup.displayName = RadioGroupPrimitive.Root.displayName
|
||||||
|
|
||||||
|
const RadioGroupItem = React.forwardRef<
|
||||||
|
React.ElementRef<typeof RadioGroupPrimitive.Item>,
|
||||||
|
React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Item>
|
||||||
|
>(({ className, ...props }, ref) => {
|
||||||
|
return (
|
||||||
|
<RadioGroupPrimitive.Item
|
||||||
|
ref={ref}
|
||||||
|
className={cn(
|
||||||
|
"aspect-square h-4 w-4 rounded-full border border-primary text-primary ring-offset-background focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
|
||||||
|
className
|
||||||
|
)}
|
||||||
|
{...props}
|
||||||
|
>
|
||||||
|
<RadioGroupPrimitive.Indicator className="flex items-center justify-center">
|
||||||
|
<Circle className="h-2.5 w-2.5 fill-current text-current" />
|
||||||
|
</RadioGroupPrimitive.Indicator>
|
||||||
|
</RadioGroupPrimitive.Item>
|
||||||
|
)
|
||||||
|
})
|
||||||
|
RadioGroupItem.displayName = RadioGroupPrimitive.Item.displayName
|
||||||
|
|
||||||
|
export { RadioGroup, RadioGroupItem }
|
Loading…
Reference in New Issue
Block a user