fix(v1): update readme - v1.0.1

This commit is contained in:
rafaelsideguide 2024-08-28 15:15:29 -03:00
parent 9e87d05b77
commit c7b3365ffd
4 changed files with 76 additions and 87 deletions

View File

@ -18,29 +18,30 @@ npm install @mendable/firecrawl-js
Here's an example of how to use the SDK with error handling:
```js
import FirecrawlApp from "@mendable/firecrawl-js";
import FirecrawlApp, { CrawlParams, CrawlStatusResponse } from '@mendable/firecrawl-js';
// Initialize the FirecrawlApp with your API key
const app = new FirecrawlApp({ apiKey: "YOUR_API_KEY" });
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
// Scrape a single URL
const url = "https://mendable.ai";
const scrapedData = await app.scrapeUrl(url);
// Scrape a website
const scrapeResponse = await app.scrapeUrl('https://firecrawl.dev', {
formats: ['markdown', 'html'],
});
if (scrapeResponse) {
console.log(scrapeResponse)
}
// Crawl a website
const crawlUrl = "https://mendable.ai";
const params = {
crawlerOptions: {
excludes: ["blog/"],
includes: [], // leave empty for all pages
limit: 1000,
},
pageOptions: {
onlyMainContent: true,
},
};
const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
limit: 100,
scrapeOptions: {
formats: ['markdown', 'html'],
}
} as CrawlParams, true, 30) as CrawlStatusResponse;
const crawlResult = await app.crawlUrl(crawlUrl, params);
if (crawlResponse) {
console.log(crawlResponse)
}
```
### Scraping a URL
@ -57,28 +58,16 @@ const scrapedData = await app.scrapeUrl(url);
To crawl a website with error handling, use the `crawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
```js
const crawlUrl = "https://example.com";
const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
limit: 100,
scrapeOptions: {
formats: ['markdown', 'html'],
}
} as CrawlParams, true, 30) as CrawlStatusResponse;
const params = {
crawlerOptions: {
excludes: ["blog/"],
includes: [], // leave empty for all pages
limit: 1000,
},
pageOptions: {
onlyMainContent: true,
},
};
const waitUntilDone = true;
const pollInterval = 5;
const crawlResult = await app.crawlUrl(
crawlUrl,
params,
waitUntilDone,
pollInterval
);
if (crawlResponse) {
console.log(crawlResponse)
}
```
### Checking Crawl Status
@ -86,7 +75,7 @@ const crawlResult = await app.crawlUrl(
To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job.
```js
const status = await app.checkCrawlStatus(jobId);
const status = await app.checkCrawlStatus(id);
```
### Extracting structured data from a URL
@ -123,17 +112,13 @@ const scrapeResult = await app.scrapeUrl("https://firecrawl.dev", {
console.log(scrapeResult.data["llm_extraction"]);
```
### Search for a query
### Map a Website
With the `search` method, you can search for a query in a search engine and get the top results along with the page content for each result. The method takes the query as a parameter and returns the search results.
Use `map_url` to generate a list of URLs from a website. The `params` argument let you customize the mapping process, including options to exclude subdomains or to utilize the sitemap.
```js
const query = "what is mendable?";
const searchResults = await app.search(query, {
pageOptions: {
fetchPageContent: true, // Fetch the page content for each search result
},
});
const mapResult = await app.mapUrl('https://example.com') as MapResponse;
console.log(mapResult)
```
## Error Handling

View File

@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "1.0.0",
"version": "1.0.1",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/cjs/index.js",
"types": "types/index.d.ts",

View File

@ -18,23 +18,28 @@ pip install firecrawl-py
Here's an example of how to use the SDK:
```python
from firecrawl import FirecrawlApp
from firecrawl.firecrawl import FirecrawlApp
# Initialize the FirecrawlApp with your API key
app = FirecrawlApp(api_key='your_api_key')
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
# Scrape a single URL
url = 'https://mendable.ai'
scraped_data = app.scrape_url(url)
# Scrape a website:
scrape_status = app.scrape_url(
'https://firecrawl.dev',
params={'formats': ['markdown', 'html']}
)
print(scrape_status)
# Crawl a website
crawl_url = 'https://mendable.ai'
# Crawl a website:
crawl_status = app.crawl_url(
'https://firecrawl.dev',
params={
'pageOptions': {
'onlyMainContent': True
}
}
crawl_result = app.crawl_url(crawl_url, params=params)
'limit': 100,
'scrapeOptions': {'formats': ['markdown', 'html']}
},
wait_until_done=True,
poll_interval=30
)
print(crawl_status)
```
### Scraping a URL
@ -72,15 +77,6 @@ data = app.scrape_url('https://news.ycombinator.com', {
print(data["llm_extraction"])
```
### Search for a query
Used to search the web, get the most relevant results, scrap each page and return the markdown.
```python
query = 'what is mendable?'
search_result = app.search(query)
```
### Crawling a Website
To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
@ -88,18 +84,16 @@ To crawl a website, use the `crawl_url` method. It takes the starting URL and op
The `wait_until_done` parameter determines whether the method should wait for the crawl job to complete before returning the result. If set to `True`, the method will periodically check the status of the crawl job until it is completed or the specified `timeout` (in seconds) is reached. If set to `False`, the method will return immediately with the job ID, and you can manually check the status of the crawl job using the `check_crawl_status` method.
```python
crawl_url = 'https://example.com'
crawl_status = app.crawl_url(
'https://firecrawl.dev',
params={
'crawlerOptions': {
'excludes': ['blog/*'],
'includes': [], # leave empty for all pages
'limit': 1000,
'limit': 100,
'scrapeOptions': {'formats': ['markdown', 'html']}
},
'pageOptions': {
'onlyMainContent': True
}
}
crawl_result = app.crawl_url(crawl_url, params=params, wait_until_done=True, timeout=5)
wait_until_done=True,
poll_interval=30
)
print(crawl_status)
```
If `wait_until_done` is set to `True`, the `crawl_url` method will return the crawl result once the job is completed. If the job fails or is stopped, an exception will be raised.
@ -109,8 +103,18 @@ If `wait_until_done` is set to `True`, the `crawl_url` method will return the cr
To check the status of a crawl job, use the `check_crawl_status` method. It takes the job ID as a parameter and returns the current status of the crawl job.
```python
job_id = crawl_result['jobId']
status = app.check_crawl_status(job_id)
id = crawl_result['id']
status = app.check_crawl_status(id)
```
### Map a Website
Use `map_url` to generate a list of URLs from a website. The `params` argument let you customize the mapping process, including options to exclude subdomains or to utilize the sitemap.
```python
# Map a website:
map_result = app.map_url('https://example.com')
print(map_result)
```
## Error Handling

View File

@ -13,7 +13,7 @@ import os
from .firecrawl import FirecrawlApp
__version__ = "1.0.0"
__version__ = "1.0.1"
# Define the logger for the Firecrawl project
logger: logging.Logger = logging.getLogger("firecrawl")