mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge pull request #880 from mendableai/python-sdk/next-handler
[SDK] Added next handler for python sdk (js is ok)
This commit is contained in:
commit
766377621e
|
@ -13,7 +13,7 @@ import os
|
||||||
|
|
||||||
from .firecrawl import FirecrawlApp # noqa
|
from .firecrawl import FirecrawlApp # noqa
|
||||||
|
|
||||||
__version__ = "1.4.0"
|
__version__ = "1.5.0"
|
||||||
|
|
||||||
# Define the logger for the Firecrawl project
|
# Define the logger for the Firecrawl project
|
||||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||||
|
|
|
@ -189,17 +189,38 @@ class FirecrawlApp:
|
||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
response = self._get_request(f'{self.api_url}{endpoint}', headers)
|
response = self._get_request(f'{self.api_url}{endpoint}', headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
data = response.json()
|
status_data = response.json()
|
||||||
|
if status_data['status'] == 'completed':
|
||||||
|
if 'data' in status_data:
|
||||||
|
data = status_data['data']
|
||||||
|
while 'next' in status_data:
|
||||||
|
next_url = status_data.get('next')
|
||||||
|
if not next_url:
|
||||||
|
logger.warning("Expected 'next' URL is missing.")
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
status_response = self._get_request(next_url, headers)
|
||||||
|
if status_response.status_code != 200:
|
||||||
|
logger.error(f"Failed to fetch next page: {status_response.status_code}")
|
||||||
|
break
|
||||||
|
status_data = status_response.json()
|
||||||
|
data.extend(status_data.get('data', []))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during pagination request: {e}")
|
||||||
|
break
|
||||||
|
status_data.pop('next', None)
|
||||||
|
status_data['data'] = data
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'success': True,
|
'success': True,
|
||||||
'status': data.get('status'),
|
'status': status_data.get('status'),
|
||||||
'total': data.get('total'),
|
'total': status_data.get('total'),
|
||||||
'completed': data.get('completed'),
|
'completed': status_data.get('completed'),
|
||||||
'creditsUsed': data.get('creditsUsed'),
|
'creditsUsed': status_data.get('creditsUsed'),
|
||||||
'expiresAt': data.get('expiresAt'),
|
'expiresAt': status_data.get('expiresAt'),
|
||||||
'next': data.get('next'),
|
'data': status_data.get('data'),
|
||||||
'data': data.get('data'),
|
'error': status_data.get('error'),
|
||||||
'error': data.get('error')
|
'next': status_data.get('next', None)
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
self._handle_error(response, 'check crawl status')
|
self._handle_error(response, 'check crawl status')
|
||||||
|
@ -377,17 +398,38 @@ class FirecrawlApp:
|
||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
response = self._get_request(f'{self.api_url}{endpoint}', headers)
|
response = self._get_request(f'{self.api_url}{endpoint}', headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
data = response.json()
|
status_data = response.json()
|
||||||
|
if status_data['status'] == 'completed':
|
||||||
|
if 'data' in status_data:
|
||||||
|
data = status_data['data']
|
||||||
|
while 'next' in status_data:
|
||||||
|
next_url = status_data.get('next')
|
||||||
|
if not next_url:
|
||||||
|
logger.warning("Expected 'next' URL is missing.")
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
status_response = self._get_request(next_url, headers)
|
||||||
|
if status_response.status_code != 200:
|
||||||
|
logger.error(f"Failed to fetch next page: {status_response.status_code}")
|
||||||
|
break
|
||||||
|
status_data = status_response.json()
|
||||||
|
data.extend(status_data.get('data', []))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during pagination request: {e}")
|
||||||
|
break
|
||||||
|
status_data.pop('next', None)
|
||||||
|
status_data['data'] = data
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'success': True,
|
'success': True,
|
||||||
'status': data.get('status'),
|
'status': status_data.get('status'),
|
||||||
'total': data.get('total'),
|
'total': status_data.get('total'),
|
||||||
'completed': data.get('completed'),
|
'completed': status_data.get('completed'),
|
||||||
'creditsUsed': data.get('creditsUsed'),
|
'creditsUsed': status_data.get('creditsUsed'),
|
||||||
'expiresAt': data.get('expiresAt'),
|
'expiresAt': status_data.get('expiresAt'),
|
||||||
'next': data.get('next'),
|
'data': status_data.get('data'),
|
||||||
'data': data.get('data'),
|
'error': status_data.get('error'),
|
||||||
'error': data.get('error')
|
'next': status_data.get('next', None)
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
self._handle_error(response, 'check batch scrape status')
|
self._handle_error(response, 'check batch scrape status')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user