mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Support for tbs, filter, lang, country and location with Serper search.
This commit is contained in:
parent
26c861db5a
commit
f2690f6909
|
@ -29,7 +29,16 @@ export async function searchHelper(
|
|||
const tbs = searchOptions.tbs ?? null;
|
||||
const filter = searchOptions.filter ?? null;
|
||||
|
||||
let res = await search({query: query, advanced: advanced, num_results: searchOptions.limit ?? 7, tbs: tbs, filter: filter});
|
||||
let res = await search({
|
||||
query: query,
|
||||
advanced: advanced,
|
||||
num_results: searchOptions.limit ?? 7,
|
||||
tbs: tbs,
|
||||
filter: filter,
|
||||
lang: searchOptions.lang ?? "en",
|
||||
country: searchOptions.country ?? "us",
|
||||
location: searchOptions.location,
|
||||
});
|
||||
|
||||
let justSearch = pageOptions.fetchPageContent === false;
|
||||
|
||||
|
|
|
@ -20,6 +20,9 @@ export type SearchOptions = {
|
|||
limit?: number;
|
||||
tbs?: string;
|
||||
filter?: string;
|
||||
lang?: string;
|
||||
country?: string;
|
||||
location?: string;
|
||||
};
|
||||
|
||||
export type WebScraperOptions = {
|
||||
|
|
|
@ -17,11 +17,12 @@ function get_useragent(): string {
|
|||
return _useragent_list[Math.floor(Math.random() * _useragent_list.length)];
|
||||
}
|
||||
|
||||
async function _req(term: string, results: number, lang: string, start: number, proxies: any, timeout: number, tbs: string = null, filter: string = null) {
|
||||
async function _req(term: string, results: number, lang: string, country: string, start: number, proxies: any, timeout: number, tbs: string = null, filter: string = null) {
|
||||
const params = {
|
||||
"q": term,
|
||||
"num": results, // Number of results to return
|
||||
"hl": lang,
|
||||
"gl": country,
|
||||
"start": start,
|
||||
};
|
||||
if (tbs) {
|
||||
|
@ -50,7 +51,7 @@ async function _req(term: string, results: number, lang: string, start: number,
|
|||
|
||||
|
||||
|
||||
export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
|
||||
export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", country = "us", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
|
||||
const escaped_term = querystring.escape(term);
|
||||
|
||||
let proxies = null;
|
||||
|
@ -70,7 +71,7 @@ export async function google_search(term: string, advanced = false, num_results
|
|||
const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop
|
||||
while (start < num_results && attempts < maxAttempts) {
|
||||
try {
|
||||
const resp = await _req(escaped_term, num_results - start, lang, start, proxies, timeout, tbs, filter);
|
||||
const resp = await _req(escaped_term, num_results - start, lang, country, start, proxies, timeout, tbs, filter);
|
||||
const $ = cheerio.load(resp.data);
|
||||
const result_block = $("div.g");
|
||||
if (result_block.length === 0) {
|
||||
|
|
|
@ -9,6 +9,8 @@ export async function search({
|
|||
tbs = null,
|
||||
filter = null,
|
||||
lang = "en",
|
||||
country = "us",
|
||||
location = undefined,
|
||||
proxy = null,
|
||||
sleep_interval = 0,
|
||||
timeout = 5000,
|
||||
|
@ -19,13 +21,15 @@ export async function search({
|
|||
tbs?: string;
|
||||
filter?: string;
|
||||
lang?: string;
|
||||
country?: string;
|
||||
location?: string;
|
||||
proxy?: string;
|
||||
sleep_interval?: number;
|
||||
timeout?: number;
|
||||
}) : Promise<SearchResult[]> {
|
||||
try {
|
||||
if (process.env.SERPER_API_KEY && !tbs) {
|
||||
return await serper_search(query, num_results);
|
||||
return await serper_search(query, {num_results, tbs, filter, lang, country, location});
|
||||
}
|
||||
return await google_search(
|
||||
query,
|
||||
|
@ -34,6 +38,7 @@ export async function search({
|
|||
tbs,
|
||||
filter,
|
||||
lang,
|
||||
country,
|
||||
proxy,
|
||||
sleep_interval,
|
||||
timeout
|
||||
|
|
|
@ -4,10 +4,23 @@ import { SearchResult } from "../../src/lib/entities";
|
|||
|
||||
dotenv.config();
|
||||
|
||||
export async function serper_search(q, num_results): Promise<SearchResult[]> {
|
||||
export async function serper_search(q, options: {
|
||||
tbs?: string;
|
||||
filter?: string;
|
||||
lang?: string;
|
||||
country?: string;
|
||||
location?: string;
|
||||
num_results: number;
|
||||
page?: number;
|
||||
}): Promise<SearchResult[]> {
|
||||
let data = JSON.stringify({
|
||||
q: q,
|
||||
num: num_results,
|
||||
hl: options.lang,
|
||||
gl: options.country,
|
||||
location: options.location,
|
||||
tbs: options.tbs,
|
||||
num: options.num_results,
|
||||
page: options.page ?? 1,
|
||||
});
|
||||
|
||||
let config = {
|
||||
|
|
Loading…
Reference in New Issue
Block a user