mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 11:42:32 +08:00
fix: ignore match all target selectors for performance
This commit is contained in:
parent
a08218506e
commit
8b7af6d076
|
@ -237,6 +237,7 @@ export class CrawlerOptions extends AutoCastable {
|
|||
instance.targetSelector ??= targetSelector;
|
||||
const waitForSelector = ctx?.req.get('x-wait-for-selector')?.split(', ');
|
||||
instance.waitForSelector ??= waitForSelector || instance.targetSelector;
|
||||
instance.targetSelector = filterSelector(instance.targetSelector);
|
||||
const overrideUserAgent = ctx?.req.get('x-user-agent');
|
||||
instance.userAgent ??= overrideUserAgent;
|
||||
|
||||
|
@ -286,3 +287,20 @@ export class CrawlerOptionsHeaderOnly extends CrawlerOptions {
|
|||
return instance;
|
||||
}
|
||||
}
|
||||
|
||||
function filterSelector(s?: string | string[]) {
|
||||
if (!s) {
|
||||
return s;
|
||||
}
|
||||
const sr = Array.isArray(s) ? s : [s];
|
||||
const selectors = sr.filter((i)=> {
|
||||
const innerSelectors = i.split(',').map((s) => s.trim());
|
||||
const someViolation = innerSelectors.find((x) => x.startsWith('*') || x.startsWith(':') || x.includes('*:'));
|
||||
if (someViolation) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})
|
||||
|
||||
return selectors;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue
Block a user