mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 11:42:32 +08:00
puppeteer stealth
This commit is contained in:
parent
33d7cfc41c
commit
dbeb69582a
320
backend/functions/package-lock.json
generated
320
backend/functions/package-lock.json
generated
|
@ -27,6 +27,8 @@
|
|||
"minio": "^7.1.3",
|
||||
"openai": "^4.20.0",
|
||||
"puppeteer": "^22.6.3",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"stripe": "^11.11.0",
|
||||
"tiktoken": "^1.0.10",
|
||||
"turndown": "^7.1.3",
|
||||
|
@ -2526,6 +2528,14 @@
|
|||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/debug": {
|
||||
"version": "4.1.12",
|
||||
"resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz",
|
||||
"integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==",
|
||||
"dependencies": {
|
||||
"@types/ms": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/express": {
|
||||
"version": "4.17.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/express/-/express-4.17.3.tgz",
|
||||
|
@ -2673,6 +2683,11 @@
|
|||
"integrity": "sha512-K0VQKziLUWkVKiRVrx4a40iPaxTUefQmjtkQofBkYRcoaaL/8rhwDWww9qWbrgicNOgnpIsMxyNIUM4+n6dUIA==",
|
||||
"optional": true
|
||||
},
|
||||
"node_modules/@types/ms": {
|
||||
"version": "0.7.34",
|
||||
"resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.34.tgz",
|
||||
"integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g=="
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "18.19.31",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.31.tgz",
|
||||
|
@ -3234,6 +3249,14 @@
|
|||
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
|
||||
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
|
||||
},
|
||||
"node_modules/arr-union": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz",
|
||||
"integrity": "sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/array-buffer-byte-length": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz",
|
||||
|
@ -4076,6 +4099,21 @@
|
|||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/clone-deep": {
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz",
|
||||
"integrity": "sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==",
|
||||
"dependencies": {
|
||||
"for-own": "^0.1.3",
|
||||
"is-plain-object": "^2.0.1",
|
||||
"kind-of": "^3.0.2",
|
||||
"lazy-cache": "^1.0.3",
|
||||
"shallow-clone": "^0.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/co": {
|
||||
"version": "4.6.0",
|
||||
"resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
|
||||
|
@ -4466,8 +4504,6 @@
|
|||
"version": "4.3.1",
|
||||
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
|
||||
"integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
|
||||
"dev": true,
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
|
@ -5739,6 +5775,25 @@
|
|||
"is-callable": "^1.1.3"
|
||||
}
|
||||
},
|
||||
"node_modules/for-in": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz",
|
||||
"integrity": "sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/for-own": {
|
||||
"version": "0.1.5",
|
||||
"resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz",
|
||||
"integrity": "sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==",
|
||||
"dependencies": {
|
||||
"for-in": "^1.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||
|
@ -6786,6 +6841,11 @@
|
|||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/is-buffer": {
|
||||
"version": "1.1.6",
|
||||
"resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
|
||||
"integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="
|
||||
},
|
||||
"node_modules/is-callable": {
|
||||
"version": "1.2.7",
|
||||
"resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz",
|
||||
|
@ -6839,6 +6899,14 @@
|
|||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/is-extendable": {
|
||||
"version": "0.1.1",
|
||||
"resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
|
||||
"integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/is-extglob": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
|
||||
|
@ -6948,6 +7016,17 @@
|
|||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/is-plain-object": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz",
|
||||
"integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==",
|
||||
"dependencies": {
|
||||
"isobject": "^3.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/is-regex": {
|
||||
"version": "1.1.4",
|
||||
"resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.4.tgz",
|
||||
|
@ -7064,6 +7143,14 @@
|
|||
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/isobject": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
|
||||
"integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/istanbul-lib-coverage": {
|
||||
"version": "3.2.2",
|
||||
"resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz",
|
||||
|
@ -8049,6 +8136,17 @@
|
|||
"json-buffer": "3.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/kind-of": {
|
||||
"version": "3.2.2",
|
||||
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
|
||||
"integrity": "sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==",
|
||||
"dependencies": {
|
||||
"is-buffer": "^1.1.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/klaw": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/klaw/-/klaw-3.0.0.tgz",
|
||||
|
@ -8184,6 +8282,14 @@
|
|||
"unicode-9.0.0": "0.7.0"
|
||||
}
|
||||
},
|
||||
"node_modules/lazy-cache": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-1.0.4.tgz",
|
||||
"integrity": "sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/lazystream": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/lazystream/-/lazystream-1.0.1.tgz",
|
||||
|
@ -8504,6 +8610,19 @@
|
|||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/merge-deep": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/merge-deep/-/merge-deep-3.0.3.tgz",
|
||||
"integrity": "sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==",
|
||||
"dependencies": {
|
||||
"arr-union": "^3.1.0",
|
||||
"clone-deep": "^0.2.4",
|
||||
"kind-of": "^3.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/merge-descriptors": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
|
||||
|
@ -8672,6 +8791,26 @@
|
|||
"resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
|
||||
"integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw=="
|
||||
},
|
||||
"node_modules/mixin-object": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz",
|
||||
"integrity": "sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==",
|
||||
"dependencies": {
|
||||
"for-in": "^0.1.3",
|
||||
"is-extendable": "^0.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/mixin-object/node_modules/for-in": {
|
||||
"version": "0.1.8",
|
||||
"resolved": "https://registry.npmjs.org/for-in/-/for-in-0.1.8.tgz",
|
||||
"integrity": "sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/mkdirp": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz",
|
||||
|
@ -9719,6 +9858,150 @@
|
|||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-extra": {
|
||||
"version": "3.3.6",
|
||||
"resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz",
|
||||
"integrity": "sha512-rsLBE/6mMxAjlLd06LuGacrukP2bqbzKCLzV1vrhHFavqQE/taQ2UXv3H5P0Ls7nsrASa+6x3bDbXHpqMwq+7A==",
|
||||
"dependencies": {
|
||||
"@types/debug": "^4.1.0",
|
||||
"debug": "^4.1.1",
|
||||
"deepmerge": "^4.2.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/puppeteer": "*",
|
||||
"puppeteer": "*",
|
||||
"puppeteer-core": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/puppeteer": {
|
||||
"optional": true
|
||||
},
|
||||
"puppeteer": {
|
||||
"optional": true
|
||||
},
|
||||
"puppeteer-core": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-extra-plugin": {
|
||||
"version": "3.2.3",
|
||||
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.3.tgz",
|
||||
"integrity": "sha512-6RNy0e6pH8vaS3akPIKGg28xcryKscczt4wIl0ePciZENGE2yoaQJNd17UiEbdmh5/6WW6dPcfRWT9lxBwCi2Q==",
|
||||
"dependencies": {
|
||||
"@types/debug": "^4.1.0",
|
||||
"debug": "^4.1.1",
|
||||
"merge-deep": "^3.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=9.11.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"playwright-extra": "*",
|
||||
"puppeteer-extra": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"playwright-extra": {
|
||||
"optional": true
|
||||
},
|
||||
"puppeteer-extra": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-extra-plugin-stealth": {
|
||||
"version": "2.11.2",
|
||||
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.11.2.tgz",
|
||||
"integrity": "sha512-bUemM5XmTj9i2ZerBzsk2AN5is0wHMNE6K0hXBzBXOzP5m5G3Wl0RHhiqKeHToe/uIH8AoZiGhc1tCkLZQPKTQ==",
|
||||
"dependencies": {
|
||||
"debug": "^4.1.1",
|
||||
"puppeteer-extra-plugin": "^3.2.3",
|
||||
"puppeteer-extra-plugin-user-preferences": "^2.4.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"playwright-extra": "*",
|
||||
"puppeteer-extra": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"playwright-extra": {
|
||||
"optional": true
|
||||
},
|
||||
"puppeteer-extra": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-extra-plugin-user-data-dir": {
|
||||
"version": "2.4.1",
|
||||
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.4.1.tgz",
|
||||
"integrity": "sha512-kH1GnCcqEDoBXO7epAse4TBPJh9tEpVEK/vkedKfjOVOhZAvLkHGc9swMs5ChrJbRnf8Hdpug6TJlEuimXNQ+g==",
|
||||
"dependencies": {
|
||||
"debug": "^4.1.1",
|
||||
"fs-extra": "^10.0.0",
|
||||
"puppeteer-extra-plugin": "^3.2.3",
|
||||
"rimraf": "^3.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"playwright-extra": "*",
|
||||
"puppeteer-extra": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"playwright-extra": {
|
||||
"optional": true
|
||||
},
|
||||
"puppeteer-extra": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-extra-plugin-user-data-dir/node_modules/fs-extra": {
|
||||
"version": "10.1.0",
|
||||
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz",
|
||||
"integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==",
|
||||
"dependencies": {
|
||||
"graceful-fs": "^4.2.0",
|
||||
"jsonfile": "^6.0.1",
|
||||
"universalify": "^2.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer-extra-plugin-user-preferences": {
|
||||
"version": "2.4.1",
|
||||
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.4.1.tgz",
|
||||
"integrity": "sha512-i1oAZxRbc1bk8MZufKCruCEC3CCafO9RKMkkodZltI4OqibLFXF3tj6HZ4LZ9C5vCXZjYcDWazgtY69mnmrQ9A==",
|
||||
"dependencies": {
|
||||
"debug": "^4.1.1",
|
||||
"deepmerge": "^4.2.2",
|
||||
"puppeteer-extra-plugin": "^3.2.3",
|
||||
"puppeteer-extra-plugin-user-data-dir": "^2.4.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"playwright-extra": "*",
|
||||
"puppeteer-extra": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"playwright-extra": {
|
||||
"optional": true
|
||||
},
|
||||
"puppeteer-extra": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/pure-rand": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz",
|
||||
|
@ -10314,6 +10597,39 @@
|
|||
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
|
||||
"integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="
|
||||
},
|
||||
"node_modules/shallow-clone": {
|
||||
"version": "0.1.2",
|
||||
"resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz",
|
||||
"integrity": "sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==",
|
||||
"dependencies": {
|
||||
"is-extendable": "^0.1.1",
|
||||
"kind-of": "^2.0.1",
|
||||
"lazy-cache": "^0.2.3",
|
||||
"mixin-object": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/shallow-clone/node_modules/kind-of": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz",
|
||||
"integrity": "sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==",
|
||||
"dependencies": {
|
||||
"is-buffer": "^1.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/shallow-clone/node_modules/lazy-cache": {
|
||||
"version": "0.2.7",
|
||||
"resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-0.2.7.tgz",
|
||||
"integrity": "sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/shebang-command": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
|
||||
|
|
|
@ -47,6 +47,8 @@
|
|||
"minio": "^7.1.3",
|
||||
"openai": "^4.20.0",
|
||||
"puppeteer": "^22.6.3",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"stripe": "^11.11.0",
|
||||
"tiktoken": "^1.0.10",
|
||||
"turndown": "^7.1.3",
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError } from 'civkit';
|
||||
import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError, ParamValidationError } from 'civkit';
|
||||
import { singleton } from 'tsyringe';
|
||||
import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
|
||||
import _ from 'lodash';
|
||||
|
@ -32,11 +32,11 @@ export class CrawlerHost extends RPCHost {
|
|||
const toBeTurnedToMd = snapshot.parsed?.content;
|
||||
const turnedDown = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd).trim() : '';
|
||||
|
||||
const contentText = turnedDown && !(turnedDown.startsWith('<') && turnedDown.endsWith('>')) ? turnedDown : snapshot.text.trim();
|
||||
const contentText = turnedDown && !(turnedDown.startsWith('<') && turnedDown.endsWith('>')) ? turnedDown : snapshot.text?.trim();
|
||||
|
||||
const formatted = {
|
||||
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
|
||||
url: snapshot.href.trim(),
|
||||
url: snapshot.href?.trim(),
|
||||
content: contentText.trim(),
|
||||
|
||||
toString() {
|
||||
|
@ -80,7 +80,15 @@ ${this.content}
|
|||
},
|
||||
) {
|
||||
const noSlashURL = ctx.req.url.slice(1);
|
||||
const urlToCrawl = new URL(normalizeUrl(noSlashURL));
|
||||
let urlToCrawl;
|
||||
try {
|
||||
urlToCrawl = new URL(normalizeUrl(noSlashURL.trim()));
|
||||
} catch (err) {
|
||||
throw new ParamValidationError({
|
||||
message: `${err}`,
|
||||
path: 'url'
|
||||
});
|
||||
}
|
||||
const screenshotEnabled = Boolean(ctx.req.headers['x-screenshot']);
|
||||
const noCache = Boolean(ctx.req.headers['x-no-cache']);
|
||||
|
||||
|
@ -125,7 +133,7 @@ ${this.content}
|
|||
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
|
||||
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
||||
lastScrapped = scrapped;
|
||||
if (!scrapped?.parsed?.content) {
|
||||
if (!scrapped?.parsed?.content || !(scrapped.title?.trim())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -143,7 +151,7 @@ ${this.content}
|
|||
|
||||
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
||||
lastScrapped = scrapped;
|
||||
if (!scrapped?.parsed?.content) {
|
||||
if (!scrapped?.parsed?.content || !(scrapped.title?.trim())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ initializeApp();
|
|||
|
||||
import { loadModulesDynamically, registry } from './shared';
|
||||
import path from 'path';
|
||||
import { ApplicationError } from 'civkit';
|
||||
loadModulesDynamically(path.resolve(__dirname, 'cloud-functions'));
|
||||
|
||||
Object.assign(exports, registry.exportAll());
|
||||
|
@ -31,4 +32,12 @@ Object.assign(exports, registry.exportGrouped({
|
|||
registry.title = 'reader';
|
||||
registry.version = '0.1.0';
|
||||
|
||||
process.on('unhandledRejection', () => 'no big deal');
|
||||
process.on('unhandledRejection', (err) => {
|
||||
// Walk around Firebase runtime bug.
|
||||
if (err instanceof ApplicationError) {
|
||||
// Application error shall not crash the process;
|
||||
return;
|
||||
}
|
||||
|
||||
throw err;
|
||||
});
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
import { AssertionFailureError, AsyncService, Defer, HashManager, marshalErrorLike } from 'civkit';
|
||||
import { container, singleton } from 'tsyringe';
|
||||
import puppeteer, { Browser } from 'puppeteer';
|
||||
import type { Browser } from 'puppeteer';
|
||||
import { Logger } from '../shared/services/logger';
|
||||
import genericPool from 'generic-pool';
|
||||
import os from 'os';
|
||||
import fs from 'fs';
|
||||
import { Crawled } from '../db/crawled';
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import puppeteerStealth from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
|
||||
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
||||
|
@ -31,6 +33,12 @@ export interface PageSnapshot {
|
|||
}
|
||||
const md5Hasher = new HashManager('md5', 'hex');
|
||||
|
||||
puppeteer.use(puppeteerStealth());
|
||||
// const puppeteerUAOverride = require('puppeteer-extra-plugin-stealth/evasions/user-agent-override');
|
||||
// puppeteer.use(puppeteerUAOverride({
|
||||
// userAgent: `Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`
|
||||
// }))
|
||||
|
||||
@singleton()
|
||||
export class PuppeteerControl extends AsyncService {
|
||||
|
||||
|
@ -77,9 +85,10 @@ export class PuppeteerControl extends AsyncService {
|
|||
headless: true,
|
||||
timeout: 10_000
|
||||
}).catch((err) => {
|
||||
this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err });
|
||||
this.logger.error(`Unknown firebase issue, just die fast.`, { err });
|
||||
process.nextTick(() => {
|
||||
process.exit(1);
|
||||
this.emit('error', err);
|
||||
// process.exit(1);
|
||||
});
|
||||
return Promise.reject(err);
|
||||
});
|
||||
|
@ -100,7 +109,7 @@ export class PuppeteerControl extends AsyncService {
|
|||
const preparations = [];
|
||||
|
||||
// preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
|
||||
preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
|
||||
// preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
|
||||
preparations.push(page.setBypassCSP(true));
|
||||
preparations.push(page.setViewport({ width: 1920, height: 1080 }));
|
||||
preparations.push(page.exposeFunction('reportSnapshot', (snapshot: any) => {
|
||||
|
|
Loading…
Reference in New Issue
Block a user