chore: clean code

This commit is contained in:
Han Xiao 2024-04-11 15:29:57 -07:00
parent b29a569d39
commit c1743db305
2 changed files with 5 additions and 6 deletions

View File

@ -26,6 +26,7 @@
},
"main": "build/index.js",
"dependencies": {
"@esm2cjs/normalize-url": "^8.0.0",
"@google-cloud/translate": "^8.2.0",
"@mozilla/readability": "^0.5.0",
"@napi-rs/canvas": "^0.1.44",

View File

@ -5,6 +5,7 @@ import _ from 'lodash';
import { PageSnapshot, PuppeteerControl } from '../services/puppeteer';
import TurnDownService from 'turndown';
import { Request, Response } from 'express';
import normalizeUrl from "@esm2cjs/normalize-url";
@singleton()
@ -57,11 +58,8 @@ ${contentText.trim()}
res: Response,
},
) {
const url = new URL(ctx.req.url, `${ctx.req.protocol}://${ctx.req.headers.host}`);
const rawPath = url.pathname.split('/').filter(Boolean);
const host = rawPath.shift();
const urlToCrawl = new URL(`${ctx.req.protocol}://${host}/${rawPath.join('/')}`);
urlToCrawl.search = url.search;
const noSlashURL = ctx.req.url.slice(1);
const urlToCrawl = new URL(normalizeUrl(noSlashURL));
if (!ctx.req.accepts('text/plain') && ctx.req.accepts('text/event-stream')) {
const sseStream = new OutputServerEventStream();
@ -88,7 +86,7 @@ ${contentText.trim()}
});
}
} catch (err: any) {
this.logger.error(`Failed to crawl ${url}`, { err: marshalErrorLike(err) });
this.logger.error(`Failed to crawl ${urlToCrawl.toString()}`, { err: marshalErrorLike(err) });
sseStream.write({
event: 'error',
data: marshalErrorLike(err),