finish responding properly

This commit is contained in:
Harsh Gupta 2024-08-15 15:42:39 +05:30
parent 7677ec77ce
commit a7fbe3cb38

View File

@ -1,5 +1,5 @@
import { import {
assignTransferProtocolMeta, marshalErrorLike, marshalErrorLike,
RPCHost, RPCReflection, RPCHost, RPCReflection,
HashManager, HashManager,
AssertionFailureError, ParamValidationError, Defer, AssertionFailureError, ParamValidationError, Defer,
@ -28,6 +28,27 @@ const md5Hasher = new HashManager('md5', 'hex');
// const logger = new Logger('Crawler'); // const logger = new Logger('Crawler');
import { TransferProtocolMetadata } from 'civkit';
function sendResponse<T>(res: Response, data: T, meta: TransferProtocolMetadata): T {
if (meta.code) {
res.status(meta.code);
}
if (meta.contentType) {
res.type(meta.contentType);
}
if (meta.headers) {
for (const [key, value] of Object.entries(meta.headers)) {
if (value !== undefined) {
res.setHeader(key, value);
}
}
}
res.send(data);
return data;
}
export interface ExtraScrappingOptions extends ScrappingOptions { export interface ExtraScrappingOptions extends ScrappingOptions {
withIframe?: boolean; withIframe?: boolean;
targetSelector?: string | string[]; targetSelector?: string | string[];
@ -596,6 +617,8 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
async crawl(req: Request, res: Response) { async crawl(req: Request, res: Response) {
console.log('Crawl method called with request:', req.url); console.log('Crawl method called with request:', req.url);
// res.setHeader('Access-Control-Allow-Origin', '*');
// res.send('Helloooooooo!');
// const rpcReflect: RPCReflection = {}; // const rpcReflect: RPCReflection = {};
const ctx = { req, res }; const ctx = { req, res };
console.log(`req.headers: ${JSON.stringify(req.headers)}`); console.log(`req.headers: ${JSON.stringify(req.headers)}`);
@ -610,7 +633,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
return this.getIndex(); return this.getIndex();
} }
return assignTransferProtocolMeta(`${this.getIndex()}`, return sendResponse(res, `${this.getIndex()}`,
{ contentType: 'text/plain', envelope: null } { contentType: 'text/plain', envelope: null }
); );
} }
@ -717,17 +740,17 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
if (crawlerOptions.timeout === undefined) { if (crawlerOptions.timeout === undefined) {
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) { if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
return assignTransferProtocolMeta(`${formatted}`, return sendResponse(res, `${formatted}`,
{ code: 302, envelope: null, headers: { Location: Reflect.get(formatted, 'screenshotUrl') } } { code: 302, envelope: null, headers: { Location: Reflect.get(formatted, 'screenshotUrl') } }
); );
} }
if (crawlerOptions.respondWith === 'pageshot' && Reflect.get(formatted, 'pageshotUrl')) { if (crawlerOptions.respondWith === 'pageshot' && Reflect.get(formatted, 'pageshotUrl')) {
return assignTransferProtocolMeta(`${formatted}`, return sendResponse(res, `${formatted}`,
{ code: 302, envelope: null, headers: { Location: Reflect.get(formatted, 'pageshotUrl') } } { code: 302, envelope: null, headers: { Location: Reflect.get(formatted, 'pageshotUrl') } }
); );
} }
return assignTransferProtocolMeta(`${formatted}`, { contentType: 'text/plain', envelope: null }); return sendResponse(res, `${formatted}`, { contentType: 'text/plain', envelope: null });
} }
} }
@ -737,17 +760,17 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl); const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) { if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
return assignTransferProtocolMeta(`${formatted}`, return sendResponse(res, `${formatted}`,
{ code: 302, envelope: null, headers: { Location: Reflect.get(formatted, 'screenshotUrl') } } { code: 302, envelope: null, headers: { Location: Reflect.get(formatted, 'screenshotUrl') } }
); );
} }
if (crawlerOptions.respondWith === 'pageshot' && Reflect.get(formatted, 'pageshotUrl')) { if (crawlerOptions.respondWith === 'pageshot' && Reflect.get(formatted, 'pageshotUrl')) {
return assignTransferProtocolMeta(`${formatted}`, return sendResponse(res, `${formatted}`,
{ code: 302, envelope: null, headers: { Location: Reflect.get(formatted, 'pageshotUrl') } } { code: 302, envelope: null, headers: { Location: Reflect.get(formatted, 'pageshotUrl') } }
); );
} }
return assignTransferProtocolMeta(`${formatted}`, { contentType: 'text/plain', envelope: null }); return sendResponse(res, `${formatted}`, { contentType: 'text/plain', envelope: null });
} }
getUrlDigest(urlToCrawl: URL) { getUrlDigest(urlToCrawl: URL) {