mirror of
https://github.com/intergalacticalvariable/reader.git
synced 2024-11-16 03:32:25 +08:00
wip
This commit is contained in:
parent
8b9ecf2e60
commit
89d6d49f06
0
.github/workflows/.keep
vendored
Normal file
0
.github/workflows/.keep
vendored
Normal file
131
.gitignore
vendored
131
.gitignore
vendored
|
@ -1,130 +1,3 @@
|
|||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
package-lock.json
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Snowpack dependency directory (https://snowpack.dev/)
|
||||
web_modules/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional stylelint cache
|
||||
.stylelintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variable files
|
||||
.env
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.local
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
out
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||
# public
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# vuepress v2.x temp and cache directory
|
||||
.temp
|
||||
.cache
|
||||
|
||||
# Docusaurus cache and generated files
|
||||
.docusaurus
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
||||
.DS_Store
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
[submodule "thinapps-shared"]
|
||||
path = thinapps-shared
|
||||
url = git@github.com:jina-ai/thinapps-shared.git
|
10
.vscode/exensions.json
vendored
Normal file
10
.vscode/exensions.json
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"recommendations": [
|
||||
"editorconfig.editorconfig",
|
||||
"octref.vetur",
|
||||
"redhat.vscode-yaml",
|
||||
"dbaeumer.vscode-eslint",
|
||||
"esbenp.prettier-vscode",
|
||||
"streetsidesoftware.code-spell-checker"
|
||||
]
|
||||
}
|
60
.vscode/launch.json
vendored
Normal file
60
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,60 @@
|
|||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Debug Fullstack: attach",
|
||||
"request": "attach",
|
||||
"cwd": "${workspaceFolder}/backend/functions",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"type": "node",
|
||||
"preLaunchTask": "Fullstack:debug"
|
||||
},
|
||||
{
|
||||
"name": "Debug Fullstack: attach: with proxy",
|
||||
"request": "attach",
|
||||
"cwd": "${workspaceFolder}/backend/functions",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"type": "node",
|
||||
"preLaunchTask": "Fullstack:debug:with-proxy"
|
||||
},
|
||||
{
|
||||
"name": "Attach",
|
||||
"port": 9229,
|
||||
"request": "attach",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"type": "node"
|
||||
},
|
||||
{
|
||||
"name": "Attach by Process ID",
|
||||
"processId": "${command:PickProcess}",
|
||||
"request": "attach",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"type": "node"
|
||||
},
|
||||
{
|
||||
"name": "Debug Fullstack",
|
||||
"request": "launch",
|
||||
"runtimeArgs": [
|
||||
"emulators:start",
|
||||
"--import=../.firebase-emu",
|
||||
"--export-on-exit=../.firebase-emu",
|
||||
],
|
||||
"cwd": "${workspaceFolder}/backend/functions",
|
||||
"runtimeExecutable": "${workspaceFolder}/node_modules/.bin/firebase",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"type": "node",
|
||||
"preLaunchTask": "Fullstack:prepare",
|
||||
"killBehavior": "polite"
|
||||
},
|
||||
]
|
||||
}
|
105
.vscode/settings.json
vendored
Normal file
105
.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,105 @@
|
|||
{
|
||||
"editor.wordWrap": "on",
|
||||
"editor.wordWrapColumn": 120,
|
||||
"files.trimTrailingWhitespace": true,
|
||||
"files.trimFinalNewlines": true,
|
||||
"[javascript]": {
|
||||
"editor.defaultFormatter": "vscode.typescript-language-features"
|
||||
},
|
||||
"[vue]": {
|
||||
"editor.defaultFormatter": "Vue.volar"
|
||||
},
|
||||
"[jsonc]": {
|
||||
"editor.defaultFormatter": "vscode.json-language-features"
|
||||
},
|
||||
"[typescript]": {
|
||||
"editor.defaultFormatter": "vscode.typescript-language-features"
|
||||
},
|
||||
"[json]": {
|
||||
"editor.defaultFormatter": "vscode.json-language-features"
|
||||
},
|
||||
"[yaml]": {
|
||||
"editor.defaultFormatter": "redhat.vscode-yaml"
|
||||
},
|
||||
"[markdown]": {
|
||||
"files.trimTrailingWhitespace": false
|
||||
},
|
||||
"typescript.tsdk": "node_modules/typescript/lib",
|
||||
"vetur.format.defaultFormatter.ts": "vscode-typescript",
|
||||
"vetur.format.defaultFormatter.js": "vscode-typescript",
|
||||
"typescript.preferences.quoteStyle": "single",
|
||||
"typescript.format.semicolons": "insert",
|
||||
"typescript.preferences.importModuleSpecifier": "project-relative",
|
||||
"typescript.locale": "en",
|
||||
"cSpell.enabled": true,
|
||||
"cSpell.words": [
|
||||
"Apiextensions",
|
||||
"apihubble",
|
||||
"auths",
|
||||
"AUTOCASTABLE",
|
||||
"Autocasting",
|
||||
"backchannel",
|
||||
"bodyparser",
|
||||
"bson",
|
||||
"BUILDKIT",
|
||||
"buildx",
|
||||
"castable",
|
||||
"cmdl",
|
||||
"Commandline",
|
||||
"conpty",
|
||||
"cpid",
|
||||
"deferreds",
|
||||
"DEVBOT",
|
||||
"dockerhub",
|
||||
"entrypoint",
|
||||
"ENVIROMENT",
|
||||
"finetuner",
|
||||
"fpath",
|
||||
"fswalk",
|
||||
"Grafana",
|
||||
"Hasher",
|
||||
"istio",
|
||||
"jina",
|
||||
"jinahub",
|
||||
"jinameta",
|
||||
"Knative",
|
||||
"kourier",
|
||||
"kube",
|
||||
"kubectl",
|
||||
"Kubernetes",
|
||||
"kwargs",
|
||||
"letsencrypt",
|
||||
"liveconfigs",
|
||||
"LOGNAME",
|
||||
"metas",
|
||||
"Mgmt",
|
||||
"middlewares",
|
||||
"minikube",
|
||||
"minio",
|
||||
"ndjson",
|
||||
"nodelib",
|
||||
"oidc",
|
||||
"openapi",
|
||||
"paramtypes",
|
||||
"penv",
|
||||
"pino",
|
||||
"prebuild",
|
||||
"quickstart",
|
||||
"reinit",
|
||||
"sslip",
|
||||
"subval",
|
||||
"Succ",
|
||||
"timedout",
|
||||
"TOTP",
|
||||
"tsbuildinfo",
|
||||
"tsyringe",
|
||||
"typeclass",
|
||||
"upsert",
|
||||
"upserted",
|
||||
"userinfo",
|
||||
"Vecs",
|
||||
"vectorize",
|
||||
"WECHAT",
|
||||
"WXPAY"
|
||||
],
|
||||
}
|
156
.vscode/tasks.json
vendored
Normal file
156
.vscode/tasks.json
vendored
Normal file
|
@ -0,0 +1,156 @@
|
|||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"type": "npm",
|
||||
"script": "build",
|
||||
"group": "build",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/backend/functions"
|
||||
},
|
||||
"problemMatcher": [],
|
||||
"label": "Backend:rebuild",
|
||||
"detail": "Backend:rebuild"
|
||||
},
|
||||
{
|
||||
"type": "npm",
|
||||
"script": "emu:reset",
|
||||
"group": "build",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/backend/functions"
|
||||
},
|
||||
"problemMatcher": [],
|
||||
"label": "Backend:reset-emulator",
|
||||
"detail": "Backend:reset-emulator"
|
||||
},
|
||||
{
|
||||
"type": "typescript",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/backend/functions"
|
||||
},
|
||||
"tsconfig": "backend/functions/tsconfig.json",
|
||||
"option": "watch",
|
||||
"isBackground": true,
|
||||
"problemMatcher": [
|
||||
"$tsc-watch"
|
||||
],
|
||||
"group": "build",
|
||||
"label": "Backend:build:watch"
|
||||
},
|
||||
{
|
||||
"type": "npm",
|
||||
"script": "emu:debug",
|
||||
"group": "none",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/backend/functions"
|
||||
},
|
||||
"problemMatcher": [
|
||||
{
|
||||
"base": "$tsc",
|
||||
"background": {
|
||||
"activeOnStart": false,
|
||||
"beginsPattern": "shutdown requested|Starting emulators",
|
||||
"endsPattern": "Debugger listening"
|
||||
}
|
||||
}
|
||||
],
|
||||
"label": "Backend:start-emulator-debug",
|
||||
"detail": "Backend:start-emulator-debug",
|
||||
"dependsOn": [
|
||||
"Backend:build:watch"
|
||||
],
|
||||
"isBackground": true,
|
||||
},
|
||||
{
|
||||
"type": "npm",
|
||||
"script": "dev",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/webapp",
|
||||
},
|
||||
"group": "build",
|
||||
"label": "Frontend:start:dev",
|
||||
"detail": "Frontend:start:dev",
|
||||
"isBackground": true,
|
||||
"problemMatcher": {
|
||||
"base": "$vite",
|
||||
"background": {
|
||||
"activeOnStart": true,
|
||||
"endsPattern": "OK",
|
||||
"beginsPattern": "vite"
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "npm",
|
||||
"script": "dev",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/webapp",
|
||||
"env": {
|
||||
"FIREBASE_EMULATE": "true",
|
||||
}
|
||||
},
|
||||
"group": "build",
|
||||
"label": "Frontend:start:emu",
|
||||
"detail": "Frontend:start:emu",
|
||||
"isBackground": true,
|
||||
"problemMatcher": {
|
||||
"base": "$vite",
|
||||
"background": {
|
||||
"activeOnStart": true,
|
||||
"endsPattern": "OK",
|
||||
"beginsPattern": "vite"
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "npm",
|
||||
"script": "emu:debug2",
|
||||
"group": "none",
|
||||
"options": {
|
||||
"cwd": "${workspaceFolder}/backend/functions",
|
||||
"env": {
|
||||
"https_proxy": "http://127.0.0.1:7890",
|
||||
"http_proxy": "http://127.0.0.1:7890",
|
||||
"all_proxy": "socks5://127.0.0.1:7890"
|
||||
}
|
||||
},
|
||||
"problemMatcher": [
|
||||
{
|
||||
"base": "$tsc",
|
||||
"background": {
|
||||
"activeOnStart": false,
|
||||
"beginsPattern": "shutdown requested|Starting emulators",
|
||||
"endsPattern": "Debugger listening"
|
||||
}
|
||||
}
|
||||
],
|
||||
"label": "Backend:start-emulator-debug:with-proxy",
|
||||
"detail": "Backend:start-emulator-debug:with-proxy",
|
||||
"dependsOn": [
|
||||
"Backend:build:watch"
|
||||
],
|
||||
"isBackground": true,
|
||||
},
|
||||
{
|
||||
"label": "Fullstack:prepare",
|
||||
"dependsOn": [
|
||||
"Frontend:start:emu",
|
||||
"Backend:build:watch",
|
||||
],
|
||||
},
|
||||
{
|
||||
"label": "Fullstack:debug",
|
||||
"dependsOn": [
|
||||
// "Frontend:start:emu",
|
||||
"Backend:start-emulator-debug",
|
||||
],
|
||||
},
|
||||
{
|
||||
"label": "Fullstack:debug:with-proxy",
|
||||
"dependsOn": [
|
||||
"Frontend:start:emu",
|
||||
"Backend:start-emulator-debug:with-proxy",
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
113
README.md
113
README.md
|
@ -1 +1,112 @@
|
|||
# url2text
|
||||
# Url2Text
|
||||
|
||||
## Development Guide
|
||||
|
||||
### Prerequisite
|
||||
- Node v18 (The build fails for Node version >18)
|
||||
- Yarn
|
||||
- Firebase CLI (`npm install -g firebase-tools`)
|
||||
|
||||
### Installation
|
||||
|
||||
Clone the scenex repo by running the command:
|
||||
|
||||
```bash
|
||||
git clone git@github.com:jina-ai/url2text.git
|
||||
git submodule init
|
||||
git submodule update
|
||||
```
|
||||
|
||||
After a successful clone, install the packages for backend and the webapp.
|
||||
|
||||
For backend, go to the `backend/functions` directory and install the npm dependencies.
|
||||
|
||||
```bash
|
||||
cd backend/functions
|
||||
npm install
|
||||
```
|
||||
|
||||
For the frontend (webapp), go to the `webapp` directory and install the yarn dependencies.
|
||||
|
||||
```bash
|
||||
cd webapp
|
||||
yarn
|
||||
```
|
||||
|
||||
### Configure
|
||||
|
||||
**Establish localhost connection:**
|
||||
|
||||
Once the packages are installed, go to the `App.vue` file inside the `webapp/src/` and uncomment the below code:
|
||||
|
||||
```js
|
||||
connectFunctionsEmulator(functions, 'localhost', 5001);
|
||||
```
|
||||
|
||||
### Run The Application Now
|
||||
|
||||
To run the backend server, inside the `backend/functions` dir run the below command:
|
||||
|
||||
```bash
|
||||
npm run serve
|
||||
```
|
||||
|
||||
To run the frontend app, inside the `webapp` dir run the below command:
|
||||
|
||||
```bash
|
||||
yarn dev
|
||||
```
|
||||
|
||||
### Known Errors
|
||||
|
||||
1. If you encounter 'npm ERR! /bin/sh: pkg-config: command not found' error in Mac, run the command `brew install pkg-config cairo libpng jpeg giflib pango librsvg`
|
||||
|
||||
## Best practices
|
||||
|
||||
### Directory structure
|
||||
|
||||
There are three folders:
|
||||
1. `webapp` is the frontend project of `SceneX`, knowledge requirements:
|
||||
- Vue 3
|
||||
- Quasar
|
||||
- ...
|
||||
|
||||
2. `backend` contains source code of backend logic, knowledge requirements:
|
||||
- Nodejs
|
||||
- Firebase
|
||||
- ...
|
||||
|
||||
3. `scripts` folder includes custom scripts we might need during the development or for production, currently we have the following scripts:
|
||||
- `translate` is responsible for translating and updating our i18n language files in frontend project.
|
||||
|
||||
### Best practices of frontend
|
||||
1. **Quasar docs** is your `best friend`. Since the frontend project highly depends on framework `Quasar`. It is recommended to use the predefined classes and components and avoid defining your custom classes
|
||||
2. **Double check** of the UI output in `Dark mode` and `Light mode`. Again, use predefined classes and props.
|
||||
3. **Plugins in boot** folder: create corresponding file in `boot` folder and use them in `quasar.config.js`:
|
||||
```js
|
||||
module.exports = configure(function() {
|
||||
return {
|
||||
...
|
||||
boot: [
|
||||
'i18n',
|
||||
'axios',
|
||||
'firebase',
|
||||
'addressbar-color',
|
||||
'quasar-lang-pack'
|
||||
],
|
||||
...
|
||||
}
|
||||
})
|
||||
|
||||
```
|
||||
|
||||
### Best practices of backend
|
||||
1. **Remember to deploy your functions** by running:
|
||||
```bash
|
||||
# deploy all functions
|
||||
firebase deploy --only functions
|
||||
|
||||
# deploy a specific function
|
||||
firebase deploy --only functions:{function name}
|
||||
|
||||
```
|
||||
|
|
5
backend/.firebaserc
Normal file
5
backend/.firebaserc
Normal file
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"projects": {
|
||||
"default": "reader-6b7dc"
|
||||
}
|
||||
}
|
75
backend/.gitignore
vendored
Normal file
75
backend/.gitignore
vendored
Normal file
|
@ -0,0 +1,75 @@
|
|||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
firebase-debug.log*
|
||||
firebase-debug.*.log*
|
||||
|
||||
# Firebase cache
|
||||
.firebase/
|
||||
|
||||
# Firebase config
|
||||
|
||||
# Uncomment this if you'd like others to create their own Firebase project.
|
||||
# For a team working on the same Firebase project(s), it is recommended to leave
|
||||
# it commented so all members can deploy to the same project(s) in .firebaserc.
|
||||
# .firebaserc
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (http://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variables file
|
||||
.env
|
||||
.secret.local
|
||||
|
||||
toy*.ts
|
||||
|
||||
.DS_Store
|
||||
build/
|
||||
.firebase-emu/
|
||||
*.log
|
||||
.DS_Store
|
44
backend/firebase.json
Normal file
44
backend/firebase.json
Normal file
|
@ -0,0 +1,44 @@
|
|||
{
|
||||
"firestore": {
|
||||
"rules": "firestore.rules",
|
||||
"indexes": "firestore.indexes.json"
|
||||
},
|
||||
"functions": [
|
||||
{
|
||||
"source": "functions",
|
||||
"codebase": "default",
|
||||
"ignore": [
|
||||
"node_modules",
|
||||
"src",
|
||||
".git",
|
||||
"firebase-debug.log",
|
||||
"firebase-debug.*.log"
|
||||
],
|
||||
"predeploy": [
|
||||
"npm --prefix \"$RESOURCE_DIR\" run build:clean",
|
||||
"npm --prefix \"$RESOURCE_DIR\" run build"
|
||||
]
|
||||
}
|
||||
],
|
||||
"storage": {
|
||||
"rules": "storage.rules"
|
||||
},
|
||||
"emulators": {
|
||||
"ui": {
|
||||
"enabled": true
|
||||
},
|
||||
"singleProjectMode": true,
|
||||
"functions": {
|
||||
"port": 5001
|
||||
},
|
||||
"auth": {
|
||||
"port": 9099
|
||||
},
|
||||
"firestore": {
|
||||
"port": 9098
|
||||
},
|
||||
"storage": {
|
||||
"port": 9097
|
||||
}
|
||||
}
|
||||
}
|
19
backend/firestore.indexes.json
Normal file
19
backend/firestore.indexes.json
Normal file
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"indexes": [
|
||||
{
|
||||
"collectionGroup": "prompts",
|
||||
"queryScope": "COLLECTION_GROUP",
|
||||
"fields": [
|
||||
{
|
||||
"fieldPath": "id",
|
||||
"order": "ASCENDING"
|
||||
},
|
||||
{
|
||||
"fieldPath": "isPublic",
|
||||
"order": "ASCENDING"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"fieldOverrides": []
|
||||
}
|
32
backend/firestore.rules
Normal file
32
backend/firestore.rules
Normal file
|
@ -0,0 +1,32 @@
|
|||
rules_version = '2';
|
||||
service cloud.firestore {
|
||||
match /databases/{database}/documents {
|
||||
// match /questions/{document=**} {
|
||||
// allow read: if request.auth != null
|
||||
// }
|
||||
|
||||
// match /answers/{userId}/profiles/default {
|
||||
// allow read, write: if request.auth != null && request.auth.uid == userId
|
||||
// }
|
||||
|
||||
match /credits/{userId}/{document=**} {
|
||||
allow read: if request.auth != null && request.auth.uid == userId
|
||||
}
|
||||
|
||||
match /users/{userId}/prompts/{document=**} {
|
||||
allow read: if request.auth != null && request.auth.uid == userId
|
||||
}
|
||||
|
||||
// match /users/{userId}/profiles/{document=**} {
|
||||
// allow read: if request.auth != null && request.auth.uid == userId
|
||||
// }
|
||||
|
||||
match /users/{userId}/creditHistory/{document=**} {
|
||||
allow read: if request.auth != null && request.auth.uid == userId
|
||||
}
|
||||
|
||||
match /{document=**} {
|
||||
allow read, write: if false;
|
||||
}
|
||||
}
|
||||
}
|
36
backend/functions/.editorconfig
Normal file
36
backend/functions/.editorconfig
Normal file
|
@ -0,0 +1,36 @@
|
|||
root = true
|
||||
|
||||
[*]
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
indent_style = space
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
indent_size = 4
|
||||
quote_type = single
|
||||
max_line_length = 120
|
||||
|
||||
[*.py]
|
||||
indent_size = 4
|
||||
|
||||
[*.ts]
|
||||
indent_size = 4
|
||||
|
||||
[*.js]
|
||||
indent_size = 2
|
||||
|
||||
[*.vue]
|
||||
indent_size = 2
|
||||
|
||||
[*.*sx]
|
||||
indent_size = 2
|
||||
|
||||
[*.*ml]
|
||||
indent_size = 2
|
||||
|
||||
[*.json]
|
||||
indent_size = 2
|
||||
|
||||
[*.md]
|
||||
indent_size = 2
|
||||
trim_trailing_whitespace = false
|
0
backend/functions/.env.example
Normal file
0
backend/functions/.env.example
Normal file
27
backend/functions/.vscode/launch.json
vendored
Normal file
27
backend/functions/.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
// 使用 IntelliSense 了解相关属性。
|
||||
// 悬停以查看现有属性的描述。
|
||||
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Attach by Process ID",
|
||||
"processId": "${command:PickProcess}",
|
||||
"request": "attach",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"type": "node"
|
||||
},
|
||||
{
|
||||
"name": "Attach",
|
||||
"port": 9229,
|
||||
"request": "attach",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"type": "node"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
10
backend/functions/.vscode/settings.json
vendored
Normal file
10
backend/functions/.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"cSpell.words": [
|
||||
"AIHTTP",
|
||||
"Castable",
|
||||
"civkit",
|
||||
"Firestore",
|
||||
"openai"
|
||||
],
|
||||
"typescript.tsdk": "node_modules/typescript/lib"
|
||||
}
|
Binary file not shown.
72
backend/functions/package.json
Normal file
72
backend/functions/package.json
Normal file
|
@ -0,0 +1,72 @@
|
|||
{
|
||||
"name": "url2text",
|
||||
"scripts": {
|
||||
"lint": "eslint --ext .js,.ts .",
|
||||
"build": "tsc -p .",
|
||||
"build:watch": "tsc --watch",
|
||||
"build:clean": "rm -rf ./build",
|
||||
"shell": "npm run build && firebase functions:shell",
|
||||
"emu:stage": "cd .. && tar -czvf firebase-emu-preset.tgz .firebase-emu",
|
||||
"emu:reset": "rm -rf ../.firebase-emu && tar -xzf ../firebase-emu-preset.tgz --directory ../",
|
||||
"emu:start": "firebase emulators:start --import ../.firebase-emu --export-on-exit",
|
||||
"emu:debug": "firebase emulators:start --import ../.firebase-emu --export-on-exit --inspect-functions",
|
||||
"emu:debug2": "firebase emulators:start --import ../.firebase-emu --export-on-exit --inspect-functions",
|
||||
"emu:kill": "killall java",
|
||||
"serve": "npm run build && npm run emu:start",
|
||||
"debug": "npm run build && npm run emu:start -- --inspect-functions",
|
||||
"from-scratch": "npm run build && rm -rf ../.firebase-emu && firebase emulators:start --export-on-exit",
|
||||
"from-preset": "npm run build && npm run emu:reset && npm run emu:start",
|
||||
"start": "npm run shell",
|
||||
"deploy": "firebase deploy --only functions",
|
||||
"logs": "firebase functions:log"
|
||||
},
|
||||
"engines": {
|
||||
"node": "20"
|
||||
},
|
||||
"main": "build/index.js",
|
||||
"dependencies": {
|
||||
"@google-cloud/translate": "^8.2.0",
|
||||
"@mozilla/readability": "^0.5.0",
|
||||
"@napi-rs/canvas": "^0.1.44",
|
||||
"@types/turndown": "^5.0.4",
|
||||
"archiver": "^6.0.1",
|
||||
"axios": "^1.3.3",
|
||||
"bcrypt": "^5.1.0",
|
||||
"civkit": "^0.6.5-be430ac",
|
||||
"cors": "^2.8.5",
|
||||
"dayjs": "^1.11.9",
|
||||
"express": "^4.19.2",
|
||||
"firebase-admin": "^11.5.0",
|
||||
"firebase-functions": "^4.8.0",
|
||||
"generic-pool": "^3.9.0",
|
||||
"htmlparser2": "^9.0.0",
|
||||
"jose": "^5.1.0",
|
||||
"langdetect": "^0.2.1",
|
||||
"minio": "^7.1.3",
|
||||
"openai": "^4.20.0",
|
||||
"puppeteer": "^22.6.3",
|
||||
"stripe": "^11.11.0",
|
||||
"tiktoken": "^1.0.10",
|
||||
"turndown": "^7.1.3",
|
||||
"undici": "^5.24.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/archiver": "^5.3.4",
|
||||
"@types/bcrypt": "^5.0.0",
|
||||
"@types/cors": "^2.8.17",
|
||||
"@types/generic-pool": "^3.8.1",
|
||||
"@types/node": "^18",
|
||||
"@typescript-eslint/eslint-plugin": "^5.12.0",
|
||||
"@typescript-eslint/parser": "^5.12.0",
|
||||
"eslint": "^8.9.0",
|
||||
"eslint-config-google": "^0.14.0",
|
||||
"eslint-plugin-import": "^2.25.4",
|
||||
"firebase-functions-test": "^3.0.0",
|
||||
"replicate": "^0.16.1",
|
||||
"typescript": "^5.1.6"
|
||||
},
|
||||
"private": true,
|
||||
"exports": {
|
||||
".": "./build/index.js"
|
||||
}
|
||||
}
|
69
backend/functions/src/cloud-functions/crawler.ts
Normal file
69
backend/functions/src/cloud-functions/crawler.ts
Normal file
|
@ -0,0 +1,69 @@
|
|||
import { marshalErrorLike, RPCHost, RPCReflection } from 'civkit';
|
||||
import { singleton } from 'tsyringe';
|
||||
import { CloudHTTPv2, Logger, OutputServerEventStream, Param, RPCReflect } from '../shared';
|
||||
import _ from 'lodash';
|
||||
import { PuppeteerControl } from '../services/puppeteer';
|
||||
import TurnDownService from 'turndown';
|
||||
|
||||
|
||||
@singleton()
|
||||
export class CrawlerHost extends RPCHost {
|
||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||
|
||||
turnDownService = new TurnDownService();
|
||||
|
||||
constructor(
|
||||
protected globalLogger: Logger,
|
||||
protected puppeteerControl: PuppeteerControl,
|
||||
) {
|
||||
super(...arguments);
|
||||
}
|
||||
|
||||
override async init() {
|
||||
await this.dependencyReady();
|
||||
|
||||
this.emit('ready');
|
||||
}
|
||||
|
||||
@CloudHTTPv2({
|
||||
exportInGroup: ['crawler'],
|
||||
httpMethod: ['get', 'post'],
|
||||
returnType: OutputServerEventStream,
|
||||
})
|
||||
async crawl(
|
||||
@RPCReflect() rpcReflect: RPCReflection,
|
||||
@Param('url', { required: true }) url: string
|
||||
) {
|
||||
await this.serviceReady();
|
||||
const sseStream = new OutputServerEventStream();
|
||||
|
||||
rpcReflect.return(sseStream);
|
||||
|
||||
try {
|
||||
for await (const scrapped of this.puppeteerControl.scrap(url)) {
|
||||
this.logger.info(`Scrapped: ${scrapped.snapshot}`);
|
||||
const content = typeof scrapped.snapshot === 'string' ? scrapped.snapshot : (scrapped.snapshot as any)?.content;
|
||||
if (!content) {
|
||||
continue;
|
||||
}
|
||||
const text = this.turnDownService.turndown(typeof scrapped.snapshot === 'string' ? scrapped.snapshot : (scrapped.snapshot as any)?.content);
|
||||
sseStream.write({
|
||||
event: 'data',
|
||||
data: text,
|
||||
});
|
||||
}
|
||||
} catch (err: any) {
|
||||
this.logger.error(`Failed to crawl ${url}`, { err: marshalErrorLike(err) });
|
||||
sseStream.write({
|
||||
event: 'error',
|
||||
data: err,
|
||||
});
|
||||
}
|
||||
|
||||
sseStream.end();
|
||||
|
||||
return sseStream;
|
||||
}
|
||||
|
||||
|
||||
}
|
13
backend/functions/src/fetch.d.ts
vendored
Normal file
13
backend/functions/src/fetch.d.ts
vendored
Normal file
|
@ -0,0 +1,13 @@
|
|||
declare global {
|
||||
export const {
|
||||
fetch,
|
||||
FormData,
|
||||
Headers,
|
||||
Request,
|
||||
Response,
|
||||
File,
|
||||
}: typeof import('undici');
|
||||
export type { FormData, Headers, Request, RequestInit, Response, RequestInit, File } from 'undici';
|
||||
}
|
||||
|
||||
export { };
|
33
backend/functions/src/index.ts
Normal file
33
backend/functions/src/index.ts
Normal file
|
@ -0,0 +1,33 @@
|
|||
import 'reflect-metadata';
|
||||
import * as functions from 'firebase-functions';
|
||||
import { initializeApp } from 'firebase-admin/app';
|
||||
initializeApp();
|
||||
|
||||
import secretExposer from './shared/services/secrets';
|
||||
|
||||
export const onUserCreated = functions
|
||||
.runWith({ secrets: [...secretExposer.bundle], memory: '512MB' })
|
||||
.auth.user()
|
||||
.onCreate(async (user) => {
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
export const onUserLogin = functions
|
||||
.runWith({ secrets: [...secretExposer.bundle], memory: '512MB' })
|
||||
.auth.user()
|
||||
.beforeSignIn(async (user, _ctx) => {
|
||||
|
||||
return;
|
||||
});
|
||||
|
||||
import { loadModulesDynamically, registry } from './shared';
|
||||
import path from 'path';
|
||||
loadModulesDynamically(path.resolve(__dirname, 'cloud-functions'));
|
||||
|
||||
Object.assign(exports, registry.exportGrouped({
|
||||
memory: '1GiB',
|
||||
timeoutSeconds: 540,
|
||||
}));
|
||||
registry.title = 'url2text';
|
||||
registry.version = '0.1.0';
|
152
backend/functions/src/services/puppeteer.ts
Normal file
152
backend/functions/src/services/puppeteer.ts
Normal file
|
@ -0,0 +1,152 @@
|
|||
import { AsyncService, Defer } from 'civkit';
|
||||
import { container, singleton } from 'tsyringe';
|
||||
import puppeteer, { Browser } from 'puppeteer';
|
||||
import { Logger } from '../shared/services/logger';
|
||||
import genericPool from 'generic-pool';
|
||||
import os from 'os';
|
||||
import fs from 'fs';
|
||||
|
||||
|
||||
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
||||
|
||||
@singleton()
|
||||
export class PuppeteerControl extends AsyncService {
|
||||
|
||||
browser!: Browser;
|
||||
logger = this.globalLogger.child({ service: this.constructor.name });
|
||||
|
||||
pagePool = genericPool.createPool({
|
||||
create: async () => {
|
||||
const page = await this.newPage();
|
||||
return page;
|
||||
},
|
||||
destroy: async (page) => {
|
||||
await page.browserContext().close();
|
||||
},
|
||||
validate: async (page) => {
|
||||
return this.browser.connected && !page.isClosed();
|
||||
}
|
||||
}, {
|
||||
max: Math.ceil(os.freemem() / 1024 * 1024 * 1024),
|
||||
min: 0,
|
||||
});
|
||||
|
||||
constructor(protected globalLogger: Logger) {
|
||||
super(...arguments);
|
||||
}
|
||||
|
||||
override async init() {
|
||||
await this.dependencyReady();
|
||||
|
||||
if (this.browser) {
|
||||
await this.browser.close();
|
||||
}
|
||||
this.browser = await puppeteer.launch({
|
||||
headless: false,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
this.browser.once('disconnected', () => {
|
||||
this.logger.warn(`Browser disconnected`);
|
||||
this.emit('crippled');
|
||||
});
|
||||
|
||||
this.emit('ready');
|
||||
}
|
||||
|
||||
async newPage() {
|
||||
await this.serviceReady();
|
||||
const dedicatedContext = await this.browser.createBrowserContext();
|
||||
|
||||
const page = await dedicatedContext.newPage();
|
||||
await page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`);
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
await page.exposeFunction('reportSnapshot', (snapshot: any) => {
|
||||
page.emit('snapshot', snapshot);
|
||||
});
|
||||
|
||||
await page.evaluateOnNewDocument(READABILITY_JS);
|
||||
|
||||
await page.evaluateOnNewDocument(() => {
|
||||
// @ts-expect-error
|
||||
window.giveSnapshot() = () => {
|
||||
// @ts-expect-error
|
||||
return new Readability(document.cloneNode(true)).parse();
|
||||
};
|
||||
let aftershot: any;
|
||||
const handlePageLoad = () => {
|
||||
// @ts-expect-error
|
||||
if (document.readyState !== 'complete' && document.readyState !== 'interactive') {
|
||||
return;
|
||||
}
|
||||
|
||||
// @ts-expect-error
|
||||
const parsed = window.giveSnapshot();
|
||||
console.log(parsed);
|
||||
if (parsed) {
|
||||
// @ts-expect-error
|
||||
window.reportSnapshot(parsed);
|
||||
} else {
|
||||
if (aftershot) {
|
||||
clearTimeout(aftershot);
|
||||
}
|
||||
aftershot = setTimeout(() => {
|
||||
// @ts-expect-error
|
||||
window.reportSnapshot(window.giveSnapshot());
|
||||
}, 500);
|
||||
}
|
||||
};
|
||||
// setInterval(handlePageLoad, 1000);
|
||||
// @ts-expect-error
|
||||
document.addEventListener('readystatechange', handlePageLoad);
|
||||
// @ts-expect-error
|
||||
document.addEventListener('load', handlePageLoad);
|
||||
});
|
||||
|
||||
// TODO: further setup the page;
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
async *scrap(url: string) {
|
||||
const page = await this.pagePool.acquire();
|
||||
let snapshot: unknown;
|
||||
let nextSnapshotDeferred = Defer();
|
||||
let finalized = false;
|
||||
const hdl = (s: any) => {
|
||||
if (snapshot === s) {
|
||||
return;
|
||||
}
|
||||
snapshot = s;
|
||||
nextSnapshotDeferred.resolve(s);
|
||||
nextSnapshotDeferred = Defer();
|
||||
};
|
||||
page.on('snapshot', hdl);
|
||||
const gotoPromise = page.goto(url, { waitUntil: 'networkidle2', timeout: 30_000 });
|
||||
gotoPromise.finally(() => finalized = true);
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
await Promise.race([nextSnapshotDeferred.promise, gotoPromise]);
|
||||
const screenshot = await page.screenshot();
|
||||
if (finalized) {
|
||||
await gotoPromise;
|
||||
snapshot = await page.evaluate('window.giveSnapshot()');
|
||||
yield { snapshot, screenshot };
|
||||
break;
|
||||
}
|
||||
yield { snapshot, screenshot };
|
||||
}
|
||||
} catch (_err) {
|
||||
void 0;
|
||||
} finally {
|
||||
page.off('snapshot', hdl);
|
||||
await this.pagePool.destroy(page);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const puppeteerControl = container.resolve(PuppeteerControl);
|
||||
|
||||
export default puppeteerControl;
|
1
backend/functions/src/shared
Symbolic link
1
backend/functions/src/shared
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../thinapps-shared/backend
|
9
backend/functions/src/types.d.ts
vendored
Normal file
9
backend/functions/src/types.d.ts
vendored
Normal file
|
@ -0,0 +1,9 @@
|
|||
declare module 'langdetect' {
|
||||
interface DetectionResult {
|
||||
lang: string;
|
||||
prob: number;
|
||||
}
|
||||
|
||||
export function detect(text: string): DetectionResult[];
|
||||
export function detectOne(text: string): string | null;
|
||||
}
|
21
backend/functions/tsconfig.json
Normal file
21
backend/functions/tsconfig.json
Normal file
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"module": "commonjs",
|
||||
"noImplicitReturns": true,
|
||||
"noUnusedLocals": true,
|
||||
"outDir": "build",
|
||||
"sourceMap": true,
|
||||
"strict": true,
|
||||
"allowJs": true,
|
||||
"target": "es2022",
|
||||
"lib": ["es2022"],
|
||||
"skipLibCheck": true,
|
||||
"useDefineForClassFields": false,
|
||||
"experimentalDecorators": true,
|
||||
"emitDecoratorMetadata": true,
|
||||
"esModuleInterop": true,
|
||||
"noImplicitOverride": true,
|
||||
},
|
||||
"compileOnSave": true,
|
||||
"include": ["src"]
|
||||
}
|
8
backend/storage.rules
Normal file
8
backend/storage.rules
Normal file
|
@ -0,0 +1,8 @@
|
|||
rules_version = '2';
|
||||
service firebase.storage {
|
||||
match /b/{bucket}/o {
|
||||
match /{allPaths=**} {
|
||||
allow read, write: if false;
|
||||
}
|
||||
}
|
||||
}
|
15
package.json
Normal file
15
package.json
Normal file
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"name": "url2text",
|
||||
"version": "1.0.0",
|
||||
"description": "### Prerequisite - Node v18 (The build fails for Node version >18) - Yarn - Firebase CLI (`npm install -g firebase-tools`)",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"firebase-tools": "^12.4.2",
|
||||
"typescript": "^5.1.6"
|
||||
}
|
||||
}
|
1
thinapps-shared
Submodule
1
thinapps-shared
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 9f0fa1dd7f8cfcea4c8d79252319b151fae6ed19
|
Loading…
Reference in New Issue
Block a user