mirror of
https://github.com/RockChinQ/QChatGPT.git
synced 2024-11-16 19:57:04 +08:00
42 lines
1.5 KiB
Python
42 lines
1.5 KiB
Python
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
import os
|
|
import random
|
|
import sys
|
|
|
|
|
|
user_agents = [
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36',
|
|
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
|
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
|
|
]
|
|
|
|
r = requests.get(
|
|
sys.argv[1],
|
|
headers={
|
|
"User-Agent": random.choice(user_agents)
|
|
}
|
|
)
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
# print(soup.get_text())
|
|
|
|
raw = soup.get_text()
|
|
|
|
import re
|
|
|
|
# strip每一行
|
|
# raw = '\n'.join([line.strip() for line in raw.split('\n')])
|
|
|
|
# # 删除所有空行或只有空格的行
|
|
# raw = re.sub(r'\n\s*\n', '\n', raw)
|
|
|
|
|
|
print(raw) |