chore: 提交部分测试文件

2024-11-16 11:42:44 +08:00 · 2023-07-31 16:24:39 +08:00 · 2023-07-31 16:24:39 +08:00 · 2b9612e933
commit 2b9612e933
parent 749d0219fb
3 changed files with 223 additions and 0 deletions
--- a/tests/bs_test/bs_test.py
+++ b/tests/bs_test/bs_test.py
@ -0,0 +1,42 @@
+
+import requests
+from bs4 import BeautifulSoup
+import os
+import random
+import sys
+
+
+user_agents = [
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36',
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36',
+    'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
+    'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
+]
+
+r = requests.get(
+    sys.argv[1],
+    headers={
+        "User-Agent": random.choice(user_agents)
+    }    
+)
+soup = BeautifulSoup(r.text, 'html.parser')
+# print(soup.get_text())
+
+raw = soup.get_text()
+
+import re
+
+# strip每一行
+# raw = '\n'.join([line.strip() for line in raw.split('\n')])
+
+# # 删除所有空行或只有空格的行
+# raw = re.sub(r'\n\s*\n', '\n', raw)
+
+
+print(raw)
--- a/tests/ssh_client_test/ssh_client.py
+++ b/tests/ssh_client_test/ssh_client.py
@ -0,0 +1,57 @@
+import os
+import sys
+import paramiko
+import time
+import select
+
+
+class sshClient:
+    #创建一个ssh客户端，和服务器连接上，准备发消息
+    def __init__(self,host,port,user,password):
+        self.trans = paramiko.Transport((host, port))
+        self.trans.start_client()
+        self.trans.auth_password(username=user, password=password)
+        self.channel = self.trans.open_session()
+        self.channel.get_pty()
+        self.channel.invoke_shell()
+
+    #给服务器发送一个命令
+    def sendCmd(self,cmd):
+        self.channel.sendall(cmd)
+        
+    #接收的时候，有时候服务器处理的比较慢，需要设置一个延时等待一下。
+    def recvResponse(self,timeout):
+        data=b''
+        while True:
+            try:
+                #使用select，不断的读取数据，直到没有多余的数据了，超时返回。
+                readable,w,e= select.select([self.channel],[],[],timeout)
+                if self.channel in readable:
+                    data = self.channel.recv(1024)
+                else:
+                    sys.stdout.write(data.decode())
+                    sys.stdout.flush()
+                    return data.decode()
+            except TimeoutError:
+                sys.stdout.write(data.decode())
+                sys.stdout.flush()
+                return data.decode
+    #关闭客户端
+    def close(self):
+        self.channel.close()
+        self.trans.close()
+
+host='host'
+port=22#your port
+user='root'
+pwd='pass'
+
+ssh = sshClient(host,port,user,pwd)
+response = ssh.recvResponse(1)
+response = ssh.sendCmd("ls\n")
+ssh.sendCmd("cd /home\n")
+response = ssh.recvResponse(1)
+ssh.sendCmd("ls\n")
+response = ssh.recvResponse(1)
+
+ssh.close()
--- a/tests/token_test/tiktoken_test.py
+++ b/tests/token_test/tiktoken_test.py
@ -0,0 +1,124 @@
+import tiktoken
+import openai
+import json
+import os
+
+
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+
+def encode(text: str, model: str):
+    import tiktoken
+    enc = tiktoken.get_encoding("cl100k_base")
+    assert enc.decode(enc.encode("hello world")) == "hello world"
+
+    # To get the tokeniser corresponding to a specific model in the OpenAI API:
+    enc = tiktoken.encoding_for_model(model)
+
+    return enc.encode(text)
+
+
+# def ask(prompt: str, model: str = "gpt-3.5-turbo"):
+#     # To get the tokeniser corresponding to a specific model in the OpenAI API:
+#     enc = tiktoken.encoding_for_model(model)
+    
+#     resp = openai.ChatCompletion.create(
+#         model=model,
+#         messages=[
+#             {
+#                 "role": "user",
+#                 "content": prompt
+#             }
+#         ]
+#     )
+
+#     return enc.encode(prompt), enc.encode(resp['choices'][0]['message']['content']), resp
+
+def ask(
+    messages: list,
+    model: str = "gpt-3.5-turbo"
+):
+    enc = tiktoken.encoding_for_model(model)
+
+    resp = openai.ChatCompletion.create(
+        model=model,
+        messages=messages
+    )
+
+    txt = ""
+
+    for r in messages:
+        txt += r['role'] + r['content'] + "\n"
+    
+    txt += "assistant: "
+
+    return enc.encode(txt), enc.encode(resp['choices'][0]['message']['content']), resp
+
+
+def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"):
+    """Return the number of tokens used by a list of messages."""
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        print("Warning: model not found. Using cl100k_base encoding.")
+        encoding = tiktoken.get_encoding("cl100k_base")
+    if model in {
+        "gpt-3.5-turbo-0613",
+        "gpt-3.5-turbo-16k-0613",
+        "gpt-4-0314",
+        "gpt-4-32k-0314",
+        "gpt-4-0613",
+        "gpt-4-32k-0613",
+        }:
+        tokens_per_message = 3
+        tokens_per_name = 1
+    elif model == "gpt-3.5-turbo-0301":
+        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
+        tokens_per_name = -1  # if there's a name, the role is omitted
+    elif "gpt-3.5-turbo" in model:
+        print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
+        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
+    elif "gpt-4" in model:
+        print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
+        return num_tokens_from_messages(messages, model="gpt-4-0613")
+    else:
+        raise NotImplementedError(
+            f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
+        )
+    num_tokens = 0
+    for message in messages:
+        num_tokens += tokens_per_message
+        for key, value in message.items():
+            num_tokens += len(encoding.encode(value))
+            if key == "name":
+                num_tokens += tokens_per_name
+    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+    return num_tokens
+
+messages = [
+    {
+        "role": "user",
+        "content": "你叫什么名字？"
+    },{
+        "role": "assistant",
+        "content": "我是AI助手，没有具体的名字。你可以叫我GPT-3。有什么可以帮到你的吗？"
+    },{
+        "role": "user",
+        "content": "你是由谁开发的？"
+    },{
+        "role": "assistant",
+        "content": "我是由OpenAI开发的，一家人工智能研究实验室。OpenAI的使命是促进人工智能的发展，使其为全人类带来积极影响。我是由OpenAI团队使用GPT-3模型训练而成的。"
+    },{
+        "role": "user",
+        "content": "很高兴见到你。"
+    }
+]
+
+
+pro, rep, resp=ask(messages)
+
+print(len(pro), len(rep))
+print(resp)
+print(resp['choices'][0]['message']['content'])
+
+print(num_tokens_from_messages(messages, model="gpt-3.5-turbo"))