mirror of
https://github.com/langgenius/dify.git
synced 2024-11-16 11:42:29 +08:00
Merge c03f8f55ed
into 5ff02b469f
This commit is contained in:
commit
b2e94e49cd
7
evaluate/code-generator/.env.example
Normal file
7
evaluate/code-generator/.env.example
Normal file
|
@ -0,0 +1,7 @@
|
|||
# MODEL_PROVIDER=anthropic
|
||||
# MODEL_NAME=claude-3-5-sonnet-20241022
|
||||
MODEL_PROVIDER=openai
|
||||
MODEL_NAME=gpt-4o-mini
|
||||
CODE_EXECUTION_ENDPOINT=http://127.0.0.1:8194
|
||||
CODE_EXECUTION_API_KEY=dify-sandbox
|
||||
CONSOLE_API_URL=http://127.0.0.1:5001
|
2
evaluate/code-generator/.gitignore
vendored
Normal file
2
evaluate/code-generator/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
.env
|
||||
.env.local
|
68
evaluate/code-generator/README.md
Normal file
68
evaluate/code-generator/README.md
Normal file
|
@ -0,0 +1,68 @@
|
|||
# Code Generator Evaluator
|
||||
|
||||
## Getting Started
|
||||
1. Move to the evaluator directory
|
||||
|
||||
```bash
|
||||
cd dify/evaluate/code-generator
|
||||
```
|
||||
|
||||
2. Set up your `.env` file with required variables
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
3. Add your test cases to `testdata/testcases.json`
|
||||
|
||||
|
||||
4. Execute the evaluator
|
||||
|
||||
```bash
|
||||
# For Linux
|
||||
./bin/evaluate-code-linux
|
||||
|
||||
# For macOS (Intel)
|
||||
./bin/evaluate-code-mac
|
||||
|
||||
# For macOS (Apple Silicon)
|
||||
./bin/evaluate-code-mac-arm64
|
||||
|
||||
# For Windows
|
||||
./bin/evaluate-code.exe
|
||||
```
|
||||
|
||||
|
||||
## Build Instructions
|
||||
|
||||
### 1. Prepare Build Script
|
||||
First, grant execution permissions to the build script:
|
||||
```bash
|
||||
chmod +x build.sh
|
||||
```
|
||||
|
||||
### 2. Prerequisites
|
||||
- Go 1.20 or higher
|
||||
- Properly configured `GOPATH`
|
||||
|
||||
### 3. Build Process
|
||||
Run the cross-platform build with the following command:
|
||||
```bash
|
||||
./build.sh
|
||||
```
|
||||
|
||||
## Running the Evaluator
|
||||
Execute the Code Generator evaluation on your platform using:
|
||||
|
||||
```bash
|
||||
# For Linux
|
||||
./bin/evaluate-code-linux
|
||||
|
||||
# For macOS (Intel)
|
||||
./bin/evaluate-code-mac
|
||||
|
||||
# For macOS (Apple Silicon)
|
||||
./bin/evaluate-code-mac-arm64
|
||||
|
||||
# For Windows
|
||||
./bin/evaluate-code.exe
|
||||
```
|
62
evaluate/code-generator/auth/login.go
Normal file
62
evaluate/code-generator/auth/login.go
Normal file
|
@ -0,0 +1,62 @@
|
|||
package auth
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
type LoginRequest struct {
|
||||
Email string `json:"email"`
|
||||
Password string `json:"password"`
|
||||
}
|
||||
|
||||
type LoginResponse struct {
|
||||
Result string `json:"result"`
|
||||
Data struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
RefreshToken string `json:"refresh_token"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func Login(email, password string) (string, error) {
|
||||
client := &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
}
|
||||
|
||||
loginPayload := LoginRequest{
|
||||
Email: email,
|
||||
Password: password,
|
||||
}
|
||||
|
||||
loginJSON, err := json.Marshal(loginPayload)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to convert to JSON: %w", err)
|
||||
}
|
||||
baseUrl := os.Getenv("CONSOLE_API_URL")
|
||||
loginReq, err := http.NewRequest("POST", baseUrl+"/console/api/login", bytes.NewBuffer(loginJSON))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
loginReq.Header.Set("Content-Type", "application/json")
|
||||
|
||||
loginResp, err := client.Do(loginReq)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer loginResp.Body.Close()
|
||||
|
||||
var loginResult LoginResponse
|
||||
if err := json.NewDecoder(loginResp.Body).Decode(&loginResult); err != nil {
|
||||
return "", fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
|
||||
if loginResult.Result != "success" {
|
||||
return "", fmt.Errorf("login failed")
|
||||
}
|
||||
|
||||
return loginResult.Data.AccessToken, nil
|
||||
}
|
BIN
evaluate/code-generator/bin/evaluate-code-linux
Executable file
BIN
evaluate/code-generator/bin/evaluate-code-linux
Executable file
Binary file not shown.
BIN
evaluate/code-generator/bin/evaluate-code-mac
Executable file
BIN
evaluate/code-generator/bin/evaluate-code-mac
Executable file
Binary file not shown.
BIN
evaluate/code-generator/bin/evaluate-code-mac-arm64
Executable file
BIN
evaluate/code-generator/bin/evaluate-code-mac-arm64
Executable file
Binary file not shown.
BIN
evaluate/code-generator/bin/evaluate-code.exe
Executable file
BIN
evaluate/code-generator/bin/evaluate-code.exe
Executable file
Binary file not shown.
17
evaluate/code-generator/build.sh
Executable file
17
evaluate/code-generator/build.sh
Executable file
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
mkdir -p bin
|
||||
|
||||
echo "Building for Linux (amd64)..."
|
||||
GOOS=linux GOARCH=amd64 go build -o bin/evaluate-code-linux ./cmd/
|
||||
|
||||
echo "Building for macOS (amd64)..."
|
||||
GOOS=darwin GOARCH=amd64 go build -o bin/evaluate-code-mac ./cmd/
|
||||
|
||||
echo "Building for macOS (arm64)..."
|
||||
GOOS=darwin GOARCH=arm64 go build -o bin/evaluate-code-mac-arm64 ./cmd/
|
||||
|
||||
echo "Building for Windows (amd64)..."
|
||||
GOOS=windows GOARCH=amd64 go build -o bin/evaluate-code.exe ./cmd/
|
||||
|
||||
echo "Build complete! Binaries are in the bin directory."
|
121
evaluate/code-generator/cmd/main.go
Normal file
121
evaluate/code-generator/cmd/main.go
Normal file
|
@ -0,0 +1,121 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"evaluate/auth"
|
||||
"evaluate/coderuntime"
|
||||
"evaluate/testdata"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/joho/godotenv"
|
||||
"golang.org/x/term"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if err := godotenv.Load("./.env"); err != nil {
|
||||
fmt.Printf("Failed to load .env file: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Print("Please enter your email address: ")
|
||||
var email string
|
||||
fmt.Scanln(&email)
|
||||
|
||||
fmt.Print("Please enter your password: ")
|
||||
password, err := term.ReadPassword(int(syscall.Stdin))
|
||||
if err != nil {
|
||||
fmt.Printf("\nFailed to read password: %v\n", err)
|
||||
return
|
||||
}
|
||||
fmt.Println()
|
||||
accessToken, err := auth.Login(email, string(password))
|
||||
testCases, err := testdata.LoadTestCases("./testdata/testcases.json")
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to load test cases: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
metrics := testdata.NewTestMetrics()
|
||||
|
||||
modelProvider := os.Getenv("MODEL_PROVIDER")
|
||||
modelName := os.Getenv("MODEL_NAME")
|
||||
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
fmt.Printf("📱 Model Provider: %s\n", modelProvider)
|
||||
fmt.Printf("🤖 Model Name: %s\n", modelName)
|
||||
fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
||||
|
||||
for _, tc := range testCases {
|
||||
fmt.Printf("\nExecuting test case: %s\n", tc.Name)
|
||||
|
||||
codegenRequest := coderuntime.GenerateCodeRequest{
|
||||
Instruction: tc.Instruction,
|
||||
CodeLanguage: tc.CodeLanguage,
|
||||
NoVariable: false,
|
||||
ModelConfig: coderuntime.ModelConfig{
|
||||
Provider: modelProvider,
|
||||
Name: modelName,
|
||||
Mode: "chat",
|
||||
CompletionParams: coderuntime.CompletionParams{
|
||||
Temperature: 0.7,
|
||||
MaxTokens: 0,
|
||||
TopP: 0,
|
||||
Echo: false,
|
||||
Stop: []string{},
|
||||
PresencePenalty: 0,
|
||||
FrequencyPenalty: 0,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
generatedCode, err := coderuntime.GenerateCode(
|
||||
codegenRequest,
|
||||
coderuntime.AccessToken{
|
||||
Value: accessToken,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
metrics.AddResult(testdata.TestResult{
|
||||
TestCase: tc,
|
||||
Success: false,
|
||||
Error: err,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
language := generatedCode.Language
|
||||
if language == "python" {
|
||||
language += "3"
|
||||
}
|
||||
|
||||
request := coderuntime.SandboxRequest{
|
||||
Language: language,
|
||||
Code: generatedCode.Code,
|
||||
EnableNetwork: true,
|
||||
}
|
||||
|
||||
result, err := coderuntime.ExecuteCode(request, tc.Inputs)
|
||||
if result.Error != nil {
|
||||
metrics.AddResult(testdata.TestResult{
|
||||
TestCase: tc,
|
||||
Success: false,
|
||||
Error: result.Error,
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
normalizedResult := strings.ReplaceAll(strings.ReplaceAll(result.Body, " ", ""), "\n", "")
|
||||
normalizedTruth := strings.ReplaceAll(strings.ReplaceAll(tc.GroundTruth, " ", ""), "\n", "")
|
||||
|
||||
metrics.AddResult(testdata.TestResult{
|
||||
TestCase: tc,
|
||||
Success: normalizedResult == normalizedTruth,
|
||||
ActualValue: result.Body,
|
||||
})
|
||||
}
|
||||
|
||||
metrics.Finish()
|
||||
metrics.PrintSummary()
|
||||
}
|
142
evaluate/code-generator/coderuntime/code-execute.go
Normal file
142
evaluate/code-generator/coderuntime/code-execute.go
Normal file
|
@ -0,0 +1,142 @@
|
|||
package coderuntime
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type SandboxRequest struct {
|
||||
Language string `json:"language"`
|
||||
Code string `json:"code"`
|
||||
Preload string `json:"preload,omitempty"`
|
||||
EnableNetwork bool `json:"enable_network"`
|
||||
}
|
||||
|
||||
type ExecutionResult struct {
|
||||
StatusCode int
|
||||
Body string
|
||||
Error error
|
||||
}
|
||||
|
||||
func ExtractResult(response string) (string, error) {
|
||||
const resultTag = "<<RESULT>>"
|
||||
startIndex := strings.Index(response, resultTag) + len(resultTag)
|
||||
endIndex := strings.LastIndex(response, resultTag)
|
||||
|
||||
if startIndex == -1 || endIndex == -1 {
|
||||
return "", fmt.Errorf("invalid result format")
|
||||
}
|
||||
|
||||
jsonStr := response[startIndex:endIndex]
|
||||
|
||||
var result map[string]interface{}
|
||||
if err := json.Unmarshal([]byte(jsonStr), &result); err != nil {
|
||||
return "", fmt.Errorf("failed to parse JSON: %v", err)
|
||||
}
|
||||
|
||||
// Format output
|
||||
prettyJSON, err := json.MarshalIndent(result, "", " ")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to format JSON: %v", err)
|
||||
}
|
||||
|
||||
return string(prettyJSON), nil
|
||||
}
|
||||
|
||||
func ExecuteCode(request SandboxRequest, inputs map[string]interface{}) (ExecutionResult, error) {
|
||||
apiKey := os.Getenv("CODE_EXECUTION_API_KEY")
|
||||
endpoint := os.Getenv("CODE_EXECUTION_ENDPOINT")
|
||||
|
||||
if apiKey == "" || endpoint == "" {
|
||||
fmt.Println("必要な環境変数が設定されていません")
|
||||
return ExecutionResult{}, fmt.Errorf("missing required environment variables")
|
||||
}
|
||||
var transformer TemplateTransformer
|
||||
switch request.Language {
|
||||
case "python3":
|
||||
transformer = NewPython3TemplateTransformer()
|
||||
case "javascript":
|
||||
transformer = NewJavaScriptTemplateTransformer()
|
||||
default:
|
||||
return ExecutionResult{}, fmt.Errorf("unsupported language: %s", request.Language)
|
||||
}
|
||||
// transformer := NewPython3TemplateTransformer()
|
||||
|
||||
finalCode, preload, err := transformer.TransformCaller(request.Code, inputs)
|
||||
if err != nil {
|
||||
return ExecutionResult{}, fmt.Errorf("failed to transform code: %v", err)
|
||||
}
|
||||
|
||||
execRequest := SandboxRequest{
|
||||
Language: request.Language,
|
||||
Code: finalCode,
|
||||
Preload: preload,
|
||||
EnableNetwork: request.EnableNetwork,
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
}
|
||||
|
||||
jsonData, err := json.Marshal(execRequest)
|
||||
if err != nil {
|
||||
return ExecutionResult{}, fmt.Errorf("failed to convert to JSON: %v", err)
|
||||
}
|
||||
|
||||
url := endpoint + "/v1/sandbox/run"
|
||||
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return ExecutionResult{}, fmt.Errorf("failed to create request: %v", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("X-Api-Key", apiKey)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return ExecutionResult{}, fmt.Errorf("failed to send request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return ExecutionResult{}, fmt.Errorf("failed to read response: %v", err)
|
||||
}
|
||||
|
||||
result := ExecutionResult{
|
||||
StatusCode: resp.StatusCode,
|
||||
Body: string(body),
|
||||
}
|
||||
|
||||
if resp.StatusCode == 200 {
|
||||
var response struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Data struct {
|
||||
Error string `json:"error"`
|
||||
Stdout string `json:"stdout"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(body, &response); err != nil {
|
||||
return result, fmt.Errorf("failed to parse response: %v", err)
|
||||
}
|
||||
|
||||
if response.Data.Error != "" {
|
||||
result.Error = fmt.Errorf("execution error: %s", response.Data.Error)
|
||||
} else if prettyResult, err := ExtractResult(response.Data.Stdout); err != nil {
|
||||
result.Error = fmt.Errorf("failed to process result: %v", err)
|
||||
} else {
|
||||
result.Body = prettyResult
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
|
||||
}
|
73
evaluate/code-generator/coderuntime/generate-code.go
Normal file
73
evaluate/code-generator/coderuntime/generate-code.go
Normal file
|
@ -0,0 +1,73 @@
|
|||
package coderuntime
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
)
|
||||
|
||||
func GenerateCode(request GenerateCodeRequest, accessToken AccessToken) (*GenerateCodeResponse, error) {
|
||||
baseUrl := os.Getenv("CONSOLE_API_URL")
|
||||
url := baseUrl + "/console/api/rule-code-generate"
|
||||
|
||||
jsonData, err := json.Marshal(request)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("JSON encoding error: %v", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("request creation error: %v", err)
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer "+accessToken.Value)
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("request sending error: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var response GenerateCodeResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
|
||||
return nil, fmt.Errorf("response decoding error: %v", err)
|
||||
}
|
||||
|
||||
return &response, nil
|
||||
}
|
||||
|
||||
type GenerateCodeRequest struct {
|
||||
Instruction string `json:"instruction"`
|
||||
CodeLanguage string `json:"code_language"`
|
||||
NoVariable bool `json:"no_variable"`
|
||||
ModelConfig ModelConfig `json:"model_config"`
|
||||
}
|
||||
type AccessToken struct {
|
||||
Value string
|
||||
}
|
||||
type GenerateCodeResponse struct {
|
||||
Code string `json:"code"`
|
||||
Error string `json:"error"`
|
||||
Language string `json:"language"`
|
||||
}
|
||||
|
||||
type ModelConfig struct {
|
||||
Provider string `json:"provider"`
|
||||
Name string `json:"name"`
|
||||
Mode string `json:"mode"`
|
||||
CompletionParams CompletionParams `json:"completion_params"`
|
||||
}
|
||||
|
||||
type CompletionParams struct {
|
||||
Temperature float64 `json:"temperature"`
|
||||
MaxTokens int `json:"max_tokens"`
|
||||
TopP float64 `json:"top_p"`
|
||||
Echo bool `json:"echo"`
|
||||
Stop []string `json:"stop"`
|
||||
PresencePenalty float64 `json:"presence_penalty"`
|
||||
FrequencyPenalty float64 `json:"frequency_penalty"`
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
package coderuntime
|
||||
|
||||
type JavaScriptTemplateTransformer struct {
|
||||
*BaseTemplateTransformer
|
||||
}
|
||||
|
||||
func NewJavaScriptTemplateTransformer() *JavaScriptTemplateTransformer {
|
||||
t := &JavaScriptTemplateTransformer{}
|
||||
t.BaseTemplateTransformer = NewBaseTemplateTransformer(t)
|
||||
return t
|
||||
}
|
||||
func (j *JavaScriptTemplateTransformer) GetRunnerScript() string {
|
||||
return `
|
||||
// declare main function
|
||||
{{code}}
|
||||
|
||||
// decode and prepare input object
|
||||
const inputs_obj = JSON.parse(Buffer.from('{{inputs}}', 'base64').toString('utf-8'))
|
||||
|
||||
// execute main function
|
||||
const output_obj = main(inputs_obj)
|
||||
|
||||
// convert output to json and print
|
||||
const output_json = JSON.stringify(output_obj, null, 4)
|
||||
const result = '<<RESULT>>' + output_json + '<<RESULT>>'
|
||||
console.log(result)
|
||||
`
|
||||
}
|
32
evaluate/code-generator/coderuntime/python3_transformer.go
Normal file
32
evaluate/code-generator/coderuntime/python3_transformer.go
Normal file
|
@ -0,0 +1,32 @@
|
|||
package coderuntime
|
||||
|
||||
type Python3TemplateTransformer struct {
|
||||
*BaseTemplateTransformer
|
||||
}
|
||||
|
||||
func NewPython3TemplateTransformer() *Python3TemplateTransformer {
|
||||
t := &Python3TemplateTransformer{}
|
||||
t.BaseTemplateTransformer = NewBaseTemplateTransformer(t)
|
||||
return t
|
||||
}
|
||||
|
||||
func (p *Python3TemplateTransformer) GetRunnerScript() string {
|
||||
return `
|
||||
# declare main function
|
||||
{{code}}
|
||||
|
||||
import json
|
||||
from base64 import b64decode
|
||||
|
||||
# decode and prepare input dict
|
||||
inputs_obj = json.loads(b64decode('{{inputs}}').decode('utf-8'))
|
||||
|
||||
# execute main function
|
||||
output_obj = main(**inputs_obj)
|
||||
|
||||
# convert output to json and print
|
||||
output_json = json.dumps(output_obj, indent=4)
|
||||
result = f'''<<RESULT>>{output_json}<<RESULT>>'''
|
||||
print(result)
|
||||
`
|
||||
}
|
63
evaluate/code-generator/coderuntime/template_transformer.go
Normal file
63
evaluate/code-generator/coderuntime/template_transformer.go
Normal file
|
@ -0,0 +1,63 @@
|
|||
package coderuntime
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type TemplateTransformer interface {
|
||||
TransformCaller(code string, inputs map[string]interface{}) (string, string, error)
|
||||
GetRunnerScript() string
|
||||
GetPreloadScript() string
|
||||
}
|
||||
|
||||
type BaseTemplateTransformer struct {
|
||||
CodePlaceholder string
|
||||
InputsPlaceholder string
|
||||
ResultTag string
|
||||
transformer TemplateTransformer
|
||||
}
|
||||
|
||||
func NewBaseTemplateTransformer(t TemplateTransformer) *BaseTemplateTransformer {
|
||||
return &BaseTemplateTransformer{
|
||||
CodePlaceholder: "{{code}}",
|
||||
InputsPlaceholder: "{{inputs}}",
|
||||
ResultTag: "<<RESULT>>",
|
||||
transformer: t,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *BaseTemplateTransformer) GetRunnerScript() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (t *BaseTemplateTransformer) GetPreloadScript() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (t *BaseTemplateTransformer) TransformCaller(code string, inputs map[string]interface{}) (string, string, error) {
|
||||
inputsJSON, err := json.Marshal(inputs)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
encoder := json.NewEncoder(&buf)
|
||||
encoder.SetEscapeHTML(false)
|
||||
if err := encoder.Encode(inputs); err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
inputsJSON = bytes.TrimSpace(buf.Bytes()) // 末尾の改行を削除
|
||||
|
||||
inputsBase64 := base64.StdEncoding.EncodeToString(inputsJSON)
|
||||
|
||||
runnerScript := t.transformer.GetRunnerScript()
|
||||
runnerScript = strings.ReplaceAll(runnerScript, t.CodePlaceholder, code)
|
||||
runnerScript = strings.ReplaceAll(runnerScript, t.InputsPlaceholder, inputsBase64)
|
||||
|
||||
preloadScript := t.GetPreloadScript()
|
||||
|
||||
return runnerScript, preloadScript, nil
|
||||
}
|
10
evaluate/code-generator/go.mod
Normal file
10
evaluate/code-generator/go.mod
Normal file
|
@ -0,0 +1,10 @@
|
|||
module evaluate
|
||||
|
||||
go 1.23.0
|
||||
|
||||
require (
|
||||
github.com/joho/godotenv v1.5.1
|
||||
golang.org/x/term v0.26.0
|
||||
)
|
||||
|
||||
require golang.org/x/sys v0.27.0 // indirect
|
6
evaluate/code-generator/go.sum
Normal file
6
evaluate/code-generator/go.sum
Normal file
|
@ -0,0 +1,6 @@
|
|||
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
|
||||
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
||||
golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
|
||||
golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU=
|
||||
golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E=
|
62
evaluate/code-generator/testdata/metrics.go
vendored
Normal file
62
evaluate/code-generator/testdata/metrics.go
vendored
Normal file
|
@ -0,0 +1,62 @@
|
|||
package testdata
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
type TestMetrics struct {
|
||||
TotalTests int
|
||||
SuccessfulTests int
|
||||
FailedTests int
|
||||
StartTime time.Time
|
||||
EndTime time.Time
|
||||
Results []TestResult
|
||||
}
|
||||
|
||||
func NewTestMetrics() *TestMetrics {
|
||||
return &TestMetrics{
|
||||
StartTime: time.Now(),
|
||||
Results: make([]TestResult, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *TestMetrics) AddResult(result TestResult) {
|
||||
m.TotalTests++
|
||||
if result.Success {
|
||||
m.SuccessfulTests++
|
||||
} else {
|
||||
m.FailedTests++
|
||||
}
|
||||
m.Results = append(m.Results, result)
|
||||
}
|
||||
|
||||
func (m *TestMetrics) Finish() {
|
||||
m.EndTime = time.Now()
|
||||
}
|
||||
|
||||
func (m *TestMetrics) PrintSummary() {
|
||||
duration := m.EndTime.Sub(m.StartTime)
|
||||
accuracy := float64(m.SuccessfulTests) / float64(m.TotalTests) * 100
|
||||
fmt.Printf("\n=== Detailed Results ===\n")
|
||||
for _, result := range m.Results {
|
||||
if result.Success {
|
||||
fmt.Printf("✅ %s\n", result.TestCase.Name)
|
||||
} else {
|
||||
fmt.Printf("❌ %s\n", result.TestCase.Name)
|
||||
if result.Error != nil {
|
||||
fmt.Printf(" Error: %v\n", result.Error)
|
||||
} else {
|
||||
fmt.Printf(" Expected: %s\n Actual: %s\n",
|
||||
result.TestCase.GroundTruth, result.ActualValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Printf("\n=== Test Execution Summary ===\n")
|
||||
fmt.Printf("Total Tests: %d\n", m.TotalTests)
|
||||
fmt.Printf("Successful: %d\n", m.SuccessfulTests)
|
||||
fmt.Printf("Failed: %d\n", m.FailedTests)
|
||||
fmt.Printf("Accuracy: %.2f%%\n", accuracy)
|
||||
fmt.Printf("Execution Time: %.2f seconds\n", duration.Seconds())
|
||||
|
||||
}
|
35
evaluate/code-generator/testdata/testcase.go
vendored
Normal file
35
evaluate/code-generator/testdata/testcase.go
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
package testdata
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
)
|
||||
|
||||
type TestCase struct {
|
||||
Name string `json:"name"`
|
||||
Inputs map[string]interface{} `json:"inputs"`
|
||||
Instruction string `json:"instruction"`
|
||||
CodeLanguage string `json:"code_language"`
|
||||
GroundTruth string `json:"ground_truth"`
|
||||
}
|
||||
|
||||
type TestResult struct {
|
||||
TestCase TestCase
|
||||
Success bool
|
||||
ActualValue string
|
||||
Error error
|
||||
}
|
||||
|
||||
func LoadTestCases(filePath string) ([]TestCase, error) {
|
||||
file, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var testCases []TestCase
|
||||
if err := json.Unmarshal(file, &testCases); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return testCases, nil
|
||||
}
|
20
evaluate/code-generator/testdata/testcases.json
vendored
Normal file
20
evaluate/code-generator/testdata/testcases.json
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
[
|
||||
{
|
||||
"name": "Positive Number Check",
|
||||
"inputs": {
|
||||
"x": 10
|
||||
},
|
||||
"instruction": "if x > 0: return 'positive'",
|
||||
"code_language": "python",
|
||||
"ground_truth": "{\"result\": \"positive\"}"
|
||||
},
|
||||
{
|
||||
"name": "Negative Number Check",
|
||||
"inputs": {
|
||||
"x": -5
|
||||
},
|
||||
"instruction": "if x > 0: return 'positive' else: return 'negative'",
|
||||
"code_language": "python",
|
||||
"ground_truth": "{\"result\": \"negative\"}"
|
||||
}
|
||||
]
|
Loading…
Reference in New Issue
Block a user