diff --git a/evaluate/code-generator/.env.example b/evaluate/code-generator/.env.example new file mode 100644 index 0000000000..211c230686 --- /dev/null +++ b/evaluate/code-generator/.env.example @@ -0,0 +1,7 @@ +# MODEL_PROVIDER=anthropic +# MODEL_NAME=claude-3-5-sonnet-20241022 +MODEL_PROVIDER=openai +MODEL_NAME=gpt-4o-mini +CODE_EXECUTION_ENDPOINT=http://127.0.0.1:8194 +CODE_EXECUTION_API_KEY=dify-sandbox +CONSOLE_API_URL=http://127.0.0.1:5001 \ No newline at end of file diff --git a/evaluate/code-generator/.gitignore b/evaluate/code-generator/.gitignore new file mode 100644 index 0000000000..e3d34d2fbe --- /dev/null +++ b/evaluate/code-generator/.gitignore @@ -0,0 +1,2 @@ +.env +.env.local \ No newline at end of file diff --git a/evaluate/code-generator/README.md b/evaluate/code-generator/README.md new file mode 100644 index 0000000000..a1df4fe696 --- /dev/null +++ b/evaluate/code-generator/README.md @@ -0,0 +1,68 @@ +# Code Generator Evaluator + +## Getting Started +1. Move to the evaluator directory + +```bash +cd dify/evaluate/code-generator +``` + +2. Set up your `.env` file with required variables +```bash +cp .env.example .env +``` + +3. Add your test cases to `testdata/testcases.json` + + +4. Execute the evaluator + +```bash +# For Linux +./bin/evaluate-code-linux + +# For macOS (Intel) +./bin/evaluate-code-mac + +# For macOS (Apple Silicon) +./bin/evaluate-code-mac-arm64 + +# For Windows +./bin/evaluate-code.exe +``` + + +## Build Instructions + +### 1. Prepare Build Script +First, grant execution permissions to the build script: +```bash +chmod +x build.sh +``` + +### 2. Prerequisites +- Go 1.20 or higher +- Properly configured `GOPATH` + +### 3. Build Process +Run the cross-platform build with the following command: +```bash +./build.sh +``` + +## Running the Evaluator +Execute the Code Generator evaluation on your platform using: + +```bash +# For Linux +./bin/evaluate-code-linux + +# For macOS (Intel) +./bin/evaluate-code-mac + +# For macOS (Apple Silicon) +./bin/evaluate-code-mac-arm64 + +# For Windows +./bin/evaluate-code.exe +``` diff --git a/evaluate/code-generator/auth/login.go b/evaluate/code-generator/auth/login.go new file mode 100644 index 0000000000..29fc06c75e --- /dev/null +++ b/evaluate/code-generator/auth/login.go @@ -0,0 +1,62 @@ +package auth + +import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + "os" + "time" +) + +type LoginRequest struct { + Email string `json:"email"` + Password string `json:"password"` +} + +type LoginResponse struct { + Result string `json:"result"` + Data struct { + AccessToken string `json:"access_token"` + RefreshToken string `json:"refresh_token"` + } `json:"data"` +} + +func Login(email, password string) (string, error) { + client := &http.Client{ + Timeout: 10 * time.Second, + } + + loginPayload := LoginRequest{ + Email: email, + Password: password, + } + + loginJSON, err := json.Marshal(loginPayload) + if err != nil { + return "", fmt.Errorf("failed to convert to JSON: %w", err) + } + baseUrl := os.Getenv("CONSOLE_API_URL") + loginReq, err := http.NewRequest("POST", baseUrl+"/console/api/login", bytes.NewBuffer(loginJSON)) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + loginReq.Header.Set("Content-Type", "application/json") + + loginResp, err := client.Do(loginReq) + if err != nil { + return "", fmt.Errorf("failed to send request: %w", err) + } + defer loginResp.Body.Close() + + var loginResult LoginResponse + if err := json.NewDecoder(loginResp.Body).Decode(&loginResult); err != nil { + return "", fmt.Errorf("failed to decode response: %w", err) + } + + if loginResult.Result != "success" { + return "", fmt.Errorf("login failed") + } + + return loginResult.Data.AccessToken, nil +} diff --git a/evaluate/code-generator/bin/evaluate-code-linux b/evaluate/code-generator/bin/evaluate-code-linux new file mode 100755 index 0000000000..ecaabe98a8 Binary files /dev/null and b/evaluate/code-generator/bin/evaluate-code-linux differ diff --git a/evaluate/code-generator/bin/evaluate-code-mac b/evaluate/code-generator/bin/evaluate-code-mac new file mode 100755 index 0000000000..0c8685343e Binary files /dev/null and b/evaluate/code-generator/bin/evaluate-code-mac differ diff --git a/evaluate/code-generator/bin/evaluate-code-mac-arm64 b/evaluate/code-generator/bin/evaluate-code-mac-arm64 new file mode 100755 index 0000000000..34daabf18e Binary files /dev/null and b/evaluate/code-generator/bin/evaluate-code-mac-arm64 differ diff --git a/evaluate/code-generator/bin/evaluate-code.exe b/evaluate/code-generator/bin/evaluate-code.exe new file mode 100755 index 0000000000..1f2154aee0 Binary files /dev/null and b/evaluate/code-generator/bin/evaluate-code.exe differ diff --git a/evaluate/code-generator/build.sh b/evaluate/code-generator/build.sh new file mode 100755 index 0000000000..cee8ec49fa --- /dev/null +++ b/evaluate/code-generator/build.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +mkdir -p bin + +echo "Building for Linux (amd64)..." +GOOS=linux GOARCH=amd64 go build -o bin/evaluate-code-linux ./cmd/ + +echo "Building for macOS (amd64)..." +GOOS=darwin GOARCH=amd64 go build -o bin/evaluate-code-mac ./cmd/ + +echo "Building for macOS (arm64)..." +GOOS=darwin GOARCH=arm64 go build -o bin/evaluate-code-mac-arm64 ./cmd/ + +echo "Building for Windows (amd64)..." +GOOS=windows GOARCH=amd64 go build -o bin/evaluate-code.exe ./cmd/ + +echo "Build complete! Binaries are in the bin directory." \ No newline at end of file diff --git a/evaluate/code-generator/cmd/main.go b/evaluate/code-generator/cmd/main.go new file mode 100644 index 0000000000..ab7076fa4a --- /dev/null +++ b/evaluate/code-generator/cmd/main.go @@ -0,0 +1,121 @@ +package main + +import ( + "evaluate/auth" + "evaluate/coderuntime" + "evaluate/testdata" + "fmt" + "os" + "strings" + "syscall" + + "github.com/joho/godotenv" + "golang.org/x/term" +) + +func main() { + if err := godotenv.Load("./.env"); err != nil { + fmt.Printf("Failed to load .env file: %v\n", err) + return + } + + fmt.Print("Please enter your email address: ") + var email string + fmt.Scanln(&email) + + fmt.Print("Please enter your password: ") + password, err := term.ReadPassword(int(syscall.Stdin)) + if err != nil { + fmt.Printf("\nFailed to read password: %v\n", err) + return + } + fmt.Println() + accessToken, err := auth.Login(email, string(password)) + testCases, err := testdata.LoadTestCases("./testdata/testcases.json") + if err != nil { + fmt.Printf("Failed to load test cases: %v\n", err) + return + } + + metrics := testdata.NewTestMetrics() + + modelProvider := os.Getenv("MODEL_PROVIDER") + modelName := os.Getenv("MODEL_NAME") + + fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━") + fmt.Printf("📱 Model Provider: %s\n", modelProvider) + fmt.Printf("🤖 Model Name: %s\n", modelName) + fmt.Println("━━━━━━━━━━━━━━━━━━━━━━━━━━━") + + for _, tc := range testCases { + fmt.Printf("\nExecuting test case: %s\n", tc.Name) + + codegenRequest := coderuntime.GenerateCodeRequest{ + Instruction: tc.Instruction, + CodeLanguage: tc.CodeLanguage, + NoVariable: false, + ModelConfig: coderuntime.ModelConfig{ + Provider: modelProvider, + Name: modelName, + Mode: "chat", + CompletionParams: coderuntime.CompletionParams{ + Temperature: 0.7, + MaxTokens: 0, + TopP: 0, + Echo: false, + Stop: []string{}, + PresencePenalty: 0, + FrequencyPenalty: 0, + }, + }, + } + + generatedCode, err := coderuntime.GenerateCode( + codegenRequest, + coderuntime.AccessToken{ + Value: accessToken, + }, + ) + if err != nil { + metrics.AddResult(testdata.TestResult{ + TestCase: tc, + Success: false, + Error: err, + }) + continue + } + + language := generatedCode.Language + if language == "python" { + language += "3" + } + + request := coderuntime.SandboxRequest{ + Language: language, + Code: generatedCode.Code, + EnableNetwork: true, + } + + result, err := coderuntime.ExecuteCode(request, tc.Inputs) + if result.Error != nil { + metrics.AddResult(testdata.TestResult{ + TestCase: tc, + Success: false, + Error: result.Error, + }) + continue + } + + normalizedResult := strings.ReplaceAll(strings.ReplaceAll(result.Body, " ", ""), "\n", "") + normalizedTruth := strings.ReplaceAll(strings.ReplaceAll(tc.GroundTruth, " ", ""), "\n", "") + + metrics.AddResult(testdata.TestResult{ + TestCase: tc, + Success: normalizedResult == normalizedTruth, + ActualValue: result.Body, + }) + } + + metrics.Finish() + metrics.PrintSummary() +} diff --git a/evaluate/code-generator/coderuntime/code-execute.go b/evaluate/code-generator/coderuntime/code-execute.go new file mode 100644 index 0000000000..adaf09f3c6 --- /dev/null +++ b/evaluate/code-generator/coderuntime/code-execute.go @@ -0,0 +1,142 @@ +package coderuntime + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "strings" + "time" +) + +type SandboxRequest struct { + Language string `json:"language"` + Code string `json:"code"` + Preload string `json:"preload,omitempty"` + EnableNetwork bool `json:"enable_network"` +} + +type ExecutionResult struct { + StatusCode int + Body string + Error error +} + +func ExtractResult(response string) (string, error) { + const resultTag = "<>" + startIndex := strings.Index(response, resultTag) + len(resultTag) + endIndex := strings.LastIndex(response, resultTag) + + if startIndex == -1 || endIndex == -1 { + return "", fmt.Errorf("invalid result format") + } + + jsonStr := response[startIndex:endIndex] + + var result map[string]interface{} + if err := json.Unmarshal([]byte(jsonStr), &result); err != nil { + return "", fmt.Errorf("failed to parse JSON: %v", err) + } + + // Format output + prettyJSON, err := json.MarshalIndent(result, "", " ") + if err != nil { + return "", fmt.Errorf("failed to format JSON: %v", err) + } + + return string(prettyJSON), nil +} + +func ExecuteCode(request SandboxRequest, inputs map[string]interface{}) (ExecutionResult, error) { + apiKey := os.Getenv("CODE_EXECUTION_API_KEY") + endpoint := os.Getenv("CODE_EXECUTION_ENDPOINT") + + if apiKey == "" || endpoint == "" { + fmt.Println("必要な環境変数が設定されていません") + return ExecutionResult{}, fmt.Errorf("missing required environment variables") + } + var transformer TemplateTransformer + switch request.Language { + case "python3": + transformer = NewPython3TemplateTransformer() + case "javascript": + transformer = NewJavaScriptTemplateTransformer() + default: + return ExecutionResult{}, fmt.Errorf("unsupported language: %s", request.Language) + } + // transformer := NewPython3TemplateTransformer() + + finalCode, preload, err := transformer.TransformCaller(request.Code, inputs) + if err != nil { + return ExecutionResult{}, fmt.Errorf("failed to transform code: %v", err) + } + + execRequest := SandboxRequest{ + Language: request.Language, + Code: finalCode, + Preload: preload, + EnableNetwork: request.EnableNetwork, + } + + client := &http.Client{ + Timeout: 10 * time.Second, + } + + jsonData, err := json.Marshal(execRequest) + if err != nil { + return ExecutionResult{}, fmt.Errorf("failed to convert to JSON: %v", err) + } + + url := endpoint + "/v1/sandbox/run" + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return ExecutionResult{}, fmt.Errorf("failed to create request: %v", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Api-Key", apiKey) + + resp, err := client.Do(req) + if err != nil { + return ExecutionResult{}, fmt.Errorf("failed to send request: %v", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return ExecutionResult{}, fmt.Errorf("failed to read response: %v", err) + } + + result := ExecutionResult{ + StatusCode: resp.StatusCode, + Body: string(body), + } + + if resp.StatusCode == 200 { + var response struct { + Code int `json:"code"` + Message string `json:"message"` + Data struct { + Error string `json:"error"` + Stdout string `json:"stdout"` + } `json:"data"` + } + + if err := json.Unmarshal(body, &response); err != nil { + return result, fmt.Errorf("failed to parse response: %v", err) + } + + if response.Data.Error != "" { + result.Error = fmt.Errorf("execution error: %s", response.Data.Error) + } else if prettyResult, err := ExtractResult(response.Data.Stdout); err != nil { + result.Error = fmt.Errorf("failed to process result: %v", err) + } else { + result.Body = prettyResult + } + } + + return result, nil + +} diff --git a/evaluate/code-generator/coderuntime/generate-code.go b/evaluate/code-generator/coderuntime/generate-code.go new file mode 100644 index 0000000000..004c078c8e --- /dev/null +++ b/evaluate/code-generator/coderuntime/generate-code.go @@ -0,0 +1,73 @@ +package coderuntime + +import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + "os" +) + +func GenerateCode(request GenerateCodeRequest, accessToken AccessToken) (*GenerateCodeResponse, error) { + baseUrl := os.Getenv("CONSOLE_API_URL") + url := baseUrl + "/console/api/rule-code-generate" + + jsonData, err := json.Marshal(request) + if err != nil { + return nil, fmt.Errorf("JSON encoding error: %v", err) + } + + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("request creation error: %v", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer "+accessToken.Value) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("request sending error: %v", err) + } + defer resp.Body.Close() + + var response GenerateCodeResponse + if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { + return nil, fmt.Errorf("response decoding error: %v", err) + } + + return &response, nil +} + +type GenerateCodeRequest struct { + Instruction string `json:"instruction"` + CodeLanguage string `json:"code_language"` + NoVariable bool `json:"no_variable"` + ModelConfig ModelConfig `json:"model_config"` +} +type AccessToken struct { + Value string +} +type GenerateCodeResponse struct { + Code string `json:"code"` + Error string `json:"error"` + Language string `json:"language"` +} + +type ModelConfig struct { + Provider string `json:"provider"` + Name string `json:"name"` + Mode string `json:"mode"` + CompletionParams CompletionParams `json:"completion_params"` +} + +type CompletionParams struct { + Temperature float64 `json:"temperature"` + MaxTokens int `json:"max_tokens"` + TopP float64 `json:"top_p"` + Echo bool `json:"echo"` + Stop []string `json:"stop"` + PresencePenalty float64 `json:"presence_penalty"` + FrequencyPenalty float64 `json:"frequency_penalty"` +} diff --git a/evaluate/code-generator/coderuntime/javascript_transformer.go b/evaluate/code-generator/coderuntime/javascript_transformer.go new file mode 100644 index 0000000000..68ca995ce8 --- /dev/null +++ b/evaluate/code-generator/coderuntime/javascript_transformer.go @@ -0,0 +1,28 @@ +package coderuntime + +type JavaScriptTemplateTransformer struct { + *BaseTemplateTransformer +} + +func NewJavaScriptTemplateTransformer() *JavaScriptTemplateTransformer { + t := &JavaScriptTemplateTransformer{} + t.BaseTemplateTransformer = NewBaseTemplateTransformer(t) + return t +} +func (j *JavaScriptTemplateTransformer) GetRunnerScript() string { + return ` +// declare main function +{{code}} + +// decode and prepare input object +const inputs_obj = JSON.parse(Buffer.from('{{inputs}}', 'base64').toString('utf-8')) + +// execute main function +const output_obj = main(inputs_obj) + +// convert output to json and print +const output_json = JSON.stringify(output_obj, null, 4) +const result = '<>' + output_json + '<>' +console.log(result) + ` +} diff --git a/evaluate/code-generator/coderuntime/python3_transformer.go b/evaluate/code-generator/coderuntime/python3_transformer.go new file mode 100644 index 0000000000..3fea7d7d2b --- /dev/null +++ b/evaluate/code-generator/coderuntime/python3_transformer.go @@ -0,0 +1,32 @@ +package coderuntime + +type Python3TemplateTransformer struct { + *BaseTemplateTransformer +} + +func NewPython3TemplateTransformer() *Python3TemplateTransformer { + t := &Python3TemplateTransformer{} + t.BaseTemplateTransformer = NewBaseTemplateTransformer(t) + return t +} + +func (p *Python3TemplateTransformer) GetRunnerScript() string { + return ` +# declare main function +{{code}} + +import json +from base64 import b64decode + +# decode and prepare input dict +inputs_obj = json.loads(b64decode('{{inputs}}').decode('utf-8')) + +# execute main function +output_obj = main(**inputs_obj) + +# convert output to json and print +output_json = json.dumps(output_obj, indent=4) +result = f'''<>{output_json}<>''' +print(result) + ` +} diff --git a/evaluate/code-generator/coderuntime/template_transformer.go b/evaluate/code-generator/coderuntime/template_transformer.go new file mode 100644 index 0000000000..d72da1c5ff --- /dev/null +++ b/evaluate/code-generator/coderuntime/template_transformer.go @@ -0,0 +1,63 @@ +package coderuntime + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "strings" +) + +type TemplateTransformer interface { + TransformCaller(code string, inputs map[string]interface{}) (string, string, error) + GetRunnerScript() string + GetPreloadScript() string +} + +type BaseTemplateTransformer struct { + CodePlaceholder string + InputsPlaceholder string + ResultTag string + transformer TemplateTransformer +} + +func NewBaseTemplateTransformer(t TemplateTransformer) *BaseTemplateTransformer { + return &BaseTemplateTransformer{ + CodePlaceholder: "{{code}}", + InputsPlaceholder: "{{inputs}}", + ResultTag: "<>", + transformer: t, + } +} + +func (t *BaseTemplateTransformer) GetRunnerScript() string { + return "" +} + +func (t *BaseTemplateTransformer) GetPreloadScript() string { + return "" +} + +func (t *BaseTemplateTransformer) TransformCaller(code string, inputs map[string]interface{}) (string, string, error) { + inputsJSON, err := json.Marshal(inputs) + if err != nil { + return "", "", err + } + + var buf bytes.Buffer + encoder := json.NewEncoder(&buf) + encoder.SetEscapeHTML(false) + if err := encoder.Encode(inputs); err != nil { + return "", "", err + } + inputsJSON = bytes.TrimSpace(buf.Bytes()) // 末尾の改行を削除 + + inputsBase64 := base64.StdEncoding.EncodeToString(inputsJSON) + + runnerScript := t.transformer.GetRunnerScript() + runnerScript = strings.ReplaceAll(runnerScript, t.CodePlaceholder, code) + runnerScript = strings.ReplaceAll(runnerScript, t.InputsPlaceholder, inputsBase64) + + preloadScript := t.GetPreloadScript() + + return runnerScript, preloadScript, nil +} diff --git a/evaluate/code-generator/go.mod b/evaluate/code-generator/go.mod new file mode 100644 index 0000000000..0c9a63f7da --- /dev/null +++ b/evaluate/code-generator/go.mod @@ -0,0 +1,10 @@ +module evaluate + +go 1.23.0 + +require ( + github.com/joho/godotenv v1.5.1 + golang.org/x/term v0.26.0 +) + +require golang.org/x/sys v0.27.0 // indirect diff --git a/evaluate/code-generator/go.sum b/evaluate/code-generator/go.sum new file mode 100644 index 0000000000..da22c13a4e --- /dev/null +++ b/evaluate/code-generator/go.sum @@ -0,0 +1,6 @@ +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= +golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= diff --git a/evaluate/code-generator/testdata/metrics.go b/evaluate/code-generator/testdata/metrics.go new file mode 100644 index 0000000000..7fc90c0737 --- /dev/null +++ b/evaluate/code-generator/testdata/metrics.go @@ -0,0 +1,62 @@ +package testdata + +import ( + "fmt" + "time" +) + +type TestMetrics struct { + TotalTests int + SuccessfulTests int + FailedTests int + StartTime time.Time + EndTime time.Time + Results []TestResult +} + +func NewTestMetrics() *TestMetrics { + return &TestMetrics{ + StartTime: time.Now(), + Results: make([]TestResult, 0), + } +} + +func (m *TestMetrics) AddResult(result TestResult) { + m.TotalTests++ + if result.Success { + m.SuccessfulTests++ + } else { + m.FailedTests++ + } + m.Results = append(m.Results, result) +} + +func (m *TestMetrics) Finish() { + m.EndTime = time.Now() +} + +func (m *TestMetrics) PrintSummary() { + duration := m.EndTime.Sub(m.StartTime) + accuracy := float64(m.SuccessfulTests) / float64(m.TotalTests) * 100 + fmt.Printf("\n=== Detailed Results ===\n") + for _, result := range m.Results { + if result.Success { + fmt.Printf("✅ %s\n", result.TestCase.Name) + } else { + fmt.Printf("❌ %s\n", result.TestCase.Name) + if result.Error != nil { + fmt.Printf(" Error: %v\n", result.Error) + } else { + fmt.Printf(" Expected: %s\n Actual: %s\n", + result.TestCase.GroundTruth, result.ActualValue) + } + } + } + fmt.Printf("\n=== Test Execution Summary ===\n") + fmt.Printf("Total Tests: %d\n", m.TotalTests) + fmt.Printf("Successful: %d\n", m.SuccessfulTests) + fmt.Printf("Failed: %d\n", m.FailedTests) + fmt.Printf("Accuracy: %.2f%%\n", accuracy) + fmt.Printf("Execution Time: %.2f seconds\n", duration.Seconds()) + +} diff --git a/evaluate/code-generator/testdata/testcase.go b/evaluate/code-generator/testdata/testcase.go new file mode 100644 index 0000000000..86c5baf56c --- /dev/null +++ b/evaluate/code-generator/testdata/testcase.go @@ -0,0 +1,35 @@ +package testdata + +import ( + "encoding/json" + "os" +) + +type TestCase struct { + Name string `json:"name"` + Inputs map[string]interface{} `json:"inputs"` + Instruction string `json:"instruction"` + CodeLanguage string `json:"code_language"` + GroundTruth string `json:"ground_truth"` +} + +type TestResult struct { + TestCase TestCase + Success bool + ActualValue string + Error error +} + +func LoadTestCases(filePath string) ([]TestCase, error) { + file, err := os.ReadFile(filePath) + if err != nil { + return nil, err + } + + var testCases []TestCase + if err := json.Unmarshal(file, &testCases); err != nil { + return nil, err + } + + return testCases, nil +} diff --git a/evaluate/code-generator/testdata/testcases.json b/evaluate/code-generator/testdata/testcases.json new file mode 100644 index 0000000000..a28328be1d --- /dev/null +++ b/evaluate/code-generator/testdata/testcases.json @@ -0,0 +1,20 @@ +[ + { + "name": "Positive Number Check", + "inputs": { + "x": 10 + }, + "instruction": "if x > 0: return 'positive'", + "code_language": "python", + "ground_truth": "{\"result\": \"positive\"}" + }, + { + "name": "Negative Number Check", + "inputs": { + "x": -5 + }, + "instruction": "if x > 0: return 'positive' else: return 'negative'", + "code_language": "python", + "ground_truth": "{\"result\": \"negative\"}" + } +] \ No newline at end of file