-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathevals_action.yml
More file actions
92 lines (77 loc) · 3.14 KB
/
evals_action.yml
File metadata and controls
92 lines (77 loc) · 3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# This is a sample GitHub Actions workflow file that runs prompt evaluations
# on pull requests when prompt files are changed. It uses the `gh-models` CLI to evaluate prompts
# and comments the results back on the pull request.
# The workflow is triggered by pull requests that modify any `.prompt.yml` files.
name: Run evaluations for changed prompts
permissions:
models: read
contents: read
pull-requests: write
on:
pull_request:
paths:
- '**/*.prompt.yml'
jobs:
evaluate-model:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup gh-models
run: gh extension install github/gh-models
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Find changed prompt files
id: find-prompts
run: |
# Get the list of changed files that match *.prompt.yml pattern
changed_prompts=$(git diff --name-only origin/${{ github.base_ref }}..HEAD | grep '\.prompt\.yml$' | head -1)
if [[ -z "$changed_prompts" ]]; then
echo "No prompt files found in the changes"
echo "skip_evaluation=true" >> "$GITHUB_OUTPUT"
exit 0
fi
echo "first_prompt=$changed_prompts" >> "$GITHUB_OUTPUT"
echo "Found changed prompt file: $changed_prompts"
- name: Run model evaluation
id: eval
run: |
set -e
PROMPT_FILE="${{ steps.find-prompts.outputs.first_prompt }}"
echo "## Model Evaluation Results" >> "$GITHUB_STEP_SUMMARY"
echo "Evaluating: $PROMPT_FILE" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
if gh models eval "$PROMPT_FILE" > eval_output.txt 2>&1; then
echo "✅ All evaluations passed!" >> "$GITHUB_STEP_SUMMARY"
cat eval_output.txt >> "$GITHUB_STEP_SUMMARY"
echo "eval_status=success" >> "$GITHUB_OUTPUT"
else
echo "❌ Some evaluations failed!" >> "$GITHUB_STEP_SUMMARY"
cat eval_output.txt >> "$GITHUB_STEP_SUMMARY"
echo "eval_status=failure" >> "$GITHUB_OUTPUT"
exit 1
fi
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Comment on PR with evaluation results
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const output = fs.readFileSync('eval_output.txt', 'utf8');
const evalStatus = '${{ steps.eval.outputs.eval_status }}';
const statusMessage = evalStatus === 'success'
? '✅ Evaluation passed'
: '❌ Evaluation failed';
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: `## ${statusMessage}
\`\`\`
${output}
\`\`\`
Review the evaluation results above for more details.`
});