gh-aw/pkg/workflow/compiler.go at main · github/gh-aw

History

786 lines (682 loc) · 36.6 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

package workflow

import (

_ "embed"

"errors"

"fmt"

"os"

"path/filepath"

"regexp"

"slices"

"strings"

"time"

"github.com/github/gh-aw/pkg/console"

"github.com/github/gh-aw/pkg/gitutil"

"github.com/github/gh-aw/pkg/logger"

"github.com/github/gh-aw/pkg/stringutil"

"go.yaml.in/yaml/v3"

)

var log = logger.New("workflow:compiler")

// heredocDelimiterRE matches randomized heredoc delimiters of the form GH_AW_<NAME>_<16hexchars>_EOF.

// Used to normalize delimiters when comparing compiled output to skip unnecessary writes.

var heredocDelimiterRE = regexp.MustCompile(`GH_AW_([A-Z0-9_]+)_[0-9a-f]{16}_EOF`)

// normalizeHeredocDelimiters replaces randomized heredoc delimiter tokens with a stable

// placeholder so that two compilations of the same workflow compare as equal even though

// each run embeds different random tokens.

func normalizeHeredocDelimiters(content string) string {

return heredocDelimiterRE.ReplaceAllString(content, "GH_AW_${1}_NORM_EOF")

}

const (

// MaxLockFileSize is the maximum allowed size for generated lock workflow files (500KB)

MaxLockFileSize = 512000 // 500KB in bytes

// MaxExpressionSize is the maximum allowed size for GitHub Actions expression values (21KB)

// This includes environment variable values, if conditions, and other expression contexts

// See: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration

MaxExpressionSize = 21000 // 21KB in bytes

// MaxPromptChunkSize is the maximum size for each chunk when splitting prompt text (20KB)

// This limit ensures each heredoc block stays under GitHub Actions step size limits (21KB)

MaxPromptChunkSize = 20000 // 20KB limit for each chunk

// MaxPromptChunks is the maximum number of chunks allowed when splitting prompt text

// This prevents excessive step generation for extremely large prompt texts

MaxPromptChunks = 5 // Maximum number of chunks

)

//go:embed schemas/github-workflow.json

var githubWorkflowSchema string

// formatCompilerMessage creates a formatted compiler message string (for warnings printed to stderr)

// filePath: the file path to include in the message (typically markdownPath or lockFile)

// msgType: the message type ("error" or "warning")

// message: the message text

func formatCompilerMessage(filePath string, msgType string, message string) string {

return console.FormatError(console.CompilerError{

Position: console.ErrorPosition{

File: filePath,

Line: 0,

Column: 0,

Type: msgType,

Message: message,

})

}

// CompileWorkflow compiles a workflow markdown file into a GitHub Actions YAML file.

// It reads the file from disk, parses frontmatter and markdown sections, and generates

// the corresponding workflow YAML. Returns the compiled workflow data or an error.

// The compilation process includes:

// - Reading and parsing the markdown file

// - Extracting frontmatter configuration

// - Validating workflow configuration

// - Generating GitHub Actions YAML

// - Writing the compiled workflow to a .lock.yml file

// This is the main entry point for compiling workflows from disk. For compiling

// pre-parsed workflow data, use CompileWorkflowData instead.

func (c *Compiler) CompileWorkflow(markdownPath string) error {

// Store markdownPath for use in dynamic tool generation

c.markdownPath = markdownPath

// Parse the markdown file

log.Printf("Parsing workflow file")

workflowData, err := c.ParseWorkflowFile(markdownPath)

if err != nil {

// ParseWorkflowFile already returns formatted compiler errors; pass them through.

if isFormattedCompilerError(err) {

return err

}

// Fallback for any unformatted error that slipped through.

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

return c.CompileWorkflowData(workflowData, markdownPath)

}

// validateWorkflowData performs comprehensive validation of workflow configuration

// including expressions, features, permissions, and configurations.

func (c *Compiler) validateWorkflowData(workflowData *WorkflowData, markdownPath string) error {

// Validate expression safety - check that all GitHub Actions expressions are in the allowed list

log.Printf("Validating expression safety")

if err := validateExpressionSafety(workflowData.MarkdownContent); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate expressions in runtime-import files at compile time

log.Printf("Validating runtime-import files")

// Go up from .github/workflows/file.md to repo root

workflowDir := filepath.Dir(markdownPath) // .github/workflows

githubDir := filepath.Dir(workflowDir) // .github

workspaceDir := filepath.Dir(githubDir) // repo root

if err := validateRuntimeImportFiles(workflowData.MarkdownContent, workspaceDir); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate feature flags

log.Printf("Validating feature flags")

if err := validateFeatures(workflowData); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Check for action-mode feature flag override

if workflowData.Features != nil {

if actionModeVal, exists := workflowData.Features["action-mode"]; exists {

if actionModeStr, ok := actionModeVal.(string); ok && actionModeStr != "" {

mode := ActionMode(actionModeStr)

if !mode.IsValid() {

return formatCompilerError(markdownPath, "error", fmt.Sprintf("invalid action-mode feature flag '%s'. Must be 'dev', 'release', or 'script'", actionModeStr), nil)

}

log.Printf("Overriding action mode from feature flag: %s", mode)

c.SetActionMode(mode)

}

// Parse permissions once for all permission-related validation checks below.

// WorkflowData.Permissions contains the raw YAML string (including "permissions:" prefix).

// Parsing once here avoids redundant YAML parsing in each validator.

workflowPermissions := NewPermissionsParser(workflowData.Permissions).ToPermissions()

// Validate dangerous permissions

log.Printf("Validating dangerous permissions")

if err := validateDangerousPermissions(workflowData, workflowPermissions); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate GitHub App-only permissions require a GitHub App to be configured

log.Printf("Validating GitHub App-only permissions")

if err := validateGitHubAppOnlyPermissions(workflowData, workflowPermissions); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate tools.github.github-app.permissions does not use "write"

log.Printf("Validating GitHub MCP app permissions (no write)")

if err := validateGitHubMCPAppPermissionsNoWrite(workflowData); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Warn when github-app.permissions is set in contexts that don't support it

warnGitHubAppPermissionsUnsupportedContexts(workflowData)

// Validate agent file exists if specified in engine config

log.Printf("Validating agent file if specified")

if err := c.validateAgentFile(workflowData, markdownPath); err != nil {

return err

}

// Validate sandbox configuration

log.Printf("Validating sandbox configuration")

if err := validateSandboxConfig(workflowData); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate safe-outputs target configuration

log.Printf("Validating safe-outputs target fields")

if err := validateSafeOutputsTarget(workflowData.SafeOutputs); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate safe-outputs max configuration

log.Printf("Validating safe-outputs max fields")

if err := validateSafeOutputsMax(workflowData.SafeOutputs); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate safe-outputs allowed-domains configuration

log.Printf("Validating safe-outputs allowed-domains")

if err := c.validateSafeOutputsAllowedDomains(workflowData.SafeOutputs); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate safe-job needs: declarations against known generated job IDs

log.Printf("Validating safe-job needs declarations")

if err := validateSafeJobNeeds(workflowData); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Emit warnings for push-to-pull-request-branch misconfiguration

log.Printf("Validating push-to-pull-request-branch configuration")

c.validatePushToPullRequestBranchWarnings(workflowData.SafeOutputs, workflowData.CheckoutConfigs)

// Validate network allowed domains configuration

log.Printf("Validating network allowed domains")

if err := c.validateNetworkAllowedDomains(workflowData.NetworkPermissions); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate network firewall configuration

log.Printf("Validating network firewall configuration")

if err := validateNetworkFirewallConfig(workflowData.NetworkPermissions); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate labels configuration

log.Printf("Validating labels")

if err := validateLabels(workflowData); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Validate workflow-level concurrency group expression

log.Printf("Validating workflow-level concurrency configuration")

if workflowData.Concurrency != "" {

// Extract the group expression from the concurrency YAML

// The Concurrency field contains the full YAML (e.g., "concurrency:\n group: \"...\"")

// We need to extract just the group value

groupExpr := extractConcurrencyGroupFromYAML(workflowData.Concurrency)

if groupExpr != "" {

if err := validateConcurrencyGroupExpression(groupExpr); err != nil {

return formatCompilerError(markdownPath, "error", "workflow-level concurrency validation failed: "+err.Error(), err)

}

// Validate concurrency.job-discriminator expression

if workflowData.ConcurrencyJobDiscriminator != "" {

if err := validateConcurrencyGroupExpression(workflowData.ConcurrencyJobDiscriminator); err != nil {

return formatCompilerError(markdownPath, "error", "concurrency.job-discriminator validation failed: "+err.Error(), err)

}

// Validate engine-level concurrency group expression

log.Printf("Validating engine-level concurrency configuration")

if workflowData.EngineConfig != nil && workflowData.EngineConfig.Concurrency != "" {

// Extract the group expression from the engine concurrency YAML

groupExpr := extractConcurrencyGroupFromYAML(workflowData.EngineConfig.Concurrency)

if groupExpr != "" {

if err := validateConcurrencyGroupExpression(groupExpr); err != nil {

return formatCompilerError(markdownPath, "error", "engine.concurrency validation failed: "+err.Error(), err)

}

// Validate safe-outputs concurrency group expression

if workflowData.SafeOutputs != nil && workflowData.SafeOutputs.ConcurrencyGroup != "" {

if err := validateConcurrencyGroupExpression(workflowData.SafeOutputs.ConcurrencyGroup); err != nil {

return formatCompilerError(markdownPath, "error", "safe-outputs.concurrency-group validation failed: "+err.Error(), err)

}

// Warn when the user has specified custom workflow-level concurrency with cancel-in-progress: true

// AND the workflow has the bot self-cancel risk combination (issue_comment triggers + GitHub App

// safe-outputs). In this case the auto-generated bot-actor isolation cannot be applied because the

// user's concurrency expression is preserved as-is. The user must add the bot-actor isolation

// themselves (e.g. prepend `contains(github.actor, '[bot]') && github.run_id ||` to their group key).

if workflowData.Concurrency != "" &&

strings.Contains(workflowData.Concurrency, "cancel-in-progress: true") &&

hasBotSelfCancelRisk(workflowData) {

fmt.Fprintln(os.Stderr, formatCompilerMessage(markdownPath, "warning",

"Custom workflow-level concurrency with cancel-in-progress: true may cause self-cancellation.\n"+

"safe-outputs.github-app can post comments that re-trigger this workflow via issue_comment,\n"+

"and those passive bot-authored runs can collide with the primary run's concurrency group.\n"+

"Add `contains(github.actor, '[bot]') && github.run_id ||` at the start of your concurrency\n"+

"group expression to route bot-triggered runs to a unique key and prevent self-cancellation.\n"+

"See: https://gh.io/gh-aw/reference/concurrency for details."))

c.IncrementWarningCount()

}

// Emit warning for sandbox.agent: false (disables agent sandbox firewall)

if isAgentSandboxDisabled(workflowData) {

fmt.Fprintln(os.Stderr, console.FormatWarningMessage("⚠️ WARNING: Agent sandbox disabled (sandbox.agent: false). This removes firewall protection. The AI agent will have direct network access without firewall filtering. The MCP gateway remains enabled. Only use this for testing or in controlled environments where you trust the AI agent completely."))

c.IncrementWarningCount()

}

// Validate: threat detection requires sandbox.agent to be enabled (detection runs inside AWF)

if workflowData.SafeOutputs != nil && workflowData.SafeOutputs.ThreatDetection != nil && isAgentSandboxDisabled(workflowData) {

return formatCompilerError(markdownPath, "error", "threat detection requires sandbox.agent to be enabled. Threat detection runs inside the agent sandbox (AWF) with fully blocked network. Either enable sandbox.agent or use 'threat-detection: false' to disable the threat-detection configuration in safe-outputs.", errors.New("threat detection requires sandbox.agent"))

}

// Emit warning when assign-to-agent is used with github-app: but no explicit github-token:.

// GitHub App tokens are rejected by the Copilot assignment API — a PAT is required.

// The token fallback chain (GH_AW_AGENT_TOKEN || GH_AW_GITHUB_TOKEN || GITHUB_TOKEN) is used automatically.

if workflowData.SafeOutputs != nil &&

workflowData.SafeOutputs.AssignToAgent != nil &&

workflowData.SafeOutputs.GitHubApp != nil &&

workflowData.SafeOutputs.AssignToAgent.GitHubToken == "" {

fmt.Fprintln(os.Stderr, console.FormatWarningMessage(

"assign-to-agent does not support GitHub App tokens. "+

"The Copilot assignment API requires a fine-grained PAT. "+

"The token fallback chain (GH_AW_AGENT_TOKEN || GH_AW_GITHUB_TOKEN || GITHUB_TOKEN) will be used automatically. "+

"Add github-token: to your assign-to-agent config to specify a different token."))

c.IncrementWarningCount()

}

// Emit experimental warning for rate-limit feature

if workflowData.RateLimit != nil {

fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Using experimental feature: rate-limit"))

c.IncrementWarningCount()

}

// Emit experimental warning for dispatch_repository feature

if workflowData.SafeOutputs != nil && workflowData.SafeOutputs.DispatchRepository != nil {

fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Using experimental feature: dispatch_repository"))

c.IncrementWarningCount()

}

// Validate workflow_run triggers have branch restrictions

log.Printf("Validating workflow_run triggers for branch restrictions")

if err := c.validateWorkflowRunBranches(workflowData, markdownPath); err != nil {

return err

}

// Reuse the permissions parsed earlier (workflowPermissions) for all subsequent checks.

// This avoids an additional NewPermissionsParser call here.

cachedPermissions := workflowPermissions

// Validate permissions against GitHub MCP toolsets

log.Printf("Validating permissions for GitHub MCP toolsets")

if workflowData.ParsedTools != nil && workflowData.ParsedTools.GitHub != nil {

// Check if GitHub tool was explicitly configured in frontmatter

// If permissions exist but tools.github was NOT explicitly configured,

// skip validation and let the GitHub MCP server handle permission issues

hasPermissions := workflowData.Permissions != ""

log.Printf("Permission validation check: hasExplicitGitHubTool=%v, hasPermissions=%v",

workflowData.HasExplicitGitHubTool, hasPermissions)

// Skip validation if permissions exist but GitHub tool was auto-added (not explicit)

if hasPermissions && !workflowData.HasExplicitGitHubTool {

log.Printf("Skipping permission validation: permissions exist but tools.github not explicitly configured")

} else {

// Validate permissions using the typed GitHub tool configuration

validationResult := ValidatePermissions(cachedPermissions, workflowData.ParsedTools.GitHub)

if validationResult.HasValidationIssues {

// Format the validation message

message := FormatValidationMessage(validationResult, c.strictMode)

if len(validationResult.MissingPermissions) > 0 {

if c.strictMode {

// In strict mode, missing permissions are errors

return formatCompilerError(markdownPath, "error", message, nil)

} else {

// In non-strict mode, missing permissions are warnings

fmt.Fprintln(os.Stderr, formatCompilerMessage(markdownPath, "warning", message))

c.IncrementWarningCount()

}

// Emit warning if id-token: write permission is detected

log.Printf("Checking for id-token: write permission")

if level, exists := cachedPermissions.Get(PermissionIdToken); exists && level == PermissionWrite {

warningMsg := `This workflow grants id-token: write permission

OIDC tokens can authenticate to cloud providers (AWS, Azure, GCP).

Ensure proper audience validation and trust policies are configured.`

fmt.Fprintln(os.Stderr, formatCompilerMessage(markdownPath, "warning", warningMsg))

c.IncrementWarningCount()

}

// Validate GitHub tools against enabled toolsets

log.Printf("Validating GitHub tools against enabled toolsets")

if workflowData.ParsedTools != nil && workflowData.ParsedTools.GitHub != nil {

// Extract allowed tools and enabled toolsets from ParsedTools

allowedTools := workflowData.ParsedTools.GitHub.Allowed.ToStringSlice()

enabledToolsets := ParseGitHubToolsets(strings.Join(workflowData.ParsedTools.GitHub.Toolset.ToStringSlice(), ","))

// Validate that all allowed tools have their toolsets enabled

if err := ValidateGitHubToolsAgainstToolsets(allowedTools, enabledToolsets); err != nil {

return formatCompilerError(markdownPath, "error", err.Error(), err)

}

// Print informational message if "projects" toolset is explicitly specified

// (not when implied by "all", as users unlikely intend to use projects with "all")

originalToolsets := workflowData.ParsedTools.GitHub.Toolset.ToStringSlice()

if slices.Contains(originalToolsets, "projects") {

fmt.Fprintln(os.Stderr, console.FormatInfoMessage("The 'projects' toolset requires additional authentication."))

fmt.Fprintln(os.Stderr, console.FormatInfoMessage("See: https://github.github.com/gh-aw/reference/auth-projects/"))

}

// Validate permissions for agentic-workflows tool

log.Printf("Validating permissions for agentic-workflows tool")

if _, hasAgenticWorkflows := workflowData.Tools["agentic-workflows"]; hasAgenticWorkflows {

// Check if actions: read permission exists

actionsLevel, hasActions := cachedPermissions.Get(PermissionActions)

if !hasActions || actionsLevel == PermissionNone {

// Missing actions: read permission

message := "ERROR: Missing required permission for agentic-workflows tool:\n"

message += " - actions: read\n\n"

message += "The agentic-workflows tool requires actions: read permission to access GitHub Actions data.\n\n"

message += "Suggested fix: Add the following to your workflow frontmatter:\n"

message += "permissions:\n"

message += " actions: read"

return formatCompilerError(markdownPath, "error", message, nil)

}

// Validate resources field — GitHub Actions expression syntax is not allowed.

log.Printf("Validating resources field")

if workflowData.ParsedFrontmatter != nil {

for _, r := range workflowData.ParsedFrontmatter.Resources {

if strings.Contains(r, "${{") {

return formatCompilerError(markdownPath, "error",

fmt.Sprintf("resources entry %q contains GitHub Actions expression syntax (${{) which is not allowed; use static paths only", r), nil)

}

// Validate dispatch-workflow configuration (independent of agentic-workflows tool)

log.Print("Validating dispatch-workflow configuration")

if err := c.validateDispatchWorkflow(workflowData, markdownPath); err != nil {

return formatCompilerError(markdownPath, "error", fmt.Sprintf("dispatch-workflow validation failed: %v", err), err)

}

// Validate dispatch_repository configuration (independent of agentic-workflows tool)

log.Print("Validating dispatch_repository configuration")

if err := c.validateDispatchRepository(workflowData, markdownPath); err != nil {

return formatCompilerError(markdownPath, "error", fmt.Sprintf("dispatch_repository validation failed: %v", err), err)

}

// Validate call-workflow configuration (independent of agentic-workflows tool)

log.Print("Validating call-workflow configuration")

if err := c.validateCallWorkflow(workflowData, markdownPath); err != nil {

return formatCompilerError(markdownPath, "error", fmt.Sprintf("call-workflow validation failed: %v", err), err)

}

return nil

}

// generateAndValidateYAML generates GitHub Actions YAML and validates

// the output size and format.

func (c *Compiler) generateAndValidateYAML(workflowData *WorkflowData, markdownPath string, lockFile string) (string, []string, []string, error) {

// Generate the YAML content along with the collected body secrets and action refs

// (returned to avoid a second scan of the full YAML in the caller for safe update enforcement).

yamlContent, bodySecrets, bodyActions, err := c.generateYAML(workflowData, markdownPath)

if err != nil {

return "", nil, nil, formatCompilerError(markdownPath, "error", fmt.Sprintf("failed to generate YAML: %v", err), err)

}

// Always validate expression sizes - this is a hard limit from GitHub Actions (21KB)

// that cannot be bypassed, so we validate it unconditionally

log.Print("Validating expression sizes")

if err := c.validateExpressionSizes(yamlContent); err != nil {

// Store error first so we can write invalid YAML before returning

formattedErr := formatCompilerError(markdownPath, "error", fmt.Sprintf("expression size validation failed: %v", err), err)

// Write the invalid YAML to a .invalid.yml file for inspection

invalidFile := strings.TrimSuffix(lockFile, ".lock.yml") + ".invalid.yml"

if writeErr := os.WriteFile(invalidFile, []byte(yamlContent), 0644); writeErr == nil {

fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Invalid workflow YAML written to: "+console.ToRelativePath(invalidFile)))

}

return "", nil, nil, formattedErr

}

// Template injection validation and GitHub Actions schema validation both require a

// parsed representation of the compiled YAML. Parse it once here and share the

// result between the two validators to avoid redundant yaml.Unmarshal calls.

// Fast-path: if the YAML contains no unsafe context expressions we can skip the

// parse (and template-injection check) entirely for the common case.

needsTemplateCheck := unsafeContextRegex.MatchString(yamlContent)

needsSchemaCheck := !c.skipValidation

var parsedWorkflow map[string]any

if needsTemplateCheck || needsSchemaCheck {

log.Print("Parsing compiled YAML for validation")

if parseErr := yaml.Unmarshal([]byte(yamlContent), &parsedWorkflow); parseErr != nil {

// If parsing fails here the subsequent validators would also fail; keep going

// so we surface the root error from the right validator.

parsedWorkflow = nil

}

// Validate for template injection vulnerabilities - detect unsafe expression usage in run: commands

if needsTemplateCheck {

log.Print("Validating for template injection vulnerabilities")

var templateErr error

if parsedWorkflow != nil {

templateErr = validateNoTemplateInjectionFromParsed(parsedWorkflow)

} else {

templateErr = validateNoTemplateInjection(yamlContent)

}

if templateErr != nil {

// Store error first so we can write invalid YAML before returning

formattedErr := formatCompilerError(markdownPath, "error", templateErr.Error(), templateErr)

// Write the invalid YAML to a .invalid.yml file for inspection

invalidFile := strings.TrimSuffix(lockFile, ".lock.yml") + ".invalid.yml"

if writeErr := os.WriteFile(invalidFile, []byte(yamlContent), 0644); writeErr == nil {

fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Workflow with template injection risks written to: "+console.ToRelativePath(invalidFile)))

}

return "", nil, nil, formattedErr

}

// Validate against GitHub Actions schema (unless skipped)

if needsSchemaCheck {

log.Print("Validating workflow against GitHub Actions schema")

var schemaErr error

if parsedWorkflow != nil {

schemaErr = c.validateGitHubActionsSchemaFromParsed(parsedWorkflow)

} else {

schemaErr = c.validateGitHubActionsSchema(yamlContent)

}

if schemaErr != nil {

// Try to point at the exact line of the failing field in the source markdown.

// extractSchemaErrorField unwraps the error chain to find the top-level field

// name (e.g. "timeout-minutes"), which findFrontmatterFieldLine then locates in

// the source frontmatter so the error is IDE-navigable.

fieldLine := 1

if fieldName := extractSchemaErrorField(schemaErr); fieldName != "" {

frontmatterLines := strings.Split(workflowData.FrontmatterYAML, "\n")

if line := findFrontmatterFieldLine(frontmatterLines, 2, fieldName); line > 0 {

fieldLine = line

}

// Store error first so we can write invalid YAML before returning

formattedErr := formatCompilerErrorWithPosition(markdownPath, fieldLine, 1, "error",

fmt.Sprintf("invalid workflow: %v", schemaErr), schemaErr)

// Write the invalid YAML to a .invalid.yml file for inspection

invalidFile := strings.TrimSuffix(lockFile, ".lock.yml") + ".invalid.yml"

if writeErr := os.WriteFile(invalidFile, []byte(yamlContent), 0644); writeErr == nil {

fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Invalid workflow YAML written to: "+console.ToRelativePath(invalidFile)))

}

return "", nil, nil, formattedErr

}

// Validate container images used in MCP configurations

log.Print("Validating container images")

if err := c.validateContainerImages(workflowData); err != nil {

// Treat container image validation failures as warnings, not errors

// This is because validation may fail due to auth issues locally (e.g., private registries)

fmt.Fprintln(os.Stderr, formatCompilerMessage(markdownPath, "warning", fmt.Sprintf("container image validation failed: %v", err)))

c.IncrementWarningCount()

}

// Validate runtime packages (npx, uv)

log.Print("Validating runtime packages")

if err := c.validateRuntimePackages(workflowData); err != nil {

return "", nil, nil, formatCompilerError(markdownPath, "error", fmt.Sprintf("runtime package validation failed: %v", err), err)

}

// Validate firewall configuration (log-level enum)

log.Print("Validating firewall configuration")

if err := c.validateFirewallConfig(workflowData); err != nil {

return "", nil, nil, formatCompilerError(markdownPath, "error", fmt.Sprintf("firewall configuration validation failed: %v", err), err)

}

// Validate repository features (discussions, issues)

log.Print("Validating repository features")

if err := c.validateRepositoryFeatures(workflowData); err != nil {

return "", nil, nil, formatCompilerError(markdownPath, "error", fmt.Sprintf("repository feature validation failed: %v", err), err)

}

} else if c.verbose {

fmt.Fprintln(os.Stderr, console.FormatWarningMessage("Schema validation available but skipped (use SetSkipValidation(false) to enable)"))

c.IncrementWarningCount()

}

return yamlContent, bodySecrets, bodyActions, nil

}

// writeWorkflowOutput writes the compiled workflow to the lock file

// and handles console output formatting.

func (c *Compiler) writeWorkflowOutput(lockFile, yamlContent string, markdownPath string) error {

// Write to lock file (unless noEmit is enabled)

if c.noEmit {

log.Print("Validation completed - no lock file generated (--no-emit enabled)")

} else {

log.Printf("Writing output to: %s", lockFile)

// Check if content has actually changed

contentUnchanged := false

if existingContent, err := os.ReadFile(lockFile); err == nil {

if normalizeHeredocDelimiters(string(existingContent)) == normalizeHeredocDelimiters(yamlContent) {

// Content is identical (modulo random heredoc tokens) - skip write to preserve timestamp

contentUnchanged = true

log.Print("Lock file content unchanged - skipping write to preserve timestamp")

}

// Only write if content has changed

if !contentUnchanged {

if err := os.WriteFile(lockFile, []byte(yamlContent), 0644); err != nil {

return formatCompilerError(lockFile, "error", fmt.Sprintf("failed to write lock file: %v", err), err)

}

log.Print("Lock file written successfully")

}

// Validate file size after writing

if lockFileInfo, err := os.Stat(lockFile); err == nil {

if lockFileInfo.Size() > MaxLockFileSize {

lockSize := console.FormatFileSize(lockFileInfo.Size())

maxSize := console.FormatFileSize(MaxLockFileSize)

warningMsg := fmt.Sprintf("Generated lock file size (%s) exceeds recommended maximum size (%s)", lockSize, maxSize)

fmt.Fprintln(os.Stderr, console.FormatWarningMessage(warningMsg))

}

// Display success message with file size if we generated a lock file (unless quiet mode)

if !c.quiet {

if c.noEmit {

fmt.Fprintln(os.Stderr, console.FormatSuccessMessage(console.ToRelativePath(markdownPath)))

} else {

// Get the size of the generated lock file for display

if lockFileInfo, err := os.Stat(lockFile); err == nil {

lockSize := console.FormatFileSize(lockFileInfo.Size())

fmt.Fprintln(os.Stderr, console.FormatSuccessMessage(fmt.Sprintf("%s (%s)", console.ToRelativePath(markdownPath), lockSize)))

} else {

// Fallback to original display if we can't get file info

fmt.Fprintln(os.Stderr, console.FormatSuccessMessage(console.ToRelativePath(markdownPath)))

}

return nil

}

// CompileWorkflowData compiles pre-parsed workflow content into GitHub Actions YAML.

// Unlike CompileWorkflow, this accepts already-parsed frontmatter and markdown content

// rather than reading from disk. This is useful for testing and programmatic workflow generation.

// The compilation process includes:

// - Validating workflow configuration and features

// - Checking permissions and tool configurations

// - Generating GitHub Actions YAML structure

// - Writing the compiled workflow to a .lock.yml file

// This function avoids re-parsing when workflow data has already been extracted,

// making it efficient for scenarios where the same workflow is compiled multiple times

// or when workflow data comes from a non-file source.

func (c *Compiler) CompileWorkflowData(workflowData *WorkflowData, markdownPath string) error {

// Store markdownPath for use in dynamic tool generation and prompt generation

c.markdownPath = markdownPath

// Track compilation time for performance monitoring

startTime := time.Now()

defer func() {

log.Printf("Compilation completed in %v", time.Since(startTime))

}()

// Reset the step order tracker for this compilation

c.stepOrderTracker = NewStepOrderTracker()

// Reset schedule friendly formats for this compilation

c.scheduleFriendlyFormats = nil

// Reset the artifact manager for this compilation

if c.artifactManager == nil {

c.artifactManager = NewArtifactManager()

} else {

c.artifactManager.Reset()

}

// Generate lock file name

lockFile := stringutil.MarkdownToLockFile(markdownPath)

// Sanitize the lock file path to prevent path traversal attacks

lockFile = filepath.Clean(lockFile)

log.Printf("Starting compilation: %s -> %s", markdownPath, lockFile)

// Read the existing lock file to extract the previous gh-aw-manifest for safe update

// enforcement.

// Priority (highest to lowest):

// 1. Pre-cached manifest supplied by the caller (e.g. MCP server collected at startup

// before any agent interaction, making it tamper-proof without requiring git access).

// 2. Content from the last git commit (HEAD) – prevents a local agent from modifying

// the .lock.yml file on disk to forge an approved manifest.

// 3. Filesystem read – fallback for first-time compilations or non-git environments.

var oldManifest *GHAWManifest

if cached, ok := c.priorManifests[lockFile]; ok {

oldManifest = cached

secretCount := 0

if cached != nil {

secretCount = len(cached.Secrets)

}

log.Printf("Using pre-cached gh-aw-manifest for %s: %d secret(s)", lockFile, secretCount)

} else if committedContent, readErr := gitutil.ReadFileFromHEAD(lockFile); readErr == nil {

if m, parseErr := ExtractGHAWManifestFromLockFile(committedContent); parseErr == nil {

oldManifest = m

if oldManifest != nil {

log.Printf("Loaded committed gh-aw-manifest from HEAD: %d secret(s)", len(oldManifest.Secrets))

}

} else {

log.Printf("Failed to parse committed gh-aw-manifest: %v. Safe update enforcement will proceed without baseline comparison (all secrets will be considered new).", parseErr)

}

} else {

log.Printf("Lock file %s not found in HEAD commit (%v); falling back to filesystem read.", lockFile, readErr)

if existingContent, fsErr := os.ReadFile(lockFile); fsErr == nil {

if m, parseErr := ExtractGHAWManifestFromLockFile(string(existingContent)); parseErr == nil {

oldManifest = m

if oldManifest != nil {

log.Printf("Loaded gh-aw-manifest from filesystem: %d secret(s)", len(oldManifest.Secrets))

}

} else {

log.Printf("Failed to parse filesystem gh-aw-manifest: %v. Safe update enforcement will treat as empty manifest.", parseErr)

}

} else {

log.Printf("Lock file %s not found on filesystem either (new workflow or not yet written). Safe update enforcement will treat as empty manifest.", lockFile)

}

// Validate workflow data

if err := c.validateWorkflowData(workflowData, markdownPath); err != nil {

return err

}

// Note: Markdown content size is now handled by splitting into multiple steps in generatePrompt

log.Printf("Workflow: %s, Tools: %d", workflowData.Name, len(workflowData.Tools))

// Note: compute-text functionality is now inlined directly in the task job

// instead of using a shared action file

// Generate and validate YAML (also embeds the new gh-aw-manifest in the header).

// Returns the collected body secrets and action refs to avoid duplicate scans for

// safe update enforcement.

yamlContent, bodySecrets, bodyActions, err := c.generateAndValidateYAML(workflowData, markdownPath, lockFile)

if err != nil {

return err

}

// Enforce safe update mode: emit a warning prompt (not a hard error) when unapproved

// secrets or action changes are detected. body* vars contain data collected from the

// workflow body only (not the header) to avoid matching the gh-aw-manifest JSON comment.

// Emitting a warning instead of failing allows compilation to succeed so that the lock

// file is written and the agent receives the actionable guidance embedded in the warning.

if c.effectiveSafeUpdate(workflowData) {

if enforceErr := EnforceSafeUpdate(oldManifest, bodySecrets, bodyActions); enforceErr != nil {

warningMsg := buildSafeUpdateWarningPrompt(enforceErr.Error())

c.AddSafeUpdateWarning(warningMsg)

fmt.Fprintln(os.Stderr, formatCompilerMessage(markdownPath, "warning", enforceErr.Error()))

c.IncrementWarningCount()

}

// Write output

return c.writeWorkflowOutput(lockFile, yamlContent, markdownPath)

}

// ParseWorkflowFile parses a markdown workflow file and extracts all necessary data

// extractTopLevelYAMLSection extracts a top-level YAML section from the frontmatter map

// This ensures we only extract keys at the root level, avoiding nested keys with the same name

// parseOnSection parses the "on" section from frontmatter to extract command triggers, reactions, and other events

// generateYAML generates the complete GitHub Actions YAML content

// isActivationJobNeeded determines if the activation job is required

// generateMainJobSteps generates the steps section for the main job

// The original JavaScript code will use the pattern as-is with "g" flags

// validateMarkdownSizeForGitHubActions is no longer used - content is now split into multiple steps

// to handle GitHub Actions script size limits automatically

// func (c *Compiler) validateMarkdownSizeForGitHubActions(content string) error { ... }

// splitContentIntoChunks splits markdown content into chunks that fit within GitHub Actions script size limits

// generatePostSteps generates the post-steps section that runs after AI execution

// generateEngineExecutionSteps uses the new GetExecutionSteps interface method

// generateAgentVersionCapture generates a step that captures the agent version if the engine supports it

// generateCreateAwInfo generates a step that creates aw_info.json with agentic run metadata

// generateOutputCollectionStep generates a step that reads the output file and sets it as a GitHub Actions output

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

compiler.go

Latest commit

History

compiler.go

File metadata and controls