Instead of completely unmarshaling the YAML files, you should only compose them. In YAML terms, this means constructing a node graph, without processing it further (usually, the node graph would then be processed into native types). go-yaml provides the type yaml.Node for that but you'll need the unstable v3 version.
After both files are available as nodes, you simply implement the merging operation on them, e.g.
package main
import (
"errors"
"gopkg.in/yaml.v3"
"os"
)
var input1 = []byte(`
a: b
c:
d: e
`)
var input2 = []byte(`
c:
f: g
h: i
`)
func nodesEqual(l, r *yaml.Node) bool {
if l.Kind == yaml.ScalarNode && r.Kind == yaml.ScalarNode {
return l.Value == r.Value
}
panic("equals on non-scalars not implemented!")
}
func recursiveMerge(from, into *yaml.Node) error {
if from.Kind != into.Kind {
return errors.New("cannot merge nodes of different kinds")
}
switch from.Kind {
case yaml.MappingNode:
for i := 0; i < len(from.Content); i += 2 {
found := false
for j := 0; j < len(into.Content); j += 2 {
if nodesEqual(from.Content[i], into.Content[j]) {
found = true
if err := recursiveMerge(from.Content[i+1], into.Content[j+1]); err != nil {
return errors.New("at key "+from.Content[i].Value+": "+err.Error())
}
break
}
}
if !found {
into.Content = append(into.Content, from.Content[i:i+2]...)
}
}
case yaml.SequenceNode:
into.Content = append(into.Content, from.Content...)
case yaml.DocumentNode:
recursiveMerge(from.Content[0], into.Content[0])
default:
return errors.New("can only merge mapping and sequence nodes")
}
return nil
}
func main() {
var v1, v2 yaml.Node
yaml.Unmarshal(input1, &v1)
yaml.Unmarshal(input2, &v2)
if err := recursiveMerge(&v1, &v2); err != nil {
panic(err)
}
e := yaml.NewEncoder(os.Stdout)
e.Encode(&v2)
e.Close()
}
This code outputs
c:
f: g
d: e
h: i
a: b
As you can see, it merges values both at the top level and inside c:. Theoretically, you need to implement nodesEqual also on sequence and mapping nodes because those can also be keys, but this feature is rarely used so you can leave it out if you don't need it. This code also merges sequences by simply concatenating them into a single sequence, this may or may not be what you want. More code is required if you want to also properly process alias nodes.