Skip to content

Commit d9480ee

Browse files
smirashanduur
andcommitted
fix: resolve SideroLink Wireguard endpoint on reconnect
It's not recommended to use DNS name for the Wireguard endpoint, as in-kernel Wireguard endpoint relies only on IP addresses, so either way DNS resolve will happen outside of any Wireguard networking operations. Previously, the resolving would happen at the moment Wireguard config is applied to the Linux kernel, but SideroLink reconnect would not trigger Wireguard reconfiguration as there is no change to the spec if the hostname is used (even if it resolves to a different IP now). With this change, on each SideroLink reconnect attempt the name will be resolved to an IP address, so the Wireguard config would actually trigger a change/reconfiguration if the DNS names resolves to a new IP now. Co-authored-by: Mateusz Urbanek <mateusz.urbanek@siderolabs.com> Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com> (cherry picked from commit 308c750)
1 parent e16c2d5 commit d9480ee

File tree

4 files changed

+140
-101
lines changed

4 files changed

+140
-101
lines changed

internal/app/machined/pkg/controllers/siderolink/config_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ type ConfigSuite struct {
2929
}
3030

3131
func TestConfigSuite(t *testing.T) {
32+
t.Parallel()
33+
3234
suite.Run(t, &ConfigSuite{
3335
DefaultSuite: ctest.DefaultSuite{
3436
AfterSetup: func(suite *ctest.DefaultSuite) {

internal/app/machined/pkg/controllers/siderolink/manager.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"crypto/tls"
1111
"errors"
1212
"fmt"
13+
"net"
1314
"net/netip"
1415
"os"
1516
"time"
@@ -212,9 +213,17 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo
212213
return errors.New("host returned no endpoints")
213214
}
214215

216+
// in case the endpoint is a hostname, resolve it to an IP address each time we reconnect
217+
// if the IP behind the DNS name changes, it will trigger a change in the LinkSpec, and
218+
// it will update the Wireguard peer endpoint accordingly
219+
resolvedEndpoint, err := net.ResolveUDPAddr("udp", ep)
220+
if err != nil {
221+
return fmt.Errorf("error resolving endpoint %q: %w", ep, err)
222+
}
223+
215224
logger.Info(
216225
"configuring siderolink connection",
217-
zap.String("peer_endpoint", ep),
226+
zap.String("peer_endpoint", resolvedEndpoint.String()),
218227
zap.String("next_peer_endpoint", ctrl.pd.PeekNextEndpoint()),
219228
)
220229

@@ -244,7 +253,7 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo
244253
Peers: []network.WireguardPeer{
245254
{
246255
PublicKey: ctrl.pd.ServerPublicKey,
247-
Endpoint: ep,
256+
Endpoint: resolvedEndpoint.String(),
248257
AllowedIPs: []netip.Prefix{
249258
netip.PrefixFrom(serverAddress, serverAddress.BitLen()),
250259
},

internal/app/machined/pkg/controllers/siderolink/manager_test.go

Lines changed: 125 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ import (
1010
"net"
1111
"net/netip"
1212
"testing"
13-
"time"
1413

14+
"github.com/cosi-project/runtime/pkg/resource/rtestutils"
1515
"github.com/cosi-project/runtime/pkg/state"
1616
"github.com/siderolabs/go-pointer"
1717
"github.com/siderolabs/go-procfs/procfs"
18-
"github.com/siderolabs/go-retry/retry"
1918
pb "github.com/siderolabs/siderolink/api/siderolink"
19+
"github.com/stretchr/testify/assert"
2020
"github.com/stretchr/testify/suite"
2121
"google.golang.org/grpc"
2222

@@ -33,31 +33,13 @@ import (
3333
)
3434

3535
func TestManagerSuite(t *testing.T) {
36+
t.Parallel()
37+
3638
if fipsmode.Strict() {
3739
t.Skip("skipping test in strict FIPS mode")
3840
}
3941

40-
var m ManagerSuite
41-
42-
m.AfterSetup = func(suite *ctest.DefaultSuite) {
43-
lis, err := (&net.ListenConfig{}).Listen(suite.Ctx(), "tcp", "localhost:0")
44-
suite.Require().NoError(err)
45-
46-
m.s = grpc.NewServer()
47-
pb.RegisterProvisionServiceServer(m.s, mockServer{})
48-
49-
go func() {
50-
suite.Require().NoError(m.s.Serve(lis))
51-
}()
52-
53-
cmdline := procfs.NewCmdline(fmt.Sprintf("%s=%s", constants.KernelParamSideroLink, lis.Addr().String()))
54-
configController := siderolinkctrl.ConfigController{Cmdline: cmdline}
55-
56-
suite.Require().NoError(suite.Runtime().RegisterController(&siderolinkctrl.ManagerController{}))
57-
suite.Require().NoError(suite.Runtime().RegisterController(&configController))
58-
}
59-
60-
suite.Run(t, &m)
42+
suite.Run(t, &ManagerSuite{})
6143
}
6244

6345
type ManagerSuite struct {
@@ -68,116 +50,160 @@ type ManagerSuite struct {
6850

6951
type mockServer struct {
7052
pb.UnimplementedProvisionServiceServer
53+
54+
suite *ManagerSuite
55+
endpoints []string
7156
}
7257

7358
const (
74-
mockServerEndpoint = "127.0.0.11:51820"
59+
mockNodeUUID = "71233efd-7a07-43f8-b6ba-da90fae0e88b"
60+
mockUniqueToken = "random-token"
61+
mockServerEndpoint1 = "127.0.0.11:51820"
62+
mockServerEndpoint2 = "localhost:51821"
7563
mockServerAddress = "fdae:41e4:649b:9303:b6db:d99c:215e:dfc4"
7664
mockServerPublicKey = "2aq/V91QyrHAoH24RK0bldukgo2rWk+wqE5Eg6TArCM="
7765
mockNodeAddressPrefix = "fdae:41e4:649b:9303:2a07:9c7:5b08:aef7/64"
7866
)
7967

80-
func (srv mockServer) Provision(_ context.Context, _ *pb.ProvisionRequest) (*pb.ProvisionResponse, error) {
68+
func (srv mockServer) Provision(_ context.Context, req *pb.ProvisionRequest) (*pb.ProvisionResponse, error) {
69+
srv.suite.Assert().Equal(mockNodeUUID, req.GetNodeUuid())
70+
srv.suite.Assert().Empty(req.GetJoinToken())
71+
srv.suite.Assert().False(req.GetWireguardOverGrpc())
72+
srv.suite.Assert().Equal(mockUniqueToken, req.GetNodeUniqueToken())
73+
8174
return &pb.ProvisionResponse{
82-
ServerEndpoint: pb.MakeEndpoints(mockServerEndpoint),
75+
ServerEndpoint: pb.MakeEndpoints(srv.endpoints...),
8376
ServerAddress: mockServerAddress,
8477
ServerPublicKey: mockServerPublicKey,
8578
NodeAddressPrefix: mockNodeAddressPrefix,
8679
}, nil
8780
}
8881

89-
func (suite *ManagerSuite) TestReconcile() {
82+
func (suite *ManagerSuite) initialSetup(endpoints ...string) {
83+
lis, err := (&net.ListenConfig{}).Listen(suite.Ctx(), "tcp", "localhost:0")
84+
suite.Require().NoError(err)
85+
86+
suite.s = grpc.NewServer()
87+
pb.RegisterProvisionServiceServer(suite.s, mockServer{
88+
suite: suite,
89+
endpoints: endpoints,
90+
})
91+
92+
suite.T().Cleanup(suite.s.Stop)
93+
94+
go func() {
95+
suite.Require().NoError(suite.s.Serve(lis))
96+
}()
97+
98+
cmdline := procfs.NewCmdline(fmt.Sprintf("%s=%s", constants.KernelParamSideroLink, lis.Addr().String()))
99+
configController := siderolinkctrl.ConfigController{Cmdline: cmdline}
100+
101+
suite.Require().NoError(suite.Runtime().RegisterController(&siderolinkctrl.ManagerController{}))
102+
suite.Require().NoError(suite.Runtime().RegisterController(&configController))
103+
90104
networkStatus := network.NewStatus(network.NamespaceName, network.StatusID)
91105
networkStatus.TypedSpec().AddressReady = true
92-
93-
suite.Require().NoError(suite.State().Create(suite.Ctx(), networkStatus))
106+
suite.Create(networkStatus)
94107

95108
systemInformation := hardware.NewSystemInformation(hardware.SystemInformationID)
96-
systemInformation.TypedSpec().UUID = "71233efd-7a07-43f8-b6ba-da90fae0e88b"
97-
98-
suite.Require().NoError(suite.State().Create(suite.Ctx(), systemInformation))
109+
systemInformation.TypedSpec().UUID = mockNodeUUID
110+
suite.Create(systemInformation)
99111

100112
uniqToken := runtime.NewUniqueMachineToken()
101-
uniqToken.TypedSpec().Token = "random-token"
113+
uniqToken.TypedSpec().Token = mockUniqueToken
114+
suite.Create(uniqToken)
115+
}
102116

103-
suite.Require().NoError(suite.State().Create(suite.Ctx(), uniqToken))
117+
func (suite *ManagerSuite) TestReconcile() {
118+
suite.initialSetup(mockServerEndpoint1)
104119

105120
nodeAddress := netip.MustParsePrefix(mockNodeAddressPrefix)
106121

107-
addressSpec := network.NewAddressSpec(network.ConfigNamespaceName, network.LayeredID(network.ConfigOperator, network.AddressID(constants.SideroLinkName, nodeAddress)))
108-
linkSpec := network.NewLinkSpec(network.ConfigNamespaceName, network.LayeredID(network.ConfigOperator, network.LinkID(constants.SideroLinkName)))
109-
110-
suite.AssertWithin(10*time.Second, 100*time.Millisecond, func() error {
111-
addressResource, err := ctest.Get[*network.AddressSpec](suite, addressSpec.Metadata())
112-
if err != nil {
113-
if state.IsNotFoundError(err) {
114-
return retry.ExpectedError(err)
115-
}
116-
117-
return err
118-
}
119-
120-
address := addressResource.TypedSpec()
121-
122-
suite.Assert().Equal(nodeAddress, address.Address)
123-
suite.Assert().Equal(network.ConfigOperator, address.ConfigLayer)
124-
suite.Assert().Equal(nethelpers.FamilyInet6, address.Family)
125-
suite.Assert().Equal(constants.SideroLinkName, address.LinkName)
126-
127-
linkResource, err := ctest.Get[*network.LinkSpec](suite, linkSpec.Metadata())
128-
if err != nil {
129-
if state.IsNotFoundError(err) {
130-
return retry.ExpectedError(err)
131-
}
132-
133-
return err
134-
}
135-
136-
link := linkResource.TypedSpec()
137-
138-
suite.Assert().Equal("wireguard", link.Kind)
139-
suite.Assert().Equal(network.ConfigOperator, link.ConfigLayer)
140-
suite.Assert().NotEmpty(link.Wireguard.PrivateKey)
141-
suite.Assert().Len(link.Wireguard.Peers, 1)
142-
suite.Assert().Equal(mockServerEndpoint, link.Wireguard.Peers[0].Endpoint)
143-
suite.Assert().Equal(mockServerPublicKey, link.Wireguard.Peers[0].PublicKey)
144-
suite.Assert().Equal(
145-
[]netip.Prefix{
146-
netip.PrefixFrom(
147-
netip.MustParseAddr(mockServerAddress),
148-
128,
149-
),
150-
}, link.Wireguard.Peers[0].AllowedIPs,
151-
)
152-
suite.Assert().Equal(
153-
constants.SideroLinkDefaultPeerKeepalive,
154-
link.Wireguard.Peers[0].PersistentKeepaliveInterval,
155-
)
156-
157-
return nil
158-
})
122+
ctest.AssertResource(suite,
123+
network.LayeredID(network.ConfigOperator, network.AddressID(constants.SideroLinkName, nodeAddress)),
124+
func(r *network.AddressSpec, asrt *assert.Assertions) {
125+
address := r.TypedSpec()
126+
127+
asrt.Equal(nodeAddress, address.Address)
128+
asrt.Equal(network.ConfigOperator, address.ConfigLayer)
129+
asrt.Equal(nethelpers.FamilyInet6, address.Family)
130+
asrt.Equal(constants.SideroLinkName, address.LinkName)
131+
},
132+
rtestutils.WithNamespace(network.ConfigNamespaceName),
133+
)
134+
135+
ctest.AssertResource(suite,
136+
network.LayeredID(network.ConfigOperator, network.LinkID(constants.SideroLinkName)),
137+
func(r *network.LinkSpec, asrt *assert.Assertions) {
138+
link := r.TypedSpec()
139+
140+
asrt.Equal("wireguard", link.Kind)
141+
asrt.Equal(network.ConfigOperator, link.ConfigLayer)
142+
asrt.NotEmpty(link.Wireguard.PrivateKey)
143+
asrt.Len(link.Wireguard.Peers, 1)
144+
asrt.Equal(mockServerEndpoint1, link.Wireguard.Peers[0].Endpoint)
145+
asrt.Equal(mockServerPublicKey, link.Wireguard.Peers[0].PublicKey)
146+
asrt.Equal(
147+
[]netip.Prefix{
148+
netip.PrefixFrom(
149+
netip.MustParseAddr(mockServerAddress),
150+
128,
151+
),
152+
}, link.Wireguard.Peers[0].AllowedIPs,
153+
)
154+
asrt.Equal(
155+
constants.SideroLinkDefaultPeerKeepalive,
156+
link.Wireguard.Peers[0].PersistentKeepaliveInterval,
157+
)
158+
},
159+
rtestutils.WithNamespace(network.ConfigNamespaceName),
160+
)
159161

160162
// remove config
161163
configPtr := siderolink.NewConfig(config.NamespaceName, siderolink.ConfigID).Metadata()
162164
destroyErr := suite.State().Destroy(suite.Ctx(), configPtr,
163165
state.WithDestroyOwner(pointer.To(siderolinkctrl.ConfigController{}).Name()))
164166
suite.Require().NoError(destroyErr)
165167

166-
suite.AssertWithin(10*time.Second, 100*time.Millisecond, func() error {
167-
_, err := ctest.Get[*network.LinkSpec](suite, linkSpec.Metadata())
168-
if err == nil {
169-
return retry.ExpectedErrorf("link resource still exists")
170-
}
168+
ctest.AssertNoResource[*network.LinkSpec](suite,
169+
network.LayeredID(network.ConfigOperator, network.LinkID(constants.SideroLinkName)),
170+
rtestutils.WithNamespace(network.ConfigNamespaceName),
171+
)
171172

172-
suite.Assert().Truef(state.IsNotFoundError(err), "unexpected error: %v", err)
173+
ctest.AssertNoResource[*network.AddressSpec](suite,
174+
network.LayeredID(network.ConfigOperator, network.AddressID(constants.SideroLinkName, nodeAddress)),
175+
rtestutils.WithNamespace(network.ConfigNamespaceName),
176+
)
177+
}
173178

174-
_, err = ctest.Get[*network.AddressSpec](suite, addressSpec.Metadata())
175-
if err == nil {
176-
return retry.ExpectedErrorf("address resource still exists")
177-
}
179+
func (suite *ManagerSuite) TestMultipleEndpoints() {
180+
suite.initialSetup(mockServerEndpoint1, mockServerEndpoint2)
178181

179-
suite.Assert().Truef(state.IsNotFoundError(err), "unexpected error: %v", err)
182+
ctest.AssertResource(suite,
183+
network.LayeredID(network.ConfigOperator, network.LinkID(constants.SideroLinkName)),
184+
func(r *network.LinkSpec, asrt *assert.Assertions) {
185+
link := r.TypedSpec()
180186

181-
return nil
182-
})
187+
asrt.Len(link.Wireguard.Peers, 1)
188+
// Talos should pick the first endpoint from the list.
189+
asrt.Equal(mockServerEndpoint1, link.Wireguard.Peers[0].Endpoint)
190+
},
191+
rtestutils.WithNamespace(network.ConfigNamespaceName),
192+
)
193+
}
194+
195+
func (suite *ManagerSuite) TestResolveEndpoints() {
196+
suite.initialSetup(mockServerEndpoint2)
197+
198+
ctest.AssertResource(suite,
199+
network.LayeredID(network.ConfigOperator, network.LinkID(constants.SideroLinkName)),
200+
func(r *network.LinkSpec, asrt *assert.Assertions) {
201+
link := r.TypedSpec()
202+
203+
asrt.Len(link.Wireguard.Peers, 1)
204+
// Talos should resolve the hostname to an IP address.
205+
asrt.Equal("127.0.0.1:51821", link.Wireguard.Peers[0].Endpoint)
206+
},
207+
rtestutils.WithNamespace(network.ConfigNamespaceName),
208+
)
183209
}

internal/app/machined/pkg/controllers/siderolink/status_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ type StatusSuite struct {
2727
}
2828

2929
func TestStatusSuite(t *testing.T) {
30+
t.Parallel()
31+
3032
suite.Run(t, &StatusSuite{
3133
DefaultSuite: ctest.DefaultSuite{
3234
Timeout: 3 * time.Second,

0 commit comments

Comments
 (0)