Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1084,6 +1084,7 @@ if(WITH_NVMEOF_GATEWAY_MONITOR_CLIENT)
${nvmeof_monitor_grpc_hdrs}
ceph_nvmeof_monitor_client.cc
nvmeof/NVMeofGwClient.cc
nvmeof/NVMeofGwUtils.cc
nvmeof/NVMeofGwMonitorGroupClient.cc
nvmeof/NVMeofGwMonitorClient.cc)
add_executable(ceph-nvmeof-monitor-client ${ceph_nvmeof_monitor_client_srcs})
Expand Down
3 changes: 2 additions & 1 deletion src/include/ceph_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_PROXY_FEATURES, JEWEL, LUMINOUS) // overl
DEFINE_CEPH_FEATURE(49, 2, SERVER_SQUID);
DEFINE_CEPH_FEATURE_RETIRED(50, 1, MON_METADATA, MIMIC, OCTOPUS)
DEFINE_CEPH_FEATURE(50, 2, SERVER_TENTACLE);
DEFINE_CEPH_FEATURE_RETIRED(51, 1, OSD_BITWISE_HOBJ_SORT, MIMIC, OCTOPUS)
DEFINE_CEPH_FEATURE(51, 2, NVMEOF_BEACON_DIFF)
// available
DEFINE_CEPH_FEATURE_RETIRED(52, 1, OSD_PROXY_WRITE_FEATURES, MIMIC, OCTOPUS)
// available
Expand Down Expand Up @@ -258,6 +258,7 @@ DEFINE_CEPH_FEATURE_RETIRED(63, 1, RESERVED_BROKEN, LUMINOUS, QUINCY) // client-
CEPH_FEATUREMASK_SERVER_REEF | \
CEPH_FEATUREMASK_SERVER_SQUID | \
CEPH_FEATUREMASK_SERVER_TENTACLE | \
CEPH_FEATUREMASK_NVMEOF_BEACON_DIFF | \
0ULL)

#define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL
Expand Down
36 changes: 27 additions & 9 deletions src/messages/MNVMeofGwBeacon.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@
#include "mon/MonCommand.h"
#include "mon/NVMeofGwMap.h"
#include "include/types.h"
#include "mon/NVMeofGwBeaconConstants.h"

class MNVMeofGwBeacon final : public PaxosServiceMessage {
private:
static constexpr int HEAD_VERSION = 1;
static constexpr int COMPAT_VERSION = 1;

protected:
std::string gw_id;
Expand All @@ -36,10 +34,13 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage {
gw_availability_t availability; // in absence of beacon heartbeat messages it becomes inavailable
epoch_t last_osd_epoch;
epoch_t last_gwmap_epoch;
uint64_t sequence = 0; // sequence number for each beacon message
uint64_t affected_features = 0;

public:
MNVMeofGwBeacon()
: PaxosServiceMessage{MSG_MNVMEOF_GW_BEACON, 0, HEAD_VERSION, COMPAT_VERSION}
: PaxosServiceMessage{MSG_MNVMEOF_GW_BEACON, 0, BEACON_VERSION_ENHANCED,
BEACON_VERSION_ENHANCED}, sequence(0)
{
set_priority(CEPH_MSG_PRIO_HIGH);
}
Expand All @@ -50,11 +51,17 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage {
const BeaconSubsystems& subsystems_,
const gw_availability_t& availability_,
const epoch_t& last_osd_epoch_,
const epoch_t& last_gwmap_epoch_
)
: PaxosServiceMessage{MSG_MNVMEOF_GW_BEACON, 0, HEAD_VERSION, COMPAT_VERSION},
const epoch_t& last_gwmap_epoch_,
uint64_t sequence_ = 0, // default sequence for backward compatibility
uint64_t features = 0)
: PaxosServiceMessage{MSG_MNVMEOF_GW_BEACON,
0,
features ? BEACON_VERSION_ENHANCED : BEACON_VERSION_LEGACY,
features ? BEACON_VERSION_ENHANCED : BEACON_VERSION_LEGACY},
gw_id(gw_id_), gw_pool(gw_pool_), gw_group(gw_group_), subsystems(subsystems_),
availability(availability_), last_osd_epoch(last_osd_epoch_), last_gwmap_epoch(last_gwmap_epoch_)
availability(availability_), last_osd_epoch(last_osd_epoch_),
last_gwmap_epoch(last_gwmap_epoch_), sequence(sequence_),
affected_features(features)
{
set_priority(CEPH_MSG_PRIO_HIGH);
}
Expand All @@ -78,6 +85,7 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage {
const epoch_t& get_last_osd_epoch() const { return last_osd_epoch; }
const epoch_t& get_last_gwmap_epoch() const { return last_gwmap_epoch; }
const BeaconSubsystems& get_subsystems() const { return subsystems; };
uint64_t get_sequence() const { return sequence; }

private:
~MNVMeofGwBeacon() final {}
Expand All @@ -92,10 +100,14 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage {
encode(gw_id, payload);
encode(gw_pool, payload);
encode(gw_group, payload);
encode(subsystems, payload);
encode(subsystems, payload, affected_features);
encode((uint32_t)availability, payload);
encode(last_osd_epoch, payload);
encode(last_gwmap_epoch, payload);
// Only encode sequence for enhanced beacons (version >= 2)
if (get_header().version >= 2) {
encode(sequence, payload);
}
}

void decode_payload() override {
Expand All @@ -112,6 +124,12 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage {
availability = static_cast<gw_availability_t>(tmp);
decode(last_osd_epoch, p);
decode(last_gwmap_epoch, p);
// Only decode sequence for enhanced beacons (version >= 2)
if (get_header().version >= 2 && !p.end()) {
decode(sequence, p);
} else {
sequence = 0; // Legacy beacons don't have sequence field
}
}

private:
Expand Down
6 changes: 6 additions & 0 deletions src/mon/MonClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,12 @@ class MonClient : public Dispatcher,
return std::forward<Callback>(cb)(monmap, std::forward<Args>(args)...);
}

mon_feature_t get_monmap_required_features() {
return with_monmap([](const auto &monmap) {
return monmap.get_required_features();
});
}

void register_config_callback(md_config_t::config_callback fn);
void register_config_notify_callback(std::function<void(void)> f) {
config_notify_cb = f;
Expand Down
20 changes: 20 additions & 0 deletions src/mon/Monitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,9 @@ CompatSet Monitor::get_supported_features()
compat.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_REEF);
compat.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_SQUID);
compat.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_TENTACLE);

// Release-independent features
compat.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_NVMEOF_BEACON_DIFF);
return compat;
}

Expand Down Expand Up @@ -2573,6 +2576,16 @@ void Monitor::apply_monmap_to_compatset_features()
new_features.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_TENTACLE);
}


// Release-independent features
if (monmap_features.contains_all(ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) {
ceph_assert(ceph::features::mon::get_persistent().contains_all(
ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF));
// this feature should only ever be set if the quorum supports it.
ceph_assert(HAVE_FEATURE(quorum_con_features, NVMEOF_BEACON_DIFF));
new_features.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_NVMEOF_BEACON_DIFF);
}

dout(5) << __func__ << dendl;
_apply_compatset_features(new_features);
}
Expand Down Expand Up @@ -2635,6 +2648,13 @@ void Monitor::calc_quorum_requirements()
required_features |= CEPH_FEATUREMASK_SERVER_NAUTILUS |
CEPH_FEATUREMASK_CEPHX_V2;
}

// Release-independent features
if (monmap->get_required_features().contains_all(
ceph::features::mon::FEATURE_NVMEOF_BEACON_DIFF)) {
required_features |= CEPH_FEATUREMASK_NVMEOF_BEACON_DIFF;
}

dout(10) << __func__ << " required_features " << required_features << dendl;
}

Expand Down
3 changes: 3 additions & 0 deletions src/mon/Monitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,9 @@ class Monitor : public Dispatcher,
#define CEPH_MON_FEATURE_INCOMPAT_REEF CompatSet::Feature(15, "reef ondisk layout")
#define CEPH_MON_FEATURE_INCOMPAT_SQUID CompatSet::Feature(16, "squid ondisk layout")
#define CEPH_MON_FEATURE_INCOMPAT_TENTACLE CompatSet::Feature(17, "tentacle ondisk layout")

// Release-independent features
#define CEPH_MON_FEATURE_INCOMPAT_NVMEOF_BEACON_DIFF CompatSet::Feature(32, "nvmeof beacon diff")
// make sure you add your feature to Monitor::get_supported_features


Expand Down
24 changes: 24 additions & 0 deletions src/mon/NVMeofGwBeaconConstants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2025 IBM, Inc.
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*/

#ifndef CEPH_NVMEOFGWBEACONCONSTANTS_H
#define CEPH_NVMEOFGWBEACONCONSTANTS_H

// This header contains version constants used across multiple files
// to avoid duplication and maintain consistency.

// Beacon version constants
#define BEACON_VERSION_LEGACY 1 // Legacy beacon format (no diff support)
#define BEACON_VERSION_ENHANCED 2 // Enhanced beacon format (with diff support)

#endif /* CEPH_NVMEOFGWBEACONCONSTANTS_H */
83 changes: 72 additions & 11 deletions src/mon/NVMeofGwMap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ void NVMeofGwMap::to_gmap(
}

auto gw_state = NvmeGwClientState(
gw_created.ana_grp_id, epoch, availability);
gw_created.ana_grp_id, epoch, availability, gw_created.beacon_sequence,
gw_created.beacon_sequence_ooo);
for (const auto& sub: gw_created.subsystems) {
gw_state.subsystems.insert({
sub.nqn,
Expand Down Expand Up @@ -82,10 +83,10 @@ void NVMeofGwMap::remove_grp_id(
}

int NVMeofGwMap::cfg_add_gw(
const NvmeGwId &gw_id, const NvmeGroupKey& group_key)
const NvmeGwId &gw_id, const NvmeGroupKey& group_key, uint64_t features)
{
std::set<NvmeAnaGrpId> allocated;
if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) {
if (HAVE_FEATURE(features, NVMEOFHAMAP)) {
auto gw_epoch_it = gw_epoch.find(group_key);
if (gw_epoch_it == gw_epoch.end()) {
gw_epoch[group_key] = epoch;
Expand Down Expand Up @@ -176,11 +177,16 @@ int NVMeofGwMap::cfg_delete_gw(
for (auto& gws_states: created_gws[group_key]) {
if (gws_states.first == gw_id) {
auto& state = gws_states.second;
if (state.availability == gw_availability_t::GW_AVAILABLE) {
/*prevent failover because blocklisting right now cause IO errors */
dout(4) << "Delete GW: set skip-failovers for group " << gw_id
<< " group " << group_key << dendl;
skip_failovers_for_group(group_key, 5);
}
state.availability = gw_availability_t::GW_DELETING;
dout(4) << " Deleting GW :"<< gw_id << " in state "
<< state.availability << " Resulting GW availability: "
<< state.availability << dendl;
state.subsystems.clear();//ignore subsystems of this GW
utime_t now = ceph_clock_now();
mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now;
return 0;
Expand Down Expand Up @@ -360,10 +366,16 @@ void NVMeofGwMap::track_deleting_gws(const NvmeGroupKey& group_key,
}
}

void NVMeofGwMap::skip_failovers_for_group(const NvmeGroupKey& group_key)
void NVMeofGwMap::skip_failovers_for_group(const NvmeGroupKey& group_key,
int interval_sec)
{
const auto skip_failovers = g_conf().get_val<std::chrono::seconds>
("mon_nvmeofgw_skip_failovers_interval");
std::chrono::seconds skip_failovers;
if (interval_sec == 0) {
skip_failovers = g_conf().get_val<std::chrono::seconds>
("mon_nvmeofgw_skip_failovers_interval");
} else {
skip_failovers = std::chrono::seconds(interval_sec);
}
for (auto& gw_created: created_gws[group_key]) {
gw_created.second.allow_failovers_ts = std::chrono::system_clock::now()
+ skip_failovers;
Expand Down Expand Up @@ -408,6 +420,7 @@ int NVMeofGwMap::process_gw_map_gw_down(
auto& st = gw_state->second;
st.set_unavailable_state();
st.set_last_gw_down_ts();
st.reset_beacon_sequence();
for (auto& state_itr: created_gws[group_key][gw_id].sm_state) {
fsm_handle_gw_down(
gw_id, group_key, state_itr.second,
Expand Down Expand Up @@ -1063,14 +1076,23 @@ int NVMeofGwMap::blocklist_gw(
// find_already_created_gw(gw_id, group_key);
NvmeGwMonState& gw_map = created_gws[group_key][gw_id];
NvmeNonceVector nonces;

NvmeAnaNonceMap nonce_map;
for (const auto& sub: gw_map.subsystems) { // recreate nonce map from subsystems
for (const auto& ns: sub.namespaces) {
auto& nonce_vec = nonce_map[ns.anagrpid-1]; //Converting ana groups to offsets
if (std::find(nonce_vec.begin(), nonce_vec.end(), ns.nonce) == nonce_vec.end())
nonce_vec.push_back(ns.nonce);
}
}
for (auto& state_itr: gw_map.sm_state) {
// to make blocklist on all clusters of the failing GW
nonces.insert(nonces.end(), gw_map.nonce_map[state_itr.first].begin(),
gw_map.nonce_map[state_itr.first].end());
nonces.insert(nonces.end(), nonce_map[state_itr.first].begin(),
nonce_map[state_itr.first].end());
}

gw_map.subsystems.clear();
if (nonces.size() > 0) {
NvmeNonceVector &nonce_vector = gw_map.nonce_map[grpid];;
NvmeNonceVector &nonce_vector = nonces;
std::string str = "[";
entity_addrvec_t addr_vect;

Expand Down Expand Up @@ -1144,6 +1166,45 @@ void NVMeofGwMap::validate_gw_map(const NvmeGroupKey& group_key)
}
}

bool NVMeofGwMap::put_gw_beacon_sequence_number(const NvmeGwId &gw_id,
int gw_version, const NvmeGroupKey& group_key,
uint64_t beacon_sequence, uint64_t& old_sequence)
{
bool rc = true;
NvmeGwMonState& gw_map = created_gws[group_key][gw_id];

if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF) ||
(gw_version > 0) ) {
uint64_t seq_number = gw_map.beacon_sequence;
if ((beacon_sequence != seq_number+1) &&
!(beacon_sequence == 0 && seq_number == 0 )) {// new GW startup
rc = false;
old_sequence = seq_number;
dout(4) << "Warning: GW " << gw_id
<< " sent beacon sequence out of order, expected "
<< seq_number +1 << " received " << beacon_sequence << dendl;
gw_map.beacon_sequence_ooo = true;
} else {
gw_map.beacon_sequence = beacon_sequence;
}
}
return rc;
}

bool NVMeofGwMap::set_gw_beacon_sequence_number(const NvmeGwId &gw_id,
int gw_version, const NvmeGroupKey& group_key, uint64_t beacon_sequence)
{
NvmeGwMonState& gw_map = created_gws[group_key][gw_id];
if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF) ||
(gw_version > 0)) {
gw_map.beacon_sequence = beacon_sequence;
gw_map.beacon_sequence_ooo = false;
dout(10) << gw_id << " set beacon_sequence " << beacon_sequence << dendl;
}
return true;
}


void NVMeofGwMap::update_active_timers(bool &propose_pending)
{
const auto now = std::chrono::system_clock::now();
Expand Down
13 changes: 10 additions & 3 deletions src/mon/NVMeofGwMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,11 @@ class NVMeofGwMap
void to_gmap(std::map<NvmeGroupKey, NvmeGwMonClientStates>& Gmap) const;
void track_deleting_gws(const NvmeGroupKey& group_key,
const BeaconSubsystems& subs, bool &propose_pending);
int cfg_add_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
int cfg_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
void check_all_gws_in_deleting_state(const NvmeGwId &gw_id,
const NvmeGroupKey& group_key);
int cfg_add_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
uint64_t features);
int cfg_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
void process_gw_map_ka(
const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
epoch_t& last_osd_epoch, bool &propose_pending);
Expand All @@ -92,7 +93,13 @@ class NVMeofGwMap
const NvmeGroupKey& group_key, bool &propose_pending);
void set_addr_vect(const NvmeGwId &gw_id,
const NvmeGroupKey& group_key, const entity_addr_t &addr_vect);
void skip_failovers_for_group(const NvmeGroupKey& group_key);
void skip_failovers_for_group(const NvmeGroupKey& group_key,
int interval_sec = 0);
bool put_gw_beacon_sequence_number(const NvmeGwId &gw_id, int gw_version,
const NvmeGroupKey& group_key, uint64_t beacon_sequence,
uint64_t& old_sequence);
bool set_gw_beacon_sequence_number(const NvmeGwId &gw_id, int gw_version,
const NvmeGroupKey& group_key, uint64_t beacon_sequence);
private:
int do_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
int do_erase_gw_id(const NvmeGwId &gw_id,
Expand Down
Loading
Loading