Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions doc/cephfs/mds-journaling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,12 @@ The targetted size of a log segment in terms of number of events is controlled b

.. confval:: mds_log_events_per_segment

The frequency of major segments (noted by the journaling of the latest ``ESubtreeMap``) is controlled by:
The number of minor mds log segments since last major segment is controlled by:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MDS log segments since the last major


.. confval:: mds_log_major_segment_event_ratio
.. confval:: mds_log_minor_segments_per_major_segment

When ``mds_log_events_per_segment * mds_log_major_segment_event_ratio``
non-``ESubtreeMap`` events are logged, the MDS will journal a new
``ESubtreeMap``. This is necessary to allow the journal to shrink in size
during the trimming of expired segments.
This controls how often the MDS trims expired log segments (higher the value, less
often the MDS updates the journal expiry position for trimming).

The target maximum number of segments is controlled by:

Expand Down
19 changes: 9 additions & 10 deletions src/common/options/mds.yaml.in
Original file line number Diff line number Diff line change
Expand Up @@ -586,16 +586,6 @@ options:
min: 1
services:
- mds
- name: mds_log_major_segment_event_ratio
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this change broke the docs

type: uint
level: advanced
desc: multiple of mds_log_events_per_segment between major segments
default: 12
services:
- mds
min: 1
see_also:
- mds_log_events_per_segment
# segment size for mds log, default to default file_layout_t
- name: mds_log_segment_size
type: size
Expand Down Expand Up @@ -1741,3 +1731,12 @@ options:
- mds
flags:
- runtime
- name: mds_log_minor_segments_per_major_segment
type: uint
level: advanced
desc: number of minor segments per major segment.
long_desc: The number of minor mds log segments since last major segment after which a major segment is started/logged.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MDS log segments since the last

default: 16
services:
- mds
min: 8
13 changes: 6 additions & 7 deletions src/mds/Beacon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -320,16 +320,15 @@ void Beacon::notify_health(MDSRank const *mds)
// Detect MDS_HEALTH_TRIM condition
// Indicates MDS is not trimming promptly
{
const auto log_max_segments = mds->mdlog->get_max_segments();
const auto log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
if (mds->mdlog->get_num_segments() > (size_t)(log_max_segments * log_warn_factor)) {
if (mds->mdlog->is_trim_slow()) {
auto num_segments = mds->mdlog->get_num_segments();
auto max_segments = mds->mdlog->get_max_segments();
CachedStackStringStream css;
*css << "Behind on trimming (" << mds->mdlog->get_num_segments()
<< "/" << log_max_segments << ")";
*css << "Behind on trimming (" << num_segments << "/" << max_segments << ")";

MDSHealthMetric m(MDS_HEALTH_TRIM, HEALTH_WARN, css->strv());
m.metadata["num_segments"] = stringify(mds->mdlog->get_num_segments());
m.metadata["max_segments"] = stringify(log_max_segments);
m.metadata["num_segments"] = stringify(num_segments);
m.metadata["max_segments"] = stringify(max_segments);
health.metrics.push_back(m);
}
}
Expand Down
28 changes: 19 additions & 9 deletions src/mds/MDLog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,12 @@ MDLog::MDLog(MDSRank* m)
event_large_threshold = g_conf().get_val<uint64_t>("mds_log_event_large_threshold");
events_per_segment = g_conf().get_val<uint64_t>("mds_log_events_per_segment");
pause = g_conf().get_val<bool>("mds_log_pause");
major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio");
max_segments = g_conf().get_val<uint64_t>("mds_log_max_segments");
max_events = g_conf().get_val<int64_t>("mds_log_max_events");
skip_corrupt_events = g_conf().get_val<bool>("mds_log_skip_corrupt_events");
skip_unbounded_events = g_conf().get_val<bool>("mds_log_skip_unbounded_events");
log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
minor_segments_per_major_segment = g_conf().get_val<uint64_t>("mds_log_minor_segments_per_major_segment");
upkeep_thread = std::thread(&MDLog::log_trim_upkeep, this);
}

Expand Down Expand Up @@ -357,14 +358,15 @@ void MDLog::_submit_entry(LogEvent *le, MDSLogContextBase* c)
ceph_assert(!mds_is_shutting_down);

event_seq++;
events_since_last_major_segment++;

if (auto sb = dynamic_cast<SegmentBoundary*>(le); sb) {
auto ls = _start_new_segment(sb);
if (sb->is_major_segment_boundary()) {
major_segments.insert(ls->seq);
logger->set(l_mdl_segmjr, major_segments.size());
events_since_last_major_segment = 0;
minor_segments_since_last_major_segment = 0;
} else {
++minor_segments_since_last_major_segment;
}
}

Expand Down Expand Up @@ -403,7 +405,7 @@ void MDLog::_segment_upkeep()
uint64_t period = journaler->get_layout_period();
auto ls = get_current_segment();
// start a new segment?
if (events_since_last_major_segment > events_per_segment*major_segment_event_ratio) {
if (minor_segments_since_last_major_segment > minor_segments_per_major_segment) {
dout(10) << __func__ << ": starting new major segment, current " << *ls << dendl;
auto sle = mds->mdcache->create_subtree_map();
_submit_entry(sle, NULL);
Expand Down Expand Up @@ -656,6 +658,10 @@ void MDLog::try_to_commit_open_file_table(uint64_t last_seq)
}
}

bool MDLog::is_trim_slow() const {
return (segments.size() > (size_t)(max_segments * log_warn_factor));
}

void MDLog::log_trim_upkeep(void) {
dout(10) << dendl;

Expand Down Expand Up @@ -1474,7 +1480,6 @@ void MDLog::_replay_thread()
}
le->set_start_off(pos);

events_since_last_major_segment++;
if (auto sb = dynamic_cast<SegmentBoundary*>(le.get()); sb) {
auto seq = sb->get_seq();
if (seq > 0) {
Expand All @@ -1487,7 +1492,9 @@ void MDLog::_replay_thread()
if (sb->is_major_segment_boundary()) {
major_segments.insert(event_seq);
logger->set(l_mdl_segmjr, major_segments.size());
events_since_last_major_segment = 0;
minor_segments_since_last_major_segment = 0;
} else {
++minor_segments_since_last_major_segment;
}
} else {
event_seq++;
Expand Down Expand Up @@ -1618,9 +1625,6 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa
if (changed.count("mds_log_events_per_segment")) {
events_per_segment = g_conf().get_val<uint64_t>("mds_log_events_per_segment");
}
if (changed.count("mds_log_major_segment_event_ratio")) {
major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio");
}
if (changed.count("mds_log_max_events")) {
max_events = g_conf().get_val<int64_t>("mds_log_max_events");
}
Expand All @@ -1642,4 +1646,10 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa
if (changed.count("mds_log_trim_decay_rate")){
log_trim_counter = DecayCounter(g_conf().get_val<double>("mds_log_trim_decay_rate"));
}
if (changed.count("mds_log_warn_factor")) {
log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
}
if (changed.count("mds_log_minor_segments_per_major_segment")) {
minor_segments_per_major_segment = g_conf().get_val<uint64_t>("mds_log_minor_segments_per_major_segment");
}
}
8 changes: 6 additions & 2 deletions src/mds/MDLog.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ class MDLog {
// replay state
std::map<inodeno_t, std::set<inodeno_t>> pending_exports;

// beacon needs me too
bool is_trim_slow() const;

protected:
struct PendingEvent {
PendingEvent(LogEvent *e, Context* c, bool f=false) : le(e), fin(c), flush(f) {}
Expand Down Expand Up @@ -302,17 +305,18 @@ class MDLog {
bool debug_subtrees;
std::atomic_uint64_t event_large_threshold; // accessed by submit thread
uint64_t events_per_segment;
uint64_t major_segment_event_ratio;
int64_t max_events;
uint64_t max_segments;
uint64_t minor_segments_per_major_segment;
bool pause;
bool skip_corrupt_events;
bool skip_unbounded_events;

std::set<uint64_t> major_segments;
std::set<LogSegment*> expired_segments;
std::set<LogSegment*> expiring_segments;
uint64_t events_since_last_major_segment = 0;
uint64_t minor_segments_since_last_major_segment = 0;
double log_warn_factor;

// log trimming decay counter
DecayCounter log_trim_counter;
Expand Down