Skip to content

Commit 4f8acb4

Browse files
authored
Make CodepointWidthDetector::GetWidth faster (#3727)
This is a subset of #3578 which I think is harmless and the first step towards making things right. References #3546 #3578 ## Detailed Description of the Pull Request / Additional comments For more robust Unicode support, `CodepointWidthDetector` should provide concrete width information rather than a simple boolean of `IsWide`. Currently only `IsWide` is widely used and optimized using quick lookup table and fallback cache. This PR moves those optimization into `GetWidth`. ## Validation Steps Performed API remains unchanged. Things are not broken.
1 parent 9421553 commit 4f8acb4

2 files changed

Lines changed: 68 additions & 52 deletions

File tree

src/types/CodepointWidthDetector.cpp

Lines changed: 66 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -319,29 +319,47 @@ CodepointWidthDetector::CodepointWidthDetector() noexcept :
319319
}
320320

321321
// Routine Description:
322-
// - returns the width type of codepoint by searching the map generated from the unicode spec
322+
// - returns the width type of codepoint as fast as we can by using quick lookup table and fallback cache.
323323
// Arguments:
324324
// - glyph - the utf16 encoded codepoint to search for
325325
// Return Value:
326326
// - the width type of the codepoint
327327
CodepointWidth CodepointWidthDetector::GetWidth(const std::wstring_view glyph) const
328328
{
329-
if (glyph.empty())
329+
THROW_HR_IF(E_INVALIDARG, glyph.empty());
330+
if (glyph.size() == 1)
330331
{
331-
return CodepointWidth::Invalid;
332-
}
333-
334-
const auto codepoint = _extractCodepoint(glyph);
335-
const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint);
332+
// We first attempt to look at our custom quick lookup table of char width preferences.
333+
const auto width = GetQuickCharWidth(glyph.front());
336334

337-
// For characters that are not _in_ the table, lower_bound will return the nearest item that is.
338-
// We must check its bounds to make sure that our hit was a true hit.
339-
if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound)
335+
// If it's invalid, the quick width had no opinion, so go to the lookup table.
336+
if (width == CodepointWidth::Invalid)
337+
{
338+
return _lookupGlyphWidthWithCache(glyph);
339+
}
340+
// If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can.
341+
// If not, go to the lookup table.
342+
else if (width == CodepointWidth::Ambiguous)
343+
{
344+
if (_pfnFallbackMethod)
345+
{
346+
return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous;
347+
}
348+
else
349+
{
350+
return _lookupGlyphWidthWithCache(glyph);
351+
}
352+
}
353+
// Otherwise, return Width as it is.
354+
else
355+
{
356+
return width;
357+
}
358+
}
359+
else
340360
{
341-
return it->width;
361+
return _lookupGlyphWidthWithCache(glyph);
342362
}
343-
344-
return CodepointWidth::Narrow;
345363
}
346364

347365
// Routine Description:
@@ -369,74 +387,71 @@ bool CodepointWidthDetector::IsWide(const wchar_t wch) const noexcept
369387
// - true if codepoint is wide
370388
bool CodepointWidthDetector::IsWide(const std::wstring_view glyph) const
371389
{
372-
THROW_HR_IF(E_INVALIDARG, glyph.empty());
373-
if (glyph.size() == 1)
374-
{
375-
// We first attempt to look at our custom quick lookup table of char width preferences.
376-
const auto width = GetQuickCharWidth(glyph.front());
390+
return GetWidth(glyph) == CodepointWidth::Wide;
391+
}
377392

378-
// If it's invalid, the quick width had no opinion, so go to the lookup table.
379-
if (width == CodepointWidth::Invalid)
380-
{
381-
return _lookupIsWide(glyph);
382-
}
383-
// If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can.
384-
// If not, go to the lookup table.
385-
else if (width == CodepointWidth::Ambiguous)
386-
{
387-
if (_pfnFallbackMethod)
388-
{
389-
return _checkFallbackViaCache(glyph);
390-
}
391-
else
392-
{
393-
return _lookupIsWide(glyph);
394-
}
395-
}
396-
// Otherwise, return Wide as True and Narrow as False.
397-
else
398-
{
399-
return width == CodepointWidth::Wide;
400-
}
393+
// Routine Description:
394+
// - returns the width type of codepoint by searching the map generated from the unicode spec
395+
// Arguments:
396+
// - glyph - the utf16 encoded codepoint to search for
397+
// Return Value:
398+
// - the width type of the codepoint
399+
CodepointWidth CodepointWidthDetector::_lookupGlyphWidth(const std::wstring_view glyph) const
400+
{
401+
if (glyph.empty())
402+
{
403+
return CodepointWidth::Invalid;
401404
}
402-
else
405+
406+
const auto codepoint = _extractCodepoint(glyph);
407+
const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint);
408+
409+
// For characters that are not _in_ the table, lower_bound will return the nearest item that is.
410+
// We must check its bounds to make sure that our hit was a true hit.
411+
if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound)
403412
{
404-
return _lookupIsWide(glyph);
413+
return it->width;
405414
}
415+
416+
return CodepointWidth::Narrow;
406417
}
407418

408419
// Routine Description:
409-
// - checks if codepoint is wide using fallback methods.
420+
// - returns the width type of codepoint using fallback methods.
410421
// Arguments:
411422
// - glyph - the utf16 encoded codepoint to check width of
412423
// Return Value:
413-
// - true if codepoint is wide or if it can't be confirmed to be narrow
414-
bool CodepointWidthDetector::_lookupIsWide(const std::wstring_view glyph) const noexcept
424+
// - the width type of the codepoint
425+
CodepointWidth CodepointWidthDetector::_lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept
415426
{
416427
try
417428
{
418429
// Use our generated table to try to lookup the width based on the Unicode standard.
419-
const CodepointWidth width = GetWidth(glyph);
430+
const CodepointWidth width = _lookupGlyphWidth(glyph);
420431

421432
// If it's ambiguous, then ask the font if we can.
422433
if (width == CodepointWidth::Ambiguous)
423434
{
424435
if (_pfnFallbackMethod)
425436
{
426-
return _checkFallbackViaCache(glyph);
437+
return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous;
438+
}
439+
else
440+
{
441+
return CodepointWidth::Ambiguous;
427442
}
428443
}
429-
// If it's not ambiguous, it should say wide or narrow. Turn that into True = Wide or False = Narrow.
444+
// If it's not ambiguous, it should say wide or narrow.
430445
else
431446
{
432-
return width == CodepointWidth::Wide;
447+
return width;
433448
}
434449
}
435450
CATCH_LOG();
436451

437452
// If we got this far, we couldn't figure it out.
438453
// It's better to be too wide than too narrow.
439-
return true;
454+
return CodepointWidth::Wide;
440455
}
441456

442457
// Routine Description:

src/types/inc/CodepointWidthDetector.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class CodepointWidthDetector final
4141
#endif
4242

4343
private:
44-
bool _lookupIsWide(const std::wstring_view glyph) const noexcept;
44+
CodepointWidth _lookupGlyphWidth(const std::wstring_view glyph) const;
45+
CodepointWidth _lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept;
4546
bool _checkFallbackViaCache(const std::wstring_view glyph) const;
4647
static unsigned int _extractCodepoint(const std::wstring_view glyph) noexcept;
4748

0 commit comments

Comments
 (0)