Skip to content

/std:c++latest makes headers much slower to include, up to 10 times #3599

@Kojoley

Description

@Kojoley

Times are reported by compiler frontend /Bt flag, >5ms >1% diff shown, min of 10 runs, /permissive- was used on every /std to exclude preprocessor/parser differences:

default c++17 c++20 c++latest slowdown header
0.121 0.275 1.248 1.308 981% <chrono>
0.033 0.037 0.182 0.186 464% <cmath>
0.166 0.193 0.291 0.537 223% <queue>
0.137 0.162 0.210 0.438 220% <stack>
..... 0.581 1.394 1.474 154% <filesystem>
0.079 0.124 0.169 0.174 120% <numeric>
0.053 0.061 0.111 0.114 115% <utility>
0.081 0.092 0.169 0.172 112% <array>
0.062 0.070 0.124 0.128 106% <tuple>
0.056 0.064 0.114 0.114 104% <typeindex>
0.143 0.159 0.263 0.287 101% <algorithm>
..... 0.397 0.559 0.758 91% <execution>
0.149 0.214 0.269 0.284 91% <functional>
0.147 0.159 0.249 0.259 76% <memory>
0.171 0.209 0.281 0.291 70% <stdexcept>
0.172 0.210 0.280 0.292 70% <bitset>
0.188 0.227 0.302 0.312 66% <string>
0.199 0.239 0.320 0.330 66% <system_error>
0.391 0.450 0.611 0.645 65% <regex>
0.109 0.120 0.174 0.178 63% <iterator>
0.192 0.204 0.303 0.312 62% <thread>
0.138 0.160 0.209 0.223 62% <list>
0.318 0.378 0.498 0.510 60% <sstream>
0.406 0.458 0.618 0.646 59% <random>
0.148 0.165 0.226 0.235 59% <valarray>
0.259 0.298 0.397 0.411 59% <streambuf>
0.287 0.332 0.442 0.455 59% <ostream>
0.282 0.325 0.438 0.447 59% <ios>
0.531 0.604 0.801 0.841 58% <future>
0.292 0.339 0.449 0.461 58% <iostream>
0.134 0.158 0.205 0.211 57% <deque>
0.135 0.157 0.205 0.212 57% <forward_list>
0.271 0.304 0.414 0.425 57% <mutex>
0.298 0.342 0.454 0.465 56% <locale>
0.140 0.166 0.212 0.218 56% <vector>
0.307 0.353 0.465 0.478 56% <strstream>
0.301 0.342 0.459 0.468 55% <fstream>
0.279 0.310 0.423 0.433 55% <shared_mutex>
0.304 0.349 0.456 0.470 55% <codecvt>
0.279 0.307 0.418 0.431 54% <condition_variable>
0.307 0.371 0.464 0.471 53% <iomanip>
..... 0.166 0.246 0.252 52% <charconv>
0.145 0.171 0.210 0.220 52% <set>
0.334 0.383 0.494 0.505 51% <complex>
0.137 0.153 0.200 0.206 50% <scoped_allocator>
0.175 0.199 0.254 0.263 50% <unordered_map>
0.308 0.355 0.452 0.460 49% <istream>
0.176 0.198 0.255 0.262 49% <unordered_set>
0.148 0.165 0.212 0.218 47% <map>
..... 0.150 0.201 0.215 43% <any>
..... 0.342 0.473 0.490 43% <memory_resource>
..... 0.149 0.206 0.213 43% <optional>
..... 0.206 0.279 0.293 42% <string_view>
..... 0.213 0.273 0.277 30% <variant>
0.072 0.080 0.090 0.090 25% <atomic>
0.053 0.062 0.066 0.066 25% <new>
0.048 0.055 0.058 0.059 23% <type_traits>
0.054 0.062 0.065 0.065 20% <exception>
0.056 0.063 0.067 0.067 20% <typeinfo>
0.052 0.059 0.062 0.062 19% <ratio>
..... ..... 0.385 0.418 9% <ranges>
..... ..... 0.826 0.867 5% <format>
..... ..... 0.229 0.237 3% <stop_token>
..... ..... 0.514 0.527 3% <syncstream>
..... ..... 0.221 0.226 2% <barrier>
>cl
Microsoft (R) C/C++ Optimizing Compiler Version 19.35.32216.1 for x64

Repro:

import subprocess
import json
import os
import sys
import colorama
from colorama import Fore, Back, Style
from collections import defaultdict
colorama.init()


def parse_msvc_wall(output):
    i = output.find(b'c1xx.dll)=')
    if i == -1: return None
    j = output.find(b's', i)
    return float(output[i+10:j])


def msvc_get_parsing_time(*fnames, std=None):
    if len(fnames) == 1 and isinstance(fnames, list):
        fnames = fnames[0]

    cmd = ['cl', '/nologo', '/Bt', '/Zs', '/TP', '/w', '.empty.tmp']

    if std is not None:
        cmd.append(f'/std:c++{std}')

    cmd += [f'/FI{fn[1:-1]}' for fn in fnames]
    #print(' '.join(cmd))

    try:
        output = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True).stdout
    except subprocess.CalledProcessError as e:
        if b'C1083' in e.stdout:
            return None
        time = parse_msvc_wall(e.stderr)
        if time is not None:
            return time
        print(f'stderr={e.stderr}')
        print(f'stderr={e.stdout}')
        raise

    return parse_msvc_wall(output)


# https://eel.is/c++draft/headers
headers = set('''
<algorithm>
<flat_set>
<mutex>
<stdexcept>
<any>
<format>
<new>
<stdfloat>
<array>
<forward_list>
<numbers>
<stop_token>
<atomic>
<fstream>
<numeric>
<streambuf>
<barrier>
<functional>
<optional>
<string>
<bit>
<future>
<ostream>
<string_view>
<bitset>
<generator>
<print>
<strstream>
<charconv>
<initializer_list>
<queue>
<syncstream>
<chrono>
<iomanip>
<random>
<system_error>
<codecvt>
<ios>
<ranges>
<thread>
🔗
<compare>
<iosfwd>
<ratio>
<tuple>
🔗
<complex>
<iostream>
<regex>
<type_traits>
🔗
<concepts>
<istream>
<scoped_allocator>
<typeindex>
🔗
<condition_variable>
<iterator>
<semaphore>
<typeinfo>
🔗
<coroutine>
<latch>
<set>
<unordered_map>
🔗
<deque>
<limits>
<shared_mutex>
<unordered_set>
🔗
<exception>
<list>
<source_location>
<utility>
🔗
<execution>
<locale>
<span>
<valarray>
🔗
<expected>
<map>
<spanstream>
<variant>
🔗
<filesystem>
<mdspan>
<sstream>
<vector>
🔗
<flat_map>
<memory>
<stack>
<version>
🔗
<memory_resource>
<stacktrace>
<cassert>
<cfenv>
<climits>
<csetjmp>
<cstddef>
<cstdlib>
<cuchar>
🔗
<cctype>
<cfloat>
<clocale>
<csignal>
<cstdint>
<cstring>
<cwchar>
🔗
<cerrno>
<cinttypes>
<cmath>
<cstdarg>
<cstdio>
<ctime>
<cwctype>
'''.strip().splitlines()) - {'🔗'}
#headers = ['<string_view>', '<chrono>']
max_header_name_len = max(map(len, headers))


def time_to_color(t):
    if t < 0.010: return Fore.BLACK, Style.BRIGHT
    if t < 0.020: return Fore.WHITE, Style.DIM
    if t < 0.040: return '', ''
    if t < 0.060: return Fore.CYAN, Style.BRIGHT
    if t < 0.080: return Fore.WHITE, Style.BRIGHT
    if t < 0.100: return Fore.YELLOW, Style.BRIGHT
    if t < 0.150: return Fore.RED, Style.BRIGHT
    if t < 0.200: return Fore.RED, Style.DIM
    if t < 0.300: return Fore.MAGENTA, Style.BRIGHT
    return Fore.MAGENTA, Style.DIM


reset_colors = Style.RESET_ALL + Fore.RESET

stds = [None, '17', '20', 'latest']


def ttc(time):
    f, s = time_to_color(time)
    return f + s


def info(timings, expected_count=len(stds), reverse=False, delim=' ', fill=' .....'):
    a = [f'{ttc(time)}{time:>6.3f}{reset_colors}' for time in timings]
    b = [fill] * (expected_count - len(timings))
    return delim.join(b + a if reverse else a + b )


def print_slowdown(all_timings, considered_disabled=0.030, min_diff=0.005, min_diff_rel=0.01):
    s = ' | '.join(f'c++{std}' if std else 'default' for std in stds)
    print(f'{s} | slowdown | header')
    print(('-' * 6 + ':|') * (len(stds) + 1) + '-' * max_header_name_len)

    results = []
    for header, timings in all_timings.items():
        timings = timings
        while len(timings) and timings[0] < considered_disabled:
            timings = timings[1:]

        if len(timings) == 0: continue

        slowdown = timings[-1] / timings[0] - 1
        if abs(timings[-1] - timings[0]) > min_diff and abs(slowdown) > min_diff_rel:
            results.append((slowdown, header, timings))

    for slowdown, header, timings in sorted(results, reverse=True):
        print(f'{info(timings, reverse=True, delim=" |")} | {slowdown:>5.0%} | `{header}`')


def get_timings():
    with open('.empty.tmp', 'w+') as f:
        pass
    all_timings = defaultdict(list)
    print(f'Timing standard library headers:')
    for header in headers:
        #print(f'working on {header}...', end='')
        print(f'{info([])} {header}', end='')

        for std in stds:
            timings = []
            for _ in range(10):
                time = msvc_get_parsing_time(header, std=std)
                if time is None:
                    timings.append(float('nan'))
                    break
                timings.append(time)
        
            time = min(timings)
            f, s = time_to_color(time)
            self_colors = f + s
            all_timings[header].append(time)
            print(f'\r{info(all_timings[header])} {header}', end='')
        print(f'\r{info(all_timings[header])} {header}')
    return all_timings


if __name__ == "__main__":
    fn = 'bench_syshdrs.json'
    if os.path.exists(fn) and not '-r' in sys.argv:
        with open(fn) as f:
            timings = json.load(f)
    else:
        timings = get_timings()
        with open(fn, 'w+') as f:
            json.dump(timings, f)
        print('\n' * 3)
    print_slowdown(timings)

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions