|
| 1 | +import inspect |
| 2 | +import warnings |
| 3 | +from functools import lru_cache, wraps |
| 4 | +from importlib import import_module |
| 5 | +from importlib.metadata import entry_points |
| 6 | + |
| 7 | +from sklearn._config import get_config |
| 8 | + |
| 9 | +SKLEARN_ENGINES_ENTRY_POINT = "sklearn_engines" |
| 10 | + |
| 11 | + |
| 12 | +class EngineSpec: |
| 13 | + __slots__ = ["name", "provider_name", "module_name", "engine_qualname"] |
| 14 | + |
| 15 | + def __init__(self, name, provider_name, module_name, engine_qualname): |
| 16 | + self.name = name |
| 17 | + self.provider_name = provider_name |
| 18 | + self.module_name = module_name |
| 19 | + self.engine_qualname = engine_qualname |
| 20 | + |
| 21 | + def get_engine_class(self): |
| 22 | + engine = import_module(self.module_name) |
| 23 | + for attr in self.engine_qualname.split("."): |
| 24 | + engine = getattr(engine, attr) |
| 25 | + return engine |
| 26 | + |
| 27 | + |
| 28 | +def _parse_entry_point(entry_point): |
| 29 | + module_name, engine_qualname = entry_point.value.split(":") |
| 30 | + provider_name = next(iter(module_name.split(".", 1))) |
| 31 | + return EngineSpec(entry_point.name, provider_name, module_name, engine_qualname) |
| 32 | + |
| 33 | + |
| 34 | +@lru_cache |
| 35 | +def _parse_entry_points(provider_names=None): |
| 36 | + specs = [] |
| 37 | + all_entry_points = entry_points() |
| 38 | + if hasattr(all_entry_points, "select"): |
| 39 | + engine_entry_points = all_entry_points.select(group=SKLEARN_ENGINES_ENTRY_POINT) |
| 40 | + else: |
| 41 | + engine_entry_points = all_entry_points.get(SKLEARN_ENGINES_ENTRY_POINT, ()) |
| 42 | + for entry_point in engine_entry_points: |
| 43 | + try: |
| 44 | + spec = _parse_entry_point(entry_point) |
| 45 | + if provider_names is not None and spec.provider_name not in provider_names: |
| 46 | + # Skip entry points that do not match the requested provider names. |
| 47 | + continue |
| 48 | + specs.append(spec) |
| 49 | + except Exception as e: |
| 50 | + # Do not raise an exception in case an invalid package has been |
| 51 | + # installed in the same Python env as scikit-learn: just warn and |
| 52 | + # skip. |
| 53 | + warnings.warn( |
| 54 | + f"Invalid {SKLEARN_ENGINES_ENTRY_POINT} entry point" |
| 55 | + f" {entry_point.name} with value {entry_point.value}: {e}" |
| 56 | + ) |
| 57 | + if provider_names is not None: |
| 58 | + observed_provider_names = {spec.provider_name for spec in specs} |
| 59 | + missing_providers = set(provider_names) - observed_provider_names |
| 60 | + if missing_providers: |
| 61 | + raise RuntimeError( |
| 62 | + "Could not find any provider for the" |
| 63 | + f" {SKLEARN_ENGINES_ENTRY_POINT} entry point with name(s):" |
| 64 | + f" {', '.join(repr(p) for p in sorted(missing_providers))}" |
| 65 | + ) |
| 66 | + return specs |
| 67 | + |
| 68 | + |
| 69 | +def list_engine_provider_names(): |
| 70 | + """Find the list of sklearn_engine provider names |
| 71 | +
|
| 72 | + This function only inspects the metadata and should trigger any module import. |
| 73 | + """ |
| 74 | + return sorted({spec.provider_name for spec in _parse_entry_points()}) |
| 75 | + |
| 76 | + |
| 77 | +def _get_engine_classes(engine_name, provider_names, engine_specs, default): |
| 78 | + specs_by_provider = {} |
| 79 | + for spec in engine_specs: |
| 80 | + if spec.name != engine_name: |
| 81 | + continue |
| 82 | + specs_by_provider.setdefault(spec.provider_name, spec) |
| 83 | + |
| 84 | + for provider_name in provider_names: |
| 85 | + if inspect.isclass(provider_name): |
| 86 | + # The provider name is actually a ready-to-go engine class. |
| 87 | + # Instead of a made up string to name this ad-hoc provider |
| 88 | + # we use the class itself. This mirrors what the user used |
| 89 | + # when they set the config (ad-hoc class or string naming |
| 90 | + # a provider). |
| 91 | + engine_class = provider_name |
| 92 | + if getattr(engine_class, "engine_name", None) != engine_name: |
| 93 | + continue |
| 94 | + yield engine_class, engine_class |
| 95 | + |
| 96 | + spec = specs_by_provider.get(provider_name) |
| 97 | + if spec is not None: |
| 98 | + yield spec.provider_name, spec.get_engine_class() |
| 99 | + |
| 100 | + yield "default", default |
| 101 | + |
| 102 | + |
| 103 | +def get_engine_classes(engine_name, default, verbose=False): |
| 104 | + """Find all possible providers of `engine_name`. |
| 105 | +
|
| 106 | + Provider candidates are found based on parsing entrypoint definitions that |
| 107 | + match the name of enabled engine providers, as well as, ad-hoc providers |
| 108 | + in the form of engine classes in the list of enabled engine providers. |
| 109 | +
|
| 110 | + Parameters |
| 111 | + ---------- |
| 112 | + engine_name : str |
| 113 | + The name of the algorithm for which to find engine classes. |
| 114 | +
|
| 115 | + default : class |
| 116 | + The default engine class to use if no other provider is found. |
| 117 | +
|
| 118 | + verbose : bool, default=False |
| 119 | + If True, print the name of the engine classes that are tried. |
| 120 | +
|
| 121 | + Yields |
| 122 | + ------ |
| 123 | + provider : str or class |
| 124 | + The "name" of each matching provider. The "name" corresponds to the |
| 125 | + entry in the `engine_provider` configuration. It can be a string or a |
| 126 | + class for programmatically registered ad-hoc providers. |
| 127 | +
|
| 128 | + engine_class : |
| 129 | + The engine class that implements the algorithm for the given provider. |
| 130 | + """ |
| 131 | + provider_names = get_config()["engine_provider"] |
| 132 | + |
| 133 | + if not provider_names: |
| 134 | + yield "default", default |
| 135 | + return |
| 136 | + |
| 137 | + engine_specs = _parse_entry_points( |
| 138 | + provider_names=tuple( |
| 139 | + [name for name in provider_names if not inspect.isclass(name)] |
| 140 | + ) |
| 141 | + ) |
| 142 | + for provider, engine_class in _get_engine_classes( |
| 143 | + engine_name=engine_name, |
| 144 | + provider_names=provider_names, |
| 145 | + engine_specs=engine_specs, |
| 146 | + default=default, |
| 147 | + ): |
| 148 | + if verbose: |
| 149 | + print( |
| 150 | + f"trying engine {engine_class.__module__}.{engine_class.__qualname__}." |
| 151 | + ) |
| 152 | + yield provider, engine_class |
| 153 | + |
| 154 | + |
| 155 | +def convert_attributes(method): |
| 156 | + """Convert estimator attributes after calling the decorated method. |
| 157 | +
|
| 158 | + The attributes of an estimator can be stored in "engine native" types |
| 159 | + (default) or "scikit-learn native" types. This decorator will call the |
| 160 | + engine's conversion function when needed. Use this decorator on methods |
| 161 | + that set estimator attributes. |
| 162 | + """ |
| 163 | + |
| 164 | + @wraps(method) |
| 165 | + def wrapper(self, *args, **kwargs): |
| 166 | + r = method(self, *args, **kwargs) |
| 167 | + convert_attributes = get_config()["engine_attributes"] |
| 168 | + |
| 169 | + if convert_attributes == "sklearn_types": |
| 170 | + engine = self._engine_class |
| 171 | + for name, value in vars(self).items(): |
| 172 | + # All attributes are passed to the engine, which can |
| 173 | + # either convert the value (engine specific types) or |
| 174 | + # return it as is (native Python types) |
| 175 | + converted = engine.convert_to_sklearn_types(name, value) |
| 176 | + setattr(self, name, converted) |
| 177 | + |
| 178 | + # No matter which engine was used to fit, after the attribute |
| 179 | + # conversion to the sklearn native types the default engine |
| 180 | + # is used. |
| 181 | + self._engine_class = self._default_engine |
| 182 | + self._engine_provider = "default" |
| 183 | + |
| 184 | + return r |
| 185 | + |
| 186 | + return wrapper |
0 commit comments