Merge e249f1e into ada7908

SaschaCowley · web-flow · commit 6c9a566a0879 · 2024-12-18T06:02:38.000Z
diff --git a/nvdaHelper/local/wasapi.cpp b/nvdaHelper/local/wasapi.cpp
@@ -1,7 +1,7 @@
 /*
 This file is a part of the NVDA project.
 URL: http://www.nvda-project.org/
-Copyright 2023 James Teh.
+Copyright 2023-2024 NV Access Limited, James Teh.
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License version 2.0, as published by
     the Free Software Foundation.
@@ -45,6 +45,7 @@ const IID IID_IMMNotificationClient = __uuidof(IMMNotificationClient);
 const IID IID_IAudioStreamVolume = __uuidof(IAudioStreamVolume);
 const IID IID_IAudioSessionManager2 = __uuidof(IAudioSessionManager2);
 const IID IID_IAudioSessionControl2 = __uuidof(IAudioSessionControl2);
+const IID IID_IMMEndpoint = __uuidof(IMMEndpoint);
 
 /**
  * C++ RAII class to manage the lifecycle of a standard Windows HANDLE closed
@@ -167,9 +168,9 @@ class WasapiPlayer {
 
 	/**
 	 * Constructor.
-	 * Specify an empty (not null) deviceName to use the default device.
+	 * Specify an empty (not null) endpointId to use the default device.
 	 */
-	WasapiPlayer(wchar_t* deviceName, WAVEFORMATEX format,
+	WasapiPlayer(wchar_t* endpointId, WAVEFORMATEX format,
 		ChunkCompletedCallback callback);
 
 	/**
@@ -229,7 +230,7 @@ class WasapiPlayer {
 	CComPtr<IAudioClock> clock;
 	// The maximum number of frames that will fit in the buffer.
 	UINT32 bufferFrames;
-	std::wstring deviceName;
+	std::wstring endpointId;
 	WAVEFORMATEX format;
 	ChunkCompletedCallback callback;
 	PlayState playState = PlayState::stopped;
@@ -246,9 +247,9 @@ class WasapiPlayer {
 	bool isUsingPreferredDevice = false;
 };
 
-WasapiPlayer::WasapiPlayer(wchar_t* deviceName, WAVEFORMATEX format,
+WasapiPlayer::WasapiPlayer(wchar_t* endpointId, WAVEFORMATEX format,
 	ChunkCompletedCallback callback)
-: deviceName(deviceName), format(format), callback(callback) {
+: endpointId(endpointId), format(format), callback(callback) {
 	wakeEvent = CreateEvent(nullptr, false, false, nullptr);
 }
 
@@ -266,7 +267,7 @@ HRESULT WasapiPlayer::open(bool force) {
 	}
 	CComPtr<IMMDevice> device;
 	isUsingPreferredDevice = false;
-	if (deviceName.empty()) {
+	if (endpointId.empty()) {
 		hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole, &device);
 	} else {
 		hr = getPreferredDevice(device);
@@ -491,48 +492,47 @@ HRESULT WasapiPlayer::getPreferredDevice(CComPtr<IMMDevice>& preferredDevice) {
 	if (FAILED(hr)) {
 		return hr;
 	}
-	CComPtr<IMMDeviceCollection> devices;
-	hr = enumerator->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &devices);
+	CComPtr<IMMDevice> device;
+	hr = enumerator->GetDevice(endpointId.c_str(), &device);
 	if (FAILED(hr)) {
 		return hr;
 	}
-	UINT count = 0;
-	devices->GetCount(&count);
-	for (UINT d = 0; d < count; ++d) {
-		CComPtr<IMMDevice> device;
-		hr = devices->Item(d, &device);
-		if (FAILED(hr)) {
-			return hr;
-		}
-		CComPtr<IPropertyStore> props;
-		hr = device->OpenPropertyStore(STGM_READ, &props);
-		if (FAILED(hr)) {
-			return hr;
-		}
-		PROPVARIANT val;
-		hr = props->GetValue(PKEY_Device_FriendlyName, &val);
-		if (FAILED(hr)) {
-			return hr;
-		}
-		// WinMM device names are truncated to MAXPNAMELEN characters, including the
-		// null terminator.
-		constexpr size_t MAX_CHARS = MAXPNAMELEN - 1;
-		if (wcsncmp(val.pwszVal, deviceName.c_str(), MAX_CHARS) == 0) {
-			PropVariantClear(&val);
-			preferredDevice = std::move(device);
-			return S_OK;
-		}
-		PropVariantClear(&val);
+
+	// We only want to use the device if it is plugged in and enabled.
+	DWORD state;
+	hr = device->GetState(&state);
+	if (FAILED(hr)) {
+		return hr;
+	} else if (state != DEVICE_STATE_ACTIVE) {
+		return E_NOTFOUND;
 	}
-	return E_NOTFOUND;
+
+	// We only want to use the device if it is an output device.
+	IMMEndpoint* endpoint;
+	hr = device->QueryInterface(IID_IMMEndpoint, (void**)&endpoint);
+	if (FAILED(hr)) {
+		return hr;
+	}
+	EDataFlow dataFlow;
+	hr = endpoint->GetDataFlow(&dataFlow);
+	if (FAILED(hr)) {
+		return hr;
+	} else if(dataFlow != eRender) {
+		return E_NOTFOUND;
+	}
+	preferredDevice = std::move(device);
+	endpoint->Release();
+	device.Release();
+	enumerator.Release();
+	return S_OK;
 }
 
 bool WasapiPlayer::didPreferredDeviceBecomeAvailable() {
 	if (
 		// We're already using the preferred device.
 		isUsingPreferredDevice ||
 		// A preferred device was not specified.
-		deviceName.empty() ||
+		endpointId.empty() ||
 		// A device hasn't recently changed state.
 		deviceStateChangeCount == notificationClient->getDeviceStateChangeCount()
 	) {
@@ -673,7 +673,7 @@ HRESULT WasapiPlayer::disableCommunicationDucking(IMMDevice* device) {
  */
 class SilencePlayer {
 	public:
-	SilencePlayer(wchar_t* deviceName);
+	SilencePlayer(wchar_t* endpointId);
 	HRESULT init();
 	// Play silence for the specified duration.
 	void playFor(DWORD ms, float volume);
@@ -698,8 +698,8 @@ class SilencePlayer {
 	std::vector<INT16> whiteNoiseData;
 };
 
-SilencePlayer::SilencePlayer(wchar_t* deviceName):
-player(deviceName, getFormat(), nullptr),
+SilencePlayer::SilencePlayer(wchar_t* endpointId):
+player(endpointId, getFormat(), nullptr),
 whiteNoiseData(
 	SILENCE_BYTES  / (
 		sizeof(INT16) / sizeof(unsigned char)
@@ -791,10 +791,10 @@ void SilencePlayer::terminate() {
  * WasapiPlayer or SilencePlayer, with the exception of wasPlay_startup.
  */
 
-WasapiPlayer* wasPlay_create(wchar_t* deviceName, WAVEFORMATEX format,
+WasapiPlayer* wasPlay_create(wchar_t* endpointId, WAVEFORMATEX format,
 	WasapiPlayer::ChunkCompletedCallback callback
 ) {
-	return new WasapiPlayer(deviceName, format, callback);
+	return new WasapiPlayer(endpointId, format, callback);
 }
 
 void wasPlay_destroy(WasapiPlayer* player) {
@@ -855,9 +855,9 @@ HRESULT wasPlay_startup() {
 
 SilencePlayer* silence = nullptr;
 
-HRESULT wasSilence_init(wchar_t* deviceName) {
+HRESULT wasSilence_init(wchar_t* endpointId) {
 	assert(!silence);
-	silence = new SilencePlayer(deviceName);
+	silence = new SilencePlayer(endpointId);
 	return silence->init();
 }
 
diff --git a/source/config/configSpec.py b/source/config/configSpec.py
@@ -41,7 +41,6 @@
 	includeCLDR = boolean(default=True)
 	symbolDictionaries = string_list(default=list("cldr"))
 	beepSpeechModePitch = integer(default=10000,min=50,max=11025)
-	outputDevice = string(default=default)
 	autoLanguageSwitching = boolean(default=true)
 	autoDialectSwitching = boolean(default=false)
 	delayedCharacterDescriptions = boolean(default=false)
@@ -55,6 +54,7 @@
 
 # Audio settings
 [audio]
+	outputDevice = string(default=default)
 	audioDuckingMode = integer(default=0)
 	soundVolumeFollowsVoice = boolean(default=false)
 	soundVolume = integer(default=100, min=0, max=100)
diff --git a/source/config/profileUpgradeSteps.py b/source/config/profileUpgradeSteps.py
@@ -414,3 +414,58 @@ def upgradeConfigFrom_12_to_13(profile: ConfigObj) -> None:
 	log.debug(
 		f"Handled cldr value of {setting!r}. List is now: {profile['speech']['symbolDictionaries']}",
 	)
+
+
+def upgradeConfigFrom_13_to_14(profile: ConfigObj):
+	"""Set [audio][outputDevice] to the endpointID of [speech][outputDevice], and delete the latter."""
+	try:
+		friendlyName = profile["speech"]["outputDevice"]
+	except KeyError:
+		log.debug("Output device not present in config. Taking no action.")
+		return
+	if friendlyName == "default":
+		log.debug("Output device is set to default. Not writing a new value to config.")
+	elif endpointId := _friendlyNameToEndpointId(friendlyName):
+		log.debug(
+			f"Best match for device with {friendlyName=} has {endpointId=}. Writing new value to config."
+		)
+		profile["audio"]["outputDevice"] = endpointId
+	else:
+		log.debug(
+			f"Could not find an audio output device with {friendlyName=}. Not writing a new value to config."
+		)
+	log.debug("Deleting old config value.")
+	del profile["speech"]["outputDevice"]
+
+
+def _friendlyNameToEndpointId(friendlyName: str) -> str | None:
+	"""_summary_
+
+	Since friendly names are not unique, there may be many devices on one system with the same friendly name.
+	As the order of devices in an IMMEndpointEnumerator is arbitrary, we cannot assume that the first device with a matching friendly name is the device the user wants.
+	We also can't guarantee that the device the user has selected is active, so we need to retrieve devices by state, in order from most to least preferable.
+	It is probably a safe bet that the device the user wants to use is either active or unplugged.
+	Thus, the preference order for states is:
+	1. ACTIVE- The audio adapter that connects to the endpoint device is present and enabled.
+	   In addition, if the endpoint device plugs into a jack on the adapter, then the endpoint device is plugged in.
+	2. UNPLUGGED - The audio adapter that contains the jack for the endpoint device is present and enabled, but the endpoint device is not plugged into the jack.
+	3. DISABLED - The user has disabled the device in the Windows multimedia control panel.
+	4. NOTPRESENT - The audio adapter that connects to the endpoint device has been removed from the system, or the user has disabled the adapter device in Device Manager.
+	Within a state, if there is more than one device with the selected friendly name, we use the first one.
+
+	:param friendlyName: Friendly name of the device to search for.
+	:return: Endpoint ID string of the best match device, or `None` if no device with a matching friendly name is available.
+	"""
+	from nvwave import _getOutputDevices
+	from pycaw.constants import DEVICE_STATE
+
+	states = (DEVICE_STATE.ACTIVE, DEVICE_STATE.UNPLUGGED, DEVICE_STATE.DISABLED, DEVICE_STATE.NOTPRESENT)
+	for state in states:
+		try:
+			return next(
+				device for device in _getOutputDevices(stateMask=state) if device.friendlyName == friendlyName
+			).id
+		except StopIteration:
+			# Proceed to the next device state.
+			continue
+	return None
diff --git a/source/gui/settingsDialogs.py b/source/gui/settingsDialogs.py
@@ -3041,17 +3041,15 @@ def makeSettings(self, settingsSizer: wx.BoxSizer) -> None:
 		# Translators: This is the label for the select output device combo in NVDA audio settings.
 		# Examples of an output device are default soundcard, usb headphones, etc.
 		deviceListLabelText = _("Audio output &device:")
-		# The Windows Core Audio device enumeration does not have the concept of an ID for the default output device, so we have to insert something ourselves instead.
-		# Translators: Value to show when choosing to use the default audio output device.
-		deviceNames = (_("Default output device"), *nvwave.getOutputDeviceNames())
+		self._deviceIds, deviceNames = zip(*nvwave._getOutputDevices(includeDefault=True))
 		self.deviceList = sHelper.addLabeledControl(deviceListLabelText, wx.Choice, choices=deviceNames)
 		self.bindHelpEvent("SelectSynthesizerOutputDevice", self.deviceList)
 		selectedOutputDevice = config.conf["speech"]["outputDevice"]
-		if selectedOutputDevice == "default":
+		if selectedOutputDevice == config.conf.getConfigValidation(("speech", "outputDevice")).default:
 			selection = 0
 		else:
 			try:
-				selection = deviceNames.index(selectedOutputDevice)
+				selection = self._deviceIds.index(selectedOutputDevice)
 			except ValueError:
 				selection = 0
 		self.deviceList.SetSelection(selection)
@@ -3176,10 +3174,8 @@ def _appendSoundSplitModesList(self, settingsSizerHelper: guiHelper.BoxSizerHelp
 		self.soundSplitModesList.Select(0)
 
 	def onSave(self):
-		# We already use "default" as the key in the config spec, so use it here as an alternative to Microsoft Sound Mapper.
-		selectedOutputDevice = (
-			"default" if self.deviceList.GetSelection() == 0 else self.deviceList.GetStringSelection()
-		)
+		selectedOutputDevice = self._deviceIds[self.deviceList.GetSelection()]
+		log.info(f"{selectedOutputDevice=}")
 		if config.conf["speech"]["outputDevice"] != selectedOutputDevice:
 			# Synthesizer must be reload if output device changes
 			config.conf["speech"]["outputDevice"] = selectedOutputDevice
diff --git a/source/nvwave.py b/source/nvwave.py
@@ -1,10 +1,10 @@
 # A part of NonVisual Desktop Access (NVDA)
-# Copyright (C) 2007-2023 NV Access Limited, Aleksey Sadovoy, Cyrille Bougot, Peter Vágner, Babbage B.V.,
+# Copyright (C) 2007-2024 NV Access Limited, Aleksey Sadovoy, Cyrille Bougot, Peter Vágner, Babbage B.V.,
 # Leonard de Ruijter, James Teh
 # This file is covered by the GNU General Public License.
 # See the file COPYING for more details.
 
-"""Provides a simple Python interface to playing audio using the Windows multimedia waveOut functions, as well as other useful utilities."""
+"""Provides a simple Python interface to playing audio using the Windows Audio Session API (WASAPI), as well as other useful utilities."""
 
 from collections.abc import Generator
 import threading
@@ -100,19 +100,40 @@ class AudioPurpose(Enum):
 	SOUNDS = auto()
 
 
-def _getOutputDevices() -> Generator[tuple[str, str]]:
-	"""Generator, yielding device ID and device Name in device ID order.
+class _AudioOutputDevice(typing.NamedTuple):
+	id: str
+	friendlyName: str
+
+
+def _getOutputDevices(
+	*,
+	includeDefault: bool = False,
+	stateMask: DEVICE_STATE = DEVICE_STATE.ACTIVE,
+) -> Generator[_AudioOutputDevice]:
+	"""Generator, yielding device ID and device Name.
 	..note: Depending on number of devices being fetched, this may take some time (~3ms)
+
+	:param includeDefault: Whether to include a value representing the system default output device in the generator, defaults to False.
+		..note: The ID of this device is **not** a valid mmdevice endpoint ID string, and is for internal use only.
+			The friendly name is **not** generated by the operating system, and it is highly unlikely that it will match any real output device.
+	:param state: What device states to include in the resultant generator, defaults to DEVICE_STATE.ACTIVE.
+	:return: Generator of :class:`_AudioOutputDevices` containing all enabled and present audio output devices on the system.
 	"""
+	if includeDefault:
+		yield _AudioOutputDevice(
+			id=typing.cast(str, config.conf.getConfigValidation(("speech", "outputDevice")).default),
+			# Translators: Value to show when choosing to use the default audio output device.
+			friendlyName=_("Default output device"),
+		)
 	endpointCollection = AudioUtilities.GetDeviceEnumerator().EnumAudioEndpoints(
 		EDataFlow.eRender.value,
-		DEVICE_STATE.ACTIVE.value,
+		stateMask.value,
 	)
 	for i in range(endpointCollection.GetCount()):
 		device = AudioUtilities.CreateDevice(endpointCollection.Item(i))
 		# This should never be None, but just to be sure
 		if device is not None:
-			yield device.id, device.FriendlyName
+			yield _AudioOutputDevice(device.id, device.FriendlyName)
 		else:
 			continue
 
diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md
@@ -146,6 +146,8 @@ As the NVDA update check URL is now configurable directly within NVDA, no replac
   * `gui.settingsDialogs.AdvancedPanelControls.wasapiComboBox` has been removed.
   * The `WASAPI` key has been removed from the `audio` section of the config spec.
   * The output from `nvwave.outputDeviceNameToID`, and input to `nvwave.outputDeviceIDToName` are now string identifiers.
+  * The configuration key `config.conf["speech"]["outputDevice"]` has been removed.
+    It has been replaced by `config.conf["audio"]["outputDevice"]`, which stores a Windows core audio endpoint device ID. (#17547)
 * In `NVDAObjects.window.scintilla.ScintillaTextInfo`, if no text is selected, the `collapse` method is overriden to expand to line if the `end` parameter is set to `True` (#17431, @nvdaes)
 * The following symbols have been removed with no replacement: `languageHandler.getLanguageCliArgs`, `__main__.quitGroup` and `__main__.installGroup` . (#17486, @CyrilleB79)
 * Prefix matching on command line flags, e.g. using `--di` for `--disable-addons` is no longer supported. (#11644, @CyrilleB79)