Skip to content

Commit 119fd87

Browse files
authored
fix(file search): apply filters before from/size parameters (#741)
1 parent de226a8 commit 119fd87

7 files changed

Lines changed: 167 additions & 139 deletions

File tree

docs/content.en/docs/release-notes/_index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Information about release notes of Coco Server is provided here.
1717
- feat: voice input support in both search and chat modes #732
1818

1919
### 🐛 Bug fix
20+
- fix(file search): apply filters before from/size parameters #741
2021

2122
### ✈️ Improvements
2223
- refactor: prioritize stat(2) when checking if a file is dir #737

src-tauri/Cargo.lock

Lines changed: 15 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src-tauri/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ async-recursion = "1.1.1"
103103
zip = "4.0.0"
104104
url = "2.5.2"
105105
camino = "1.1.10"
106+
tokio-stream = { version = "0.1.17", features = ["io-util"] }
107+
cfg-if = "1.0.1"
106108

107109
[target."cfg(target_os = \"macos\")".dependencies]
108110
tauri-nspanel = { git = "https://github.com/ahkohd/tauri-nspanel", branch = "v2" }

src-tauri/src/extension/built_in/application/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,4 @@ pub(crate) const PLUGIN_JSON_FILE: &str = r#"
4545
"type": "group",
4646
"enabled": true
4747
}
48-
"#;
48+
"#;

src-tauri/src/extension/built_in/file_search.rs

Lines changed: 84 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,20 @@ use crate::common::{
77
};
88
use crate::extension::OnOpened;
99
use async_trait::async_trait;
10+
use futures::stream::Stream;
11+
use futures::stream::StreamExt;
1012
use hostname;
1113
use serde::{Deserialize, Serialize};
1214
use serde_json::Value;
13-
use std::io::BufRead;
14-
use std::io::BufReader;
15+
use std::os::fd::OwnedFd;
1516
use std::path::Path;
1617
use std::sync::LazyLock;
1718
use tauri_plugin_store::StoreExt;
19+
use tokio::io::AsyncBufReadExt;
20+
use tokio::io::BufReader;
21+
use tokio::process::Child;
1822
use tokio::process::Command;
23+
use tokio_stream::wrappers::LinesStream;
1924

2025
pub(crate) const EXTENSION_ID: &str = "File Search";
2126

@@ -221,26 +226,19 @@ impl FileSearchExtensionSearchSource {
221226
FileSearchExtensionSearchSource { base_score }
222227
}
223228

224-
fn build_mdfind_query(&self, query_string: &str, config: &FileSearchConfig) -> Vec<String> {
229+
/// Return an array containing the `mdfind` command and its arguments.
230+
fn build_mdfind_query(query_string: &str, config: &FileSearchConfig) -> Vec<String> {
225231
let mut args = vec!["mdfind".to_string()];
226232

227-
// Build the query string with file type filters
228-
let mut query_parts = Vec::new();
229-
230-
// Add search criteria based on search mode
231233
match config.search_by {
232234
SearchBy::Name => {
233-
query_parts.push(format!("kMDItemFSName == '*{}*'", query_string));
235+
args.push(format!("kMDItemFSName == '*{}*'", query_string));
234236
}
235237
SearchBy::NameAndContents => {
236-
query_parts.push(format!("kMDItemTextContent == '{}'", query_string));
238+
args.push(format!("kMDItemTextContent == '{}'", query_string));
237239
}
238240
}
239241

240-
// Combine all query parts
241-
let final_query = query_parts.join(" && ");
242-
args.push(final_query);
243-
244242
// Add search paths using -onlyin
245243
for path in &config.search_paths {
246244
if Path::new(path).exists() {
@@ -251,89 +249,81 @@ impl FileSearchExtensionSearchSource {
251249
args
252250
}
253251

254-
async fn execute_mdfind_static(args: &[String], limit: usize) -> Result<Vec<String>, String> {
252+
/// Spawn the `mdfind` child process and return an async iterator over its output,
253+
/// allowing us to collect the results asynchronously.
254+
///
255+
/// # Return value:
256+
///
257+
/// * impl Stream: an async iterator that will yield the matched files
258+
/// * Child: The handle to the mdfind process, we need to kill it once we
259+
/// collect all the results to avoid zombie processes.
260+
fn execute_mdfind_query(
261+
query_string: &str,
262+
from: usize,
263+
size: usize,
264+
config: &FileSearchConfig,
265+
) -> Result<(impl Stream<Item = std::io::Result<String>>, Child), String> {
266+
let args = Self::build_mdfind_query(query_string, &config);
255267
let (rx, tx) = std::io::pipe().unwrap();
256-
let mut buffered_rx = BufReader::new(rx);
268+
let rx_owned = OwnedFd::from(rx);
269+
let async_rx = tokio::net::unix::pipe::Receiver::from_owned_fd(rx_owned).unwrap();
270+
let buffered_rx = BufReader::new(async_rx);
271+
let lines = LinesStream::new(buffered_rx.lines());
257272

258-
let mut mdfind_child_process = Command::new(&args[0])
273+
let child = Command::new(&args[0])
259274
.args(&args[1..])
260275
.stdout(tx)
261276
.stderr(std::process::Stdio::null())
262277
.spawn()
263-
.map_err(|e| format!("Failed to execute mdfind: {}", e))?;
264-
265-
let handle = tokio::task::spawn_blocking(move || {
266-
let mut file_paths = Vec::with_capacity(limit);
267-
let mut line_buffer = String::new();
268-
269-
loop {
270-
if file_paths.len() >= limit {
271-
break;
272-
}
273-
274-
let n_read = buffered_rx.read_line(&mut line_buffer).unwrap();
275-
276-
// EOF
277-
if n_read == 0 {
278-
break;
279-
}
280-
281-
// read_line() will read the tailing new-line char, trim it
282-
let trimmed = line_buffer.trim_end();
283-
file_paths.push(trimmed.to_string());
284-
line_buffer.clear();
285-
}
286-
287-
file_paths
288-
});
289-
290-
let file_paths = handle.await.map_err(|e| format!("{:?}", e))?;
291-
292-
mdfind_child_process.kill().await.unwrap();
278+
.map_err(|e| format!("Failed to spawn mdfind: {}", e))?;
279+
let config_clone = config.clone();
280+
let iter = lines
281+
.filter(move |res_path| {
282+
std::future::ready({
283+
match res_path {
284+
Ok(path) => !Self::should_be_filtered_out(&config_clone, path),
285+
Err(_) => {
286+
// Don't filter out Err() values
287+
true
288+
}
289+
}
290+
})
291+
})
292+
.skip(from)
293+
.take(size);
293294

294-
Ok(file_paths)
295+
Ok((iter, child))
295296
}
296297

297-
fn apply_exclude_path_and_file_type_filters(
298-
results: Vec<String>,
299-
config: &FileSearchConfig,
300-
) -> Vec<String> {
301-
let mut filtered_results = Vec::new();
302-
303-
for path in results {
304-
// Check if path should be excluded
305-
let is_excluded = config
306-
.exclude_paths
307-
.iter()
308-
.any(|exclude_path| path.starts_with(exclude_path));
309-
310-
if is_excluded {
311-
continue;
312-
}
298+
/// If `file_path` should be removed from the search results given the filter
299+
/// conditions specified in `config`.
300+
fn should_be_filtered_out(config: &FileSearchConfig, file_path: &str) -> bool {
301+
let is_excluded = config
302+
.exclude_paths
303+
.iter()
304+
.any(|exclude_path| file_path.starts_with(exclude_path));
313305

314-
// Check file type filter
315-
let matches_file_type = if config.file_types.is_empty() {
316-
true
317-
} else {
318-
let path_obj = camino::Utf8Path::new(&path);
319-
if let Some(extension) = path_obj.extension() {
320-
config
321-
.file_types
322-
.iter()
323-
.any(|file_type| file_type == extension)
324-
} else {
325-
false
326-
}
327-
};
306+
if is_excluded {
307+
return true;
308+
}
328309

329-
if !matches_file_type {
330-
continue;
310+
let matches_file_type = if config.file_types.is_empty() {
311+
true
312+
} else {
313+
let path_obj = camino::Utf8Path::new(&file_path);
314+
if let Some(extension) = path_obj.extension() {
315+
config
316+
.file_types
317+
.iter()
318+
.any(|file_type| file_type == extension)
319+
} else {
320+
// `config.file_types` is not empty, then the search results
321+
// should have extensions.
322+
false
331323
}
324+
};
332325

333-
filtered_results.push(path);
334-
}
335-
336-
filtered_results
326+
!matches_file_type
337327
}
338328
}
339329

@@ -388,19 +378,17 @@ impl SearchSource for FileSearchExtensionSearchSource {
388378
// Execute search in a blocking task
389379
let query_source = self.get_type();
390380
let base_score = self.base_score;
391-
let mdfind_args = self.build_mdfind_query(query_string, &config);
392381

393-
let search_results = Self::execute_mdfind_static(&mdfind_args, from + size)
394-
.await
395-
.map_err(SearchError::InternalError)?;
396-
397-
// Filter out excluded paths
398-
let filtered_results =
399-
Self::apply_exclude_path_and_file_type_filters(search_results, &config);
382+
let (mut iter, mut mdfind_child_process) =
383+
Self::execute_mdfind_query(&query_string, from, size, &config)
384+
.map_err(SearchError::InternalError)?;
400385

401386
// Convert results to documents
402387
let mut hits: Vec<(Document, f64)> = Vec::new();
403-
for file_path in filtered_results.into_iter().skip(from).take(size) {
388+
while let Some(res_file_path) = iter.next().await {
389+
let file_path =
390+
res_file_path.map_err(|io_err| SearchError::InternalError(io_err.to_string()))?;
391+
404392
let file_type = get_file_type(&file_path).await;
405393
let icon = type_to_icon(file_type);
406394
let file_path_of_type_path = camino::Utf8Path::new(&file_path);
@@ -442,6 +430,10 @@ impl SearchSource for FileSearchExtensionSearchSource {
442430

443431
hits.push((doc, base_score));
444432
}
433+
mdfind_child_process
434+
.kill()
435+
.await
436+
.map_err(|e| SearchError::InternalError(format!("{:?}", e)))?;
445437

446438
let total_hits = hits.len();
447439
Ok(QueryResponse {

0 commit comments

Comments
 (0)