Skip to content

Commit de3c78a

Browse files
authored
feat: file search for Linux/GNOME (#884)
This commit implements the file search extension for Linux with the GNOME desktop environment by employing the engine that powers GNOME's desktop search - Tracker. It also fixes an edge case bug that the search and exclude path configuration entries will not work. For example, say I set the search path to ["~/Dcouments"], and I have a file named "Documents_foobarbuzz" under my home directory, this file is not in the specified search path but Coco would return it because we verified this by checking string prefix. Claude Code found this when I asked it to write unit tests. Thank both tests and Claude Code.
1 parent eafa704 commit de3c78a

13 files changed

Lines changed: 754 additions & 110 deletions

File tree

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ jobs:
104104
if: startsWith(matrix.platform, 'ubuntu-22.04')
105105
run: |
106106
sudo apt-get update
107-
sudo apt-get install -y libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils
107+
sudo apt-get install -y libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils libtracker-sparql-3.0-dev
108108
109109
- name: Add Rust build target
110110
working-directory: src-tauri

.github/workflows/rust_code_check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
if: startsWith(matrix.platform, 'ubuntu-latest')
3131
run: |
3232
sudo apt-get update
33-
sudo apt-get install -y libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils
33+
sudo apt-get install -y libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils libtracker-sparql-3.0-dev
3434
3535
- name: Add pizza engine as a dependency
3636
working-directory: src-tauri

docs/content.en/docs/release-notes/_index.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ Information about release notes of Coco App is provided here.
2222
- feat: impl extension settings 'hide_before_open' #862
2323
- feat: index both en/zh_CN app names and show app name in chosen language #875
2424
- feat: support context menu in debug mode #882
25+
- feat: file search for Linux/GNOME #884
26+
2527

2628
### 🐛 Bug fix
2729

src-tauri/Cargo.lock

Lines changed: 32 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src-tauri/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ tauri-plugin-prevent-default = "1"
113113
[target."cfg(target_os = \"macos\")".dependencies]
114114
tauri-nspanel = { git = "https://github.com/ahkohd/tauri-nspanel", branch = "v2" }
115115

116+
117+
[target."cfg(target_os = \"linux\")".dependencies]
118+
gio = "0.20.12"
119+
tracker-rs = "0.6.1"
120+
116121
[target."cfg(any(target_os = \"macos\", windows, target_os = \"linux\"))".dependencies]
117122
tauri-plugin-single-instance = { version = "2.0.0", features = ["deep-link"] }
118123

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
//! File system powered by GNOME's Tracker engine.
2+
3+
use super::super::EXTENSION_ID;
4+
use super::super::config::FileSearchConfig;
5+
use super::should_be_filtered_out;
6+
use crate::common::document::DataSourceReference;
7+
use crate::extension::LOCAL_QUERY_SOURCE_TYPE;
8+
use crate::util::file::sync_get_file_icon;
9+
use crate::{
10+
common::document::{Document, OnOpened},
11+
extension::built_in::file_search::config::SearchBy,
12+
};
13+
use gio::Cancellable;
14+
use tracker::{SparqlConnection, SparqlCursor, prelude::SparqlCursorExtManual};
15+
16+
/// The service that we will connect to.
17+
const SERVICE_NAME: &str = "org.freedesktop.Tracker3.Miner.Files";
18+
19+
/// Tracker won't return scores when we are not using full-text seach. In that
20+
/// case, we use this score.
21+
const SCORE: f64 = 1.0;
22+
23+
/// Helper function to return different SPARQL queries depending on the different configurations.
24+
fn query_sparql(query_string: &str, config: &FileSearchConfig) -> String {
25+
match config.search_by {
26+
SearchBy::Name => {
27+
// Cannot use the inverted index as that searches for all the attributes,
28+
// but we only want to search the filename.
29+
format!(
30+
"SELECT nie:url(?file_item) WHERE {{ ?file_item nfo:fileName ?fileName . FILTER(regex(?fileName, '{query_string}', 'i')) }}"
31+
)
32+
}
33+
SearchBy::NameAndContents => {
34+
// Full-text search against all attributes
35+
// OR
36+
// filename search
37+
format!(
38+
"SELECT nie:url(?file_item) fts:rank(?file_item) WHERE {{ {{ ?file_item fts:match '{query_string}' }} UNION {{ ?file_item nfo:fileName ?fileName . FILTER(regex(?fileName, '{query_string}', 'i')) }} }} ORDER BY DESC fts:rank(?file_item)"
39+
)
40+
}
41+
}
42+
}
43+
44+
/// Helper function to replace unsupported characters with whitespace.
45+
///
46+
/// Tracker will error out if it encounters these characters.
47+
///
48+
/// The complete list of unsupported characters is unknown and we don't know how
49+
/// to escape them, so let's replace them.
50+
fn query_string_cleanup(old: &str) -> String {
51+
const UNSUPPORTED_CHAR: [char; 3] = ['\'', '\n', '\\'];
52+
53+
// Using len in bytes is ok
54+
let mut chars = Vec::with_capacity(old.len());
55+
for char in old.chars() {
56+
if UNSUPPORTED_CHAR.contains(&char) {
57+
chars.push(' ');
58+
} else {
59+
chars.push(char);
60+
}
61+
}
62+
63+
chars.into_iter().collect()
64+
}
65+
66+
struct Query {
67+
conn: SparqlConnection,
68+
cursor: SparqlCursor,
69+
}
70+
71+
impl Query {
72+
fn new(query_string: &str, config: &FileSearchConfig) -> Result<Self, String> {
73+
let query_string = query_string_cleanup(query_string);
74+
let sparql = query_sparql(&query_string, config);
75+
let conn =
76+
SparqlConnection::bus_new(SERVICE_NAME, None, None).map_err(|e| e.to_string())?;
77+
let cursor = conn
78+
.query(&sparql, Cancellable::NONE)
79+
.map_err(|e| e.to_string())?;
80+
81+
Ok(Self { conn, cursor })
82+
}
83+
}
84+
85+
impl Drop for Query {
86+
fn drop(&mut self) {
87+
self.cursor.close();
88+
self.conn.close();
89+
}
90+
}
91+
92+
impl Iterator for Query {
93+
/// It yields a tuple `(file path, score)`
94+
type Item = Result<(String, f64), String>;
95+
96+
fn next(&mut self) -> Option<Self::Item> {
97+
loop {
98+
let has_next = match self
99+
.cursor
100+
.next(Cancellable::NONE)
101+
.map_err(|e| e.to_string())
102+
{
103+
Ok(has_next) => has_next,
104+
Err(err_str) => return Some(Err(err_str)),
105+
};
106+
107+
if !has_next {
108+
return None;
109+
}
110+
111+
// The first column is the URL
112+
let file_url_column = self.cursor.string(0);
113+
// It could be None (or NULL ptr if you use C), I have no clue why.
114+
let opt_str = file_url_column.as_ref().map(|gstr| gstr.as_str());
115+
116+
match opt_str {
117+
Some(url) => {
118+
// The returned URL has a prefix that we need to trim
119+
const PREFIX: &str = "file://";
120+
const PREFIX_LEN: usize = PREFIX.len();
121+
122+
let file_path = url[PREFIX_LEN..].to_string();
123+
assert!(!file_path.is_empty());
124+
assert_ne!(file_path, "/", "file search should not hit the root path");
125+
126+
let score = {
127+
// The second column is the score, this column may not
128+
// exist. We use SCORE if the real value is absent.
129+
let score_column = self.cursor.string(1);
130+
let opt_score_str = score_column.as_ref().map(|g_str| g_str.as_str());
131+
let opt_score = opt_score_str.map(|str| {
132+
str.parse::<f64>()
133+
.expect("score should be valid for type f64")
134+
});
135+
136+
opt_score.unwrap_or(SCORE)
137+
};
138+
139+
return Some(Ok((file_path, score)));
140+
}
141+
None => {
142+
// another try
143+
continue;
144+
}
145+
}
146+
}
147+
}
148+
}
149+
150+
pub(crate) async fn hits(
151+
query_string: &str,
152+
from: usize,
153+
size: usize,
154+
config: &FileSearchConfig,
155+
) -> Result<Vec<(Document, f64)>, String> {
156+
// Special cases that will make querying faster.
157+
if query_string.is_empty() || size == 0 || config.search_paths.is_empty() {
158+
return Ok(Vec::new());
159+
}
160+
161+
let mut result_hits = Vec::with_capacity(size);
162+
163+
let need_to_skip = {
164+
if matches!(config.search_by, SearchBy::Name) {
165+
// We don't use full-text search in this case, the returned documents
166+
// won't be scored, the query hits won't be sorted, so processing the
167+
// from parameter is meaningless.
168+
false
169+
} else {
170+
from > 0
171+
}
172+
};
173+
let mut num_skipped = 0;
174+
let should_skip = from;
175+
176+
let query = Query::new(query_string, config)?;
177+
for res_entry in query {
178+
let (file_path, score) = res_entry?;
179+
180+
// This should be called before processing the `from` parameter.
181+
if should_be_filtered_out(config, &file_path, true, true, true) {
182+
continue;
183+
}
184+
185+
// Process the `from` parameter.
186+
if need_to_skip && num_skipped < should_skip {
187+
// Skip this
188+
num_skipped += 1;
189+
continue;
190+
}
191+
192+
let icon = sync_get_file_icon(&file_path);
193+
let file_path_of_type_path = camino::Utf8Path::new(&file_path);
194+
let r#where = file_path_of_type_path
195+
.parent()
196+
.unwrap_or_else(|| {
197+
panic!(
198+
"expect path [{}] to have a parent, but it does not",
199+
file_path
200+
);
201+
})
202+
.to_string();
203+
204+
let file_name = file_path_of_type_path.file_name().unwrap_or_else(|| {
205+
panic!(
206+
"expect path [{}] to have a file name, but it does not",
207+
file_path
208+
);
209+
});
210+
let on_opened = OnOpened::Document {
211+
url: file_path.to_string(),
212+
};
213+
214+
let doc = Document {
215+
id: file_path.to_string(),
216+
title: Some(file_name.to_string()),
217+
source: Some(DataSourceReference {
218+
r#type: Some(LOCAL_QUERY_SOURCE_TYPE.into()),
219+
name: Some(EXTENSION_ID.into()),
220+
id: Some(EXTENSION_ID.into()),
221+
icon: Some(String::from("font_Filesearch")),
222+
}),
223+
category: Some(r#where),
224+
on_opened: Some(on_opened),
225+
url: Some(file_path),
226+
icon: Some(icon.to_string()),
227+
..Default::default()
228+
};
229+
230+
result_hits.push((doc, score));
231+
232+
// Collected enough documents, return
233+
if result_hits.len() >= size {
234+
break;
235+
}
236+
}
237+
238+
Ok(result_hits)
239+
}
240+
241+
#[cfg(test)]
242+
mod tests {
243+
use super::*;
244+
245+
#[test]
246+
fn test_query_string_cleanup_basic() {
247+
assert_eq!(query_string_cleanup("test"), "test");
248+
assert_eq!(query_string_cleanup("hello world"), "hello world");
249+
assert_eq!(query_string_cleanup("file.txt"), "file.txt");
250+
}
251+
252+
#[test]
253+
fn test_query_string_cleanup_unsupported_chars() {
254+
assert_eq!(query_string_cleanup("test'file"), "test file");
255+
assert_eq!(query_string_cleanup("test\nfile"), "test file");
256+
assert_eq!(query_string_cleanup("test\\file"), "test file");
257+
}
258+
259+
#[test]
260+
fn test_query_string_cleanup_multiple_unsupported() {
261+
assert_eq!(query_string_cleanup("test'file\nname"), "test file name");
262+
assert_eq!(query_string_cleanup("test\'file"), "test file");
263+
assert_eq!(query_string_cleanup("\n'test"), " test");
264+
}
265+
266+
#[test]
267+
fn test_query_string_cleanup_edge_cases() {
268+
assert_eq!(query_string_cleanup(""), "");
269+
assert_eq!(query_string_cleanup("'"), " ");
270+
assert_eq!(query_string_cleanup("\n"), " ");
271+
assert_eq!(query_string_cleanup("\\"), " ");
272+
assert_eq!(query_string_cleanup(" '\n\\ "), " ");
273+
}
274+
275+
#[test]
276+
fn test_query_string_cleanup_mixed_content() {
277+
assert_eq!(
278+
query_string_cleanup("document's content\nwith\\backslash"),
279+
"document s content with backslash"
280+
);
281+
assert_eq!(
282+
query_string_cleanup("path/to'file\nextension\\test"),
283+
"path/to file extension test"
284+
);
285+
}
286+
}

0 commit comments

Comments
 (0)