|
| 1 | +//! File system powered by GNOME's Tracker engine. |
| 2 | +
|
| 3 | +use super::super::EXTENSION_ID; |
| 4 | +use super::super::config::FileSearchConfig; |
| 5 | +use super::should_be_filtered_out; |
| 6 | +use crate::common::document::DataSourceReference; |
| 7 | +use crate::extension::LOCAL_QUERY_SOURCE_TYPE; |
| 8 | +use crate::util::file::sync_get_file_icon; |
| 9 | +use crate::{ |
| 10 | + common::document::{Document, OnOpened}, |
| 11 | + extension::built_in::file_search::config::SearchBy, |
| 12 | +}; |
| 13 | +use gio::Cancellable; |
| 14 | +use tracker::{SparqlConnection, SparqlCursor, prelude::SparqlCursorExtManual}; |
| 15 | + |
| 16 | +/// The service that we will connect to. |
| 17 | +const SERVICE_NAME: &str = "org.freedesktop.Tracker3.Miner.Files"; |
| 18 | + |
| 19 | +/// Tracker won't return scores when we are not using full-text seach. In that |
| 20 | +/// case, we use this score. |
| 21 | +const SCORE: f64 = 1.0; |
| 22 | + |
| 23 | +/// Helper function to return different SPARQL queries depending on the different configurations. |
| 24 | +fn query_sparql(query_string: &str, config: &FileSearchConfig) -> String { |
| 25 | + match config.search_by { |
| 26 | + SearchBy::Name => { |
| 27 | + // Cannot use the inverted index as that searches for all the attributes, |
| 28 | + // but we only want to search the filename. |
| 29 | + format!( |
| 30 | + "SELECT nie:url(?file_item) WHERE {{ ?file_item nfo:fileName ?fileName . FILTER(regex(?fileName, '{query_string}', 'i')) }}" |
| 31 | + ) |
| 32 | + } |
| 33 | + SearchBy::NameAndContents => { |
| 34 | + // Full-text search against all attributes |
| 35 | + // OR |
| 36 | + // filename search |
| 37 | + format!( |
| 38 | + "SELECT nie:url(?file_item) fts:rank(?file_item) WHERE {{ {{ ?file_item fts:match '{query_string}' }} UNION {{ ?file_item nfo:fileName ?fileName . FILTER(regex(?fileName, '{query_string}', 'i')) }} }} ORDER BY DESC fts:rank(?file_item)" |
| 39 | + ) |
| 40 | + } |
| 41 | + } |
| 42 | +} |
| 43 | + |
| 44 | +/// Helper function to replace unsupported characters with whitespace. |
| 45 | +/// |
| 46 | +/// Tracker will error out if it encounters these characters. |
| 47 | +/// |
| 48 | +/// The complete list of unsupported characters is unknown and we don't know how |
| 49 | +/// to escape them, so let's replace them. |
| 50 | +fn query_string_cleanup(old: &str) -> String { |
| 51 | + const UNSUPPORTED_CHAR: [char; 3] = ['\'', '\n', '\\']; |
| 52 | + |
| 53 | + // Using len in bytes is ok |
| 54 | + let mut chars = Vec::with_capacity(old.len()); |
| 55 | + for char in old.chars() { |
| 56 | + if UNSUPPORTED_CHAR.contains(&char) { |
| 57 | + chars.push(' '); |
| 58 | + } else { |
| 59 | + chars.push(char); |
| 60 | + } |
| 61 | + } |
| 62 | + |
| 63 | + chars.into_iter().collect() |
| 64 | +} |
| 65 | + |
| 66 | +struct Query { |
| 67 | + conn: SparqlConnection, |
| 68 | + cursor: SparqlCursor, |
| 69 | +} |
| 70 | + |
| 71 | +impl Query { |
| 72 | + fn new(query_string: &str, config: &FileSearchConfig) -> Result<Self, String> { |
| 73 | + let query_string = query_string_cleanup(query_string); |
| 74 | + let sparql = query_sparql(&query_string, config); |
| 75 | + let conn = |
| 76 | + SparqlConnection::bus_new(SERVICE_NAME, None, None).map_err(|e| e.to_string())?; |
| 77 | + let cursor = conn |
| 78 | + .query(&sparql, Cancellable::NONE) |
| 79 | + .map_err(|e| e.to_string())?; |
| 80 | + |
| 81 | + Ok(Self { conn, cursor }) |
| 82 | + } |
| 83 | +} |
| 84 | + |
| 85 | +impl Drop for Query { |
| 86 | + fn drop(&mut self) { |
| 87 | + self.cursor.close(); |
| 88 | + self.conn.close(); |
| 89 | + } |
| 90 | +} |
| 91 | + |
| 92 | +impl Iterator for Query { |
| 93 | + /// It yields a tuple `(file path, score)` |
| 94 | + type Item = Result<(String, f64), String>; |
| 95 | + |
| 96 | + fn next(&mut self) -> Option<Self::Item> { |
| 97 | + loop { |
| 98 | + let has_next = match self |
| 99 | + .cursor |
| 100 | + .next(Cancellable::NONE) |
| 101 | + .map_err(|e| e.to_string()) |
| 102 | + { |
| 103 | + Ok(has_next) => has_next, |
| 104 | + Err(err_str) => return Some(Err(err_str)), |
| 105 | + }; |
| 106 | + |
| 107 | + if !has_next { |
| 108 | + return None; |
| 109 | + } |
| 110 | + |
| 111 | + // The first column is the URL |
| 112 | + let file_url_column = self.cursor.string(0); |
| 113 | + // It could be None (or NULL ptr if you use C), I have no clue why. |
| 114 | + let opt_str = file_url_column.as_ref().map(|gstr| gstr.as_str()); |
| 115 | + |
| 116 | + match opt_str { |
| 117 | + Some(url) => { |
| 118 | + // The returned URL has a prefix that we need to trim |
| 119 | + const PREFIX: &str = "file://"; |
| 120 | + const PREFIX_LEN: usize = PREFIX.len(); |
| 121 | + |
| 122 | + let file_path = url[PREFIX_LEN..].to_string(); |
| 123 | + assert!(!file_path.is_empty()); |
| 124 | + assert_ne!(file_path, "/", "file search should not hit the root path"); |
| 125 | + |
| 126 | + let score = { |
| 127 | + // The second column is the score, this column may not |
| 128 | + // exist. We use SCORE if the real value is absent. |
| 129 | + let score_column = self.cursor.string(1); |
| 130 | + let opt_score_str = score_column.as_ref().map(|g_str| g_str.as_str()); |
| 131 | + let opt_score = opt_score_str.map(|str| { |
| 132 | + str.parse::<f64>() |
| 133 | + .expect("score should be valid for type f64") |
| 134 | + }); |
| 135 | + |
| 136 | + opt_score.unwrap_or(SCORE) |
| 137 | + }; |
| 138 | + |
| 139 | + return Some(Ok((file_path, score))); |
| 140 | + } |
| 141 | + None => { |
| 142 | + // another try |
| 143 | + continue; |
| 144 | + } |
| 145 | + } |
| 146 | + } |
| 147 | + } |
| 148 | +} |
| 149 | + |
| 150 | +pub(crate) async fn hits( |
| 151 | + query_string: &str, |
| 152 | + from: usize, |
| 153 | + size: usize, |
| 154 | + config: &FileSearchConfig, |
| 155 | +) -> Result<Vec<(Document, f64)>, String> { |
| 156 | + // Special cases that will make querying faster. |
| 157 | + if query_string.is_empty() || size == 0 || config.search_paths.is_empty() { |
| 158 | + return Ok(Vec::new()); |
| 159 | + } |
| 160 | + |
| 161 | + let mut result_hits = Vec::with_capacity(size); |
| 162 | + |
| 163 | + let need_to_skip = { |
| 164 | + if matches!(config.search_by, SearchBy::Name) { |
| 165 | + // We don't use full-text search in this case, the returned documents |
| 166 | + // won't be scored, the query hits won't be sorted, so processing the |
| 167 | + // from parameter is meaningless. |
| 168 | + false |
| 169 | + } else { |
| 170 | + from > 0 |
| 171 | + } |
| 172 | + }; |
| 173 | + let mut num_skipped = 0; |
| 174 | + let should_skip = from; |
| 175 | + |
| 176 | + let query = Query::new(query_string, config)?; |
| 177 | + for res_entry in query { |
| 178 | + let (file_path, score) = res_entry?; |
| 179 | + |
| 180 | + // This should be called before processing the `from` parameter. |
| 181 | + if should_be_filtered_out(config, &file_path, true, true, true) { |
| 182 | + continue; |
| 183 | + } |
| 184 | + |
| 185 | + // Process the `from` parameter. |
| 186 | + if need_to_skip && num_skipped < should_skip { |
| 187 | + // Skip this |
| 188 | + num_skipped += 1; |
| 189 | + continue; |
| 190 | + } |
| 191 | + |
| 192 | + let icon = sync_get_file_icon(&file_path); |
| 193 | + let file_path_of_type_path = camino::Utf8Path::new(&file_path); |
| 194 | + let r#where = file_path_of_type_path |
| 195 | + .parent() |
| 196 | + .unwrap_or_else(|| { |
| 197 | + panic!( |
| 198 | + "expect path [{}] to have a parent, but it does not", |
| 199 | + file_path |
| 200 | + ); |
| 201 | + }) |
| 202 | + .to_string(); |
| 203 | + |
| 204 | + let file_name = file_path_of_type_path.file_name().unwrap_or_else(|| { |
| 205 | + panic!( |
| 206 | + "expect path [{}] to have a file name, but it does not", |
| 207 | + file_path |
| 208 | + ); |
| 209 | + }); |
| 210 | + let on_opened = OnOpened::Document { |
| 211 | + url: file_path.to_string(), |
| 212 | + }; |
| 213 | + |
| 214 | + let doc = Document { |
| 215 | + id: file_path.to_string(), |
| 216 | + title: Some(file_name.to_string()), |
| 217 | + source: Some(DataSourceReference { |
| 218 | + r#type: Some(LOCAL_QUERY_SOURCE_TYPE.into()), |
| 219 | + name: Some(EXTENSION_ID.into()), |
| 220 | + id: Some(EXTENSION_ID.into()), |
| 221 | + icon: Some(String::from("font_Filesearch")), |
| 222 | + }), |
| 223 | + category: Some(r#where), |
| 224 | + on_opened: Some(on_opened), |
| 225 | + url: Some(file_path), |
| 226 | + icon: Some(icon.to_string()), |
| 227 | + ..Default::default() |
| 228 | + }; |
| 229 | + |
| 230 | + result_hits.push((doc, score)); |
| 231 | + |
| 232 | + // Collected enough documents, return |
| 233 | + if result_hits.len() >= size { |
| 234 | + break; |
| 235 | + } |
| 236 | + } |
| 237 | + |
| 238 | + Ok(result_hits) |
| 239 | +} |
| 240 | + |
| 241 | +#[cfg(test)] |
| 242 | +mod tests { |
| 243 | + use super::*; |
| 244 | + |
| 245 | + #[test] |
| 246 | + fn test_query_string_cleanup_basic() { |
| 247 | + assert_eq!(query_string_cleanup("test"), "test"); |
| 248 | + assert_eq!(query_string_cleanup("hello world"), "hello world"); |
| 249 | + assert_eq!(query_string_cleanup("file.txt"), "file.txt"); |
| 250 | + } |
| 251 | + |
| 252 | + #[test] |
| 253 | + fn test_query_string_cleanup_unsupported_chars() { |
| 254 | + assert_eq!(query_string_cleanup("test'file"), "test file"); |
| 255 | + assert_eq!(query_string_cleanup("test\nfile"), "test file"); |
| 256 | + assert_eq!(query_string_cleanup("test\\file"), "test file"); |
| 257 | + } |
| 258 | + |
| 259 | + #[test] |
| 260 | + fn test_query_string_cleanup_multiple_unsupported() { |
| 261 | + assert_eq!(query_string_cleanup("test'file\nname"), "test file name"); |
| 262 | + assert_eq!(query_string_cleanup("test\'file"), "test file"); |
| 263 | + assert_eq!(query_string_cleanup("\n'test"), " test"); |
| 264 | + } |
| 265 | + |
| 266 | + #[test] |
| 267 | + fn test_query_string_cleanup_edge_cases() { |
| 268 | + assert_eq!(query_string_cleanup(""), ""); |
| 269 | + assert_eq!(query_string_cleanup("'"), " "); |
| 270 | + assert_eq!(query_string_cleanup("\n"), " "); |
| 271 | + assert_eq!(query_string_cleanup("\\"), " "); |
| 272 | + assert_eq!(query_string_cleanup(" '\n\\ "), " "); |
| 273 | + } |
| 274 | + |
| 275 | + #[test] |
| 276 | + fn test_query_string_cleanup_mixed_content() { |
| 277 | + assert_eq!( |
| 278 | + query_string_cleanup("document's content\nwith\\backslash"), |
| 279 | + "document s content with backslash" |
| 280 | + ); |
| 281 | + assert_eq!( |
| 282 | + query_string_cleanup("path/to'file\nextension\\test"), |
| 283 | + "path/to file extension test" |
| 284 | + ); |
| 285 | + } |
| 286 | +} |
0 commit comments