Skip to content

Commit 62563dc

Browse files
committed
feat: 新增中华医学服务,重构翻译接口
服务层: - 新增中华医学(Yiigle)文献搜索与翻译服务 - 重构 ScrapeService.translate() 接口,改为接收 (searchResult, libraryID),返回 Zotero.Item[] - 新增基于 string-similarity 的搜索结果过滤 - 集中化条目后处理逻辑,globalItemFix 支持保留分类集合 - 暂时禁用 PubScholar 服务 工具层: - requestDocument 新增 cookieSandbox 参数支持 - TaskRunner 新增 runningTask 属性追踪当前运行任务 - 对外暴露 requestDocument API 配置: - Zotero 最低版本要求提升至 7.999 - 注释掉进度窗口 beforeunload 时清空 cookie 的逻辑
1 parent 9e1288d commit 62563dc

File tree

10 files changed

+266
-114
lines changed

10 files changed

+266
-114
lines changed

addon/chrome/content/progress.xhtml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,9 +382,9 @@ href="chrome://zotero-platform/content/zotero.css" type="text/css"?>
382382
});
383383
}
384384

385-
window.addEventListener("beforeunload", (e) => {
386-
Zotero.Jasminum.data.myCookieSandbox._CNKIHomeCookieBox = null;
387-
});
385+
// window.addEventListener("beforeunload", (e) => {
386+
// Zotero.Jasminum.data.myCookieSandbox._CNKIHomeCookieBox = null;
387+
// });
388388

389389
// 模拟数据
390390
const tasks = [

addon/manifest.json

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
{
2-
"manifest_version": 2,
3-
"name": "__addonName__",
4-
"version": "__buildVersion__",
5-
"description": "__description__",
6-
"homepage_url": "__homepage__",
7-
"author": "__author__",
8-
"icons": {
9-
"48": "chrome/content/icons/icon@0.5x.png",
10-
"96": "chrome/content/icons/icon.png"
11-
},
12-
"applications": {
13-
"zotero": {
14-
"id": "__addonID__",
15-
"update_url": "__updateURL__",
16-
"strict_min_version": "6.999",
17-
"strict_max_version": "8.*.*"
2+
"manifest_version": 2,
3+
"name": "__addonName__",
4+
"version": "__buildVersion__",
5+
"description": "__description__",
6+
"homepage_url": "__homepage__",
7+
"author": "__author__",
8+
"icons": {
9+
"48": "chrome/content/icons/icon@0.5x.png",
10+
"96": "chrome/content/icons/icon.png"
11+
},
12+
"applications": {
13+
"zotero": {
14+
"id": "__addonID__",
15+
"update_url": "__updateURL__",
16+
"strict_min_version": "7.999",
17+
"strict_max_version": "8.*.*"
18+
}
1819
}
19-
}
2020
}

src/addon.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { VirtualizedTableHelper } from "zotero-plugin-toolkit";
55
import { MyCookieSandbox } from "./utils/cookiebox";
66
import { getOutlineFromPDF } from "./modules/outline/outline";
77
import { TaskRunner } from "./utils/task";
8+
import { requestDocument } from "./utils/http";
89

910
class Addon {
1011
public data: {
@@ -53,7 +54,7 @@ class Addon {
5354
isImportingAttachments: false,
5455
};
5556
this.hooks = hooks;
56-
this.api = { getOutlineFromPDF };
57+
this.api = { getOutlineFromPDF, requestDocument };
5758
this.taskRunner = new TaskRunner();
5859
}
5960
}

src/modules/services/cnki.ts

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ export class CNKI implements ScrapeService {
367367
return {
368368
source: "CNKI",
369369
title: title,
370+
articleTitle: dt.innerText("td.name a"),
370371
url: url,
371372
date: Zotero.Date.strToISO(dt.innerText("td.date")) || "",
372373
netFirst: dt.innerText("td.name > b.marktip"),
@@ -381,12 +382,11 @@ export class CNKI implements ScrapeService {
381382
}
382383

383384
async translate(
384-
task: ScraperTask,
385+
searchResult: ScrapeSearchResult,
386+
libraryID: number,
385387
saveAttachments: false,
386-
): Promise<Zotero.Item | null> {
387-
let item: Zotero.Item | null = null;
388+
): Promise<Zotero.Item[]> {
388389
let translatedItems: Zotero.Item[] = [];
389-
const searchResult = task.searchResults![task.resultIndex!];
390390
let isWebTranslated = true;
391391
try {
392392
const doc = await requestDocument(searchResult.url, {
@@ -396,8 +396,9 @@ export class CNKI implements ScrapeService {
396396
Referer: "https://kns.cnki.net/kns8s/AdvSearch",
397397
"Accept-Language": "zh-CN,en-US;q=0.7,en;q=0.3",
398398
"User-Agent":
399-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0",
399+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:147.0) Gecko/20100101 Firefox/147.0",
400400
},
401+
cookieSandbox: await addon.data.myCookieSandbox.getCNKIHomeCookieBox(),
401402
});
402403
ztoolkit.log(`Document title: ${doc.title}`);
403404
if (doc.title != "知网节超时验证" && doc.title != "captcha") {
@@ -408,15 +409,15 @@ export class CNKI implements ScrapeService {
408409
translator.setTranslator("5c95b67b-41c5-4f55-b71a-48d5d7183063");
409410
translator.setDocument(doc);
410411
translatedItems = await translator.translate({
411-
libraryID: task.item.libraryID,
412+
libraryID: libraryID,
412413
saveAttachments: saveAttachments,
413414
});
414415
} else {
415416
isWebTranslated = false;
416417
}
417418
} catch (e) {
418419
ztoolkit.log(`CNKI web translation failed: ${e}`);
419-
task.addMsg(`CNKI web translation failed: ${e}`);
420+
addon.taskRunner.runningTask?.addMsg(`CNKI web translation failed: ${e}`);
420421
isWebTranslated = false;
421422
}
422423

@@ -427,36 +428,23 @@ export class CNKI implements ScrapeService {
427428
const refworksText = await getRefworksText(searchResult);
428429
if (!refworksText) {
429430
ztoolkit.log("CNKI reference text is null.");
430-
task.addMsg("CNKI reference text is null.");
431-
return null;
431+
addon.taskRunner.runningTask?.addMsg("CNKI reference text is null.");
432+
return [];
432433
}
433434
ztoolkit.log("Formated Refworks text: ", refworksText);
434435
const translate = new Zotero.Translate.Import();
435436
translate.setTranslator("7b6b135a-ed39-4d90-8e38-65516671c5bc");
436437
translate.setString(refworksText);
437438
translatedItems = await translate.translate({
438-
libraryID: task.item.libraryID,
439+
libraryID: libraryID,
439440
saveAttachments: false,
440441
});
441442
} catch (e) {
442443
ztoolkit.log(`CNKI refwork translation failed: ${e}`);
443-
task.addMsg(`CNKI refwork translation failed: ${e}`);
444+
throw `CNKI refwork translation failed: ${e}`;
444445
}
445446
}
446-
447-
if (translatedItems.length > 1) {
448-
ztoolkit.log("Wired and Additional Items Appear.");
449-
task.addMsg("Wired! More than one item after tranlsation.");
450-
return null;
451-
} else if (translatedItems.length == 1) {
452-
item = translatedItems[0];
453-
task.item.getCollections().forEach((cid) => item!.addToCollection(cid));
454-
return updateItem(item, searchResult);
455-
} else {
456-
ztoolkit.log("CNKI service translated item is null.");
457-
task.addMsg("CNKI service translated item is null.");
458-
return null;
459-
}
447+
return translatedItems;
460448
}
461449

462450
// CNKI webpage item or snapshot item.

src/modules/services/index.ts

Lines changed: 116 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@ import { getPref } from "../../utils/prefs";
44
import { ScraperTask } from "../../utils/task";
55
import { isChineseTopAttachment, isChinsesSnapshot } from "../../utils/detect";
66
import { CNKI } from "./cnki";
7-
import { PubScholar } from "./pubscholar";
7+
// import { PubScholar } from "./pubscholar";
8+
import { Yiigle } from "./yiigle";
9+
import { compareTwoStrings } from "string-similarity";
810

911
const cnki = new CNKI();
10-
const pubscholar = new PubScholar();
12+
// const pubscholar = new PubScholar();
13+
const yiigle = new Yiigle();
14+
1115
async function getSearchOption(
1216
item: Zotero.Item,
1317
): Promise<SearchOption | null> {
@@ -58,15 +62,40 @@ export async function metaSearch(
5862
task.addMsg(`Found ${cnkiSearchResult.length} results from CNKI`);
5963
scrapeSearchResults = scrapeSearchResults.concat(cnkiSearchResult);
6064
}
61-
const pubscholarSearchResult = await pubscholar.search(searchOption);
62-
ztoolkit.log("pubscholar results", pubscholarSearchResult);
63-
if (pubscholarSearchResult) {
64-
task.addMsg(
65-
`Found ${pubscholarSearchResult.length} results from PubScholar`,
66-
);
67-
scrapeSearchResults =
68-
scrapeSearchResults.concat(pubscholarSearchResult);
65+
// const pubscholarSearchResult = await pubscholar.search(searchOption);
66+
// ztoolkit.log("pubscholar results", pubscholarSearchResult);
67+
// if (pubscholarSearchResult) {
68+
// task.addMsg(
69+
// `Found ${pubscholarSearchResult.length} results from PubScholar`,
70+
// );
71+
// scrapeSearchResults = scrapeSearchResults.concat(
72+
// pubscholarSearchResult,
73+
// );
74+
// }
75+
const yiigleSearchResult = await yiigle.search(searchOption);
76+
ztoolkit.log("yiigle results", yiigleSearchResult);
77+
if (yiigleSearchResult) {
78+
task.addMsg(`Found ${yiigleSearchResult.length} results from Yiigle`);
79+
scrapeSearchResults = scrapeSearchResults.concat(yiigleSearchResult);
6980
}
81+
82+
// Filter search results
83+
const filteredResults1 = scrapeSearchResults.filter((result) => {
84+
return (result.articleTitle as string).includes(searchOption.title);
85+
});
86+
87+
const filteredResults2 = scrapeSearchResults.filter((result) => {
88+
const score = compareTwoStrings(
89+
searchOption.title,
90+
result.articleTitle as string,
91+
);
92+
ztoolkit.log(`Similarity score for "${result.articleTitle}": ${score}`);
93+
return (
94+
!(result.articleTitle as string).includes(searchOption.title) &&
95+
score > 0.85
96+
);
97+
});
98+
scrapeSearchResults = filteredResults1.concat(filteredResults2);
7099
} else {
71100
task.addMsg("Filename parsing error");
72101
task.status = "fail";
@@ -87,82 +116,110 @@ export async function metaSearch(
87116
}
88117

89118
export async function metaTranslate(task: ScraperTask): Promise<void> {
90-
if (task.searchResults) {
119+
if (task.searchResults.length === 0) {
120+
task.addMsg("No search results found.");
121+
task.status = "fail";
122+
}
123+
124+
try {
125+
const resultIndex = task.resultIndex || 0; // default is 0
126+
task.resultIndex = resultIndex;
127+
const searchResult = task.searchResults[resultIndex];
128+
const libraryID = task.item.libraryID;
129+
ztoolkit.log(`start translate for search result: ${searchResult.title}`);
130+
let translatedItems: Zotero.Item[] = [];
91131
try {
92-
const resultIndex = task.resultIndex || 0; // default is 0
93-
task.resultIndex = resultIndex;
94-
const result = task.searchResults[resultIndex];
95-
ztoolkit.log(`start translate for search result: ${result.title}`);
96-
let newItem: Zotero.Item | null | undefined = null;
97-
switch (result.source) {
132+
switch (searchResult.source) {
98133
case "CNKI":
99134
ztoolkit.log("translated by CNKI");
100-
newItem = await cnki.translate(task, false);
135+
translatedItems = await cnki.translate(
136+
searchResult,
137+
libraryID,
138+
false,
139+
);
101140
break;
102-
case "PubScholar":
103-
ztoolkit.log("translated by PubScholar");
104-
newItem = await pubscholar.translate(task, false);
141+
// case "PubScholar":
142+
// ztoolkit.log("translated by PubScholar");
143+
// newItem = await pubscholar.translate(task, false);
144+
// break;
145+
case "中华医学":
146+
ztoolkit.log("translated by Yiigle");
147+
translatedItems = await yiigle.translate(
148+
searchResult,
149+
libraryID,
150+
false,
151+
);
105152
break;
106153
default:
107154
break;
108155
}
109-
ztoolkit.log(newItem);
156+
ztoolkit.log(translatedItems);
157+
} catch (e) {
158+
ztoolkit.log(`Translation error: ${e}`);
159+
task.addMsg(`Translation error: ${e}`);
160+
}
110161

111-
if (newItem) {
112-
// if (addon.data.env != "development")
113-
newItem = await globalItemFix(newItem);
114-
if (task.type == "attachment") {
115-
task.item.parentID = newItem.id;
116-
} else if (task.type == "snapshot") {
117-
if (task.item.isTopLevelItem()) {
118-
ztoolkit.log("Translate snapshot item for webpage item");
119-
const tmpJSON = newItem.toJSON();
120-
task.item.fromJSON(tmpJSON);
121-
await newItem.eraseTx();
122-
} else {
123-
ztoolkit.log("Translate snapshot attachment item");
124-
const oldParentItem = task.item.parentItem!;
125-
const collectionIDs = oldParentItem.getCollections();
126-
task.item.parentID = newItem.id;
127-
// When parent item is erased, the attachment item will be erased. Set new parent item before the old parent will be earsed.
128-
await task.item.saveTx();
129-
await oldParentItem.eraseTx();
130-
newItem.setCollections(collectionIDs);
131-
await newItem.saveTx();
132-
}
162+
if (translatedItems.length === 1) {
163+
// if (addon.data.env != "development")
164+
const translatedItem = await globalItemFix(task.item, translatedItems[0]);
165+
if (task.type == "attachment") {
166+
task.item.parentID = translatedItem.id;
167+
} else if (task.type == "snapshot") {
168+
if (task.item.isTopLevelItem()) {
169+
ztoolkit.log("Translate snapshot item for webpage item");
170+
const tmpJSON = translatedItem.toJSON();
171+
task.item.fromJSON(tmpJSON);
172+
await translatedItem.eraseTx();
173+
} else {
174+
ztoolkit.log("Translate snapshot attachment item");
175+
const oldParentItem = task.item.parentItem!;
176+
const collectionIDs = oldParentItem.getCollections();
177+
task.item.parentID = translatedItem.id;
178+
// When parent item is erased, the attachment item will be erased. Set new parent item before the old parent will be earsed.
179+
await task.item.saveTx();
180+
await oldParentItem.eraseTx();
181+
translatedItem.setCollections(collectionIDs);
182+
await translatedItem.saveTx();
133183
}
134-
await task.item.saveTx();
135-
task.status = "success";
136-
} else {
137-
task.addMsg("Translation error");
138-
task.status = "fail";
139184
}
140-
} catch (e) {
141-
task.addMsg(`ERROR: ${e}`);
185+
await task.item.saveTx();
186+
task.status = "success";
187+
} else if (translatedItems.length > 1) {
188+
task.addMsg(
189+
`Multiple items (${translatedItems.length}) translated, please check details.`,
190+
);
191+
task.status = "fail";
192+
} else {
193+
task.addMsg("Translation error");
142194
task.status = "fail";
143195
}
144-
} else {
145-
task.addMsg("No search results found.");
196+
} catch (e) {
197+
task.addMsg(`ERROR: ${e}`);
146198
task.status = "fail";
147199
}
148200
}
149201

150202
// Need to update data in item returned by translator.
151-
async function globalItemFix(item: Zotero.Item): Promise<Zotero.Item> {
203+
async function globalItemFix(
204+
oldItem: Zotero.Item,
205+
newItem: Zotero.Item,
206+
): Promise<Zotero.Item> {
152207
if (Zotero.Prefs.get("extensions.zotero.automaticTags", true)) {
153208
// Keyword tag type is automatic.
154209
ztoolkit.log("update auto tags");
155-
item.setTags(
156-
item.getTags().map((t: { tag: string; type?: number }) => ({
210+
newItem.setTags(
211+
newItem.getTags().map((t: { tag: string; type?: number }) => ({
157212
tag: t.tag,
158213
type: 1,
159214
})),
160215
);
161216
} else {
162217
// Remove automatic tags
163218
ztoolkit.log("remove all tags");
164-
item.removeAllTags();
219+
newItem.removeAllTags();
165220
}
166-
await item.saveTx();
167-
return item;
221+
// Preserve collections
222+
oldItem.getCollections().forEach((cid) => newItem!.addToCollection(cid));
223+
await newItem.saveTx();
224+
return newItem;
168225
}

0 commit comments

Comments
 (0)