Skip to content

Commit 2248625

Browse files
committed
fix: 优化知网抓取逻辑
1. 优化知网网页cookies更新逻辑,对cookies验证,cookies信息缺失时,会重新进行获取 2. 调整知网搜索查询参数,在本机测试中通过
1 parent afc199a commit 2248625

File tree

2 files changed

+27
-14
lines changed

2 files changed

+27
-14
lines changed

src/modules/services/cnki.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ function createSearchPostOptions(searchOption: SearchOption) {
3131
.filter((i) => i.length > 4);
3232
searchExp =
3333
"(TI %= " +
34-
titleParts.map((_i) => `'${_i}'`).join(" % ") +
35-
" OR SU %= " +
36-
titleParts.join("+") +
34+
`'${searchOption.title}'` +
35+
// " OR SU %= " +
36+
// `${titleParts.join("+")}` +
37+
// " OR TI %=" + // 保留全部的标题主题题作为标题,排除最后一个,因这个可能是作者
38+
// titleParts.map((_i) => `'${_i}'`).join(" % ") +
3739
")";
3840
} else {
3941
searchExp = `TI %= '${searchOption.title}'`;

src/utils/cookiebox.ts

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -229,13 +229,22 @@ export class MyCookieSandbox {
229229
MyCookieSandbox.COOKIE_EXPIRE_MS;
230230

231231
// If cookie exists and not expired, return directly
232-
if (this._CNKIHomeCookieBox != null && !isExpired) {
232+
// Valid cookie has more than 1 cookie item.
233+
if (
234+
this._CNKIHomeCookieBox != null &&
235+
!isExpired &&
236+
Object.keys(this._CNKIHomeCookieBox._cookies).length > 1
237+
) {
233238
return this._CNKIHomeCookieBox;
234239
}
235240

236241
// Cookie expired or missing, reset for re-initialization
237-
if (isExpired && this._CNKIHomeCookieBox != null) {
238-
ztoolkit.log("CNKI Home cookie expired, re-initializing...");
242+
if (
243+
isExpired ||
244+
this._CNKIHomeCookieBox === null ||
245+
Object.keys(this._CNKIHomeCookieBox._cookies).length <= 1
246+
) {
247+
ztoolkit.log("CNKI Home cookie expired or invalid, re-initializing...");
239248
this._CNKIHomeCookieBox = null;
240249
this._initPromise = null;
241250
}
@@ -252,14 +261,16 @@ export class MyCookieSandbox {
252261
}
253262
await this._initPromise;
254263
// 保险起见,再次检查是否成功获取到 cookieSandbox
255-
if (this._CNKIHomeCookieBox == null) {
256-
ztoolkit.log("homeCookieBox 还是为空,又开始初始化...");
257-
this._CNKIHomeCookieBox = await this.getCookieBoxFromUrl(
258-
"https://kns.cnki.net/kns8s/defaultresult/index?crossids=YSTT4HG0%2CLSTPFY1C%2CJUP3MUPD%2CMPMFIG1A%2CWQ0UVIAA%2CBLZOG7CK%2CPWFIRAGL%2CEMRPGLPA%2CNLBO1Z6R%2CNN3FJMUV&korder=SU&kw=%E7%A7%91%E7%A0%94%E8%AE%BA%E6%96%87%E9%98%85%E8%AF%BB",
259-
"请等待知网网页正常打开后,再点击下方按钮关闭",
260-
);
261-
this._cnkiHomeCookieLastUpdateTime = Date.now();
262-
}
264+
// if (
265+
// this._CNKIHomeCookieBox == null
266+
// ) {
267+
// ztoolkit.log("homeCookieBox 还是为空,又开始初始化...");
268+
// this._CNKIHomeCookieBox = await this.getCookieBoxFromUrl(
269+
// "https://kns.cnki.net/kns8s/defaultresult/index?crossids=YSTT4HG0%2CLSTPFY1C%2CJUP3MUPD%2CMPMFIG1A%2CWQ0UVIAA%2CBLZOG7CK%2CPWFIRAGL%2CEMRPGLPA%2CNLBO1Z6R%2CNN3FJMUV&korder=SU&kw=%E7%A7%91%E7%A0%94%E8%AE%BA%E6%96%87%E9%98%85%E8%AF%BB",
270+
// "请等待知网网页正常打开后,再点击下方按钮关闭",
271+
// );
272+
// this._cnkiHomeCookieLastUpdateTime = Date.now();
273+
// }
263274
return this._CNKIHomeCookieBox!;
264275
}
265276

0 commit comments

Comments
 (0)