Skip to content

Commit 881a9d8

Browse files
committed
fix(route/qidian): anticrawl issue
1 parent f85a159 commit 881a9d8

File tree

5 files changed

+82
-115
lines changed

5 files changed

+82
-115
lines changed

lib/routes/qidian/author.tsx

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import { load } from 'cheerio';
2-
import { renderToString } from 'hono/jsx/dom/server';
32

43
import type { Route } from '@/types';
54
import got from '@/utils/got';
6-
import { parseDate, parseRelativeDate } from '@/utils/parse-date';
7-
import timezone from '@/utils/timezone';
5+
6+
const headers = {
7+
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
8+
};
89

910
export const route: Route = {
1011
path: '/author/:id',
@@ -14,7 +15,7 @@ export const route: Route = {
1415
features: {
1516
requireConfig: false,
1617
requirePuppeteer: false,
17-
antiCrawler: false,
18+
antiCrawler: true,
1819
supportBT: false,
1920
supportPodcast: false,
2021
supportScihub: false,
@@ -31,42 +32,28 @@ export const route: Route = {
3132

3233
async function handler(ctx) {
3334
const id = ctx.req.param('id');
34-
const rootUrl = 'https://my.qidian.com';
35-
const currentUrl = `${rootUrl}/author/${id}/`;
36-
const response = await got(currentUrl);
35+
const currentUrl = `https://my.qidian.com/author/${id}/`;
36+
37+
// Reason: PC site (my.qidian.com) returns anti-bot JS challenge; mobile site has SSR data
38+
const response = await got(`https://m.qidian.com/author/${id}/`, { headers });
3739
const $ = load(response.data);
38-
const authorName = $('.header-msg h1').contents().first().text().trim();
39-
const items = $('.author-work .author-item')
40-
.toArray()
41-
.map((item) => {
42-
item = $(item);
43-
const messageItem = item.find('.author-item-msg');
44-
const updatedDate = messageItem.find('.author-item-update span').text().replace('·', '').trim();
40+
const { pageContext } = JSON.parse($('#vite-plugin-ssr_pageContext').text());
41+
const pageData = pageContext.pageProps.pageData;
4542

46-
return {
47-
title: messageItem.find('.author-item-title').text().trim(),
48-
author: authorName,
49-
category: messageItem.find('.author-item-exp a').first().text().trim(),
50-
description: renderDescription(messageItem.find('.author-item-update a').attr('title'), item.find('a img').attr('src')),
51-
pubDate: timezone(/(|)/.test(updatedDate) ? parseRelativeDate(updatedDate) : parseDate(updatedDate, 'YYYY-MM-DD HH:mm'), +8),
52-
link: messageItem.find('.author-item-update a').attr('href'),
53-
};
54-
});
43+
const authorName = pageData.info.name;
44+
45+
const items = (pageData.allBook || []).map((book) => ({
46+
title: book.bName,
47+
author: authorName,
48+
category: book.cat,
49+
description: book.desc,
50+
link: `https://book.qidian.com/info/${book.bid}/`,
51+
}));
5552

5653
return {
5754
title: `${authorName} - 起点中文网`,
58-
description: $('.header-msg-desc').text().trim(),
55+
description: pageData.info.desc,
5956
link: currentUrl,
6057
item: items,
6158
};
6259
}
63-
64-
const renderDescription = (description?: string, image?: string, author?: string) => renderToString(<QidianDescription description={description} image={image} author={author} />);
65-
66-
const QidianDescription = ({ description, image, author }: { description?: string; image?: string; author?: string }) => (
67-
<>
68-
<p>{description}</p>
69-
{image ? <img src={image} /> : null}
70-
{author ?? null}
71-
</>
72-
);

lib/routes/qidian/chapter.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ export const route: Route = {
1414
features: {
1515
requireConfig: false,
1616
requirePuppeteer: false,
17-
antiCrawler: false,
17+
antiCrawler: true,
1818
supportBT: false,
1919
supportPodcast: false,
2020
supportScihub: false,

lib/routes/qidian/forum.ts

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ import { load } from 'cheerio';
22

33
import type { Route } from '@/types';
44
import got from '@/utils/got';
5-
import { parseRelativeDate } from '@/utils/parse-date';
5+
6+
const headers = {
7+
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
8+
};
69

710
export const route: Route = {
811
path: '/forum/:id',
@@ -12,7 +15,7 @@ export const route: Route = {
1215
features: {
1316
requireConfig: false,
1417
requirePuppeteer: false,
15-
antiCrawler: false,
18+
antiCrawler: true,
1619
supportBT: false,
1720
supportPodcast: false,
1821
supportScihub: false,
@@ -30,34 +33,26 @@ export const route: Route = {
3033
async function handler(ctx) {
3134
const id = ctx.req.param('id');
3235

33-
const url = `https://forum.qidian.com/NewForum/List.aspx?BookId=${id}`;
36+
// Reason: forum.qidian.com redirects and PC site has anti-bot JS challenge;
37+
// mobile book page embeds forum posts via seoBookCirclePost
38+
const response = await got(`https://m.qidian.com/book/${id}.html`, { headers });
39+
const $ = load(response.data);
40+
const { pageContext } = JSON.parse($('#vite-plugin-ssr_pageContext').text());
41+
const pageData = pageContext.pageProps.pageData;
3442

35-
const forum_response = await got(url, {
36-
headers: {
37-
Referer: `https://book.qidian.com/info/${id}`,
38-
},
39-
});
40-
41-
const $ = load(forum_response.data);
42-
const name = $('.main-header>h1').text();
43-
const cover_url = $('img.forum_book').attr('src');
44-
const list = $('li.post-wrap>.post');
45-
46-
const items = [];
47-
for (const el of list) {
48-
const title = $(el).children().eq(1).find('a');
49-
items.push({
50-
title: title.text(),
51-
link: `https:${title.attr('href')}`,
52-
description: $(el).text(),
53-
pubDate: parseRelativeDate($(el).find('.post-info>span').text()),
54-
});
55-
}
43+
const bookName = pageData.bookInfo?.bookName || '';
44+
const posts = pageData.seoBookCirclePost?.bookCirclePostList || [];
45+
46+
const items = posts.map((post) => ({
47+
title: post.title,
48+
link: `https://book.qidian.com/info/${id}/`,
49+
description: post.circleReviewDesc,
50+
author: post.userName,
51+
}));
5652

5753
return {
58-
title: `起点 《${name}》讨论区`,
59-
link: url,
60-
image: cover_url,
54+
title: `起点 《${bookName}》讨论区`,
55+
link: `https://book.qidian.com/info/${id}/`,
6156
item: items,
6257
};
6358
}

lib/routes/qidian/free-next.ts

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ import { load } from 'cheerio';
33
import type { Route } from '@/types';
44
import got from '@/utils/got';
55

6+
const headers = {
7+
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
8+
};
9+
610
export const route: Route = {
711
path: '/free-next/:type?',
812
categories: ['reading'],
@@ -11,7 +15,7 @@ export const route: Route = {
1115
features: {
1216
requireConfig: false,
1317
requirePuppeteer: false,
14-
antiCrawler: false,
18+
antiCrawler: true,
1519
supportBT: false,
1620
supportPodcast: false,
1721
supportScihub: false,
@@ -31,37 +35,27 @@ export const route: Route = {
3135
async function handler(ctx) {
3236
const type = ctx.req.param('type');
3337

34-
let link, title;
35-
if (type === 'mm') {
36-
link = 'https://www.qidian.com/mm/free';
37-
title = '起点女生网';
38-
} else {
39-
link = 'https://www.qidian.com/free';
40-
title = '起点中文网';
41-
}
38+
const isMM = type === 'mm';
39+
const link = isMM ? 'https://www.qidian.com/mm/free' : 'https://www.qidian.com/free';
40+
const title = isMM ? '起点女生网' : '起点中文网';
4241

43-
const response = await got(link);
42+
// Reason: PC site (www.qidian.com) returns anti-bot JS challenge; mobile site has SSR data
43+
const response = await got('https://m.qidian.com/free', { headers });
4444
const $ = load(response.data);
45+
const { pageContext } = JSON.parse($('#vite-plugin-ssr_pageContext').text());
46+
const pageData = pageContext.pageProps.pageData;
4547

46-
const list = $('div.other-rec-wrap li');
47-
const out = list.toArray().map((item) => {
48-
item = $(item);
49-
50-
const img = `<img src="https:${item.find('.img-box img').attr('src')}">`;
51-
const rank = `<p>评分:${item.find('.img-box span').text()}</p>`;
52-
53-
return {
54-
title: item.find('.book-info h4 a').text(),
55-
description: img + rank + item.find('p.intro').html(),
56-
link: 'https:' + item.find('.book-info h4 a').attr('href'),
57-
author: item.find('p.author a').text(),
58-
};
59-
});
48+
const items = (pageData.nxtFree || []).map((book) => ({
49+
title: book.bName,
50+
link: `https://book.qidian.com/info/${book.bid}/`,
51+
author: book.bAuth,
52+
description: `评分:${book.score === -1 ? '暂无' : book.score}`,
53+
}));
6054

6155
return {
6256
title,
6357
description: `限时免费下期预告-${title}`,
6458
link,
65-
item: out,
59+
item: items,
6660
};
6761
}

lib/routes/qidian/free.ts

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ import { load } from 'cheerio';
22

33
import type { Route } from '@/types';
44
import got from '@/utils/got';
5-
import { parseRelativeDate } from '@/utils/parse-date';
5+
6+
const headers = {
7+
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
8+
};
69

710
export const route: Route = {
811
path: '/free/:type?',
@@ -12,7 +15,7 @@ export const route: Route = {
1215
features: {
1316
requireConfig: false,
1417
requirePuppeteer: false,
15-
antiCrawler: false,
18+
antiCrawler: true,
1619
supportBT: false,
1720
supportPodcast: false,
1821
supportScihub: false,
@@ -32,39 +35,27 @@ export const route: Route = {
3235
async function handler(ctx) {
3336
const type = ctx.req.param('type');
3437

35-
let link, title;
36-
if (type === 'mm') {
37-
link = 'https://www.qidian.com/mm/free';
38-
title = '起点女生网';
39-
} else {
40-
link = 'https://www.qidian.com/free';
41-
title = '起点中文网';
42-
}
38+
const isMM = type === 'mm';
39+
const link = isMM ? 'https://www.qidian.com/mm/free' : 'https://www.qidian.com/free';
40+
const title = isMM ? '起点女生网' : '起点中文网';
4341

44-
const response = await got(link);
42+
// Reason: PC site (www.qidian.com) returns anti-bot JS challenge; mobile site has SSR data
43+
const response = await got('https://m.qidian.com/free', { headers });
4544
const $ = load(response.data);
45+
const { pageContext } = JSON.parse($('#vite-plugin-ssr_pageContext').text());
46+
const pageData = pageContext.pageProps.pageData;
4647

47-
const list = $('#limit-list li');
48-
const out = list.toArray().map((item) => {
49-
item = $(item);
50-
51-
const img = `<img src="https:${item.find('.book-img-box img').attr('src')}">`;
52-
const rank = `<p>评分:${item.find('.score').text()}</p>`;
53-
const update = `<a href=https:${item.find('p.update > a').attr('href')}>${item.find('p.update > a').text()}</a>`;
54-
55-
return {
56-
title: item.find('.book-mid-info h4 a').text(),
57-
description: img + rank + update + '<br>' + item.find('p.intro').html(),
58-
pubDate: parseRelativeDate(item.find('p.update span').text()),
59-
link: 'https:' + item.find('.book-mid-info h4 a').attr('href'),
60-
author: item.find('p.author a.name').text(),
61-
};
62-
});
48+
const items = (pageData.curFree || []).map((book) => ({
49+
title: book.bName,
50+
link: `https://book.qidian.com/info/${book.bid}/`,
51+
author: book.bAuth,
52+
description: `评分:${book.score === -1 ? '暂无' : book.score}`,
53+
}));
6354

6455
return {
6556
title,
6657
description: `限时免费-${title}`,
6758
link,
68-
item: out,
59+
item: items,
6960
};
7061
}

0 commit comments

Comments
 (0)