Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ type ConfigEnvKeys =
| 'BITBUCKET_USERNAME'
| 'BITBUCKET_PASSWORD'
| 'BTBYR_HOST'
| 'BAIDU_COOKIE'
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A comes before I and T

| 'BTBYR_COOKIE'
| 'BUPT_PORTAL_COOKIE'
| 'CAIXIN_COOKIE'
Expand Down Expand Up @@ -348,6 +349,9 @@ export type Config = {
};

// Route-specific Configurations
baidu: {
cookie?: string;
};
bilibili: {
cookies: Record<string, string | undefined>;
dmImgList?: string;
Expand Down Expand Up @@ -763,7 +767,7 @@ const calculateValue = () => {
type: envs.CACHE_TYPE || (envs.CACHE_TYPE === '' ? '' : 'memory'), // 缓存类型,支持 'memory' 和 'redis',设为空可以禁止缓存
requestTimeout: toInt(envs.CACHE_REQUEST_TIMEOUT, 60),
routeExpire: toInt(envs.CACHE_EXPIRE, 5 * 60), // 路由缓存时间,单位为秒
contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 1 * 60 * 60), // 不变内容缓存时间,单位为秒
contentExpire: toInt(envs.CACHE_CONTENT_EXPIRE, 60 * 60), // 不变内容缓存时间,单位为秒
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not make any changes to the default value.

},
memory: {
max: toInt(envs.MEMORY_MAX, Math.pow(2, 8)), // The maximum number of items that remain in the cache. This must be a positive finite intger.
Expand Down Expand Up @@ -843,6 +847,9 @@ const calculateValue = () => {
},

// Route-specific Configurations
baidu: {
cookie: envs.BAIDU_COOKIE,
},
bilibili: {
cookies: bilibili_cookies,
dmImgList: envs.BILIBILI_DM_IMG_LIST,
Expand Down
122 changes: 122 additions & 0 deletions lib/routes/baidu/tieba/common.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import { config } from '@/config';
import ConfigNotFoundError from '@/errors/types/config-not-found';
import cache from '@/utils/cache';

/**
* 解析百度 cookie 字符串为 Puppeteer 可用的 cookie 对象数组
* 正确处理包含 '=' 的 cookie 值
*/
export function parseBaiduCookies(cookieStr: string): Array<{ name: string; value: string; domain: string }> {
return cookieStr
.split(';')
.map((c) => c.trim())
.filter((c) => c.length > 0)
.map((c) => {
const firstEqualIndex = c.indexOf('=');
if (firstEqualIndex === -1) {
return { name: c, value: '', domain: '.tieba.baidu.com' };
}
const name = c.slice(0, firstEqualIndex).trim();
const value = c.slice(firstEqualIndex + 1).trim();
return { name, value, domain: '.tieba.baidu.com' };
});
}
Comment on lines +9 to +23
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not reinventing the wheels.


/**
* 检查 HTML 内容是否包含百度安全验证页面
*/
export function checkSecurityVerification(html: string): void {
if (html.includes('安全验证') || html.includes('百度安全验证')) {
throw new Error('Baidu security verification required. The cookie may be expired or invalid. Please update your BAIDU_COOKIE.');
}
}

/**
* 使用 Puppeteer 获取贴吧页面内容
* 包含统一的 cookie 设置、安全验证检查和缓存逻辑
* 带有重试机制处理瞬态错误
*/
export async function getTiebaPageContent(
url: string,
cacheKey: string,
options: {
waitForSelector?: string;
timeout?: number;
retries?: number;
} = {}
): Promise<string> {
const cookie = config.baidu.cookie;

if (!cookie) {
throw new ConfigNotFoundError('Baidu Tieba RSS is disabled due to the lack of <a href="https://docs.rsshub.app/deploy/config#baidu">BAIDU_COOKIE</a>');
}

const { getPuppeteerPage } = await import('@/utils/puppeteer');
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No await import

const { waitForSelector = '.thread-card-wrapper, .virtual-list-item, .thread-content-box, .thread-card', timeout = 3000, retries = 3 } = options;

const data = await cache.tryGet(
cacheKey,
async () => {
let lastError: Error | undefined;

/* eslint-disable no-await-in-loop -- Intentional sequential retry logic */
for (let attempt = 0; attempt < retries; attempt++) {
const { page, destroy } = await getPuppeteerPage(url, { noGoto: true });

try {
// 设置 Cookie(在访问页面前设置,减少一次导航)
const cookies = parseBaiduCookies(cookie);
await page.setCookie(...cookies);

// 访问目标页面 - 使用更宽松的等待条件
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The puppeteer session will be killed

setTimeout(async () => {
await browser.close();
}, 30000);
before this timeout


// 等待页面稳定
await new Promise((resolve) => setTimeout(resolve, 2000));

// 动态等待内容加载
try {
await page.waitForSelector(waitForSelector, { timeout });
} catch {
// 如果超时,继续执行
}

return await page.content();
} catch (error) {
lastError = error as Error;
// 如果是最后一次尝试,抛出错误
if (attempt === retries - 1) {
throw lastError;
}
// 等待后重试
await new Promise((resolve) => setTimeout(resolve, 1000 * (attempt + 1)));
} finally {
await destroy();
}
}
/* eslint-enable no-await-in-loop */

throw lastError || new Error('Failed to fetch page content');
},
config.cache.routeExpire,
false
);

const html = data as string;
checkSecurityVerification(html);
return html;
}

/**
* 规范化 URL 为绝对地址
*/
export function normalizeUrl(href: string, base: string = 'https://tieba.baidu.com'): string {
if (!href) {
return '';
}
if (href.startsWith('http')) {
return href;
}
const path = href.startsWith('/') ? href : `/${href}`;
return `${base}${path}`;
}
120 changes: 68 additions & 52 deletions lib/routes/baidu/tieba/forum.tsx
Original file line number Diff line number Diff line change
@@ -1,85 +1,101 @@
import { load } from 'cheerio';
import { raw } from 'hono/html';
import { renderToString } from 'hono/jsx/dom/server';

import type { Route } from '@/types';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

import { getTiebaPageContent, normalizeUrl } from './common';
import { parseRelativeTime, parseThreads } from './utils';

export const route: Route = {
path: ['/tieba/forum/good/:kw/:cid?/:sortBy?', '/tieba/forum/:kw/:sortBy?'],
categories: ['bbs'],
example: '/baidu/tieba/forum/good/女图',
parameters: { kw: '吧名', cid: '精品分类,默认为 `0`(全部分类),如果不传 `cid` 则获取全部分类', sortBy: '排序方式:`created`, `replied`。默认为 `created`' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
requireConfig: [
{
name: 'BAIDU_COOKIE',
optional: false,
description: '百度 cookie 值,用于需要登录的贴吧页面',
},
],
requirePuppeteer: true,
antiCrawler: true,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
name: '精品帖子',
maintainers: ['u3u'],
maintainers: ['u3u', 'FlanChanXwO'],
handler,
};

async function handler(ctx) {
// sortBy: created, replied
const { kw, cid = '0', sortBy = 'created' } = ctx.req.param();
const sortParam = sortBy === 'replied' ? '&sc=67108864' : '';

// PC端:https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}
// 移动端接口:https://tieba.baidu.com/mo/q/m?kw=${encodeURIComponent(kw)}&lp=5024&forum_recommend=1&lm=0&cid=0&has_url_param=1&pn=0&is_ajax=1
const params = { kw: encodeURIComponent(kw) };
ctx.req.path.includes('good') && (params.tab = 'good');
cid && (params.cid = cid);
const { data } = await got(`https://tieba.baidu.com/f`, {
headers: {
Referer: 'https://tieba.baidu.com/',
},
searchParams: params,
});
// 固定抓取3页,约30条帖子
const maxPages = 3;

// 并发获取所有页面
const pagePromises = [];
for (let pageNum = 0; pageNum < maxPages; pageNum++) {
const pageUrl = `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}&pn=${pageNum * 50}${cid === '0' ? '' : `&cid=${cid}`}${ctx.req.path.includes('good') ? '&tab=good' : ''}${pageNum === 0 ? '' : '&ie=utf-8'}${sortParam}`;

const promise = getTiebaPageContent(pageUrl, `tieba:forum:${kw}:${cid}:${sortBy}:page${pageNum}`, { waitForSelector: '.thread-card-wrapper', timeout: 3000 });
pagePromises.push(promise);
}

const threadListHTML = load(data)('code[id="pagelet_html_frs-list/pagelet/thread_list"]')
.contents()
.filter((e) => e.nodeType === '8');
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No page turn. Extract content from the first page only.

// 等待所有页面获取完成
const pageResults = await Promise.all(pagePromises);

const $ = load(threadListHTML.prevObject[0].data);
const list = $('#thread_list > .j_thread_list[data-field]')
.toArray()
.map((element) => {
const item = $(element);
const { id, author_name } = item.data('field');
const time = sortBy === 'created' ? item.find('.is_show_create_time').text().trim() : item.find('.threadlist_reply_date').text().trim();
const title = item.find('a.j_th_tit').text().trim();
const details = item.find('.threadlist_abs').text().trim();
const medias = item
.find('.threadlist_media img')
.toArray()
.map((element) => {
const item = $(element);
return `<img src="${item.attr('bpic')}">`;
})
.join('');
// 解析所有页面数据并去重
const threadMap = new Map();
for (const html of pageResults) {
if (html && html.length > 0) {
const $ = load(html);
const threads = parseThreads($);
for (const thread of threads) {
// 使用帖子ID去重,只保留第一次出现的
if (!threadMap.has(thread.id)) {
threadMap.set(thread.id, thread);
}
}
}
}

return {
title,
description: renderToString(
<>
<p>{details}</p>
<p>{raw(medias)}</p>
<p>作者:{author_name}</p>
</>
),
pubDate: timezone(parseDate(time, ['HH:mm', 'M-D', 'YYYY-MM'], true), +8),
link: `https://tieba.baidu.com/p/${id}`,
};
});
const allThreads = [...threadMap.values()];

if (allThreads.length === 0) {
throw new Error('No threads found. The cookie may be expired or invalid. Please check your BAIDU_COOKIE.');
}

const list = allThreads.map((thread) => {
const parsedDate = parseRelativeTime(thread.time);
return {
title: thread.title,
link: normalizeUrl(thread.link) || `https://tieba.baidu.com/p/${thread.id}`,
pubDate: parsedDate ? timezone(parsedDate, +8) : undefined,
author: thread.author,
description: renderToString(
<>
{thread.content ? <p>{thread.content}</p> : null}
{thread.images && thread.images.length > 0 ? (
<div>
{thread.images.map((img) => (
<img src={img} alt="" style={{ maxWidth: '100%', margin: '5px 0' }} />
))}
</div>
) : null}
</>
),
};
});

return {
title: `${kw}吧`,
description: load(data)('meta[name="description"]').attr('content'),
link: `https://tieba.baidu.com/f?kw=${encodeURIComponent(kw)}`,
item: list,
};
Expand Down
Loading
Loading