Skip to content

Commit 57cb70e

Browse files
authored
feat: Add new source: freebuf (#260)
* feat: Add new source: freebuf * fix: 修复freebuf新闻id为空字符串的问题,现在使用截取文章url的方式获取文章id
1 parent fcdf837 commit 57cb70e

6 files changed

Lines changed: 204 additions & 1 deletion

File tree

public/icons/freebuf.png

2.43 KB
Loading

server/glob.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ declare module 'glob:./sources/{*.ts,**/index.ts}' {
1111
export const douban: typeof import('./sources/douban')
1212
export const douyin: typeof import('./sources/douyin')
1313
export const fastbull: typeof import('./sources/fastbull')
14+
export const freebuf: typeof import('./sources/freebuf')
1415
export const gelonghui: typeof import('./sources/gelonghui')
1516
export const ghxi: typeof import('./sources/ghxi')
1617
export const github: typeof import('./sources/github')

server/sources/freebuf.ts

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
import * as cheerio from "cheerio"
2+
3+
// 定义文章统计信息接口
4+
interface ArticleStats {
5+
views: number
6+
collections: number
7+
}
8+
9+
// 定义作者信息接口
10+
interface AuthorInfo {
11+
name: string
12+
avatar?: string
13+
profileUrl?: string
14+
}
15+
16+
// 定义文章数据接口
17+
interface ArticleData {
18+
title: string
19+
url: string
20+
description: string
21+
publishTime: string
22+
author: AuthorInfo
23+
stats: ArticleStats
24+
album?: string
25+
image?: string
26+
category?: string
27+
}
28+
29+
// 辅助函数:安全提取文本
30+
function safeExtract($element: cheerio.Cheerio<any>, selector: string): string {
31+
const result = $element.find(selector).first().text().trim()
32+
return result || ""
33+
}
34+
35+
// 辅助函数:安全提取属性
36+
function safeExtractAttribute($element: cheerio.Cheerio<any>, selector: string, attribute: string): string {
37+
return $element.find(selector).first().attr(attribute) || ""
38+
}
39+
40+
// 辅助函数:格式化URL
41+
function formatUrl(url: string | undefined, baseUrl: string = "https://www.freebuf.com"): string {
42+
if (!url) return ""
43+
return url.startsWith("http") ? url : `${baseUrl}${url}`
44+
}
45+
46+
// 辅助函数:提取统计信息
47+
function extractStats($article: cheerio.Cheerio<any>): ArticleStats {
48+
const stats: ArticleStats = { views: 0, collections: 0 }
49+
50+
// 提取围观数
51+
const viewElement = $article.find("a:contains(\"围观\")")
52+
if (viewElement.length) {
53+
const viewText = viewElement.find("span").first().text()
54+
stats.views = Number.parseInt(viewText) || 0
55+
}
56+
57+
// 提取收藏数
58+
const collectElement = $article.find("a:contains(\"收藏\")")
59+
if (collectElement.length) {
60+
const collectText = collectElement.find("span").first().text()
61+
stats.collections = Number.parseInt(collectText) || 0
62+
}
63+
64+
return stats
65+
}
66+
67+
// 辅助函数:提取作者信息
68+
function extractAuthor($article: cheerio.Cheerio<any>): AuthorInfo {
69+
const author: AuthorInfo = { name: "" }
70+
71+
const authorLink = $article.find(".item-bottom a").first()
72+
if (authorLink.length) {
73+
author.name = authorLink.find("span").last().text().trim()
74+
author.profileUrl = formatUrl(authorLink.attr("href"))
75+
76+
const avatarImg = authorLink.find(".ant-avatar img")
77+
if (avatarImg.length) {
78+
author.avatar = avatarImg.attr("src")
79+
}
80+
}
81+
82+
return author
83+
}
84+
85+
// 辅助函数:提取分类信息
86+
function extractCategory($article: cheerio.Cheerio<any>): string {
87+
// 从URL路径推断分类
88+
const articleUrl = $article.find(".title-left .title").parent().attr("href") || ""
89+
if (articleUrl.includes("/articles/web/")) return "Web安全"
90+
if (articleUrl.includes("/articles/database/")) return "数据安全"
91+
if (articleUrl.includes("/articles/network/")) return "网络安全"
92+
if (articleUrl.includes("/articles/mobile/")) return "移动安全"
93+
if (articleUrl.includes("/articles/cloud/")) return "云安全"
94+
95+
return ""
96+
}
97+
98+
// 通过截取freebuf的文章url获取新闻id
99+
function extractIdFromUrl(url: string): string {
100+
// 找到最后一个斜杠
101+
const lastPart = url.slice(url.lastIndexOf("/") + 1) // "460614.html"
102+
// 去掉 .html,只保留数字
103+
const match = lastPart.match(/\d+/)
104+
return match ? match[0] : ""
105+
}
106+
107+
export default defineSource(async () => {
108+
const baseUrl = "https://www.freebuf.com"
109+
const html = await myFetch<any>(baseUrl, {
110+
headers: {
111+
"User-Agent":
112+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
113+
"Referer": "https://www.freebuf.com/",
114+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
115+
},
116+
})
117+
const $ = cheerio.load(html)
118+
const articles: ArticleData[] = []
119+
// 遍历每个文章项
120+
$(".article-item").each((index: number, articleElement) => {
121+
const $article = $(articleElement)
122+
123+
try {
124+
// 提取文章标题和URL
125+
const titleLink = $article.find(".title-left .title").parent()
126+
const title = titleLink.find(".title").text().trim()
127+
const url = formatUrl(titleLink.attr("href"), baseUrl)
128+
129+
// 如果标题为空,跳过此项
130+
if (!title) return
131+
132+
// 提取文章描述
133+
const description = safeExtract($article, ".item-right .text-line-2")
134+
135+
// 提取发布时间
136+
const publishTime = safeExtract($article, ".item-bottom span:last-child")
137+
138+
// 提取作者信息
139+
const author = extractAuthor($article)
140+
141+
// 提取统计信息
142+
const stats = extractStats($article)
143+
144+
// 提取专辑信息
145+
const album = safeExtract($article, ".from-column span")
146+
147+
// 提取图片
148+
const image = safeExtractAttribute($article, ".img-view img", "src")
149+
150+
// 提取分类
151+
const category = extractCategory($article)
152+
153+
// 构建完整的文章对象
154+
const article: ArticleData = {
155+
title,
156+
url,
157+
description,
158+
publishTime,
159+
author,
160+
stats,
161+
album: album || undefined,
162+
image: image || undefined,
163+
category: category || undefined,
164+
}
165+
166+
articles.push(article)
167+
} catch (error) {
168+
console.warn(`解析第${index + 1}篇文章时出错:`, error instanceof Error ? error.message : String(error))
169+
}
170+
})
171+
// 转换数据格式
172+
return articles.map(item => ({
173+
id: extractIdFromUrl(item.url),
174+
title: item.title,
175+
url: item.url,
176+
extra: {
177+
hover: item.description,
178+
time: item.publishTime,
179+
author: item.author,
180+
stats: item.stats,
181+
album: item.album,
182+
},
183+
}))
184+
})

shared/pinyin.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,6 @@
4444
"chongbuluo-hot": "chongbuluo-zuire",
4545
"douban": "douban-remendianying",
4646
"steam": "Steam-zaixianrenshu",
47-
"tencent-hot": "tengxunxinwen-zonghezaobao"
47+
"tencent-hot": "tengxunxinwen-zonghezaobao",
48+
"freebuf": "Freebuf-wangluoanquan"
4849
}

shared/pre-sources.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,14 @@ export const originSources = {
443443
},
444444
},
445445
},
446+
"freebuf": {
447+
name: "Freebuf",
448+
column: "china",
449+
title: "网络安全",
450+
color: "green",
451+
type: "hottest",
452+
home: "https://www.freebuf.com/",
453+
},
446454
} as const satisfies Record<string, OriginSource>
447455

448456
export function genSources() {

shared/sources.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,5 +509,14 @@
509509
"color": "blue",
510510
"interval": 1800000,
511511
"title": "综合早报"
512+
},
513+
"freebuf": {
514+
"title": "网络安全",
515+
"name": "Freebuf",
516+
"type": "hottest",
517+
"column": "china",
518+
"home": "https://www.freebuf.com/",
519+
"color": "green",
520+
"interval": 600000
512521
}
513522
}

0 commit comments

Comments
 (0)