diff --git a/package-lock.json b/package-lock.json index 7a55825..052824a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "shadow-reader", - "version": "0.6.2", + "version": "0.8.3", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "shadow-reader", - "version": "0.6.2", + "version": "0.8.3", "dependencies": { "axios": "^0.21.1", "chardet": "^1.3.0", diff --git a/package.json b/package.json index 57c1ce7..0666eaf 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "shadow-reader", "displayName": "shadow reader", "description": "摸鱼划水看书,十分隐蔽", - "version": "0.8.2", + "version": "0.8.3", "publisher": "rainbroadcast", "engines": { "vscode": "^1.54.0" @@ -31,10 +31,10 @@ }, "shadowReader.onlineBookURL": { "type": "string", - "default": "https://www.biqugee6.com", + "default": "https://www.biquge7.xyz", "enum": [ - "https://www.caimoge.net", - "https://www.biqugee6.com" + "https://www.caimoge.com", + "https://www.biquge7.xyz" ], "enumDescriptions": [ "采墨阁", diff --git a/src/const.ts b/src/const.ts index 84b1bd2..27dffe5 100644 --- a/src/const.ts +++ b/src/const.ts @@ -1,4 +1,4 @@ export const CrawelerDomains = new Map([ - ["biquURL", "https://www.biqugee6.com"], - ["caimoURL", "https://www.caimoge.net"], + ["biquURL", "https://www.biquge7.xyz"], + ["caimoURL", "https://www.caimoge.com"], ]); diff --git a/src/crawler/biqu.ts b/src/crawler/biqu.ts index 6a74a55..3487c74 100644 --- a/src/crawler/biqu.ts +++ b/src/crawler/biqu.ts @@ -1,75 +1,55 @@ import cheerioModule = require("cheerio"); import axios from "axios"; import iconv = require('iconv-lite'); -import https = require('https'); import { window } from "vscode"; import { Craweler } from "./interface"; - -const ignoreSSL = axios.create({ - httpsAgent: new https.Agent({ - rejectUnauthorized: false - }) -}); - -function sleep(delay: number) { - return new Promise(reslove => { - setTimeout(reslove, delay) - }) -} +import { CrawelerDomains } from "../const"; export class BiquCrawler implements Craweler { - private readonly baseURL = "https://www.biqugee6.com"; + private readonly baseURL = CrawelerDomains.get("biquURL"); private readonly defaultEncode = "utf-8"; + private readonly requestHeaders = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Accept-Language": "zh-CN,zh;q=0.9", + }; async searchBook(keyWord: string): Promise> { - let data: string = ""; - let self = this; - let count = 0; - const retryCount = 5; - let result; - while (count < retryCount) { - try { - const response = await ignoreSSL.get(self.baseURL + "/search.php", { - params: { keyword: keyWord } - }); - result = response; - if (response.data.indexOf("Verify") !== -1) { - count++; - await sleep(1000); - continue; - } - data = response.data; - break; - } catch (error: any) { - window.showErrorMessage(error.message); - throw error; - } - } - - if (count >= retryCount) { - let error_msg = "遭遇验证码次数过多,稍后再试吧"; - window.showErrorMessage(error_msg); - throw new Error(error_msg); + let data: string; + try { + const response = await axios.get(this.baseURL + "/search/", { + headers: this.requestHeaders, + params: { keyword: keyWord }, + responseType: "arraybuffer", + }); + data = iconv.decode(response.data, this.defaultEncode); + } catch (error: any) { + window.showErrorMessage(error.message); + throw error; } const $ = cheerioModule.load(data); let choices = new Map(); - $("a.result-game-item-title-link").each(function (_i, ele) { - choices.set($(ele).prop("title"), self.baseURL + $(ele).prop("href")); + $("a[href^='/']").each((_i, ele) => { + const title = $(ele).text().trim(); + const href = $(ele).prop("href"); + if (!title || !href || !/^\/\d+$/.test(href)) { + return; + } + if (!choices.has(title)) { + choices.set(title, this.baseURL + href); + } }); - if (choices.size == 0) { - console.log(result) - } return choices; - } async findChapterURL(url: string): Promise> { let data: string; - let self = this; try { - const response = await axios.get(url, {responseType: "arraybuffer"}); + const response = await axios.get(url, { + headers: this.requestHeaders, + responseType: "arraybuffer", + }); data = iconv.decode(response.data, this.defaultEncode); } catch (error: any) { window.showErrorMessage(error.message); @@ -78,9 +58,16 @@ export class BiquCrawler implements Craweler { const $ = cheerioModule.load(data); let choices = new Map(); - $("#list a").each(function (_i, ele) { - choices.set($(ele).text(), self.baseURL + $(ele).prop("href")); + $("a[href^='/']").each((_i, ele) => { + const title = $(ele).text().trim(); + const href = $(ele).prop("href"); + if (!title || !href || !title.startsWith("第")) { + return; + } + if (!choices.has(title)) { + choices.set(title, this.baseURL + href); + } }); return choices; } -} \ No newline at end of file +} diff --git a/src/crawler/caimo.ts b/src/crawler/caimo.ts index 16d7df3..e0125fc 100644 --- a/src/crawler/caimo.ts +++ b/src/crawler/caimo.ts @@ -3,41 +3,106 @@ import axios from "axios"; import iconv = require('iconv-lite'); import { window } from "vscode"; import { Craweler } from "./interface"; +import { CrawelerDomains } from "../const"; -const querystring = require('querystring'); - +type BookSearchResult = Map; export class CaimoCrawler implements Craweler { - private readonly baseURL = "https://www.caimoge.net"; + private readonly baseURL = CrawelerDomains.get("caimoURL"); private readonly defaultEncode = "utf-8"; + private readonly requestHeaders = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Accept-Language": "zh-CN,zh;q=0.9", + }; + + private async fetchHTML(url: string, params?: Record): Promise { + const response = await axios.get(url, { + headers: this.requestHeaders, + params, + responseType: "arraybuffer", + }); + return iconv.decode(response.data, this.defaultEncode); + } + + private parseBookLinks(data: string): BookSearchResult { + const $ = cheerioModule.load(data); + const choices = new Map(); + $("a[href^='/txt/']").each((_i, ele) => { + const title = $(ele).text().trim(); + const href = $(ele).prop("href"); + if (!title || !href) { + return; + } + if (!choices.has(title)) { + choices.set(title, this.baseURL + href); + } + }); + return choices; + } + + private async searchFromShelf(keyWord: string): Promise { + const seedPages = [ + "/", + "/shuku/", + "/rank/", + "/rank/weekvisit/", + "/rank/monthvisit/", + "/rank/goodnum/", + "/shuku/xuanhuan/1.html", + "/shuku/wuxia/1.html", + "/shuku/yanqing/1.html", + "/shuku/dushi/1.html", + "/shuku/lishi/1.html", + "/shuku/youxi/1.html", + "/shuku/kehuan/1.html", + "/shuku/tongren/1.html", + "/shuku/qita/1.html", + ]; + const normalizedKeyword = keyWord.trim().toLowerCase(); + const matches = new Map(); + + for (const page of seedPages) { + const data = await this.fetchHTML(this.baseURL + page); + const choices = this.parseBookLinks(data); + for (const [title, url] of choices) { + if (title.toLowerCase().includes(normalizedKeyword)) { + matches.set(title, url); + } + } + } + return matches; + } async searchBook(keyWord: string): Promise> { - let data: string; - let self = this; try { - const response = await axios.post(self.baseURL + "/search/", querystring.stringify({ searchkey: keyWord })); - data = response.data; + const data = await this.fetchHTML(this.baseURL + "/search/", { searchkey: keyWord }); + const choices = this.parseBookLinks(data); + if (choices.size > 0) { + return choices; + } + } catch (error: any) { + if (error?.response?.status && error.response.status !== 404) { + window.showWarningMessage(`站内搜索不可用,改用书库检索: ${error.message}`); + } + } + + try { + const choices = await this.searchFromShelf(keyWord); + if (choices.size === 0) { + window.showWarningMessage("当前书源未找到匹配书籍,可尝试更完整的书名"); + } + return choices; } catch (error: any) { window.showErrorMessage(error.message); throw error; } - - const $ = cheerioModule.load(data); - let choices = new Map(); - $("#sitembox h3>a").each(function (_i, ele) { - choices.set($(ele).text(), self.baseURL + $(ele).prop("href")); - }); - return choices; - } async findChapterURL(url: string): Promise> { let data: string; - let self = this; try { - const response = await axios.get(url, {responseType: "arraybuffer"}); - data = iconv.decode(response.data, this.defaultEncode); + data = await this.fetchHTML(url); } catch (error: any) { window.showErrorMessage(error.message); throw error; @@ -45,9 +110,16 @@ export class CaimoCrawler implements Craweler { const $ = cheerioModule.load(data); let choices = new Map(); - $("#readerlist ul a").each(function (_i, ele) { - choices.set($(ele).text(), self.baseURL + $(ele).prop("href")); + $("a[href^='/read/']").each((_i, ele) => { + const title = $(ele).text().trim(); + const href = $(ele).prop("href"); + if (!title || !href || !title.startsWith("第")) { + return; + } + if (!choices.has(title)) { + choices.set(title, this.baseURL + href); + } }); return choices; } -} \ No newline at end of file +} diff --git a/src/menu.ts b/src/menu.ts index c614970..570ec42 100644 --- a/src/menu.ts +++ b/src/menu.ts @@ -94,35 +94,51 @@ async function newBookMenu(context: ExtensionContext) { window.showErrorMessage("onlineBookURL未配置"); return; } - window.showInputBox({ + const bookFuzzyName = await window.showInputBox({ value: "", prompt: "要搜索的书名" - }).then(async bookFuzzyName => { - if (bookFuzzyName) { - let crawler: Craweler = newOnlineCraweler(); - let bookDict = await crawler.searchBook(bookFuzzyName); - window.showQuickPick(Array.from(bookDict.keys()), {matchOnDescription: true}).then(async value => { - if (value) { - let bookURL = bookDict.get(value); - let chapterURLDict = await crawler.findChapterURL(bookURL); - window.showQuickPick(Array.from(chapterURLDict.keys()), {matchOnDescription: true}).then( startChapter => { - if(startChapter) { - let bookLibraryDictString = context.globalState.get(bookLibraryKey, "{}"); - let bookLibraryDict = JSON.parse(bookLibraryDictString); - bookLibraryDict[value] = bookURL; - context.globalState.update(bookLibraryKey, JSON.stringify(bookLibraryDict)); - context.globalState.update(bookURL, { - kind: BookKind.online, - readedCount: 0, - sectionPath: chapterURLDict.get(startChapter), - }); - window.showInformationMessage("添加成功"); - } - }); - } - }); - } }); + if (!bookFuzzyName) { + return; + } + + try { + let crawler: Craweler = newOnlineCraweler(); + let bookDict = await crawler.searchBook(bookFuzzyName); + if (bookDict.size === 0) { + return; + } + + const value = await window.showQuickPick(Array.from(bookDict.keys()), { matchOnDescription: true }); + if (!value) { + return; + } + + let bookURL = bookDict.get(value); + let chapterURLDict = await crawler.findChapterURL(bookURL); + if (chapterURLDict.size === 0) { + window.showWarningMessage("找到书籍了,但没拿到章节目录"); + return; + } + + const startChapter = await window.showQuickPick(Array.from(chapterURLDict.keys()), { matchOnDescription: true }); + if (!startChapter) { + return; + } + + let bookLibraryDictString = context.globalState.get(bookLibraryKey, "{}"); + let bookLibraryDict = JSON.parse(bookLibraryDictString); + bookLibraryDict[value] = bookURL; + await context.globalState.update(bookLibraryKey, JSON.stringify(bookLibraryDict)); + await context.globalState.update(bookURL, { + kind: BookKind.online, + readedCount: 0, + sectionPath: chapterURLDict.get(startChapter), + }); + window.showInformationMessage("添加成功"); + } catch (error: any) { + window.showErrorMessage(error.message); + } break; @@ -153,4 +169,4 @@ export function showSearchKeywordBox(context: ExtensionContext) { } } ); -} \ No newline at end of file +} diff --git a/src/parse/biqu.ts b/src/parse/biqu.ts index 1dd8b32..b63ea2f 100644 --- a/src/parse/biqu.ts +++ b/src/parse/biqu.ts @@ -31,7 +31,13 @@ export class BiquWebParser implements Parser { this.currentPageURL = pageURL; let data: string; try { - const response = await axios.get(pageURL, { responseType: "arraybuffer" }); + const response = await axios.get(pageURL, { + headers: { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Accept-Language": "zh-CN,zh;q=0.9", + }, + responseType: "arraybuffer", + }); data = iconv.decode(response.data, this.defaultEncode); } catch (e: any) { window.showErrorMessage(e.message); @@ -39,25 +45,26 @@ export class BiquWebParser implements Parser { } const $ = cheerioModule.load(data); - let html = $("#content").html(); + let html = $(".text").html(); if (!html) { window.showErrorMessage("爬不到内容啦"); return; } - this.cacheText = html.replace(/

.*<\/p>/g, '').replace(/

/g, '\n').trim(); + this.cacheText = html.replace(//g, "").replace(/ /g, "").replace(/

/g, '\n').trim(); this.title = $("h1").text(); - $(".bottem1>a").each((i, ele) => { - switch (i) { - case 1: - this.prevPageURL = `${this.baseURL}${$(ele).prop("href")}`; - break; - - case 3: - this.nextPageURL = `${this.baseURL}${$(ele).prop("href")}`; - break; - - default: - break; + this.prevPageURL = this.indexPageURL; + this.nextPageURL = this.indexPageURL; + $("a[href^='/']").each((_i, ele) => { + const text = $(ele).text().trim(); + const href = $(ele).prop("href"); + if (!href) { + return; + } + if (text.includes("上一章")) { + this.prevPageURL = `${this.baseURL}${href}`; + } + if (text.includes("下一章")) { + this.nextPageURL = `${this.baseURL}${href}`; } }); } @@ -125,4 +132,4 @@ export class BiquWebParser implements Parser { sectionPath: this.currentPageURL, }; }; -} \ No newline at end of file +} diff --git a/src/parse/caimo.ts b/src/parse/caimo.ts index 2a7e3c4..511ebc9 100644 --- a/src/parse/caimo.ts +++ b/src/parse/caimo.ts @@ -31,7 +31,13 @@ export class CaimoWebParser implements Parser { this.currentPageURL = pageURL; let data: string; try { - const response = await axios.get(pageURL, { responseType: "arraybuffer" }); + const response = await axios.get(pageURL, { + headers: { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "Accept-Language": "zh-CN,zh;q=0.9", + }, + responseType: "arraybuffer", + }); data = iconv.decode(response.data, this.defaultEncode); } catch (e: any) { window.showErrorMessage(e.message); @@ -40,17 +46,31 @@ export class CaimoWebParser implements Parser { const $ = cheerioModule.load(data); - let html = $("#content").html(); + let html = $("#chaptercontent #booktxt").html() || $(".content #booktxt").html(); if (!html) { window.showErrorMessage("爬不到内容啦"); return; } this.cacheText = html.replace(/

/g, '').replace(/<\/p>/g, '\n').replace(/

.*<\/div>/g, '').trim(); - this.title = $(".title em").text(); - - this.prevPageURL = this.baseURL + $("#prev_url").prop("href"); - this.nextPageURL = this.baseURL + $("#next_url").prop("href"); + this.title = $(".title em").text() || $("h1").text(); + + const navLinks = $("a[href^='/read/']"); + this.prevPageURL = this.currentPageURL; + this.nextPageURL = this.indexPageURL; + navLinks.each((_i, ele) => { + const text = $(ele).text().trim(); + const href = $(ele).prop("href"); + if (!href) { + return; + } + if (text.includes("上一页")) { + this.prevPageURL = this.baseURL + href; + } + if (text.includes("下一页")) { + this.nextPageURL = this.baseURL + href; + } + }); } async getCacheText(start: number, pageSize: number): Promise { @@ -116,4 +136,4 @@ export class CaimoWebParser implements Parser { sectionPath: this.currentPageURL, }; }; -} \ No newline at end of file +}