收藏/历史抓取 + URL 发现 + 用户名检测 — JM Shelf 推荐脚本的模块库,通过 @require 被主脚本引用。
Dette scriptet burde ikke installeres direkte. Det er et bibliotek for andre script å inkludere med det nye metadirektivet // @require https://update.sleazyfork.org/scripts/581107/1842606/JM%20Shelf%20-%20Scraper.js
// ==UserScript==
// @name JM Shelf - Scraper
// @namespace jmshelf-lib
// @version 1.0.0
// @author Kesdi
// @description 收藏/历史抓取 + URL 发现 + 用户名检测 — JM Shelf 推荐脚本的模块库,通过 @require 被主脚本引用。
// @license MIT
// ==/UserScript==
//
// 此文件是 GreasyFork 库(library),不直接安装。
// 请安装主脚本: JM Shelf 给杂鱼的个性化推荐
//
// ═══ [9] SCRAPER ═══ — 收藏+历史抓取 + URL发现
// ============================================================
// 用户名检测
function detectUsername() {
const userLinks = document.querySelectorAll('a[href*="/user/"]');
const candidates = [];
for (const link of userLinks) {
const match = (link.getAttribute('href') || '').match(/\/user\/([^/]+)/);
if (match && match[1] !== 'user' && !match[1].includes('#') && match[1].length < 30) {
candidates.push(match[1]);
}
}
const unique = [...new Set(candidates)];
LOG.info(`检测到用户: ${JSON.stringify(unique)}`);
if (unique.length > 0) return unique[0];
LOG.warn('未检测到用户名 — 请确认已登录18comic并在主页');
return '';
}
function isLoggedIn() {
const body = document.body?.textContent || '';
return !body.includes('會員登錄/註冊');
}
// URL发现
async function discoverFavoritesUrl(username) {
if (State.getFavoritesUrl()) {
LOG.info(`已保存收藏URL: ${State.getFavoritesUrl()}`);
return State.getFavoritesUrl();
}
LOG.info(`探测收藏页... 用户=${username}`);
const patterns = [
`/user/${username}/favorite/albums`,
`/user/${username}/favorites`,
`/user/${username}/favorite`,
`/user/${username}/bookmark`,
`/user/${username}?tab=favorite`,
`/user/${username}?tab=album`,
`/bookmark`,
];
for (const path of patterns) {
try {
const url = `https://18comic.vip${path}`;
const html = await fetcher.enqueue(url, null, 10);
if (html !== null) {
const hasAlbums = html.includes('/album/');
const hasLogin = html.includes('login-modal');
LOG.info(` ${path}: len=${html.length} albums=${hasAlbums} login=${hasLogin}`);
if (hasAlbums && !hasLogin) {
State.saveFavoritesUrl(url);
LOG.info(`✅ 收藏页: ${url}`);
return url;
}
}
} catch (e) { LOG.info(` ${path}: ${e.message}`); }
}
LOG.info('HTTP探测未发现收藏页 (预期, 将用构造URL)');
return '';
}
// 收藏抓取(iframe翻页)
async function scrapeFavorites(baseUrl, maxPages, onProgress) {
const allIds = new Set();
const getSet = async (url) => {
const set = new Set();
try {
const resp = await fetch(url, { credentials: 'include' });
return { html: resp.ok ? await resp.text() : '', set };
} catch(e) { return { html: '', set: new Set() }; }
};
const p1Url = baseUrl + (baseUrl.includes('?') ? '&' : '?') + 'page=1';
const { html: p1Html } = await getSet(p1Url);
const folderUrls = [baseUrl];
const doc = new DOMParser().parseFromString(p1Html, 'text/html');
const fl = doc.querySelector('#folder_list');
if (fl) {
fl.querySelectorAll('a[href*="favorite/albums?folder="]').forEach(a => {
const href = a.getAttribute('href');
if (href) {
const fu = 'https://18comic.vip' + href.replace(/[?&]page=\d+/, '');
if (!folderUrls.includes(fu)) folderUrls.push(fu);
}
});
}
for (const fu of folderUrls) {
const idSet = new Set();
for (const pg of [1, 2]) {
const url = fu + (fu.includes('?') ? '&' : '?') + `page=${pg}`;
const { html } = await getSet(url);
const d2 = new DOMParser().parseFromString(html, 'text/html');
d2.querySelectorAll('a[href*="/album/"]').forEach(a => {
const m = (a.getAttribute('href') || '').match(/\/album\/(\d+)/);
if (m && parseInt(m[1]) > 100) {
if (pg === 1) idSet.add(m[1]);
else if (idSet.has(m[1])) allIds.add(m[1]);
}
});
}
}
if (allIds.size === 0) {
const { html } = await getSet(p1Url);
const d3 = new DOMParser().parseFromString(html, 'text/html');
d3.querySelectorAll('a[href*="/album/"]').forEach(a => {
const m = (a.getAttribute('href') || '').match(/\/album\/(\d+)/);
if (m && parseInt(m[1]) > 100) allIds.add(m[1]);
});
}
const albums = [];
for (const id of allIds) albums.push({ id, title: '' });
onProgress && onProgress({ message: `收藏: ${folderUrls.length}个文件夹, ${albums.length}本`, progress: 100 });
LOG.info(`收藏抓取: ${folderUrls.length}个文件夹, ${albums.length}本`);
return albums;
}
// 全量扫描主流程
async function initialScan(username, onProgress) {
LOG.info('开始扫描...');
onProgress && onProgress({ phase: 'discover', progress: 0, message: '探测收藏/历史URL...' });
const currentPath = location.pathname;
if (currentPath.includes('/favorite/') || currentPath.includes('/bookmark')) {
const curUrl = location.href.replace(/[?&]page=\d+/, '');
State.saveFavoritesUrl(curUrl);
LOG.info(`已在收藏页: ${curUrl}`);
}
const histUrl = `https://18comic.vip/user/${username}/favorite/watchlist`;
let history = [];
try {
onProgress && onProgress({ phase: 'history', progress: 0, message: '提取历史记录...' });
const histHtml = await fetcher.enqueue(histUrl, null, 5);
if (histHtml) {
const items = Parser.parseListing(histHtml);
history = items.slice(0, 25).map(it => ({ id: it.id, title: it.title || '', tags: it.tags || [] }));
onProgress && onProgress({ phase: 'history', progress: 100, message: `历史: ${history.length} 条 (赛后补标签)` });
LOG.info(`历史: ${history.length} 条 [${history.map(h=>h.id).join(',')}] (赛后从候选池补标签)`);
const viewed = State.getViewedAlbums();
const viewedMap = new Map(viewed.map(v => [String(v.id || v), typeof v === 'object' ? (v.viewedAt || 0) : 0]));
const now = Date.now();
let merged = 0;
for (let idx = 0; idx < history.length; idx++) {
const sid = String(history[idx].id);
const oldTs = viewedMap.get(sid);
if (oldTs !== undefined) continue;
const estTs = now;
viewed.push({ id: sid, viewedAt: estTs });
merged++;
}
if (merged > 0) { State.saveViewedAlbums(viewed); LOG.info(`历史→浏览合并: +${merged}条`); }
}
} catch(e) { LOG.warn('历史抓取失败', e.message); }
let favUrl = await discoverFavoritesUrl(username);
if (!favUrl && username) {
favUrl = `https://18comic.vip/user/${username}/favorite/albums`;
LOG.info(`构造收藏URL: ${favUrl}`);
}
let favorites = [];
if (favUrl) {
onProgress && onProgress({ phase: 'favorites', progress: 0, message: 'iframe提取收藏...' });
favorites = await scrapeFavorites(favUrl, 30, (info) => {
onProgress && onProgress({ phase: 'favorites', progress: info.progress, message: info.message });
});
LOG.info(`收藏: ${favorites.length} 本`);
if (favorites.length > 0) {
onProgress && onProgress({ phase: 'favorites', progress: 90, message: `丰富 ${favorites.length} 收藏标签...` });
favorites = await enrichAlbumsWithDetails(favorites, (i, total) => {
onProgress && onProgress({ phase: 'favorites', progress: 90 + Math.round((i / total) * 10), message: `收藏标签 ${i}/${total}` });
});
State.saveFavorites(favorites);
const ft2 = favorites.reduce((s,a) => s + (a.tags||[]).length, 0);
LOG.info(`收藏详情: ${favorites.length}本 | 总标签:${ft2} | ID:${favorites.map(a=>a.id).slice(0,10).join(',')}${favorites.length>10?'...':''}`);
}
} else {
LOG.warn('⚠️ 无收藏数据');
}
return { favorites, history };
}
async function scrapeAllPages(baseUrl, onPageProgress) {
const allItems = [];
const firstHtml = await fetcher.enqueue(baseUrl, null, 10);
if (!firstHtml) return allItems;
const items = Parser.parseListing(firstHtml);
allItems.push(...items);
const pagination = Parser.parsePagination(firstHtml);
let totalPages = pagination.totalPages || 1;
if (totalPages <= 1 && items.length >= 15) {
totalPages = 10;
}
onPageProgress(1, totalPages);
for (let page = 2; page <= totalPages; page++) {
const sep = baseUrl.includes('?') ? '&' : '?';
const url = `${baseUrl}${sep}page=${page}`;
try {
const html = await fetcher.enqueue(url, null, 5);
if (html) {
const pageItems = Parser.parseListing(html);
if (pageItems.length === 0) break;
allItems.push(...pageItems);
}
} catch (e) {
LOG.warn(`第 ${page} 页抓取失败: ${e.message}`);
}
onPageProgress(page, totalPages);
}
return allItems;
}
async function enrichAlbumsWithDetails(albums, onProgress) {
const enriched = [];
for (let i = 0; i < albums.length; i++) {
const album = albums[i];
if (album.tags && album.tags.length >= CONFIG.TAG_ENRICH_THRESHOLD) {
enriched.push(album);
continue;
}
try {
const html = await fetcher.enqueue(`https://18comic.vip/album/${album.id}/`, null, 3);
if (html) {
const detail = Parser.parseDetail(html);
album.tags = detail.tags || [];
album.authors = detail.authors || [];
album.typeTags = detail.typeTags || [];
album.title = detail.title || album.title;
}
} catch (e) {
LOG.warn(`详情页获取失败 #${album.id}: ${e.message}`);
}
enriched.push(album);
if (onProgress) onProgress(i + 1, albums.length);
}
return enriched;
}