HTML 解析器 (OWL/列表/详情/分页) — JM Shelf 推荐脚本的模块库,通过 @require 被主脚本引用。
ეს სკრიპტი არ უნდა იყოს პირდაპირ დაინსტალირებული. ეს ბიბლიოთეკაა, სხვა სკრიპტებისთვის უნდა ჩართეთ მეტა-დირექტივაში // @require https://update.sleazyfork.org/scripts/581105/1842604/JM%20Shelf%20-%20Parser.js.
// ==UserScript==
// @name JM Shelf - Parser
// @namespace jmshelf-lib
// @version 1.0.0
// @author Kesdi
// @description HTML 解析器 (OWL/列表/详情/分页) — JM Shelf 推荐脚本的模块库,通过 @require 被主脚本引用。
// @license MIT
// ==/UserScript==
//
// 此文件是 GreasyFork 库(library),不直接安装。
// 请安装主脚本: JM Shelf 给杂鱼的个性化推荐
//
// ═══ [7] PARSER ═══
// ============================================================
const Parser = {
/**
* Parse an album listing page.
* Auto-detects owl-carousel vs list layout.
* Returns array of { id, title, tags: [], authors: [], typeTags: [], views: number }
*/
parseListing(html) {
const doc = new DOMParser().parseFromString(html, 'text/html');
// Detect owl-carousel layout (search results, homepage sections)
const owlItems = doc.querySelectorAll('.owl-item');
if (owlItems.length > 3) {
return this._parseOwl(owlItems);
}
return this._parseList(doc);
},
/**
* Parse owl-carousel layout: each .owl-item contains one album card.
* Tags/authors are inside the card's .title-truncate-index div.
*/
_parseOwl(owlItems) {
const results = [];
const seen = new Set();
const typePatterns = ['單本', '同人', '韓漫', '短篇', '漢化', '3D', '青年漫', '長篇', '一般向韓漫'];
for (const item of owlItems) {
const albumLink = item.querySelector('a[href*="/album/"]');
if (!albumLink) continue;
const href = albumLink.getAttribute('href') || '';
const idMatch = href.match(/\/album\/(\d+)\//);
if (!idMatch || seen.has(idMatch[1])) continue;
const id = idMatch[1];
seen.add(id);
// Title from img alt/title or video-title span
const img = item.querySelector('img');
const videoTitle = item.querySelector('.video-title');
const title = (videoTitle ? videoTitle.textContent.trim() : (img ? (img.getAttribute('title') || img.getAttribute('alt') || '') : ''));
// Tags and authors from within the card
const tags = [], authors = [];
const tagLinks = item.querySelectorAll('a[href*="search_query"]');
for (const tl of tagLinks) {
const th = tl.getAttribute('href') || '';
const tm = th.match(/search_query=([^&]+)/);
if (!tm) continue;
const tagName = decodeURIComponent(tm[1]);
if (th.includes('main_tag=2')) {
const nonAu = ['N/A','濫交','滥交','NTR','完結','連載','中文','漢化','全彩','無修正'];
if (!nonAu.includes(tagName) && !authors.includes(tagName)) authors.push(tagName);
} else {
if (!tags.includes(tagName)) tags.push(tagName);
}
}
// Type tags from card text
const cardText = item.textContent || '';
const typeTags = [];
for (const tp of typePatterns) {
if (cardText.includes(tp) && !typeTags.includes(tp)) typeTags.push(tp);
}
// View count (K format)
const vm = cardText.match(/(\d+\.?\d*)K/);
const views = vm ? Math.round(parseFloat(vm[1]) * 1000) : 0;
results.push({
id, title: (title || '').substring(0, 200),
tags: normalizeTags(tags),
authors,
typeTags: normalizeTags(typeTags),
views, likes: views,
});
}
return results;
},
/**
* Parse list-style layout (e.g. /albums pages).
* Sequential scanning: albums separated by tag/author links in flat DOM order.
*/
_parseList(doc) {
const results = [];
// Collect ALL significant links in document order
const allLinks = doc.querySelectorAll('a[href]');
const linkNodes = [];
for (const link of allLinks) {
const href = link.getAttribute('href') || '';
const albumMatch = href.match(/\/album\/(\d+)/);
const searchMatch = href.match(/search_query=([^&]+)/);
const isAuthor = href.includes('main_tag=2');
const isBookmark = href.includes('login-modal') || href.includes('bookmark');
linkNodes.push({
node: link,
albumId: albumMatch ? albumMatch[1] : null,
isAlbum: !!albumMatch,
searchTag: searchMatch ? decodeURIComponent(searchMatch[1]) : null,
isAuthor: isAuthor,
isBookmark: isBookmark,
});
}
// Only consider albums AFTER the category filter bar
// (skips nav bar, banners, etc. that also have album links)
let categoryBarEnd = 0;
for (let i = 0; i < linkNodes.length; i++) {
const href = (linkNodes[i].node.getAttribute('href') || '');
const text = linkNodes[i].node.textContent.trim();
if (href.includes('/albums/hanman') || href.includes('/albums/doujin') ||
href.includes('/albums/single') || href.includes('/albums/another') ||
href.includes('/albums/short') || href.includes('/albums/meiman') ||
href.includes('/albums/hanmansfw')) {
categoryBarEnd = Math.max(categoryBarEnd, i);
}
if (text === '最新的' || text === '最收藏' || text === '最多收藏') {
categoryBarEnd = Math.max(categoryBarEnd, i);
}
}
// Find all album entries and their boundary positions
const albumEntries = [];
for (let i = 0; i < linkNodes.length; i++) {
if (linkNodes[i].isAlbum && linkNodes[i].albumId && i > categoryBarEnd) {
albumEntries.push({ index: i, id: linkNodes[i].albumId, node: linkNodes[i].node });
}
}
// For each album, scan forward to the next album to find its tags/authors
const seen = new Set();
for (let ei = 0; ei < albumEntries.length; ei++) {
const entry = albumEntries[ei];
const id = entry.id;
if (seen.has(id)) continue;
seen.add(id);
const startIdx = entry.index;
const endIdx = (ei + 1 < albumEntries.length) ? albumEntries[ei + 1].index : linkNodes.length;
let title = entry.node.textContent.trim();
if (!title) {
const img = entry.node.querySelector('img');
if (img) title = img.getAttribute('alt') || img.getAttribute('title') || '';
}
const tags = [];
const authors = [];
let views = 0;
let likes = 0;
for (let j = startIdx + 1; j < endIdx; j++) {
const ln = linkNodes[j];
if (ln.searchTag) {
if (ln.isAuthor) {
if (ln.searchTag && ln.searchTag !== 'N/A' && !authors.includes(ln.searchTag)) {
authors.push(ln.searchTag);
}
} else {
if (!tags.includes(ln.searchTag)) tags.push(ln.searchTag);
}
}
if (!ln.isAlbum && !ln.isBookmark && !ln.searchTag) {
const vt = ln.node.textContent.trim();
const vm = vt.match(/^([\d,]+[KMB]?)$/);
if (vm && views === 0) views = parseViewCount(vm[1]);
else if (vm && likes === 0) likes = parseViewCount(vm[1]);
}
}
if (likes === 0) likes = views;
// Extract type tags from ALL text between this album and next
let rangeText = '';
try {
const walker = doc.createTreeWalker(
doc.body,
NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT,
null,
false
);
let collecting = false;
let walkerNode = walker.nextNode();
while (walkerNode) {
if (walkerNode === entry.node) { collecting = true; walkerNode = walker.nextNode(); continue; }
if (endIdx < linkNodes.length && walkerNode === linkNodes[endIdx].node) break;
if (collecting) {
if (walkerNode.nodeType === Node.TEXT_NODE) {
rangeText += ' ' + walkerNode.textContent;
} else if (walkerNode.nodeType === Node.ELEMENT_NODE && walkerNode.textContent) {
rangeText += ' ' + walkerNode.textContent;
}
}
walkerNode = walker.nextNode();
}
} catch (e) { /* fallback */ }
const typePatterns = ['單本', '同人', '韓漫', '短篇', '其他類', '漢化', '3D', '青年漫', '長篇', '一般向韓漫'];
const typeTags = [];
for (const tp of typePatterns) {
if (rangeText.includes(tp) && !typeTags.includes(tp)) typeTags.push(tp);
}
results.push({
id,
title: title.substring(0, 200),
tags: normalizeTags(tags),
authors,
typeTags: normalizeTags(typeTags),
views,
likes: views,
});
}
return results;
},
/**
* Parse an album detail page for full metadata + related comics.
* Uses span[itemprop] attributes to correctly separate tags vs authors.
*/
parseDetail(html) {
const doc = new DOMParser().parseFromString(html, 'text/html');
const result = { tags: [], authors: [], typeTags: [], related: [], desc: '', title: '' };
const h1 = doc.querySelector('h1');
if (h1) result.title = h1.textContent.trim();
const h2s = doc.querySelectorAll('h2');
for (const h2 of h2s) {
if (h2.textContent.trim().startsWith('敘述:')) {
result.desc = (h2.parentElement?.textContent || '').replace('敘述:', '').trim().substring(0, 500);
break;
}
}
const tagSpans = doc.querySelectorAll('span[itemprop="genre"][data-type="tags"]');
for (const span of tagSpans) {
const links = span.querySelectorAll('a');
for (const link of links) {
const href = link.getAttribute('href') || '';
const queryMatch = href.match(/search_query=([^&]+)/);
if (queryMatch) {
const tagName = decodeURIComponent(queryMatch[1]);
if (!result.tags.includes(tagName)) result.tags.push(tagName);
}
}
}
const authorSpans = doc.querySelectorAll('span[itemprop="author"][data-type="author"]');
const authorSet = new Set();
for (const span of authorSpans) {
const links = span.querySelectorAll('a[href*="search_query"]');
for (const link of links) {
const href = link.getAttribute('href') || '';
const queryMatch = href.match(/search_query=([^&]+)/);
if (queryMatch) {
const authorName = decodeURIComponent(queryMatch[1]);
if (authorName !== 'N/A' && authorName.length >= 2 && !authorName.includes(' ')) {
const nonAuthorWords = ['N/A','濫交','滥交','NTR','完結','完','連載','中文','漢化','全彩','無修正','巨乳','蘿莉','触手','純愛','劇情','短篇','長篇','單本','同人','韓漫','3D','CG'];
if (!nonAuthorWords.includes(authorName)) authorSet.add(authorName);
}
}
}
}
result.authors = [...authorSet];
const bodyText = doc.body?.textContent || '';
const typePatterns = ['單本', '同人', '韓漫', '短篇', '其他類', '漢化', '3D', '青年漫', '長篇'];
const mainText = bodyText.substring(0, 5000);
for (const tp of typePatterns) {
if (mainText.includes(tp) && !result.typeTags.includes(tp)) result.typeTags.push(tp);
}
const relatedLinks = doc.querySelectorAll('a[href*="/album/"]');
const seen = new Set();
for (const rl of relatedLinks) {
const match = (rl.getAttribute('href') || '').match(/\/album\/(\d+)/);
if (match && !seen.has(match[1])) {
seen.add(match[1]);
result.related.push(match[1]);
}
}
result.tags = normalizeTags(result.tags);
result.typeTags = normalizeTags(result.typeTags);
return result;
},
/**
* Extract pagination info: { totalPages }
*/
parsePagination(html) {
const doc = new DOMParser().parseFromString(html, 'text/html');
const pageLinks = doc.querySelectorAll('a[href*="page="]');
let maxPage = 1;
for (const pl of pageLinks) {
const match = (pl.getAttribute('href') || '').match(/page=(\d+)/);
if (match) {
const p = parseInt(match[1], 10);
if (p > maxPage) maxPage = p;
}
}
const bodyText = doc.body?.textContent || '';
const totalMatch = bodyText.match(/(\d+[\d,]*)\s*搜[索尋]結果/);
if (totalMatch) {
const totalItems = parseInt(totalMatch[1].replace(/,/g, ''), 10);
maxPage = Math.max(maxPage, Math.ceil(totalItems / 80));
}
return { totalPages: maxPage || 1 };
},
};