JM Shelf - Candidate Pool

8通道候选池构建与合并去重 — JM Shelf 推荐脚本的模块库,通过 @require 被主脚本引用。

此腳本不應該直接安裝,它是一個供其他腳本使用的函式庫。欲使用本函式庫,請在腳本 metadata 寫上: // @require https://update.sleazyfork.org/scripts/581108/1842607/JM%20Shelf%20-%20Candidate%20Pool.js

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name         JM Shelf - Candidate Pool
// @namespace    jmshelf-lib
// @version      1.0.0
// @author       Kesdi
// @description  8通道候选池构建与合并去重 — JM Shelf 推荐脚本的模块库,通过 @require 被主脚本引用。
// @license      MIT
// ==/UserScript==
// 
// 此文件是 GreasyFork 库(library),不直接安装。
// 请安装主脚本: JM Shelf 给杂鱼的个性化推荐
//

// ═══ [10] CANDIDATE POOL ═══ - Multi-channel recall
  // ============================================================
  const CandidatePool = {
    async build(profile, topFavAlbums, onProgress) {
      const channels = {};
      const albumCache = State.getAlbumCache();

      onProgress && onProgress({ phase: 'candidates', progress: 0, message: '构建候选池...' });

      // 辅助: 从HTML解析并注入搜索标签
      async function fetchTagResults(baseUrl, injectTag, pages, maxItems) {
        const ids = [];
        let globalEstimate = 0;
        for (let pg = 1; pg <= pages; pg++) {
          const url = baseUrl + (baseUrl.includes('?') ? '&' : '?') + `page=${pg}`;
          try {
            const html = await fetcher.enqueue(url, null, pg === 1 ? 3 : 2);
            if (!html) continue;
            const items = Parser.parseListing(html).slice(0, maxItems);
            for (const item of items) {
              if (injectTag && item.tags.length < 2 && !item.tags.includes(injectTag)) item.tags.push(injectTag);
              ids.push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
            if (pg === 1 && injectTag) {
              const pag = Parser.parsePagination(html);
              globalEstimate = (pag.totalPages || 1) * 40;
            }
            if (items.length < maxItems / 2) break;
          } catch (e) {}
        }
        if (injectTag && globalEstimate > 0) {
          const tagFreq = State.getTagFreq();
          const nt = normalizeTag(injectTag);
          tagFreq[nt] = globalEstimate;
          State.saveTagFreq(tagFreq);
        }
        return ids;
      }

      // Channel 1: 标签搜索
      const topTags = ProfileManager.getTopTags(profile, CONFIG.TOP_TAGS_COUNT);
      onProgress && onProgress({ phase: 'candidates', progress: 2, message: `通道①: 标签召回×${CONFIG.SEARCH_PAGES}页 (${topTags.length}个标签)...` });
      channels['tag'] = [];
      for (let i = 0; i < topTags.length; i++) {
        const tag = topTags[i];
        const baseUrl = `https://18comic.vip/search/photos?search_query=${encodeURIComponent(tag)}&main_tag=0`;
        const ids = await fetchTagResults(baseUrl, tag, CONFIG.SEARCH_PAGES, 80);
        channels['tag'].push(...ids);
        onProgress && onProgress({ phase: 'candidates', progress: 2 + Math.round((i / topTags.length) * 16), message: `① ${tag} (${ids.length}本)${ids.length===0?' ⚠':''}` });
      }

      // Channel 2: /albums 浏览
      onProgress && onProgress({ phase: 'candidates', progress: 18, message: `通道②: /albums标签补充×${CONFIG.ALBUMS_PAGES}页...` });
      channels['albums'] = [];
      for (let pg = 1; pg <= CONFIG.ALBUMS_PAGES; pg++) {
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/albums?page=${pg}&o=mr`, null, 2);
          if (!html) continue;
          const items = Parser.parseListing(html).slice(0, 80);
          for (const item of items) {
            channels['albums'].push(item.id);
            if (albumCache[item.id]) {
              const existing = albumCache[item.id];
              const mergedTags = [...new Set([...existing.tags, ...item.tags])];
              albumCache[item.id] = { ...existing, ...item, tags: mergedTags };
            } else {
              albumCache[item.id] = item;
            }
          }
        } catch (e) {}
        onProgress && onProgress({ phase: 'candidates', progress: 18 + Math.round((pg / CONFIG.ALBUMS_PAGES) * 10), message: `② /albums p${pg}` });
      }

      // Channel 3: 标签组合
      onProgress && onProgress({ phase: 'candidates', progress: 28, message: '通道③: 标签组合召回...' });
      channels['combo'] = [];
      const comboTop = topTags.slice(0, CONFIG.TAG_COMBO_TOP);
      let comboCount = 0;
      for (let i = 0; i < Math.min(comboTop.length, 8); i++) {
        for (let j = i + 1; j < Math.min(comboTop.length, 8); j++) {
          if (comboCount >= 12) break;
          const tagA = comboTop[i], tagB = comboTop[j];
          const baseUrl = `https://18comic.vip/search/photos?search_query=${encodeURIComponent(tagA + ' ' + tagB)}&main_tag=0`;
          try {
            const html = await fetcher.enqueue(baseUrl, null, 2);
            if (html) {
              const items = Parser.parseListing(html).slice(0, 30);
              for (const item of items) {
                if (item.tags.length < 3) {
                  if (!item.tags.includes(tagA)) item.tags.push(tagA);
                  if (!item.tags.includes(tagB)) item.tags.push(tagB);
                }
                channels['combo'].push(item.id);
                if (!albumCache[item.id]) albumCache[item.id] = item;
              }
            }
          } catch (e) {}
          comboCount++;
        }
      }

      // Channel 4: 作者搜索
      const topAuthors = ProfileManager.getTopAuthors(profile, CONFIG.TOP_AUTHORS_MIN_WORKS);
      const auCount = Math.min(topAuthors.length, 10);
      onProgress && onProgress({ phase: 'candidates', progress: 33, message: `通道④: 作者召回(${auCount})...` });
      channels['author'] = [];
      for (let i = 0; i < auCount; i++) {
        const author = topAuthors[i];
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/search/photos?search_query=${encodeURIComponent(author)}&main_tag=2`, null, 4);
          if (html) {
            const items = Parser.parseListing(html).slice(0, 40);
            for (const item of items) {
              channels['author'].push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
          }
        } catch (e) {}
        onProgress && onProgress({ phase: 'candidates', progress: 33 + Math.round((i / auCount) * 4), message: `④ ${author}` });
      }

      // Channel 5: 类型浏览
      onProgress && onProgress({ phase: 'candidates', progress: 37, message: '通道⑤: 类型浏览...' });
      channels['type'] = [];
      const typeUrls = [];
      const typePrefs = Object.entries(profile.types).sort((a, b) => b[1] - a[1]).slice(0, 4);
      for (const [type] of typePrefs) {
        let typePath;
        switch (type) {
          case '韓漫': typePath = '/albums/hanman'; break;
          case '同人': typePath = '/albums/doujin'; break;
          case '單本': typePath = '/albums/single'; break;
          case '短篇': typePath = '/albums/short'; break;
          default: typePath = '/albums'; break;
        }
        typeUrls.push(typePath + '?o=mr');
      }
      for (const tUrl of typeUrls) {
        try {
          const html = await fetcher.enqueue(`https://18comic.vip${tUrl}`, null, 2);
          if (html) {
            const items = Parser.parseListing(html).slice(0, 80);
            for (const item of items) {
              channels['type'].push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
          }
        } catch (e) {}
      }

      // Channel 6: 关联漫画
      onProgress && onProgress({ phase: 'candidates', progress: 40, message: '通道⑥: 关联漫画...' });
      channels['related'] = [];
      const topFavs = topFavAlbums.slice(0, 8);
      for (let i = 0; i < topFavs.length; i++) {
        const fav = topFavs[i];
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/album/${fav.id}/`, null, 2);
          if (html) {
            const detail = Parser.parseDetail(html);
            for (const rid of detail.related) channels['related'].push(rid);
            if (albumCache[fav.id]) {
              albumCache[fav.id].tags = [...new Set([...albumCache[fav.id].tags, ...(detail.tags||[])])];
              albumCache[fav.id].authors = [...new Set([...albumCache[fav.id].authors, ...(detail.authors||[])])];
            }
          }
        } catch (e) {}
      }

      // Channel 7: 探索
      onProgress && onProgress({ phase: 'candidates', progress: 43, message: '通道⑦: 随机探索...' });
      channels['discover'] = [];
      const maxPage = 200;
      for (let i = 0; i < CONFIG.EXPLORE_PAGES; i++) {
        const randPage = Math.floor(_random() * maxPage) + 1;
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/albums?page=${randPage}&o=mr`, null, 1);
          if (html) {
            const items = Parser.parseListing(html).slice(0, 60);
            for (const item of items) {
              channels['discover'].push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
          }
        } catch (e) {}
        onProgress && onProgress({ phase: 'candidates', progress: 43 + Math.round((i / CONFIG.EXPLORE_PAGES) * 4), message: `⑦ 探索p${randPage}` });
      }

      // Channel 8: 排行榜
      onProgress && onProgress({ phase: 'candidates', progress: 47, message: `通道⑧: 排行矩阵×${CONFIG.RANKING_CHANNELS.length}...` });
      channels['rank'] = [];
      channels['rank_explore'] = [];
      channels['rank_comp'] = [];
      let _rcIdx = 0;
      for (const rc of CONFIG.RANKING_CHANNELS) {
        for (let pg = 1; pg <= rc.pages; pg++) {
          try {
            const pageUrl = rc.url + (rc.url.includes('page=') ? '' : (rc.url.includes('?') ? '&' : '?') + 'page=' + pg);
            const html = await fetcher.enqueue(`https://18comic.vip${pageUrl}`, null, 2);
            if (html) {
              const items = Parser.parseListing(html).slice(0, 80);
              for (const item of items) {
                channels['rank'].push(item.id);
                if (rc.pool === 'explore' || rc.pool === 'both') channels['rank_explore'].push(item.id);
                if (rc.pool === 'comp' || rc.pool === 'both') channels['rank_comp'].push(item.id);
                if (!albumCache[item.id]) { albumCache[item.id] = item; }
                else {
                  const existing = albumCache[item.id];
                  existing.tags = [...new Set([...existing.tags, ...(item.tags||[])])];
                  existing.authors = [...new Set([...existing.authors, ...(item.authors||[])])];
                }
              }
            }
          } catch (e) {}
          onProgress && onProgress({ phase: 'candidates', progress: 47 + Math.round(((_rcIdx + pg / rc.pages) / CONFIG.RANKING_CHANNELS.length) * 10), message: `⑧ ${rc.label} p${pg}/${rc.pages}` });
        }
        _rcIdx++;
      }

      // 标签补充
      onProgress && onProgress({ phase: 'candidates', progress: 57, message: '补全标签数据...' });

      if (fetcher._scanFailed) {
        onProgress && onProgress({ phase: 'error', progress: 0, message: '❌ 扫描失败: 请求超时' });
        LOG.error('扫描终止: 部分请求超过10分钟重试上限');
        return;
      }

      let enrichedCount = 0;
      for (const [id, data] of Object.entries(albumCache)) {
        if ((data.tags || []).length <= CONFIG.TAG_ENRICH_THRESHOLD && enrichedCount < 30) {
          try {
            const html = await fetcher.enqueue(`https://18comic.vip/album/${id}/`, null, 1);
            if (html) {
              const detail = Parser.parseDetail(html);
              if (detail.tags.length > 0) {
                data.tags = [...new Set([...data.tags, ...(detail.tags||[])])];
                data.authors = [...new Set([...data.authors, ...(detail.authors||[])])];
                data.typeTags = [...new Set([...data.typeTags, ...(detail.typeTags||[])])];
                enrichedCount++;
              }
            }
          } catch (e) {}
        }
      }
      onProgress && onProgress({ phase: 'candidates', progress: 60, message: `补全${enrichedCount}个标签` });

      // 合并去重
      const channelMap = {};
      for (const [chName, ids] of Object.entries(channels)) {
        for (const id of ids) {
          if (!channelMap[id]) channelMap[id] = [];
          if (!channelMap[id].includes(chName)) channelMap[id].push(chName);
        }
      }

      const allIds = Object.keys(channelMap);
      const blacklist = getBlacklist();
      const candidates = [];
      const seenTitles = new Set();
      let mergeStats = { total:allIds.length, blAlbum:0, typeKilled:0, survived:0 };

      const normTitle = (t) => {
        let s = (t||'').toLowerCase();
        s = s.replace(/\[.*?\]/g,'').replace(/【.*?】/g,'').replace(/(.*?)/g,'').replace(/\(.*?\)/g,'');
        let r = '';
        for (const ch of s) { const v = TAG_NORMALIZE[ch]; r += v !== undefined ? v : ch; }
        return r.replace(/[\s\-~~ ]+/g, '').trim();
      };

      for (const id of allIds) {
        if (blacklist.albums.includes(id)) { mergeStats.blAlbum++; continue; }
        const channelCount = channelMap[id].length;
        const albumData = albumCache[id] || { id, title: '', tags: [], authors: [], typeTags: [], views: 0 };
        const isTypeBlacklisted = (albumData.typeTags||[]).some(t => CONFIG.TYPE_HARD_BLACKLIST.includes(t)) ||
            (albumData.tags||[]).some(t => CONFIG.TYPE_HARD_BLACKLIST.includes(t)) ||
            (albumData.chapters||0) > 20;
        if (isTypeBlacklisted) { mergeStats.typeKilled++; continue; }
        const nt = normTitle(albumData.title);
        if (nt && seenTitles.has(nt)) { mergeStats.titleDup = (mergeStats.titleDup||0)+1; continue; }
        if (nt && albumData.title) {
          const dc = _dedupCheck(albumData.title, id, false);
          if (dc.action === 'dup') { mergeStats.trieDup = (mergeStats.trieDup||0)+1; continue; }
        }
        if (nt) seenTitles.add(nt);
        mergeStats.survived++;
        if (!albumCache[id]) albumCache[id] = { ...albumData };
        albumCache[id].channels = channelMap[id];
        albumCache[id].channelCount = channelMap[id].length;
        candidates.push({ ...albumData, channelCount: channelMap[id].length, channels: channelMap[id] });
      }

      LOG.info(`📊 合并去重: 原始${mergeStats.total} → -${mergeStats.blAlbum}黑名 -${mergeStats.typeKilled}类型 -${mergeStats.titleDup||0}标题 -${mergeStats.trieDup||0}AC去重 → 候选${mergeStats.survived}`);

      State.saveAlbumCache(albumCache);
      candidates.sort((a, b) => (b.channelCount||0) - (a.channelCount||0) || (b.views||0) - (a.views||0));
      State.saveCandidates(candidates.slice(0, CONFIG.CANDIDATE_POOL_MAX).map(c => c.id));

      onProgress && onProgress({ phase: 'candidates', progress: 65, message: `候选池: ${candidates.length}个` });

      return candidates;
    },

    addAlbum(albumData) {
      const cache = State.getAlbumCache();
      if (cache[albumData.id]) return;
      cache[albumData.id] = albumData;
      State.saveAlbumCache(cache);

      const candidates = State.getCandidates();
      if (!candidates.includes(albumData.id) && candidates.length < CONFIG.CANDIDATE_POOL_MAX) {
        candidates.push(albumData.id);
        State.saveCandidates(candidates);
      }
    },

    getCachedAlbum(id) {
      return State.getAlbumCache()[id] || null;
    },
  };