JM Shelf - Candidate Pool

8通道候选池构建与合并去重 — JM Shelf 推荐脚本的模块库,通过 @require 被主脚本引用。

Ce script ne doit pas être installé directement. C'est une librairie destinée à être incluse dans d'autres scripts avec la méta-directive // @require https://update.sleazyfork.org/scripts/581108/1842607/JM%20Shelf%20-%20Candidate%20Pool.js

Vous devrez installer une extension telle que Tampermonkey, Greasemonkey ou Violentmonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey ou Violentmonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey ou Userscripts pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey pour installer ce script.

Vous devrez installer une extension de gestionnaire de script utilisateur pour installer ce script.

(J'ai déjà un gestionnaire de scripts utilisateur, laissez-moi l'installer !)

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

(J'ai déjà un gestionnaire de style utilisateur, laissez-moi l'installer!)

// ==UserScript==
// @name         JM Shelf - Candidate Pool
// @namespace    jmshelf-lib
// @version      1.0.0
// @author       Kesdi
// @description  8通道候选池构建与合并去重 — JM Shelf 推荐脚本的模块库,通过 @require 被主脚本引用。
// @license      MIT
// ==/UserScript==
// 
// 此文件是 GreasyFork 库(library),不直接安装。
// 请安装主脚本: JM Shelf 给杂鱼的个性化推荐
//

// ═══ [10] CANDIDATE POOL ═══ - Multi-channel recall
  // ============================================================
  const CandidatePool = {
    async build(profile, topFavAlbums, onProgress) {
      const channels = {};
      const albumCache = State.getAlbumCache();

      onProgress && onProgress({ phase: 'candidates', progress: 0, message: '构建候选池...' });

      // 辅助: 从HTML解析并注入搜索标签
      async function fetchTagResults(baseUrl, injectTag, pages, maxItems) {
        const ids = [];
        let globalEstimate = 0;
        for (let pg = 1; pg <= pages; pg++) {
          const url = baseUrl + (baseUrl.includes('?') ? '&' : '?') + `page=${pg}`;
          try {
            const html = await fetcher.enqueue(url, null, pg === 1 ? 3 : 2);
            if (!html) continue;
            const items = Parser.parseListing(html).slice(0, maxItems);
            for (const item of items) {
              if (injectTag && item.tags.length < 2 && !item.tags.includes(injectTag)) item.tags.push(injectTag);
              ids.push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
            if (pg === 1 && injectTag) {
              const pag = Parser.parsePagination(html);
              globalEstimate = (pag.totalPages || 1) * 40;
            }
            if (items.length < maxItems / 2) break;
          } catch (e) {}
        }
        if (injectTag && globalEstimate > 0) {
          const tagFreq = State.getTagFreq();
          const nt = normalizeTag(injectTag);
          tagFreq[nt] = globalEstimate;
          State.saveTagFreq(tagFreq);
        }
        return ids;
      }

      // Channel 1: 标签搜索
      const topTags = ProfileManager.getTopTags(profile, CONFIG.TOP_TAGS_COUNT);
      onProgress && onProgress({ phase: 'candidates', progress: 2, message: `通道①: 标签召回×${CONFIG.SEARCH_PAGES}页 (${topTags.length}个标签)...` });
      channels['tag'] = [];
      for (let i = 0; i < topTags.length; i++) {
        const tag = topTags[i];
        const baseUrl = `https://18comic.vip/search/photos?search_query=${encodeURIComponent(tag)}&main_tag=0`;
        const ids = await fetchTagResults(baseUrl, tag, CONFIG.SEARCH_PAGES, 80);
        channels['tag'].push(...ids);
        onProgress && onProgress({ phase: 'candidates', progress: 2 + Math.round((i / topTags.length) * 16), message: `① ${tag} (${ids.length}本)${ids.length===0?' ⚠':''}` });
      }

      // Channel 2: /albums 浏览
      onProgress && onProgress({ phase: 'candidates', progress: 18, message: `通道②: /albums标签补充×${CONFIG.ALBUMS_PAGES}页...` });
      channels['albums'] = [];
      for (let pg = 1; pg <= CONFIG.ALBUMS_PAGES; pg++) {
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/albums?page=${pg}&o=mr`, null, 2);
          if (!html) continue;
          const items = Parser.parseListing(html).slice(0, 80);
          for (const item of items) {
            channels['albums'].push(item.id);
            if (albumCache[item.id]) {
              const existing = albumCache[item.id];
              const mergedTags = [...new Set([...existing.tags, ...item.tags])];
              albumCache[item.id] = { ...existing, ...item, tags: mergedTags };
            } else {
              albumCache[item.id] = item;
            }
          }
        } catch (e) {}
        onProgress && onProgress({ phase: 'candidates', progress: 18 + Math.round((pg / CONFIG.ALBUMS_PAGES) * 10), message: `② /albums p${pg}` });
      }

      // Channel 3: 标签组合
      onProgress && onProgress({ phase: 'candidates', progress: 28, message: '通道③: 标签组合召回...' });
      channels['combo'] = [];
      const comboTop = topTags.slice(0, CONFIG.TAG_COMBO_TOP);
      let comboCount = 0;
      for (let i = 0; i < Math.min(comboTop.length, 8); i++) {
        for (let j = i + 1; j < Math.min(comboTop.length, 8); j++) {
          if (comboCount >= 12) break;
          const tagA = comboTop[i], tagB = comboTop[j];
          const baseUrl = `https://18comic.vip/search/photos?search_query=${encodeURIComponent(tagA + ' ' + tagB)}&main_tag=0`;
          try {
            const html = await fetcher.enqueue(baseUrl, null, 2);
            if (html) {
              const items = Parser.parseListing(html).slice(0, 30);
              for (const item of items) {
                if (item.tags.length < 3) {
                  if (!item.tags.includes(tagA)) item.tags.push(tagA);
                  if (!item.tags.includes(tagB)) item.tags.push(tagB);
                }
                channels['combo'].push(item.id);
                if (!albumCache[item.id]) albumCache[item.id] = item;
              }
            }
          } catch (e) {}
          comboCount++;
        }
      }

      // Channel 4: 作者搜索
      const topAuthors = ProfileManager.getTopAuthors(profile, CONFIG.TOP_AUTHORS_MIN_WORKS);
      const auCount = Math.min(topAuthors.length, 10);
      onProgress && onProgress({ phase: 'candidates', progress: 33, message: `通道④: 作者召回(${auCount})...` });
      channels['author'] = [];
      for (let i = 0; i < auCount; i++) {
        const author = topAuthors[i];
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/search/photos?search_query=${encodeURIComponent(author)}&main_tag=2`, null, 4);
          if (html) {
            const items = Parser.parseListing(html).slice(0, 40);
            for (const item of items) {
              channels['author'].push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
          }
        } catch (e) {}
        onProgress && onProgress({ phase: 'candidates', progress: 33 + Math.round((i / auCount) * 4), message: `④ ${author}` });
      }

      // Channel 5: 类型浏览
      onProgress && onProgress({ phase: 'candidates', progress: 37, message: '通道⑤: 类型浏览...' });
      channels['type'] = [];
      const typeUrls = [];
      const typePrefs = Object.entries(profile.types).sort((a, b) => b[1] - a[1]).slice(0, 4);
      for (const [type] of typePrefs) {
        let typePath;
        switch (type) {
          case '韓漫': typePath = '/albums/hanman'; break;
          case '同人': typePath = '/albums/doujin'; break;
          case '單本': typePath = '/albums/single'; break;
          case '短篇': typePath = '/albums/short'; break;
          default: typePath = '/albums'; break;
        }
        typeUrls.push(typePath + '?o=mr');
      }
      for (const tUrl of typeUrls) {
        try {
          const html = await fetcher.enqueue(`https://18comic.vip${tUrl}`, null, 2);
          if (html) {
            const items = Parser.parseListing(html).slice(0, 80);
            for (const item of items) {
              channels['type'].push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
          }
        } catch (e) {}
      }

      // Channel 6: 关联漫画
      onProgress && onProgress({ phase: 'candidates', progress: 40, message: '通道⑥: 关联漫画...' });
      channels['related'] = [];
      const topFavs = topFavAlbums.slice(0, 8);
      for (let i = 0; i < topFavs.length; i++) {
        const fav = topFavs[i];
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/album/${fav.id}/`, null, 2);
          if (html) {
            const detail = Parser.parseDetail(html);
            for (const rid of detail.related) channels['related'].push(rid);
            if (albumCache[fav.id]) {
              albumCache[fav.id].tags = [...new Set([...albumCache[fav.id].tags, ...(detail.tags||[])])];
              albumCache[fav.id].authors = [...new Set([...albumCache[fav.id].authors, ...(detail.authors||[])])];
            }
          }
        } catch (e) {}
      }

      // Channel 7: 探索
      onProgress && onProgress({ phase: 'candidates', progress: 43, message: '通道⑦: 随机探索...' });
      channels['discover'] = [];
      const maxPage = 200;
      for (let i = 0; i < CONFIG.EXPLORE_PAGES; i++) {
        const randPage = Math.floor(_random() * maxPage) + 1;
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/albums?page=${randPage}&o=mr`, null, 1);
          if (html) {
            const items = Parser.parseListing(html).slice(0, 60);
            for (const item of items) {
              channels['discover'].push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
          }
        } catch (e) {}
        onProgress && onProgress({ phase: 'candidates', progress: 43 + Math.round((i / CONFIG.EXPLORE_PAGES) * 4), message: `⑦ 探索p${randPage}` });
      }

      // Channel 8: 排行榜
      onProgress && onProgress({ phase: 'candidates', progress: 47, message: `通道⑧: 排行矩阵×${CONFIG.RANKING_CHANNELS.length}...` });
      channels['rank'] = [];
      channels['rank_explore'] = [];
      channels['rank_comp'] = [];
      let _rcIdx = 0;
      for (const rc of CONFIG.RANKING_CHANNELS) {
        for (let pg = 1; pg <= rc.pages; pg++) {
          try {
            const pageUrl = rc.url + (rc.url.includes('page=') ? '' : (rc.url.includes('?') ? '&' : '?') + 'page=' + pg);
            const html = await fetcher.enqueue(`https://18comic.vip${pageUrl}`, null, 2);
            if (html) {
              const items = Parser.parseListing(html).slice(0, 80);
              for (const item of items) {
                channels['rank'].push(item.id);
                if (rc.pool === 'explore' || rc.pool === 'both') channels['rank_explore'].push(item.id);
                if (rc.pool === 'comp' || rc.pool === 'both') channels['rank_comp'].push(item.id);
                if (!albumCache[item.id]) { albumCache[item.id] = item; }
                else {
                  const existing = albumCache[item.id];
                  existing.tags = [...new Set([...existing.tags, ...(item.tags||[])])];
                  existing.authors = [...new Set([...existing.authors, ...(item.authors||[])])];
                }
              }
            }
          } catch (e) {}
          onProgress && onProgress({ phase: 'candidates', progress: 47 + Math.round(((_rcIdx + pg / rc.pages) / CONFIG.RANKING_CHANNELS.length) * 10), message: `⑧ ${rc.label} p${pg}/${rc.pages}` });
        }
        _rcIdx++;
      }

      // 标签补充
      onProgress && onProgress({ phase: 'candidates', progress: 57, message: '补全标签数据...' });

      if (fetcher._scanFailed) {
        onProgress && onProgress({ phase: 'error', progress: 0, message: '❌ 扫描失败: 请求超时' });
        LOG.error('扫描终止: 部分请求超过10分钟重试上限');
        return;
      }

      let enrichedCount = 0;
      for (const [id, data] of Object.entries(albumCache)) {
        if ((data.tags || []).length <= CONFIG.TAG_ENRICH_THRESHOLD && enrichedCount < 30) {
          try {
            const html = await fetcher.enqueue(`https://18comic.vip/album/${id}/`, null, 1);
            if (html) {
              const detail = Parser.parseDetail(html);
              if (detail.tags.length > 0) {
                data.tags = [...new Set([...data.tags, ...(detail.tags||[])])];
                data.authors = [...new Set([...data.authors, ...(detail.authors||[])])];
                data.typeTags = [...new Set([...data.typeTags, ...(detail.typeTags||[])])];
                enrichedCount++;
              }
            }
          } catch (e) {}
        }
      }
      onProgress && onProgress({ phase: 'candidates', progress: 60, message: `补全${enrichedCount}个标签` });

      // 合并去重
      const channelMap = {};
      for (const [chName, ids] of Object.entries(channels)) {
        for (const id of ids) {
          if (!channelMap[id]) channelMap[id] = [];
          if (!channelMap[id].includes(chName)) channelMap[id].push(chName);
        }
      }

      const allIds = Object.keys(channelMap);
      const blacklist = getBlacklist();
      const candidates = [];
      const seenTitles = new Set();
      let mergeStats = { total:allIds.length, blAlbum:0, typeKilled:0, survived:0 };

      const normTitle = (t) => {
        let s = (t||'').toLowerCase();
        s = s.replace(/\[.*?\]/g,'').replace(/【.*?】/g,'').replace(/(.*?)/g,'').replace(/\(.*?\)/g,'');
        let r = '';
        for (const ch of s) { const v = TAG_NORMALIZE[ch]; r += v !== undefined ? v : ch; }
        return r.replace(/[\s\-~~ ]+/g, '').trim();
      };

      for (const id of allIds) {
        if (blacklist.albums.includes(id)) { mergeStats.blAlbum++; continue; }
        const channelCount = channelMap[id].length;
        const albumData = albumCache[id] || { id, title: '', tags: [], authors: [], typeTags: [], views: 0 };
        const isTypeBlacklisted = (albumData.typeTags||[]).some(t => CONFIG.TYPE_HARD_BLACKLIST.includes(t)) ||
            (albumData.tags||[]).some(t => CONFIG.TYPE_HARD_BLACKLIST.includes(t)) ||
            (albumData.chapters||0) > 20;
        if (isTypeBlacklisted) { mergeStats.typeKilled++; continue; }
        const nt = normTitle(albumData.title);
        if (nt && seenTitles.has(nt)) { mergeStats.titleDup = (mergeStats.titleDup||0)+1; continue; }
        if (nt && albumData.title) {
          const dc = _dedupCheck(albumData.title, id, false);
          if (dc.action === 'dup') { mergeStats.trieDup = (mergeStats.trieDup||0)+1; continue; }
        }
        if (nt) seenTitles.add(nt);
        mergeStats.survived++;
        if (!albumCache[id]) albumCache[id] = { ...albumData };
        albumCache[id].channels = channelMap[id];
        albumCache[id].channelCount = channelMap[id].length;
        candidates.push({ ...albumData, channelCount: channelMap[id].length, channels: channelMap[id] });
      }

      LOG.info(`📊 合并去重: 原始${mergeStats.total} → -${mergeStats.blAlbum}黑名 -${mergeStats.typeKilled}类型 -${mergeStats.titleDup||0}标题 -${mergeStats.trieDup||0}AC去重 → 候选${mergeStats.survived}`);

      State.saveAlbumCache(albumCache);
      candidates.sort((a, b) => (b.channelCount||0) - (a.channelCount||0) || (b.views||0) - (a.views||0));
      State.saveCandidates(candidates.slice(0, CONFIG.CANDIDATE_POOL_MAX).map(c => c.id));

      onProgress && onProgress({ phase: 'candidates', progress: 65, message: `候选池: ${candidates.length}个` });

      return candidates;
    },

    addAlbum(albumData) {
      const cache = State.getAlbumCache();
      if (cache[albumData.id]) return;
      cache[albumData.id] = albumData;
      State.saveAlbumCache(cache);

      const candidates = State.getCandidates();
      if (!candidates.includes(albumData.id) && candidates.length < CONFIG.CANDIDATE_POOL_MAX) {
        candidates.push(albumData.id);
        State.saveCandidates(candidates);
      }
    },

    getCachedAlbum(id) {
      return State.getAlbumCache()[id] || null;
    },
  };