JM Shelf - Candidate Pool

8通道候选池构建与合并去重 — JM Shelf 推荐脚本的模块库,通过 @require 被主脚本引用。

Este script no debería instalarse directamente. Es una biblioteca que utilizan otros scripts mediante la meta-directiva de inclusión // @require https://update.sleazyfork.org/scripts/581108/1842607/JM%20Shelf%20-%20Candidate%20Pool.js

Tendrás que instalar una extensión para tu navegador como Tampermonkey, Greasemonkey o Violentmonkey si quieres utilizar este script.

You will need to install an extension such as Tampermonkey to install this script.

Tendrás que instalar una extensión como Tampermonkey o Violentmonkey para instalar este script.

Necesitarás instalar una extensión como Tampermonkey o Userscripts para instalar este script.

Tendrás que instalar una extensión como Tampermonkey antes de poder instalar este script.

Necesitarás instalar una extensión para administrar scripts de usuario si quieres instalar este script.

(Ya tengo un administrador de scripts de usuario, déjame instalarlo)

Tendrás que instalar una extensión como Stylus antes de poder instalar este script.

Tendrás que instalar una extensión como Stylus antes de poder instalar este script.

Tendrás que instalar una extensión como Stylus antes de poder instalar este script.

Para poder instalar esto tendrás que instalar primero una extensión de estilos de usuario.

Para poder instalar esto tendrás que instalar primero una extensión de estilos de usuario.

Para poder instalar esto tendrás que instalar primero una extensión de estilos de usuario.

(Ya tengo un administrador de estilos de usuario, déjame instalarlo)

// ==UserScript==
// @name         JM Shelf - Candidate Pool
// @namespace    jmshelf-lib
// @version      1.0.0
// @author       Kesdi
// @description  8通道候选池构建与合并去重 — JM Shelf 推荐脚本的模块库,通过 @require 被主脚本引用。
// @license      MIT
// ==/UserScript==
// 
// 此文件是 GreasyFork 库(library),不直接安装。
// 请安装主脚本: JM Shelf 给杂鱼的个性化推荐
//

// ═══ [10] CANDIDATE POOL ═══ - Multi-channel recall
  // ============================================================
  const CandidatePool = {
    async build(profile, topFavAlbums, onProgress) {
      const channels = {};
      const albumCache = State.getAlbumCache();

      onProgress && onProgress({ phase: 'candidates', progress: 0, message: '构建候选池...' });

      // 辅助: 从HTML解析并注入搜索标签
      async function fetchTagResults(baseUrl, injectTag, pages, maxItems) {
        const ids = [];
        let globalEstimate = 0;
        for (let pg = 1; pg <= pages; pg++) {
          const url = baseUrl + (baseUrl.includes('?') ? '&' : '?') + `page=${pg}`;
          try {
            const html = await fetcher.enqueue(url, null, pg === 1 ? 3 : 2);
            if (!html) continue;
            const items = Parser.parseListing(html).slice(0, maxItems);
            for (const item of items) {
              if (injectTag && item.tags.length < 2 && !item.tags.includes(injectTag)) item.tags.push(injectTag);
              ids.push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
            if (pg === 1 && injectTag) {
              const pag = Parser.parsePagination(html);
              globalEstimate = (pag.totalPages || 1) * 40;
            }
            if (items.length < maxItems / 2) break;
          } catch (e) {}
        }
        if (injectTag && globalEstimate > 0) {
          const tagFreq = State.getTagFreq();
          const nt = normalizeTag(injectTag);
          tagFreq[nt] = globalEstimate;
          State.saveTagFreq(tagFreq);
        }
        return ids;
      }

      // Channel 1: 标签搜索
      const topTags = ProfileManager.getTopTags(profile, CONFIG.TOP_TAGS_COUNT);
      onProgress && onProgress({ phase: 'candidates', progress: 2, message: `通道①: 标签召回×${CONFIG.SEARCH_PAGES}页 (${topTags.length}个标签)...` });
      channels['tag'] = [];
      for (let i = 0; i < topTags.length; i++) {
        const tag = topTags[i];
        const baseUrl = `https://18comic.vip/search/photos?search_query=${encodeURIComponent(tag)}&main_tag=0`;
        const ids = await fetchTagResults(baseUrl, tag, CONFIG.SEARCH_PAGES, 80);
        channels['tag'].push(...ids);
        onProgress && onProgress({ phase: 'candidates', progress: 2 + Math.round((i / topTags.length) * 16), message: `① ${tag} (${ids.length}本)${ids.length===0?' ⚠':''}` });
      }

      // Channel 2: /albums 浏览
      onProgress && onProgress({ phase: 'candidates', progress: 18, message: `通道②: /albums标签补充×${CONFIG.ALBUMS_PAGES}页...` });
      channels['albums'] = [];
      for (let pg = 1; pg <= CONFIG.ALBUMS_PAGES; pg++) {
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/albums?page=${pg}&o=mr`, null, 2);
          if (!html) continue;
          const items = Parser.parseListing(html).slice(0, 80);
          for (const item of items) {
            channels['albums'].push(item.id);
            if (albumCache[item.id]) {
              const existing = albumCache[item.id];
              const mergedTags = [...new Set([...existing.tags, ...item.tags])];
              albumCache[item.id] = { ...existing, ...item, tags: mergedTags };
            } else {
              albumCache[item.id] = item;
            }
          }
        } catch (e) {}
        onProgress && onProgress({ phase: 'candidates', progress: 18 + Math.round((pg / CONFIG.ALBUMS_PAGES) * 10), message: `② /albums p${pg}` });
      }

      // Channel 3: 标签组合
      onProgress && onProgress({ phase: 'candidates', progress: 28, message: '通道③: 标签组合召回...' });
      channels['combo'] = [];
      const comboTop = topTags.slice(0, CONFIG.TAG_COMBO_TOP);
      let comboCount = 0;
      for (let i = 0; i < Math.min(comboTop.length, 8); i++) {
        for (let j = i + 1; j < Math.min(comboTop.length, 8); j++) {
          if (comboCount >= 12) break;
          const tagA = comboTop[i], tagB = comboTop[j];
          const baseUrl = `https://18comic.vip/search/photos?search_query=${encodeURIComponent(tagA + ' ' + tagB)}&main_tag=0`;
          try {
            const html = await fetcher.enqueue(baseUrl, null, 2);
            if (html) {
              const items = Parser.parseListing(html).slice(0, 30);
              for (const item of items) {
                if (item.tags.length < 3) {
                  if (!item.tags.includes(tagA)) item.tags.push(tagA);
                  if (!item.tags.includes(tagB)) item.tags.push(tagB);
                }
                channels['combo'].push(item.id);
                if (!albumCache[item.id]) albumCache[item.id] = item;
              }
            }
          } catch (e) {}
          comboCount++;
        }
      }

      // Channel 4: 作者搜索
      const topAuthors = ProfileManager.getTopAuthors(profile, CONFIG.TOP_AUTHORS_MIN_WORKS);
      const auCount = Math.min(topAuthors.length, 10);
      onProgress && onProgress({ phase: 'candidates', progress: 33, message: `通道④: 作者召回(${auCount})...` });
      channels['author'] = [];
      for (let i = 0; i < auCount; i++) {
        const author = topAuthors[i];
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/search/photos?search_query=${encodeURIComponent(author)}&main_tag=2`, null, 4);
          if (html) {
            const items = Parser.parseListing(html).slice(0, 40);
            for (const item of items) {
              channels['author'].push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
          }
        } catch (e) {}
        onProgress && onProgress({ phase: 'candidates', progress: 33 + Math.round((i / auCount) * 4), message: `④ ${author}` });
      }

      // Channel 5: 类型浏览
      onProgress && onProgress({ phase: 'candidates', progress: 37, message: '通道⑤: 类型浏览...' });
      channels['type'] = [];
      const typeUrls = [];
      const typePrefs = Object.entries(profile.types).sort((a, b) => b[1] - a[1]).slice(0, 4);
      for (const [type] of typePrefs) {
        let typePath;
        switch (type) {
          case '韓漫': typePath = '/albums/hanman'; break;
          case '同人': typePath = '/albums/doujin'; break;
          case '單本': typePath = '/albums/single'; break;
          case '短篇': typePath = '/albums/short'; break;
          default: typePath = '/albums'; break;
        }
        typeUrls.push(typePath + '?o=mr');
      }
      for (const tUrl of typeUrls) {
        try {
          const html = await fetcher.enqueue(`https://18comic.vip${tUrl}`, null, 2);
          if (html) {
            const items = Parser.parseListing(html).slice(0, 80);
            for (const item of items) {
              channels['type'].push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
          }
        } catch (e) {}
      }

      // Channel 6: 关联漫画
      onProgress && onProgress({ phase: 'candidates', progress: 40, message: '通道⑥: 关联漫画...' });
      channels['related'] = [];
      const topFavs = topFavAlbums.slice(0, 8);
      for (let i = 0; i < topFavs.length; i++) {
        const fav = topFavs[i];
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/album/${fav.id}/`, null, 2);
          if (html) {
            const detail = Parser.parseDetail(html);
            for (const rid of detail.related) channels['related'].push(rid);
            if (albumCache[fav.id]) {
              albumCache[fav.id].tags = [...new Set([...albumCache[fav.id].tags, ...(detail.tags||[])])];
              albumCache[fav.id].authors = [...new Set([...albumCache[fav.id].authors, ...(detail.authors||[])])];
            }
          }
        } catch (e) {}
      }

      // Channel 7: 探索
      onProgress && onProgress({ phase: 'candidates', progress: 43, message: '通道⑦: 随机探索...' });
      channels['discover'] = [];
      const maxPage = 200;
      for (let i = 0; i < CONFIG.EXPLORE_PAGES; i++) {
        const randPage = Math.floor(_random() * maxPage) + 1;
        try {
          const html = await fetcher.enqueue(`https://18comic.vip/albums?page=${randPage}&o=mr`, null, 1);
          if (html) {
            const items = Parser.parseListing(html).slice(0, 60);
            for (const item of items) {
              channels['discover'].push(item.id);
              if (!albumCache[item.id]) albumCache[item.id] = item;
            }
          }
        } catch (e) {}
        onProgress && onProgress({ phase: 'candidates', progress: 43 + Math.round((i / CONFIG.EXPLORE_PAGES) * 4), message: `⑦ 探索p${randPage}` });
      }

      // Channel 8: 排行榜
      onProgress && onProgress({ phase: 'candidates', progress: 47, message: `通道⑧: 排行矩阵×${CONFIG.RANKING_CHANNELS.length}...` });
      channels['rank'] = [];
      channels['rank_explore'] = [];
      channels['rank_comp'] = [];
      let _rcIdx = 0;
      for (const rc of CONFIG.RANKING_CHANNELS) {
        for (let pg = 1; pg <= rc.pages; pg++) {
          try {
            const pageUrl = rc.url + (rc.url.includes('page=') ? '' : (rc.url.includes('?') ? '&' : '?') + 'page=' + pg);
            const html = await fetcher.enqueue(`https://18comic.vip${pageUrl}`, null, 2);
            if (html) {
              const items = Parser.parseListing(html).slice(0, 80);
              for (const item of items) {
                channels['rank'].push(item.id);
                if (rc.pool === 'explore' || rc.pool === 'both') channels['rank_explore'].push(item.id);
                if (rc.pool === 'comp' || rc.pool === 'both') channels['rank_comp'].push(item.id);
                if (!albumCache[item.id]) { albumCache[item.id] = item; }
                else {
                  const existing = albumCache[item.id];
                  existing.tags = [...new Set([...existing.tags, ...(item.tags||[])])];
                  existing.authors = [...new Set([...existing.authors, ...(item.authors||[])])];
                }
              }
            }
          } catch (e) {}
          onProgress && onProgress({ phase: 'candidates', progress: 47 + Math.round(((_rcIdx + pg / rc.pages) / CONFIG.RANKING_CHANNELS.length) * 10), message: `⑧ ${rc.label} p${pg}/${rc.pages}` });
        }
        _rcIdx++;
      }

      // 标签补充
      onProgress && onProgress({ phase: 'candidates', progress: 57, message: '补全标签数据...' });

      if (fetcher._scanFailed) {
        onProgress && onProgress({ phase: 'error', progress: 0, message: '❌ 扫描失败: 请求超时' });
        LOG.error('扫描终止: 部分请求超过10分钟重试上限');
        return;
      }

      let enrichedCount = 0;
      for (const [id, data] of Object.entries(albumCache)) {
        if ((data.tags || []).length <= CONFIG.TAG_ENRICH_THRESHOLD && enrichedCount < 30) {
          try {
            const html = await fetcher.enqueue(`https://18comic.vip/album/${id}/`, null, 1);
            if (html) {
              const detail = Parser.parseDetail(html);
              if (detail.tags.length > 0) {
                data.tags = [...new Set([...data.tags, ...(detail.tags||[])])];
                data.authors = [...new Set([...data.authors, ...(detail.authors||[])])];
                data.typeTags = [...new Set([...data.typeTags, ...(detail.typeTags||[])])];
                enrichedCount++;
              }
            }
          } catch (e) {}
        }
      }
      onProgress && onProgress({ phase: 'candidates', progress: 60, message: `补全${enrichedCount}个标签` });

      // 合并去重
      const channelMap = {};
      for (const [chName, ids] of Object.entries(channels)) {
        for (const id of ids) {
          if (!channelMap[id]) channelMap[id] = [];
          if (!channelMap[id].includes(chName)) channelMap[id].push(chName);
        }
      }

      const allIds = Object.keys(channelMap);
      const blacklist = getBlacklist();
      const candidates = [];
      const seenTitles = new Set();
      let mergeStats = { total:allIds.length, blAlbum:0, typeKilled:0, survived:0 };

      const normTitle = (t) => {
        let s = (t||'').toLowerCase();
        s = s.replace(/\[.*?\]/g,'').replace(/【.*?】/g,'').replace(/(.*?)/g,'').replace(/\(.*?\)/g,'');
        let r = '';
        for (const ch of s) { const v = TAG_NORMALIZE[ch]; r += v !== undefined ? v : ch; }
        return r.replace(/[\s\-~~ ]+/g, '').trim();
      };

      for (const id of allIds) {
        if (blacklist.albums.includes(id)) { mergeStats.blAlbum++; continue; }
        const channelCount = channelMap[id].length;
        const albumData = albumCache[id] || { id, title: '', tags: [], authors: [], typeTags: [], views: 0 };
        const isTypeBlacklisted = (albumData.typeTags||[]).some(t => CONFIG.TYPE_HARD_BLACKLIST.includes(t)) ||
            (albumData.tags||[]).some(t => CONFIG.TYPE_HARD_BLACKLIST.includes(t)) ||
            (albumData.chapters||0) > 20;
        if (isTypeBlacklisted) { mergeStats.typeKilled++; continue; }
        const nt = normTitle(albumData.title);
        if (nt && seenTitles.has(nt)) { mergeStats.titleDup = (mergeStats.titleDup||0)+1; continue; }
        if (nt && albumData.title) {
          const dc = _dedupCheck(albumData.title, id, false);
          if (dc.action === 'dup') { mergeStats.trieDup = (mergeStats.trieDup||0)+1; continue; }
        }
        if (nt) seenTitles.add(nt);
        mergeStats.survived++;
        if (!albumCache[id]) albumCache[id] = { ...albumData };
        albumCache[id].channels = channelMap[id];
        albumCache[id].channelCount = channelMap[id].length;
        candidates.push({ ...albumData, channelCount: channelMap[id].length, channels: channelMap[id] });
      }

      LOG.info(`📊 合并去重: 原始${mergeStats.total} → -${mergeStats.blAlbum}黑名 -${mergeStats.typeKilled}类型 -${mergeStats.titleDup||0}标题 -${mergeStats.trieDup||0}AC去重 → 候选${mergeStats.survived}`);

      State.saveAlbumCache(albumCache);
      candidates.sort((a, b) => (b.channelCount||0) - (a.channelCount||0) || (b.views||0) - (a.views||0));
      State.saveCandidates(candidates.slice(0, CONFIG.CANDIDATE_POOL_MAX).map(c => c.id));

      onProgress && onProgress({ phase: 'candidates', progress: 65, message: `候选池: ${candidates.length}个` });

      return candidates;
    },

    addAlbum(albumData) {
      const cache = State.getAlbumCache();
      if (cache[albumData.id]) return;
      cache[albumData.id] = albumData;
      State.saveAlbumCache(cache);

      const candidates = State.getCandidates();
      if (!candidates.includes(albumData.id) && candidates.length < CONFIG.CANDIDATE_POOL_MAX) {
        candidates.push(albumData.id);
        State.saveCandidates(candidates);
      }
    },

    getCachedAlbum(id) {
      return State.getAlbumCache()[id] || null;
    },
  };