Coomerfans Scraper

Scrape image and video URLs from coomerfans.com posts across all pages

이 스크립트를 설치하려면 Tampermonkey, Greasemonkey 또는 Violentmonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey와 같은 확장 프로그램을 설치해야 합니다.

이 스크립트를 설치하려면 Tampermonkey 또는 Violentmonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey 또는 Userscripts와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 Tampermonkey와 같은 확장 프로그램이 필요합니다.

이 스크립트를 설치하려면 유저 스크립트 관리자 확장 프로그램이 필요합니다.

(이미 유저 스크립트 관리자가 설치되어 있습니다. 설치를 진행합니다!)

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 Stylus와 같은 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

이 스타일을 설치하려면 유저 스타일 관리자 확장 프로그램이 필요합니다.

(이미 유저 스타일 관리자가 설치되어 있습니다. 설치를 진행합니다!)

// ==UserScript==
// @name         Coomerfans Scraper
// @namespace    https://coomerfans.com/
// @version      1.0.0
// @description  Scrape image and video URLs from coomerfans.com posts across all pages
// @author       regginyggaf
// @match        *://coomerfans.com/u/*
// @match        *://www.coomerfans.com/u/*
// @grant        none
// @run-at       document-idle
// @license MIT
// ==/UserScript==

(function () {
  'use strict';

  // ─── Constants ────────────────────────────────────────────────────────────────
  const MAX_RETRIES = 3;
  const BASE_DELAY_MS = 1000;
  const SCRIPT_TAG = '[BF-SCRAPER]';

  // ─── Utilities ────────────────────────────────────────────────────────────────

  const sleep = ms => new Promise(r => setTimeout(r, ms));

  function log(pageLabel, ...args) {
    const prefix = pageLabel ? `${SCRIPT_TAG} [${pageLabel}]` : SCRIPT_TAG;
    console.log(prefix, ...args);
  }

  function getPageUrl(baseUrl, page) {
    const url = new URL(baseUrl);
    if (page <= 1) {
      url.searchParams.delete('page');
    } else {
      url.searchParams.set('page', page);
    }
    return url.toString();
  }

  function getBaseUrl() {
    const url = new URL(window.location.href);
    url.searchParams.delete('page');
    return url.toString();
  }

  async function fetchWithRetry(url, options = {}, attempt = 1) {
    try {
      const res = await fetch(url, options);
      if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
      return res;
    } catch (err) {
      if (attempt >= MAX_RETRIES) throw err;
      const delay = BASE_DELAY_MS * Math.pow(2, attempt - 1);
      log(null, `Fetch failed (attempt ${attempt}): ${err.message}. Retrying in ${delay}ms…`);
      await sleep(delay);
      return fetchWithRetry(url, options, attempt + 1);
    }
  }

  async function fetchDocument(url) {
    try {
      const res = await fetchWithRetry(url);
      const text = await res.text();
      return new DOMParser().parseFromString(text, 'text/html');
    } catch (err) {
      log(null, `Failed to fetch document: ${url} — ${err.message}`);
      return null;
    }
  }

  // ─── Page Counting ────────────────────────────────────────────────────────────

  async function countTotalPages(baseUrl) {
    log(null, 'Probing for total pages…');

    async function hasPostsAtPage(page) {
      const url = getPageUrl(baseUrl, page);
      try {
        const res = await fetch(url, { method: 'HEAD' });
        if (!res.ok) return false;
        const doc = await fetchDocument(url);
        return doc !== null && doc.querySelectorAll('div.post').length > 0;
      } catch {
        return false;
      }
    }

    // Exponential search: double until we find a page that has no posts
    let hi = 1;
    while (await hasPostsAtPage(hi + 1)) {
      hi = hi * 2;
      log(null, `  Page ${hi} has posts, probing further…`);
    }

    // Binary search between hi/2+1 and hi to find the exact last page
    let lo = Math.floor(hi / 2) + 1;
    if (lo >= hi) {
      log(null, `Total pages: ${hi}`);
      return hi;
    }

    while (lo < hi) {
      const mid = Math.floor((lo + hi + 1) / 2);
      if (await hasPostsAtPage(mid)) {
        lo = mid;
      } else {
        hi = mid - 1;
      }
    }

    log(null, `Total pages found: ${lo}`);
    return lo;
  }

  // ─── Scraping Logic ───────────────────────────────────────────────────────────

  // scrapePhotos / scrapeVideos are passed as explicit parameters to avoid
  // any closure scoping issues across the call chain.

  async function scrapePostPage(postUrl, pageLabel, postIndex) {
    log(pageLabel, `  Post ${postIndex}: fetching ${postUrl}`);
    const doc = await fetchDocument(postUrl);
    if (!doc) {
      log(pageLabel, `  Post ${postIndex}: could not load, skipping`);
      return { imgs: [], vids: [], failed: [postUrl] };
    }

    const body = doc.querySelector('div.post-body');
    if (!body) {
      log(pageLabel, `  Post ${postIndex}: no .post-body element found`);
      return { imgs: [], vids: [], failed: [] };
    }

    const imgs = [];
    body.querySelectorAll('img').forEach(img => {
      const src = img.getAttribute('src');
      if (src) { imgs.push(src); log(pageLabel, `  Post ${postIndex}: 🖼  image → ${src}`); }
    });
    const vids = [];
    body.querySelectorAll('video source').forEach(source => {
      const src = source.getAttribute('src');
      if (src) { vids.push(src); log(pageLabel, `  Post ${postIndex}: 🎬  video → ${src}`); }
    });

    log(pageLabel, `  Post ${postIndex}: done (${imgs.length} img, ${vids.length} vid)`);
    return { imgs, vids, failed: [] };
  }

  async function scrapePage(pageNum, totalPages, baseUrl) {
    const pageLabel = `Page ${pageNum}/${totalPages}`;
    const pageUrl = getPageUrl(baseUrl, pageNum);

    log(pageLabel, `Fetching listing page → ${pageUrl}`);
    const doc = await fetchDocument(pageUrl);
    if (!doc) {
      log(pageLabel, 'Failed to load listing page, skipping');
      return { images: [], videos: [], failed: [pageUrl] };
    }

    const posts = doc.querySelectorAll('div.post');
    if (!posts.length) {
      log(pageLabel, 'No div.post elements found on this page');
      return { images: [], videos: [], failed: [] };
    }

    log(pageLabel, `Found ${posts.length} post(s)`);

    const images = [];
    const videos = [];
    const failed = [];

    for (const [i, post] of posts.entries()) {
      const link = post.querySelector('a.view-post');
      if (!link || !link.href) {
        log(pageLabel, `  Post ${i + 1}: no a.view-post link, skipping`);
        continue;
      }
      const result = await scrapePostPage(link.href, pageLabel, i + 1);
      images.push(...result.imgs);
      videos.push(...result.vids);
      failed.push(...result.failed);
    }

    log(pageLabel, `Page complete — ${images.length} images, ${videos.length} videos, ${failed.length} failures`);
    return { images, videos, failed };
  }

  // ─── File Download ─────────────────────────────────────────────────────────────

  function triggerTxtDownload(images, videos, failed, scrapePhotos, scrapeVideos) {
    const pathParts = window.location.pathname.split('/').filter(Boolean);
    // Path structure: /u/<handle>/<id>/<username>
    const username = pathParts[3] ?? 'unknown';

    function downloadFile(filename, lines) {
      const blob = new Blob([lines.length ? lines.join('\n') : '(none)'], { type: 'text/plain' });
      const anchor = document.createElement('a');
      anchor.href = URL.createObjectURL(blob);
      anchor.download = filename;
      document.body.appendChild(anchor);
      anchor.click();
      document.body.removeChild(anchor);
      URL.revokeObjectURL(anchor.href);
    }

    if (scrapePhotos) downloadFile(`${username}-photos.txt`, images);
    if (scrapeVideos) downloadFile(`${username}-videos.txt`, videos);
    if (failed.length > 0) downloadFile(`${username}-failed.txt`, failed);

    log(null, `Downloaded files for user: ${username}`);
  }

  // ─── UI ────────────────────────────────────────────────────────────────────────

  function buildUI() {
    const wrapper = document.createElement('div');
    wrapper.id = 'bf-scraper-bar';
    wrapper.style.cssText = [
      'display: flex',
      'align-items: center',
      'gap: 14px',
      'margin: 12px 0',
      'padding: 10px 14px',
      'background: #0f1117',
      'border: 1px solid #2a2d3a',
      'border-radius: 8px',
      'font-family: ui-monospace, "Cascadia Code", "Fira Code", monospace',
      'font-size: 13px',
    ].join(';');

    // ── Button ─────────────────────────────────────────────────────────────────
    const btn = document.createElement('button');
    btn.id = 'bf-scraper-btn';
    btn.textContent = 'Download URLs';
    btn.style.cssText = [
      'padding: 7px 18px',
      'background: #3b82f6',
      'color: #fff',
      'border: none',
      'border-radius: 5px',
      'font-family: inherit',
      'font-size: 13px',
      'font-weight: 700',
      'cursor: pointer',
      'letter-spacing: 0.03em',
      'transition: background 0.2s, opacity 0.2s',
      'flex-shrink: 0',
    ].join(';');

    // ── Checkboxes ─────────────────────────────────────────────────────────────
    function makeCheckbox(id, label) {
      const lbl = document.createElement('label');
      lbl.style.cssText = 'display:flex; align-items:center; gap:5px; color:#c9d1e0; cursor:pointer; flex-shrink:0; user-select:none;';
      const cb = document.createElement('input');
      cb.type = 'checkbox';
      cb.id = id;
      cb.checked = true;
      cb.style.cssText = 'cursor:pointer; width:14px; height:14px;';
      lbl.appendChild(cb);
      lbl.appendChild(document.createTextNode(label));
      return { lbl, cb };
    }

    const { lbl: photosLbl, cb: photosCb } = makeCheckbox('bf-cb-photos', 'Photos');
    const { lbl: videosLbl, cb: videosCb } = makeCheckbox('bf-cb-videos', 'Videos');

    function syncButtonState() {
      const enabled = photosCb.checked || videosCb.checked;
      btn.disabled = !enabled;
      btn.style.opacity = enabled ? '1' : '0.5';
      btn.style.cursor = enabled ? 'pointer' : 'not-allowed';
    }

    photosCb.addEventListener('change', syncButtonState);
    videosCb.addEventListener('change', syncButtonState);

    // ── Status text ────────────────────────────────────────────────────────────
    const statusText = document.createElement('span');
    statusText.id = 'bf-scraper-status';
    statusText.style.cssText = 'color: #8b92a5; flex: 1;';
    statusText.textContent = 'Ready to scrape.';

    wrapper.appendChild(btn);
    wrapper.appendChild(photosLbl);
    wrapper.appendChild(videosLbl);
    wrapper.appendChild(statusText);

    let cachedImages = null;
    let cachedVideos = null;
    let cachedFailed = null;

    // ── Click handler ──────────────────────────────────────────────────────────
    btn.addEventListener('click', async () => {
      const scrapePhotos = photosCb.checked;
      const scrapeVideos = videosCb.checked;

      // If we already have results, just re-download based on current checkboxes
      if (cachedImages !== null) {
        triggerTxtDownload(cachedImages, cachedVideos, cachedFailed, scrapePhotos, scrapeVideos);
        return;
      }

      // Lock UI
      btn.disabled = true;
      btn.style.opacity = '0.5';
      btn.style.cursor = 'not-allowed';
      photosCb.disabled = true;
      videosCb.disabled = true;

      const allImages = [];
      const allVideos = [];
      const allFailed = [];

      try {
        statusText.style.color = '#f0c040';
        statusText.textContent = 'Counting pages…';
        btn.textContent = 'Scraping (0%)';

        const baseUrl = getBaseUrl();
        const totalPages = await countTotalPages(baseUrl);

        for (let page = 1; page <= totalPages; page++) {
          const pct = Math.round(((page - 1) / totalPages) * 100);
          btn.textContent = `Scraping (${pct}%)`;
          statusText.textContent = `Scraping page ${page} of ${totalPages}…`;

          const result = await scrapePage(page, totalPages, baseUrl);  // no flags
          allImages.push(...result.images);
          allVideos.push(...result.videos);
          allFailed.push(...result.failed);
        }

        btn.textContent = 'Scraping (100%)';
        statusText.textContent = 'Generating files…';

        log(null, '═══ SCRAPE COMPLETE ═══');
        log(null, `Total images : ${allImages.length}`);
        log(null, `Total videos : ${allVideos.length}`);
        log(null, `Failed URLs  : ${allFailed.length}`);
        console.log('\n── IMAGES ──\n' + (allImages.join('\n') || '(none)'));
        console.log('\n── VIDEOS ──\n' + (allVideos.join('\n') || '(none)'));
        console.log('\n── FAILED ──\n' + (allFailed.join('\n') || '(none)'));

        // Cache results for subsequent clicks
        cachedImages = allImages;
        cachedVideos = allVideos;
        cachedFailed = allFailed;

        triggerTxtDownload(cachedImages, cachedVideos, cachedFailed, scrapePhotos, scrapeVideos);

        statusText.style.color = '#4ade80';
        statusText.textContent =
          `Done! ${allImages.length} image(s), ${allVideos.length} video(s).` +
          (allFailed.length ? ` (${allFailed.length} failed)` : '');

      } catch (err) {
        log(null, 'Fatal error during scraping:', err);
        statusText.style.color = '#f87171';
        statusText.textContent = `Error: ${err.message}`;
      }

      btn.textContent = 'Download URLs';
      photosCb.disabled = false;
      videosCb.disabled = false;
      syncButtonState();
    });

    return wrapper;
  }

  // ─── Bootstrap ────────────────────────────────────────────────────────────────

  function inject() {
    if (document.getElementById('bf-scraper-bar')) return;
    const firstPost = document.querySelector('div.post');
    if (!firstPost) return;
    const ui = buildUI();
    firstPost.insertAdjacentElement('beforebegin', ui);
    log(null, 'UI injected successfully.');
  }

  inject();

  if (!document.getElementById('bf-scraper-bar')) {
    const observer = new MutationObserver(() => {
      if (document.querySelector('div.post')) {
        inject();
        if (document.getElementById('bf-scraper-bar')) observer.disconnect();
      }
    });
    observer.observe(document.body || document.documentElement, { childList: true, subtree: true });
  }

})();