Coomerfans Scraper

Scrape image and video URLs from coomerfans.com posts across all pages

Vous devrez installer une extension telle que Tampermonkey, Greasemonkey ou Violentmonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey ou Violentmonkey pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey ou Userscripts pour installer ce script.

Vous devrez installer une extension telle que Tampermonkey pour installer ce script.

Vous devrez installer une extension de gestionnaire de script utilisateur pour installer ce script.

(J'ai déjà un gestionnaire de scripts utilisateur, laissez-moi l'installer !)

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension telle que Stylus pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

Vous devrez installer une extension du gestionnaire de style pour utilisateur pour installer ce style.

(J'ai déjà un gestionnaire de style utilisateur, laissez-moi l'installer!)

// ==UserScript==
// @name         Coomerfans Scraper
// @namespace    https://coomerfans.com/
// @version      1.0.0
// @description  Scrape image and video URLs from coomerfans.com posts across all pages
// @author       regginyggaf
// @match        *://coomerfans.com/u/*
// @match        *://www.coomerfans.com/u/*
// @grant        none
// @run-at       document-idle
// @license MIT
// ==/UserScript==

(function () {
  'use strict';

  // ─── Constants ────────────────────────────────────────────────────────────────
  const MAX_RETRIES = 3;
  const BASE_DELAY_MS = 1000;
  const SCRIPT_TAG = '[BF-SCRAPER]';

  // ─── Utilities ────────────────────────────────────────────────────────────────

  const sleep = ms => new Promise(r => setTimeout(r, ms));

  function log(pageLabel, ...args) {
    const prefix = pageLabel ? `${SCRIPT_TAG} [${pageLabel}]` : SCRIPT_TAG;
    console.log(prefix, ...args);
  }

  function getPageUrl(baseUrl, page) {
    const url = new URL(baseUrl);
    if (page <= 1) {
      url.searchParams.delete('page');
    } else {
      url.searchParams.set('page', page);
    }
    return url.toString();
  }

  function getBaseUrl() {
    const url = new URL(window.location.href);
    url.searchParams.delete('page');
    return url.toString();
  }

  async function fetchWithRetry(url, options = {}, attempt = 1) {
    try {
      const res = await fetch(url, options);
      if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
      return res;
    } catch (err) {
      if (attempt >= MAX_RETRIES) throw err;
      const delay = BASE_DELAY_MS * Math.pow(2, attempt - 1);
      log(null, `Fetch failed (attempt ${attempt}): ${err.message}. Retrying in ${delay}ms…`);
      await sleep(delay);
      return fetchWithRetry(url, options, attempt + 1);
    }
  }

  async function fetchDocument(url) {
    try {
      const res = await fetchWithRetry(url);
      const text = await res.text();
      return new DOMParser().parseFromString(text, 'text/html');
    } catch (err) {
      log(null, `Failed to fetch document: ${url} — ${err.message}`);
      return null;
    }
  }

  // ─── Page Counting ────────────────────────────────────────────────────────────

  async function countTotalPages(baseUrl) {
    log(null, 'Probing for total pages…');

    async function hasPostsAtPage(page) {
      const url = getPageUrl(baseUrl, page);
      try {
        const res = await fetch(url, { method: 'HEAD' });
        if (!res.ok) return false;
        const doc = await fetchDocument(url);
        return doc !== null && doc.querySelectorAll('div.post').length > 0;
      } catch {
        return false;
      }
    }

    // Exponential search: double until we find a page that has no posts
    let hi = 1;
    while (await hasPostsAtPage(hi + 1)) {
      hi = hi * 2;
      log(null, `  Page ${hi} has posts, probing further…`);
    }

    // Binary search between hi/2+1 and hi to find the exact last page
    let lo = Math.floor(hi / 2) + 1;
    if (lo >= hi) {
      log(null, `Total pages: ${hi}`);
      return hi;
    }

    while (lo < hi) {
      const mid = Math.floor((lo + hi + 1) / 2);
      if (await hasPostsAtPage(mid)) {
        lo = mid;
      } else {
        hi = mid - 1;
      }
    }

    log(null, `Total pages found: ${lo}`);
    return lo;
  }

  // ─── Scraping Logic ───────────────────────────────────────────────────────────

  // scrapePhotos / scrapeVideos are passed as explicit parameters to avoid
  // any closure scoping issues across the call chain.

  async function scrapePostPage(postUrl, pageLabel, postIndex) {
    log(pageLabel, `  Post ${postIndex}: fetching ${postUrl}`);
    const doc = await fetchDocument(postUrl);
    if (!doc) {
      log(pageLabel, `  Post ${postIndex}: could not load, skipping`);
      return { imgs: [], vids: [], failed: [postUrl] };
    }

    const body = doc.querySelector('div.post-body');
    if (!body) {
      log(pageLabel, `  Post ${postIndex}: no .post-body element found`);
      return { imgs: [], vids: [], failed: [] };
    }

    const imgs = [];
    body.querySelectorAll('img').forEach(img => {
      const src = img.getAttribute('src');
      if (src) { imgs.push(src); log(pageLabel, `  Post ${postIndex}: 🖼  image → ${src}`); }
    });
    const vids = [];
    body.querySelectorAll('video source').forEach(source => {
      const src = source.getAttribute('src');
      if (src) { vids.push(src); log(pageLabel, `  Post ${postIndex}: 🎬  video → ${src}`); }
    });

    log(pageLabel, `  Post ${postIndex}: done (${imgs.length} img, ${vids.length} vid)`);
    return { imgs, vids, failed: [] };
  }

  async function scrapePage(pageNum, totalPages, baseUrl) {
    const pageLabel = `Page ${pageNum}/${totalPages}`;
    const pageUrl = getPageUrl(baseUrl, pageNum);

    log(pageLabel, `Fetching listing page → ${pageUrl}`);
    const doc = await fetchDocument(pageUrl);
    if (!doc) {
      log(pageLabel, 'Failed to load listing page, skipping');
      return { images: [], videos: [], failed: [pageUrl] };
    }

    const posts = doc.querySelectorAll('div.post');
    if (!posts.length) {
      log(pageLabel, 'No div.post elements found on this page');
      return { images: [], videos: [], failed: [] };
    }

    log(pageLabel, `Found ${posts.length} post(s)`);

    const images = [];
    const videos = [];
    const failed = [];

    for (const [i, post] of posts.entries()) {
      const link = post.querySelector('a.view-post');
      if (!link || !link.href) {
        log(pageLabel, `  Post ${i + 1}: no a.view-post link, skipping`);
        continue;
      }
      const result = await scrapePostPage(link.href, pageLabel, i + 1);
      images.push(...result.imgs);
      videos.push(...result.vids);
      failed.push(...result.failed);
    }

    log(pageLabel, `Page complete — ${images.length} images, ${videos.length} videos, ${failed.length} failures`);
    return { images, videos, failed };
  }

  // ─── File Download ─────────────────────────────────────────────────────────────

  function triggerTxtDownload(images, videos, failed, scrapePhotos, scrapeVideos) {
    const pathParts = window.location.pathname.split('/').filter(Boolean);
    // Path structure: /u/<handle>/<id>/<username>
    const username = pathParts[3] ?? 'unknown';

    function downloadFile(filename, lines) {
      const blob = new Blob([lines.length ? lines.join('\n') : '(none)'], { type: 'text/plain' });
      const anchor = document.createElement('a');
      anchor.href = URL.createObjectURL(blob);
      anchor.download = filename;
      document.body.appendChild(anchor);
      anchor.click();
      document.body.removeChild(anchor);
      URL.revokeObjectURL(anchor.href);
    }

    if (scrapePhotos) downloadFile(`${username}-photos.txt`, images);
    if (scrapeVideos) downloadFile(`${username}-videos.txt`, videos);
    if (failed.length > 0) downloadFile(`${username}-failed.txt`, failed);

    log(null, `Downloaded files for user: ${username}`);
  }

  // ─── UI ────────────────────────────────────────────────────────────────────────

  function buildUI() {
    const wrapper = document.createElement('div');
    wrapper.id = 'bf-scraper-bar';
    wrapper.style.cssText = [
      'display: flex',
      'align-items: center',
      'gap: 14px',
      'margin: 12px 0',
      'padding: 10px 14px',
      'background: #0f1117',
      'border: 1px solid #2a2d3a',
      'border-radius: 8px',
      'font-family: ui-monospace, "Cascadia Code", "Fira Code", monospace',
      'font-size: 13px',
    ].join(';');

    // ── Button ─────────────────────────────────────────────────────────────────
    const btn = document.createElement('button');
    btn.id = 'bf-scraper-btn';
    btn.textContent = 'Download URLs';
    btn.style.cssText = [
      'padding: 7px 18px',
      'background: #3b82f6',
      'color: #fff',
      'border: none',
      'border-radius: 5px',
      'font-family: inherit',
      'font-size: 13px',
      'font-weight: 700',
      'cursor: pointer',
      'letter-spacing: 0.03em',
      'transition: background 0.2s, opacity 0.2s',
      'flex-shrink: 0',
    ].join(';');

    // ── Checkboxes ─────────────────────────────────────────────────────────────
    function makeCheckbox(id, label) {
      const lbl = document.createElement('label');
      lbl.style.cssText = 'display:flex; align-items:center; gap:5px; color:#c9d1e0; cursor:pointer; flex-shrink:0; user-select:none;';
      const cb = document.createElement('input');
      cb.type = 'checkbox';
      cb.id = id;
      cb.checked = true;
      cb.style.cssText = 'cursor:pointer; width:14px; height:14px;';
      lbl.appendChild(cb);
      lbl.appendChild(document.createTextNode(label));
      return { lbl, cb };
    }

    const { lbl: photosLbl, cb: photosCb } = makeCheckbox('bf-cb-photos', 'Photos');
    const { lbl: videosLbl, cb: videosCb } = makeCheckbox('bf-cb-videos', 'Videos');

    function syncButtonState() {
      const enabled = photosCb.checked || videosCb.checked;
      btn.disabled = !enabled;
      btn.style.opacity = enabled ? '1' : '0.5';
      btn.style.cursor = enabled ? 'pointer' : 'not-allowed';
    }

    photosCb.addEventListener('change', syncButtonState);
    videosCb.addEventListener('change', syncButtonState);

    // ── Status text ────────────────────────────────────────────────────────────
    const statusText = document.createElement('span');
    statusText.id = 'bf-scraper-status';
    statusText.style.cssText = 'color: #8b92a5; flex: 1;';
    statusText.textContent = 'Ready to scrape.';

    wrapper.appendChild(btn);
    wrapper.appendChild(photosLbl);
    wrapper.appendChild(videosLbl);
    wrapper.appendChild(statusText);

    let cachedImages = null;
    let cachedVideos = null;
    let cachedFailed = null;

    // ── Click handler ──────────────────────────────────────────────────────────
    btn.addEventListener('click', async () => {
      const scrapePhotos = photosCb.checked;
      const scrapeVideos = videosCb.checked;

      // If we already have results, just re-download based on current checkboxes
      if (cachedImages !== null) {
        triggerTxtDownload(cachedImages, cachedVideos, cachedFailed, scrapePhotos, scrapeVideos);
        return;
      }

      // Lock UI
      btn.disabled = true;
      btn.style.opacity = '0.5';
      btn.style.cursor = 'not-allowed';
      photosCb.disabled = true;
      videosCb.disabled = true;

      const allImages = [];
      const allVideos = [];
      const allFailed = [];

      try {
        statusText.style.color = '#f0c040';
        statusText.textContent = 'Counting pages…';
        btn.textContent = 'Scraping (0%)';

        const baseUrl = getBaseUrl();
        const totalPages = await countTotalPages(baseUrl);

        for (let page = 1; page <= totalPages; page++) {
          const pct = Math.round(((page - 1) / totalPages) * 100);
          btn.textContent = `Scraping (${pct}%)`;
          statusText.textContent = `Scraping page ${page} of ${totalPages}…`;

          const result = await scrapePage(page, totalPages, baseUrl);  // no flags
          allImages.push(...result.images);
          allVideos.push(...result.videos);
          allFailed.push(...result.failed);
        }

        btn.textContent = 'Scraping (100%)';
        statusText.textContent = 'Generating files…';

        log(null, '═══ SCRAPE COMPLETE ═══');
        log(null, `Total images : ${allImages.length}`);
        log(null, `Total videos : ${allVideos.length}`);
        log(null, `Failed URLs  : ${allFailed.length}`);
        console.log('\n── IMAGES ──\n' + (allImages.join('\n') || '(none)'));
        console.log('\n── VIDEOS ──\n' + (allVideos.join('\n') || '(none)'));
        console.log('\n── FAILED ──\n' + (allFailed.join('\n') || '(none)'));

        // Cache results for subsequent clicks
        cachedImages = allImages;
        cachedVideos = allVideos;
        cachedFailed = allFailed;

        triggerTxtDownload(cachedImages, cachedVideos, cachedFailed, scrapePhotos, scrapeVideos);

        statusText.style.color = '#4ade80';
        statusText.textContent =
          `Done! ${allImages.length} image(s), ${allVideos.length} video(s).` +
          (allFailed.length ? ` (${allFailed.length} failed)` : '');

      } catch (err) {
        log(null, 'Fatal error during scraping:', err);
        statusText.style.color = '#f87171';
        statusText.textContent = `Error: ${err.message}`;
      }

      btn.textContent = 'Download URLs';
      photosCb.disabled = false;
      videosCb.disabled = false;
      syncButtonState();
    });

    return wrapper;
  }

  // ─── Bootstrap ────────────────────────────────────────────────────────────────

  function inject() {
    if (document.getElementById('bf-scraper-bar')) return;
    const firstPost = document.querySelector('div.post');
    if (!firstPost) return;
    const ui = buildUI();
    firstPost.insertAdjacentElement('beforebegin', ui);
    log(null, 'UI injected successfully.');
  }

  inject();

  if (!document.getElementById('bf-scraper-bar')) {
    const observer = new MutationObserver(() => {
      if (document.querySelector('div.post')) {
        inject();
        if (document.getElementById('bf-scraper-bar')) observer.disconnect();
      }
    });
    observer.observe(document.body || document.documentElement, { childList: true, subtree: true });
  }

})();