Scrape image and video URLs from coomerfans.com posts across all pages
// ==UserScript==
// @name Coomerfans Scraper
// @namespace https://coomerfans.com/
// @version 1.0.0
// @description Scrape image and video URLs from coomerfans.com posts across all pages
// @author regginyggaf
// @match *://coomerfans.com/u/*
// @match *://www.coomerfans.com/u/*
// @grant none
// @run-at document-idle
// @license MIT
// ==/UserScript==
(function () {
'use strict';
// ─── Constants ────────────────────────────────────────────────────────────────
const MAX_RETRIES = 3;
const BASE_DELAY_MS = 1000;
const SCRIPT_TAG = '[BF-SCRAPER]';
// ─── Utilities ────────────────────────────────────────────────────────────────
const sleep = ms => new Promise(r => setTimeout(r, ms));
function log(pageLabel, ...args) {
const prefix = pageLabel ? `${SCRIPT_TAG} [${pageLabel}]` : SCRIPT_TAG;
console.log(prefix, ...args);
}
function getPageUrl(baseUrl, page) {
const url = new URL(baseUrl);
if (page <= 1) {
url.searchParams.delete('page');
} else {
url.searchParams.set('page', page);
}
return url.toString();
}
function getBaseUrl() {
const url = new URL(window.location.href);
url.searchParams.delete('page');
return url.toString();
}
async function fetchWithRetry(url, options = {}, attempt = 1) {
try {
const res = await fetch(url, options);
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
return res;
} catch (err) {
if (attempt >= MAX_RETRIES) throw err;
const delay = BASE_DELAY_MS * Math.pow(2, attempt - 1);
log(null, `Fetch failed (attempt ${attempt}): ${err.message}. Retrying in ${delay}ms…`);
await sleep(delay);
return fetchWithRetry(url, options, attempt + 1);
}
}
async function fetchDocument(url) {
try {
const res = await fetchWithRetry(url);
const text = await res.text();
return new DOMParser().parseFromString(text, 'text/html');
} catch (err) {
log(null, `Failed to fetch document: ${url} — ${err.message}`);
return null;
}
}
// ─── Page Counting ────────────────────────────────────────────────────────────
async function countTotalPages(baseUrl) {
log(null, 'Probing for total pages…');
async function hasPostsAtPage(page) {
const url = getPageUrl(baseUrl, page);
try {
const res = await fetch(url, { method: 'HEAD' });
if (!res.ok) return false;
const doc = await fetchDocument(url);
return doc !== null && doc.querySelectorAll('div.post').length > 0;
} catch {
return false;
}
}
// Exponential search: double until we find a page that has no posts
let hi = 1;
while (await hasPostsAtPage(hi + 1)) {
hi = hi * 2;
log(null, ` Page ${hi} has posts, probing further…`);
}
// Binary search between hi/2+1 and hi to find the exact last page
let lo = Math.floor(hi / 2) + 1;
if (lo >= hi) {
log(null, `Total pages: ${hi}`);
return hi;
}
while (lo < hi) {
const mid = Math.floor((lo + hi + 1) / 2);
if (await hasPostsAtPage(mid)) {
lo = mid;
} else {
hi = mid - 1;
}
}
log(null, `Total pages found: ${lo}`);
return lo;
}
// ─── Scraping Logic ───────────────────────────────────────────────────────────
// scrapePhotos / scrapeVideos are passed as explicit parameters to avoid
// any closure scoping issues across the call chain.
async function scrapePostPage(postUrl, pageLabel, postIndex) {
log(pageLabel, ` Post ${postIndex}: fetching ${postUrl}`);
const doc = await fetchDocument(postUrl);
if (!doc) {
log(pageLabel, ` Post ${postIndex}: could not load, skipping`);
return { imgs: [], vids: [], failed: [postUrl] };
}
const body = doc.querySelector('div.post-body');
if (!body) {
log(pageLabel, ` Post ${postIndex}: no .post-body element found`);
return { imgs: [], vids: [], failed: [] };
}
const imgs = [];
body.querySelectorAll('img').forEach(img => {
const src = img.getAttribute('src');
if (src) { imgs.push(src); log(pageLabel, ` Post ${postIndex}: 🖼 image → ${src}`); }
});
const vids = [];
body.querySelectorAll('video source').forEach(source => {
const src = source.getAttribute('src');
if (src) { vids.push(src); log(pageLabel, ` Post ${postIndex}: 🎬 video → ${src}`); }
});
log(pageLabel, ` Post ${postIndex}: done (${imgs.length} img, ${vids.length} vid)`);
return { imgs, vids, failed: [] };
}
async function scrapePage(pageNum, totalPages, baseUrl) {
const pageLabel = `Page ${pageNum}/${totalPages}`;
const pageUrl = getPageUrl(baseUrl, pageNum);
log(pageLabel, `Fetching listing page → ${pageUrl}`);
const doc = await fetchDocument(pageUrl);
if (!doc) {
log(pageLabel, 'Failed to load listing page, skipping');
return { images: [], videos: [], failed: [pageUrl] };
}
const posts = doc.querySelectorAll('div.post');
if (!posts.length) {
log(pageLabel, 'No div.post elements found on this page');
return { images: [], videos: [], failed: [] };
}
log(pageLabel, `Found ${posts.length} post(s)`);
const images = [];
const videos = [];
const failed = [];
for (const [i, post] of posts.entries()) {
const link = post.querySelector('a.view-post');
if (!link || !link.href) {
log(pageLabel, ` Post ${i + 1}: no a.view-post link, skipping`);
continue;
}
const result = await scrapePostPage(link.href, pageLabel, i + 1);
images.push(...result.imgs);
videos.push(...result.vids);
failed.push(...result.failed);
}
log(pageLabel, `Page complete — ${images.length} images, ${videos.length} videos, ${failed.length} failures`);
return { images, videos, failed };
}
// ─── File Download ─────────────────────────────────────────────────────────────
function triggerTxtDownload(images, videos, failed, scrapePhotos, scrapeVideos) {
const pathParts = window.location.pathname.split('/').filter(Boolean);
// Path structure: /u/<handle>/<id>/<username>
const username = pathParts[3] ?? 'unknown';
function downloadFile(filename, lines) {
const blob = new Blob([lines.length ? lines.join('\n') : '(none)'], { type: 'text/plain' });
const anchor = document.createElement('a');
anchor.href = URL.createObjectURL(blob);
anchor.download = filename;
document.body.appendChild(anchor);
anchor.click();
document.body.removeChild(anchor);
URL.revokeObjectURL(anchor.href);
}
if (scrapePhotos) downloadFile(`${username}-photos.txt`, images);
if (scrapeVideos) downloadFile(`${username}-videos.txt`, videos);
if (failed.length > 0) downloadFile(`${username}-failed.txt`, failed);
log(null, `Downloaded files for user: ${username}`);
}
// ─── UI ────────────────────────────────────────────────────────────────────────
function buildUI() {
const wrapper = document.createElement('div');
wrapper.id = 'bf-scraper-bar';
wrapper.style.cssText = [
'display: flex',
'align-items: center',
'gap: 14px',
'margin: 12px 0',
'padding: 10px 14px',
'background: #0f1117',
'border: 1px solid #2a2d3a',
'border-radius: 8px',
'font-family: ui-monospace, "Cascadia Code", "Fira Code", monospace',
'font-size: 13px',
].join(';');
// ── Button ─────────────────────────────────────────────────────────────────
const btn = document.createElement('button');
btn.id = 'bf-scraper-btn';
btn.textContent = 'Download URLs';
btn.style.cssText = [
'padding: 7px 18px',
'background: #3b82f6',
'color: #fff',
'border: none',
'border-radius: 5px',
'font-family: inherit',
'font-size: 13px',
'font-weight: 700',
'cursor: pointer',
'letter-spacing: 0.03em',
'transition: background 0.2s, opacity 0.2s',
'flex-shrink: 0',
].join(';');
// ── Checkboxes ─────────────────────────────────────────────────────────────
function makeCheckbox(id, label) {
const lbl = document.createElement('label');
lbl.style.cssText = 'display:flex; align-items:center; gap:5px; color:#c9d1e0; cursor:pointer; flex-shrink:0; user-select:none;';
const cb = document.createElement('input');
cb.type = 'checkbox';
cb.id = id;
cb.checked = true;
cb.style.cssText = 'cursor:pointer; width:14px; height:14px;';
lbl.appendChild(cb);
lbl.appendChild(document.createTextNode(label));
return { lbl, cb };
}
const { lbl: photosLbl, cb: photosCb } = makeCheckbox('bf-cb-photos', 'Photos');
const { lbl: videosLbl, cb: videosCb } = makeCheckbox('bf-cb-videos', 'Videos');
function syncButtonState() {
const enabled = photosCb.checked || videosCb.checked;
btn.disabled = !enabled;
btn.style.opacity = enabled ? '1' : '0.5';
btn.style.cursor = enabled ? 'pointer' : 'not-allowed';
}
photosCb.addEventListener('change', syncButtonState);
videosCb.addEventListener('change', syncButtonState);
// ── Status text ────────────────────────────────────────────────────────────
const statusText = document.createElement('span');
statusText.id = 'bf-scraper-status';
statusText.style.cssText = 'color: #8b92a5; flex: 1;';
statusText.textContent = 'Ready to scrape.';
wrapper.appendChild(btn);
wrapper.appendChild(photosLbl);
wrapper.appendChild(videosLbl);
wrapper.appendChild(statusText);
let cachedImages = null;
let cachedVideos = null;
let cachedFailed = null;
// ── Click handler ──────────────────────────────────────────────────────────
btn.addEventListener('click', async () => {
const scrapePhotos = photosCb.checked;
const scrapeVideos = videosCb.checked;
// If we already have results, just re-download based on current checkboxes
if (cachedImages !== null) {
triggerTxtDownload(cachedImages, cachedVideos, cachedFailed, scrapePhotos, scrapeVideos);
return;
}
// Lock UI
btn.disabled = true;
btn.style.opacity = '0.5';
btn.style.cursor = 'not-allowed';
photosCb.disabled = true;
videosCb.disabled = true;
const allImages = [];
const allVideos = [];
const allFailed = [];
try {
statusText.style.color = '#f0c040';
statusText.textContent = 'Counting pages…';
btn.textContent = 'Scraping (0%)';
const baseUrl = getBaseUrl();
const totalPages = await countTotalPages(baseUrl);
for (let page = 1; page <= totalPages; page++) {
const pct = Math.round(((page - 1) / totalPages) * 100);
btn.textContent = `Scraping (${pct}%)`;
statusText.textContent = `Scraping page ${page} of ${totalPages}…`;
const result = await scrapePage(page, totalPages, baseUrl); // no flags
allImages.push(...result.images);
allVideos.push(...result.videos);
allFailed.push(...result.failed);
}
btn.textContent = 'Scraping (100%)';
statusText.textContent = 'Generating files…';
log(null, '═══ SCRAPE COMPLETE ═══');
log(null, `Total images : ${allImages.length}`);
log(null, `Total videos : ${allVideos.length}`);
log(null, `Failed URLs : ${allFailed.length}`);
console.log('\n── IMAGES ──\n' + (allImages.join('\n') || '(none)'));
console.log('\n── VIDEOS ──\n' + (allVideos.join('\n') || '(none)'));
console.log('\n── FAILED ──\n' + (allFailed.join('\n') || '(none)'));
// Cache results for subsequent clicks
cachedImages = allImages;
cachedVideos = allVideos;
cachedFailed = allFailed;
triggerTxtDownload(cachedImages, cachedVideos, cachedFailed, scrapePhotos, scrapeVideos);
statusText.style.color = '#4ade80';
statusText.textContent =
`Done! ${allImages.length} image(s), ${allVideos.length} video(s).` +
(allFailed.length ? ` (${allFailed.length} failed)` : '');
} catch (err) {
log(null, 'Fatal error during scraping:', err);
statusText.style.color = '#f87171';
statusText.textContent = `Error: ${err.message}`;
}
btn.textContent = 'Download URLs';
photosCb.disabled = false;
videosCb.disabled = false;
syncButtonState();
});
return wrapper;
}
// ─── Bootstrap ────────────────────────────────────────────────────────────────
function inject() {
if (document.getElementById('bf-scraper-bar')) return;
const firstPost = document.querySelector('div.post');
if (!firstPost) return;
const ui = buildUI();
firstPost.insertAdjacentElement('beforebegin', ui);
log(null, 'UI injected successfully.');
}
inject();
if (!document.getElementById('bf-scraper-bar')) {
const observer = new MutationObserver(() => {
if (document.querySelector('div.post')) {
inject();
if (document.getElementById('bf-scraper-bar')) observer.disconnect();
}
});
observer.observe(document.body || document.documentElement, { childList: true, subtree: true });
}
})();