Red 40

e6* downloader without a memory leak

You will need to install an extension such as Tampermonkey, Greasemonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Violentmonkey to install this script.

You will need to install an extension such as Tampermonkey or Userscripts to install this script.

You will need to install an extension such as Tampermonkey to install this script.

You will need to install a user script manager extension to install this script.

(I already have a user script manager, let me install it!)

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install an extension such as Stylus to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

You will need to install a user style manager extension to install this style.

(I already have a user style manager, let me install it!)

// ==UserScript==
// @name            Red 40
// @description     e6* downloader without a memory leak
// @author          1fz54ARh0m8g0KUYzqui
// @license         Unlicense
// @version         3.0
// @match           https://e621.net/*
// @match           https://e6ai.net/*
// @match           https://e926.net/*
// @grant           GM.getValue
// @grant           GM.setValue
// @grant           GM_download
// @grant           GM_log
// @run-at          document-end
// @namespace https://greasyfork.org/users/1518908
// ==/UserScript==
// Verified working on e621ng 25.09.17

// A constructed object cannot be returned from an async IIFE
// This might be the only way to do this
// There is a race condition here
const configuration = {
  maximumAttempts: 1,
  clientName: "",
  rememberHashes: false,
  userAgentCompliant: false,

  getHashes: async () => {
    try {
      const retrieved = JSON.parse(GM.getValue("hashes", []));
      if (!(retrieved instanceof Array)) {
        return [];
      }

      return retrieved;
    } catch {
      return [];
    }
  },

  setHashes: async (hashArray) => {
    if (!(hashArray instanceof Array)) {
      return new TypeError("Array required");
    }

    await GM.setValue("hashes", JSON.stringify(hashArray));
  }
};

// Page data
const currentPage = document.querySelector("nav.pagination.numbered")?.dataset.current | 0;
const finalPage = (document.querySelector("nav.pagination.numbered")?.dataset.total ?? currentPage) | 0;
const hasPosts = !!(document.querySelector("section.posts-container"));

// Locking system modified from
// https://medium.com/@chris_marois/asynchronous-locks-in-modern-javascript-8142c877baf
const metadata = {
  unlock: () => {},
  lock: () => {
    this.promise = new Promise(resolve => this.unlock = resolve);
  },
  promise: Promise.resolve(),

  data: []
};

// Heavy checks since the user can put invalid data
(async function() {
  const maximumAttempts = Math.max((await GM.getValue("maximumAttempts", 3)) | 0, 1);
  const clientName = String(await GM.getValue("clientName", "e129"));
  const rememberHashes = !!(await GM.getValue("rememberHashes", false));
  const userAgentCompliant = !!(await GM.getValue("userAgentCompliant", true));

  configuration.maximumAttempts = maximumAttempts;
  configuration.clientName = clientName;
  configuration.rememberHashes = rememberHashes;
  configuration.userAgentCompliant = (userAgentCompliant && clientName.length !== 0);
})();

// Having a reusable attempts function was slow and broke
// No point if there's only 2 attemptable functions
// FireMonkey has issues with this, that sucks
async function _download(url, currentAttempt) {
  url = String(url);
  currentAttempt = Math.max(currentAttempt|0, 1);

  // Guess the file name
  let fileName = new URL(url).pathname.split("/").pop();

  // If the file name is invalid, just go with no extension
  if (!/^[0-9A-Za-z.]+$/.test(fileName)) {
    fileName = `download@${Date.now()}`;
  }

  GM_log(`Downloading ${fileName}.`);

  const downloadPromise = new Promise((resolve, reject) => {
    GM_download({
      url: url,
      name: fileName,
      saveAs: false,
      conflictAction: "uniquify",
      onerror: (error) => reject(error),
      onload: (response) => resolve(response)
    });
  });

  await downloadPromise.catch(async () => {
    GM_log(`Failed to download ${fileName}. (Attempt ${currentAttempt}/${configuration.maximumAttempts})`);
    if (currentAttempt >= configuration.maximumAttempts) {
      return;
    }

    // Exponential backoff
    await new Promise(resolve => setTimeout(resolve, 1000 * Math.pow(2, currentAttempt)));
    await _download(url, currentAttempt+1);
  });

  await downloadPromise.then(() => GM_log(`Downloaded ${fileName}.`));
}

// A wrapper for _download
async function _downloadList(urlList) {
  if (!(urlList instanceof Array)) {
    return new TypeError("Array required");
  }

  for (const url of urlList) {
    await _download(url, 1);
  }
}

async function _fetch(url, currentAttempt) {
  url = String(url);
  currentAttempt = Math.max(currentAttempt | 0, 1);

  let response = await fetch(url);

  if (!response.ok) {
    GM_log(`Failed to fetch ${url} (Attempt ${currentAttempt}/${configuration.maximumAttempts})`);
    if (currentAttempt >= configuration.maximumAttempts) {
      return undefined;
    }

    // Hopefully this frees memory
    response = undefined;

    // Exponential backoff
    await new Promise(resolve => setTimeout(resolve, 1000 * Math.pow(2, currentAttempt)));
    return await _fetch(url, currentAttempt+1);
  }

  response = await response.text();

  // This has to be on the same line, I don't know why
  const parser = new DOMParser().parseFromString(response, "text/html");
  return parser;
}

async function _getMetadata() {
  if (!hasPosts) {
    return;
  }

  await metadata.promise;

  if (metadata.data.length !== 0) {
    return;
  }

  metadata.lock();

  if (finalPage === 1) {
    GM_log("Fetching 1 page.");
  } else {
    GM_log(`Fetching ${(finalPage+1)-currentPage} pages.`);
  }

  let pagesSkipped = 0;

  // The other parameters are unknown, so only replace the page numbers
  // A URL is needed, so provide one if there is only one page
  const baseURL = new URL(document.querySelector("a.page.last")?.href ?? "https://example.com");
  const baseURLParameters = new URLSearchParams(baseURL.search);

  if (configuration.userAgentCompliant) {
    baseURLParameters.set("_client", String(configuration.clientName));
  }

  // Keep only one of following pages' DOMs active
  for (let i = currentPage; i <= finalPage; i++) {
    let parser;

    if (i === currentPage) {
      parser = document;
    } else {
      // This should never happen
      if (baseURL.hostname === "example.com") {
        pagesSkipped++;
        continue;
      }

      baseURLParameters.set("page", i);
      baseURL.search = baseURLParameters.toString();

      // Fetch isn't async, it just returns a promise
      parser = await _fetch(baseURL.toString());

      if (parser === undefined) {
        pagesSkipped++;
        continue;
      }
    }

    const pagePosts = parser.querySelector("section.posts-container")?.querySelectorAll("article");

    // There is a chance that a human verification page is returned, nothing can be done
    if (pagePosts === null) {
      pagesSkipped++;
      continue;
    }

    for (const post of pagePosts) {
      // Tags are split by spaces, so split here
      const tags = post.dataset.tags.split(' ');

      const postData = {
        id: post.dataset.id | 0,
        extension: post.dataset.fileExt,
        md5: post.dataset.md5,
        tags: tags.map((e) => e.replaceAll('_', ' ')),
        url: post.dataset.fileUrl
      };

      // Race condition possible, oh well
      metadata.data.push(postData);
    }
  }

  metadata.unlock();

  if (pagesSkipped === 1) {
    GM_log("1 page skipped.");
  } else if (pagesSkipped !== 0) {
    GM_log(`${pagesSkipped} pages skipped.`);
  }
}

const metadataExportButton = document.createElement("a");
metadataExportButton.onclick = async () => {
  if (metadataExportButton.disabled) {
    return;
  }

  metadataExportButton.disabled = true;

  let jsonData;

  if (hasPosts) {
    await _getMetadata();
    jsonData = JSON.stringify(metadata.data);
  } else {
    // This data could be cached instead of computed, but what are the benefits?
    // There must be at least 1 tag on a post
    const tagList = document.querySelector("section#tag-list").querySelectorAll("span.tag-list-name");
    const tagObject = {
      tags: []
    };

    tagList.foreach((e) => tagObject.tags.push(e.innerText.trim()));
    jsonData = JSON.stringify(tagObject);
  }

  const blob = new Blob([jsonData], {
    type: "application/json"
  });

  const a = document.createElement("a");

  a.setAttribute('download', `download@${Date.now()}.json`);
  a.setAttribute('href', window.URL.createObjectURL(blob));
  a.click();

  metadataExportButton.disabled = false;
};

const postDownloadButton = document.createElement("a");
postDownloadButton.onclick = async () => {
  if (postDownloadButton.disabled) {
    return;
  }

  postDownloadButton.disabled = true;

  await _getMetadata();
  const previousHashes = await configuration.getHashes();
  const futureHashes = [];

  // This pointer will be changed
  let urlList1 = [];

  for (const e of metadata.data) {
    if (configuration.rememberHashes && previousHashes.includes(e.md5)) {
      continue;
    }

    urlList1.push(e.url);
    futureHashes.push(e.md5);
  }

  const fileCount = urlList1.length;

  // 0 is weirdly truthy in this case
  const urlList2 = urlList1.filter((e, i) => !!(i & 1));
  urlList1 = urlList1.filter((e, i) => !(i & 1));

  GM_log(`Downloading ${fileCount} files.`);
  postDownloadButton.innerText = `Downloading ${fileCount} files...`;

  await Promise.allSettled([_downloadList(urlList1), _downloadList(urlList2)]);
  GM_log(`Finished downloading ${fileCount} files.`);

  if (configuration.rememberHashes) {
    await configuration.setHashes(previousHashes.concat(futureHashes));
    GM_log("Saved hashes of the download list.");
  }

  postDownloadButton.disabled = false;
  postDownloadButton.innerText = "Download all posts";
};

const secondaryBar = document.querySelector("menu.nav-secondary.desktop");

if (hasPosts || document.querySelector("section#tag-list")) {
  const metadataExportItem = document.createElement("li");
  metadataExportItem.id = "subnav-metadata-export";
  metadataExportButton.id = "subnav-metadata-export-link";
  metadataExportButton.innerText = "Export metadata";
  metadataExportItem.appendChild(metadataExportButton);
  secondaryBar?.appendChild(metadataExportItem);
}

if (hasPosts) {
  const postDownloadItem = document.createElement("li");
  postDownloadItem.id = "subnav-post-download";
  postDownloadButton.id = "subnav-post-download-link";
  postDownloadButton.innerText = "Download all posts";
  postDownloadItem.appendChild(postDownloadButton);
  secondaryBar?.appendChild(postDownloadItem);
}