图片网页信息提取

将 Pixiv/Booru 图片页面中的信息以 JSON 或字符串形式导出。

当前为 2024-07-31 提交的版本,查看 最新版本

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Greasemonkey 油猴子Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Userscripts ,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name          Image Webinfo
// @name:zh-CN    图片网页信息提取
// @version       1.0.0
// @description   Extract JSON-/string-style info from Pixiv/Booru image pages.
// @description:zh-CN 将 Pixiv/Booru 图片页面中的信息以 JSON 或字符串形式导出。
// @supportURL    https://github.com/wklchris/image-webinfo
// @author        wklchris
// @match         https://danbooru.donmai.us/posts/*
// @match         https://www.pixiv.net/artworks/*
// @grant         none
// @license       MIT
// @namespace https://greasyfork.org/users/672201
// ==/UserScript==

(function() {
  'use strict';
  
  // --- Customization ---
  // Specify returned keys when copy string
  let pixiv_str_keys = [
    // 'artist',
    // 'illust_id',
    'artist_id',
    'post_date',
    'post_title'
  ];
  let booru_str_keys = [
    // 'artists',
    // 'copyrights',
    // 'characters',
    // 'general',
    'booru_id',
    'rating_level'
  ];
  
  // --- Main ---
  let artwork_data = {};
  let export_json_btn = document.createElement("button");
  export_json_btn.innerHTML = "Export WebInfo as JSON";
  let export_str_btn = document.createElement("button");
  export_str_btn.innerHTML = "Export WebInfo as String";

  function logMessage(msg) {
    console.log("[ImageWebInfo] " + msg);
  }
  
  let popMessage = 'Image webinfo exported to clipboard.';
  function copyToClipboard(s) {
    try {
      navigator.clipboard.writeText(s);
    } catch (err) {
      popMessage = 'Image webinfo export failed.';
      logMessage(popMessage);
    }
  }

  function attemptedQuerySelector(qs, callback) {
    // querySelector may not find the element when the website loads slowly.
    // Keep attempting until found or exceed the given maximum attempts.
    var attempts = 0, max_attempts = 20;
    var attemptCall = function() {
        var elem = document.querySelector(qs);
        if (elem) {
          console.log(`Found querySelector '${qs}' in ${attempts} attempts`);
          return callback(elem);
        } else {
            attempts++;
            if (attempts >= max_attempts) {
                console.warn(`Could not find: ${qs} within ${max_attempts} attempts`);
            } else {
                setTimeout(attemptCall, 4000*(attempts + 1)/max_attempts + 1000)
            }
        }
    };
    attemptCall();
  }
  
  // --- Pixiv posts ---

  if (location.hostname.includes("pixiv")) {
    function getPixivInfo(as_JSON=true) {
      let data = {};
      // Get user
      let user_tag = document.querySelector('aside div[aria-haspopup="true"] > div > a');
      data['artist'] = user_tag.childNodes[0].innerText 
      let split_user_url = user_tag.href.split('/');
      data['artist_id'] = split_user_url[split_user_url.length - 1];
      // Get post id
      let split_url = location.href.split('/');
      data['illust_id'] = split_url[split_url.length - 1];
      // Get upload date
      let timestr = document.getElementsByTagName('time')[0].innerText;
      let time_seps = {'year': '年', 'month': '月', 'day': '日'};
      let i_start = 0, i_end = -1, tmp_data = {};
      for (const [key, sep] of Object.entries(time_seps)) {
        i_start = i_end+1;
        i_end = timestr.search(sep);
        tmp_data[key] = timestr.substring(i_start, i_end);
      }
      data['post_date'] = `${tmp_data['year']}-${tmp_data['month'].padStart(2, 0)}-${tmp_data['day'].padStart(2, 0)}`;
      // Get post title
      data['post_title'] = document.querySelector('figcaption h1').innerText;

      let s = "";
      if (as_JSON == true) {
        s = JSON.stringify(data)
      } else {
        s = pixiv_str_keys.map((x, i) => data[x]).join("\t");
      }
      copyToClipboard(s);
      return data;
    }

    // Add buttons & their click functions
    let search_booru_btn = document.createElement("button");
    search_booru_btn.innerHTML = "Search artist on Booru";

    attemptedQuerySelector('aside section button[data-click-label]', (exist_dom) => {
      exist_dom.parentNode.insertBefore(export_json_btn, exist_dom.nextSibling);
      exist_dom.parentNode.insertBefore(export_str_btn, export_json_btn.nextSibling);
      exist_dom.parentNode.insertBefore(search_booru_btn, export_str_btn.nextSibling);
    });

    export_json_btn.onclick = function() {
      artwork_data = getPixivInfo(true);
      export_json_btn.style.background = 'lightgreen';
      export_str_btn.style.removeProperty("background");
    };
    export_str_btn.onclick = function() {
      artwork_data = getPixivInfo(false);
      export_str_btn.style.background = 'lightgreen';
      export_json_btn.style.removeProperty("background");
    };
    search_booru_btn.onclick = function() {
      if (!('artist_id' in artwork_data)) {
        artwork_data = getPixivInfo(false);
      }
      let _prefix = "https://danbooru.donmai.us/artists?commit=Search&search%5Border%5D=created_at&search%5Burl_matches%5D=https%3A%2F%2Fwww.pixiv.net%2Fusers%2F";
      let _url = _prefix + artwork_data["artist_id"];
      window.open(_url, "_blank");
    }
  }

  // --- Booru posts ---
  if (location.hostname.includes("booru")) {

    function getBooruInfo(as_JSON=true) {
      let data = {};
      let tag_list = document.getElementById("tag-list").querySelector("div");

      function query_tags(ul_class, sep=", ") {
        // Get tag list by ul_class name. Join by sep if multiple.
        let query_str = `ul.${ul_class} a.search-tag`;
        let _tags = tag_list.querySelectorAll(query_str);
        return Array.from(_tags).map((x, i) => x.innerText).join(sep)
      }
      data["artists"] = query_tags("artist-tag-list");
      data["copyrights"] = query_tags("copyright-tag-list");
      data["characters"] = query_tags("character-tag-list");
      data["general"] = query_tags("general-tag-list");

      // Get information (rating-levels, etc.)
      function query_by_id(dom_id, remove_str) {
        let _text = document.getElementById(dom_id).innerText;
        return _text.replace(remove_str, "").trim();
      }
      data["booru_id"] = query_by_id("post-info-id", "ID:");
      data["rating_level"] = query_by_id("post-info-rating", "Rating:");

      let s = "";
      if (as_JSON == true) {
        s = JSON.stringify(data)
      } else {
        s = booru_str_keys.map((x, i) => data[x]).join("\t");
      }
      copyToClipboard(s);
      return data;
    }

    // Add buttons
    attemptedQuerySelector('section[id="search-box"]', (exist_dom) => {
      exist_dom.parentNode.insertBefore(export_json_btn, exist_dom.nextSibling);
      exist_dom.parentNode.insertBefore(export_str_btn, export_json_btn.nextSibling);
    });

    export_json_btn.onclick = function() {
      artwork_data = getBooruInfo(true);
      export_json_btn.style.background = 'lightgreen';
      export_str_btn.style.removeProperty("background");
    };
    export_str_btn.onclick = function() {
      artwork_data = getBooruInfo(false);
      export_str_btn.style.background = 'lightgreen';
      export_json_btn.style.removeProperty("background");
    };
  }

})();