wnacgDownload

Enhanced download of wnacg

  1. // ==UserScript==
  2. // @name wnacgDownload
  3. // @namespace Yr
  4. // @version 3.0.3
  5. // @description Enhanced download of wnacg
  6. // @author yanagiragi
  7. // @require https://cdnjs.cloudflare.com/ajax/libs/jszip/3.2.0/jszip.js
  8. // @require https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/1.3.8/FileSaver.js
  9. // @match http*://*.wnacg.com/photos-index-page-*.html
  10. // @match http*://*.wnacg.org/photos-index-page-*.html
  11. // @match http*://*.wnacg.com/photos-index-aid-*.html
  12. // @match http*://*.wnacg.org/photos-index-aid-*.html
  13. // @grant GM_xmlhttpRequest
  14. // @grant GM_download
  15. // @grant GM_openInTab
  16. // ==/UserScript==
  17.  
  18. 'use strict';
  19.  
  20. // Global Defines
  21. const waitingStr = `排隊中`;
  22. const downloadStr = `已下載`;
  23. const timeout = 1000; // time interval between retry
  24. const successCountLimit = 0; // How many continous success checks required to start download, set 0 for instant download
  25. const closeTabInterval = -1; // set to -1 to avoid auto close new opened tabs
  26. const closeWindowInterval = -1; // set to -1 to avoid auto close current window
  27.  
  28. // =====================================================
  29. // Utilities
  30. // =====================================================
  31.  
  32. // Modified from https://gist.github.com/WebReflection/df05641bd04954f6d366
  33. // with predefined object specific, for HTML entities only
  34. function _Unescape (s) {
  35. var re = /&(?:amp|#38|lt|#60|gt|#62|apos|#39|quot|#34);/g;
  36. var unescaped = {
  37. '&': '&',
  38. '&': '&',
  39. '&lt;': '<',
  40. '&#60;': '<',
  41. '&gt;': '>',
  42. '&#62;': '>',
  43. '&apos;': "'",
  44. '&#39;': "'",
  45. '&quot;': '"',
  46. '&#34;': '"',
  47. '-': '-'
  48. };
  49. return s.replace(re, function (m) {
  50. return escape(unescaped[m]);
  51. });
  52. }
  53.  
  54. async function _Fetch (url) {
  55. return new Promise((resolve, reject) => {
  56. GM_xmlhttpRequest({
  57. method: "GET",
  58. url: url,
  59. headers: { // Without header it return 200 and seldom return 503 even if service is not availiable
  60. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0",
  61. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
  62. "Accept-Language": "zh-TW,zh;q=0.8,en-US;q=0.5,en;q=0.3",
  63. "Upgrade-Insecure-Requests": "1",
  64. "Pragma": "no-cache",
  65. "Cache-Control": "no-cache"
  66. },
  67. onload: function (response) {
  68. resolve(response.responseText)
  69. },
  70. onerror: function (error) {
  71. reject(error)
  72. }
  73. })
  74. })
  75. }
  76.  
  77. function _PromissAll (promises, progressCallback) {
  78. let count = 0;
  79. progressCallback(0);
  80. for (const p of promises) {
  81. p.then(() => {
  82. count++;
  83. progressCallback((count * 100) / promises.length);
  84. });
  85. }
  86. return Promise.all(promises);
  87. }
  88.  
  89.  
  90. // =====================================================
  91. // Direct Download Methods
  92. // =====================================================
  93.  
  94. async function ParseDownloadPageLink (url) {
  95. const result = await _Fetch(url);
  96. const match = result.match(/href=\"(\/download-index-aid-.*)"/);
  97. return `${location.protocol}//wnacg.org` + match[1];
  98. }
  99.  
  100. async function ParseDownloadLink (target) {
  101. const result = await _Fetch(target.replace('wnacg.org', location.hostname));
  102. const matches = result.match(/down_btn ads\" href="(.*?)">/);
  103. const rawLink = `${location.protocol}//` + _Unescape(matches[1]); // fixs download re-naming of server behaviour
  104. return new URL(rawLink).href;
  105. }
  106.  
  107. function GetCategory () {
  108. let raw = document.querySelector('.asTBcell.uwconn label').textContent
  109. raw = raw.replace(/分類:/, '').replace(/ /g, '').replace(/\//g, '')
  110. return encodeURIComponent(raw)
  111. }
  112.  
  113. async function DirectDownload (event) {
  114. event.preventDefault();
  115.  
  116. const btn = document.querySelector('#YrDownloadBtn');
  117. const block = document.querySelector('#YrDirectDownloadStatusBlock');
  118. const status = document.querySelector('#YrStatus');
  119. const retryCount = document.querySelector('#YrRetryCount');
  120. const lastRetry = document.querySelector('#YrLastRetry');
  121. const url = btn.href.replace(/&_ga=.*/, '') // remove ga since it sucks and broke server renaming function;
  122.  
  123. let retries = 0;
  124. let successCount = 0;
  125.  
  126. const Download = (onSuccessCallback, onFailCallback) => {
  127. // TODO: shoud refactor to call _Fetch later
  128. GM_xmlhttpRequest({
  129. method: "HEAD",
  130. url: url,
  131. headers: { // Without header it return 200 and seldom return 503 even if service is not availiable
  132. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0",
  133. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
  134. "Accept-Language": "zh-TW,zh;q=0.8,en-US;q=0.5,en;q=0.3",
  135. "Upgrade-Insecure-Requests": "1",
  136. "Pragma": "no-cache",
  137. "Cache-Control": "no-cache"
  138. },
  139. timeout: 1000 * 10,
  140. onerror: function (error) {
  141. console.log('onerror')
  142. successCount = 0;
  143. onFailCallback();
  144. },
  145. onload: function (response) {
  146. console.log(`successCount = ${successCount}, code = ${response.status}`);
  147. if (response.status == 200 || response.status == 403) { // 403 means cloudflare middleware
  148. successCount += 1;
  149. if (successCount >= successCountLimit) {
  150. onSuccessCallback();
  151. }
  152. else {
  153. onFailCallback();
  154. }
  155. }
  156. else {
  157. successCount = 0;
  158. onFailCallback();
  159. }
  160. }
  161. });
  162. };
  163.  
  164. const OnSuccess = () => {
  165. status.textContent = downloadStr;
  166.  
  167. const openTab = async () => {
  168.  
  169. console.log(url)
  170. // open download link in new tab, use it with set auto download to specific types
  171. const tab = await GM_openInTab(url);
  172.  
  173. // auto close tab
  174. if (closeTabInterval > 0) {
  175. await new Promise((resolve) => setTimeout(resolve, closeTabInterval));
  176. tab.close();
  177. }
  178.  
  179. // auto close window, use it with brower config: allow script to close window
  180. // e.g. Firefox: dom.allow_scripts_to_close_windows
  181. if (closeWindowInterval > 0) {
  182. setTimeout(() => window.close(), closeWindowInterval);
  183. }
  184. }
  185.  
  186. openTab();
  187. }
  188.  
  189. const OnFailed = () => {
  190. status.textContent = waitingStr;
  191. lastRetry.textContent = `${new Date().toLocaleTimeString()}`;
  192. retryCount.textContent = `${++retries}`;
  193.  
  194. setTimeout(TryDownload, timeout);
  195. }
  196.  
  197. const TryDownload = () => Download(OnSuccess, OnFailed);
  198.  
  199. // btn.style.display = 'none';
  200. block.style.display = 'block';
  201. lastRetry.textContent = `${new Date().toLocaleTimeString()}`;
  202. retryCount.textContent = `${retries}`;
  203. status.textContent = waitingStr;
  204.  
  205. TryDownload();
  206. }
  207.  
  208. // =====================================================
  209. // Download Image Methods
  210. // =====================================================
  211.  
  212. function GetImageBase64 (index, url) {
  213. return new Promise((resolve, reject) => {
  214. const extension = url.substring(url.lastIndexOf('.') + 1)
  215. GM_xmlhttpRequest({
  216. method: "GET",
  217. url: url,
  218. overrideMimeType: 'text/plain; charset=x-user-defined',
  219. onload: response => {
  220. let binary = "";
  221. const responseText = response.responseText;
  222. const responseTextLen = responseText.length;
  223. for (let i = 0; i < responseTextLen; i++) {
  224. binary += String.fromCharCode(responseText.charCodeAt(i) & 255)
  225. }
  226.  
  227. // Note there is no 'data:image/jpeg;base64,' Due to JSZip
  228. let src = btoa(binary)
  229.  
  230. console.log(`Downloaded: ${index}.${extension}, src=${url}`)
  231. resolve({ 'index': index, 'base64': src, 'extension': extension })
  232. }
  233. })
  234. })
  235. }
  236.  
  237. async function Compress (title, pics, progressCallback = null) {
  238. console.log(`Start Compress`)
  239. const zip = new JSZip();
  240. const folder = zip.folder(title);
  241. for (let i = 0; i < pics.length; ++i) {
  242. folder.file(`${pics[i].index}.${pics[i].extension}`, pics[i].base64, { base64: true })
  243. }
  244. const content = await zip.generateAsync({ type: "blob", streamFiles: true }, metadata => {
  245. progressCallback?.(metadata)
  246. console.log(`Compress Progress = ${metadata.percent.toFixed(2)} %`)
  247. })
  248. console.log(`All Done, Save to ${title}.zip`);
  249. return saveAs(content, `${title}.zip`);
  250. }
  251.  
  252. async function FetchImageLinks (url) {
  253. const resp = await _Fetch(url)
  254. const dom = new DOMParser().parseFromString(resp, 'text/html')
  255. const blocks = dom.querySelectorAll('.gallary_item')
  256. const result = []
  257. for (let i = 0; i < blocks.length; ++i) {
  258. const a = blocks[i].querySelector('a')
  259. result.push(a.href)
  260. }
  261. return result
  262. }
  263.  
  264. async function FetchImageSrc (url) {
  265. const resp = await _Fetch(url)
  266. const dom = new DOMParser().parseFromString(resp, 'text/html')
  267. const img = dom.querySelector('#photo_body img')
  268. return img.src
  269. }
  270.  
  271. function GetPageCount () {
  272. const paginators = [...document.querySelectorAll('.f_left.paginator a')]
  273. if (paginators.length == 0) {
  274. // cases: current book has only one page
  275. return 1
  276. }
  277. const href = paginators.slice(-2, -1)[0].href
  278. return parseInt(href.substring(href.indexOf('photos-index-page-') + 'photos-index-page-'.length, href.indexOf('-aid-')))
  279. }
  280.  
  281. function GetPageId () {
  282. // two formats:
  283. // https://wnacg.org/photos-index-aid-xxxxx.html
  284. // https://wnacg.org/photos-index-page-1-aid-xxxxx.html
  285. const href = location.href
  286. return location.href.substring(location.href.indexOf('-aid-') + '-aid-'.length, location.href.indexOf('.html'))
  287. }
  288.  
  289. async function DownloadImages (event) {
  290. event.preventDefault();
  291.  
  292. const block = document.querySelector('#YrDownloadImageStatusBlock');
  293. const parsingPageId = block.querySelector('#YrParsingPageId');
  294. const downloadImageStatus = block.querySelector('#YrDownloadImageStatus');
  295.  
  296. block.style.display = 'block';
  297.  
  298. const pageCount = GetPageCount()
  299. const pageId = GetPageId()
  300.  
  301. downloadImageStatus.textContent = `解析頁面中 ...`
  302.  
  303. const imageSrcs = []
  304. for (let i = 1; i <= pageCount; ++i) {
  305. parsingPageId.textContent = `第 ${i} 頁`
  306. const url = `https://wnacg.com/photos-index-page-${i}-aid-${pageId}.html`
  307. const links = await FetchImageLinks(url)
  308. const tasks = links.map(x => FetchImageSrc(x))
  309. const srcs = await Promise.all(tasks)
  310. imageSrcs.push(srcs)
  311. console.log(url, srcs) // for debug
  312. }
  313.  
  314. parsingPageId.textContent = `已完成, ${pageCount} 頁`
  315.  
  316. const tasks = imageSrcs.flat().map((x, idx) => GetImageBase64(idx, x))
  317. const images = await _PromissAll(tasks, progress => {
  318. downloadImageStatus.textContent = `解析 ${parseInt(0.01 * progress * tasks.length)} / ${tasks.length} 圖片中 ...`
  319. })
  320.  
  321. const title = document.querySelector('#bodywrap h2').textContent
  322. const results = await Compress(title, images, metadata => {
  323. downloadImageStatus.textContent = `壓縮中 (${metadata.percent.toFixed(2)} %)`
  324. })
  325.  
  326. return results
  327. }
  328.  
  329. // =====================================================
  330. // General Setups
  331. // =====================================================
  332.  
  333. async function SetupDirectDownloadButton () {
  334. const category = GetCategory();
  335. const downloadPageLink = await ParseDownloadPageLink(location.href);
  336. let downloadLink = await ParseDownloadLink(downloadPageLink);
  337. downloadLink = downloadLink.replace(/\?n=/, `?n=[${category}]`)
  338.  
  339. console.log(`downloadPageLink = ${downloadPageLink}`) // for debug!
  340. console.log(`downloadLink = ${downloadLink}`); // for debug!
  341.  
  342. // setup DOMs
  343. const downloadZipBtnElement = `<a id="YrDownloadBtn" class="btn" style="width:130px;" target="_blank" rel="noreferrer noopener" href=${downloadLink}>直接下載 (原生壓縮)</a>`;
  344. const statusElement = `
  345. <div id="YrDirectDownloadStatusBlock" style="display: none;">
  346. <div>重試次數: <span id="YrRetryCount"></span></div>
  347. <div style="padding-bottom: 3px;">目前狀態: <span id="YrStatus" style="color: blueviolet; font-weight: bold; font-size: 1.5em;"></span></div>
  348. <div>最後重試時間: <span id="YrLastRetry"></span></div>
  349. </div>`;
  350. const root = document.querySelector('.asTBcell.uwthumb');
  351. root.insertAdjacentHTML('beforeend', downloadZipBtnElement);
  352. root.insertAdjacentHTML('beforeend', statusElement);
  353.  
  354. const downloadZipBtn = document.querySelector('#YrDownloadBtn');
  355. downloadZipBtn.addEventListener('click', DirectDownload);
  356. }
  357.  
  358. async function SetupDownloadImageButton () {
  359. // setup DOMs
  360. const downloadImageBtnElement = `<a id="YrDownloadImageBtn" class="btn" style="width:130px;" target="_blank" rel="noreferrer noopener" href=#>直接下載 (網站圖片)</a>`;
  361. const statusElement = `
  362. <div id="YrDownloadImageStatusBlock" style="display: none;">
  363. <div>解析頁面: <span id="YrParsingPageId" style="color: blueviolet; font-weight: bold;"></span></div>
  364. <div style="padding-bottom: 3px;">目前狀態: <span id="YrDownloadImageStatus" style="color: blueviolet; font-weight: bold;"></span></div>
  365. </div>`;
  366. const root = document.querySelector('.asTBcell.uwthumb');
  367. root.insertAdjacentHTML('beforeend', downloadImageBtnElement);
  368. root.insertAdjacentHTML('beforeend', statusElement);
  369.  
  370. const downloadImageBtn = document.querySelector('#YrDownloadImageBtn');
  371. downloadImageBtn.addEventListener('click', DownloadImages);
  372. }
  373.  
  374. async function Run () {
  375. await SetupDirectDownloadButton();
  376. await SetupDownloadImageButton();
  377. }
  378.  
  379. Run();