// ==UserScript==
// @name Eza's Image Glutton
// @namespace https://inkbunny.net/ezalias
// @homepage https://greasyfork.org/en/users/4876-ezalias
// @author Ezalias
// @description Redirects to high-res images on gallery sites, skipping past descriptions and comments
// @license MIT
// @license Public domain / No rights reserved
// @include /^https*://www\.furaffinity\.net/(view|full)/.*/
// @include https://inkbunny.net/submissionview.php*
// @include https://inkbunny.net/s/*
// @include http://gelbooru.com/*page=post&s=view*
// @include http://youhate.us/*page=post&s=view*
// @include https://youhate.us/*page=post&s=view*
// @include http://www.gelbooru.com/*s=view*
// @include https://www.gelbooru.com/*s=view*
// @include http://gelbooru.com/*s=view*
// @include https://gelbooru.com/*s=view*
// @include http://danbooru.donmai.us/posts/*
// @include https://danbooru.donmai.us/posts/*
// @include http://*.tumblr.com/image/*
// @include https://*.tumblr.com/image/*
// @include /^http(s|)://e(621|926)\.net/post/show*//
// @include http://*.deviantart.com/art/*
// @include https://*.deviantart.com/art/*
// @include /^https?://w*\.*hentai-foundry\.com/pictures/user/.*/[0-9]*/.*/
// @include /^https*://www\.sofurry\.com/view/*//
// @include https://www.weasyl.com/*
// @include http://www.y-gallery.net/view/*
// @include http://rule34.paheal.net/post/view/*
// @include https://rule34.paheal.net/post/view/*
// @include https://rule34.xxx/index.php?page=post*
// @include http://rule34hentai.net/post/view/*
// @include /^https*://derpibooru.org/.*/
// @include http://*.booru.org/*s=view*
// @include http://mspabooru.com/*s=view*
// @include http://safebooru.org/*s=view*
// @include http://www.majhost.com/cgi-bin/gallery.cgi?i=*
// @include http://e-hentai.org/s/*
// @include https://e-hentai.org/s/*
// @include http://nijie.info/view.php?id=*
// @include http://www.pixiv.net/member_illust.php?mode=medium&illust_id=*
// @include https://www.pixiv.net/member_illust.php?mode=medium&illust_id=*
// @include http://*sleepymaid.com/*
// @include https://*.sankakucomplex.com/post/*
// @include http://*.bronibooru.com/posts/*
// @include https://luscious.net/c/*
// @include https://luscious.net/pictures/c/*
// @include http://imageboard.neko-sentai.com/post/*
// @include https://uberbooru.com/posts/*
// @include https://www.furiffic.com/*/view/*
// @include https://beta.furrynetwork.com/artwork/*
// @include http://hiccears.com/picture.php?pid=*
// @include https://hiccears.com/picture.php?pid=*
// @include http://www.hiccears.com/picture.php?pid=*
// @include https://www.hiccears.com/picture.php?pid=*
// @include http://www.jabarchives.com/main/post/*
// @include https://www.jabarchives.com/main/post/*
// @exclude http://www.deviantart.com/users/outgoing?*
// @exclude *#dnr
// @version 1.27.6
// ==/UserScript==
// Any single-image submission will redirect to the full-size image. On multi-image submissions, every page except the first will redirect to its full-size image.
// If you go "back" to the normal gallery page (to favorite the image, read its description, leave a comment, etc.) then this script will not send you forward again.
// https://greasyfork.org/scripts/4713-eza-s-image-glutton
// https://sleazyfork.org/scripts/4713-eza-s-image-glutton
// TO DO:
// for modify_tumblr: for photoset pages (but everywhere, to be safe) make unlinked images link to themselves. I want nice, clean, chronological tabs for multi-image comics.
// ugh. test without adblock enabled.
// modify_furaffinity to change prev/next/fav links with pre-appended #dnr. not raw html fiddling: use the DOM and getElementsByType or whatever. thingy.href=url_plus_dnr.
// flickr? maybe separately. that whole site is a mess. also full-size images are sometimes gigantic, like dozens of megabytes.
// Consider changing some @includes to @match.
// http://thehentaiworld.com/hentai-doujinshi/theres-something-about-sakura-naruto/ ? I already do rule34; there's no pretending this is just about "art."
// Almost deserves a more Pixiv Fixiv-like fix. Maybe just a link dump like that DeviantArt gallery script?
// Greasyfork install page as options page?
// This would work faster if I could delay or prevent the loading of images. E.g., execute script before loading page, define CSS that doesn't download embedded images, wait for page to load, scrape image_url, and then redirect as usual. Since the script wouldn't trigger on #dnr (which I should do as an @exclude, I guess) images would load as usual when you clicked 'back.'
// This thought is mostly driven by opening a bunch of e.g. Gelbooru links all at once. They spend long enough loading that the full-size images are usually half-done before the redirect happens.
// Escape function in JS is encodeURI. Also use in Tumblr Scraper, where we need 'safe' URLs as tag IDs.
// FurAffinity stories redirect to thumbnail, e.g. http://www.furaffinity.net/view/15903888/ - might need to break out a whole complex function here.
// http://seiga.nicovideo.jp/seiga/im4507046 ?
// Nijie.info support might be missing out on multipage submissions? I don't even have an account.
// My Nijie support is basically nonexistant because I didn't have an account. Turns out they're more like Pixiv now, including multi-image posts. This is problematic. (Animations work, though.)
// Make undersized images link to themselves on imgur.
// Eza's image glutton as described on http://cuddle.horse/post/109728993805/a-few-browser-extensions-that-make-furaffinity-a -
// Eza’s Image Glutton: This affects websites beyond just FA but is a unique tool for powerbrowsing and such. When you open a page with a single image it skips all comments and descriptions and just shows the image in the highest quality possible. If you want to see all the items that are hidden all you have to do is go back a page.
// good rundown from a third party. 'A page with a single image' is clearer than 'gallery submission page.'
// http://www.pixiv.net/member_illust.php?mode=medium&illust_id=52107168 404s.
// goes to http://i1.pixiv.net/img-original/img/2015/08/21/19/14/00/52107168_p0.jpg
// should be http://i1.pixiv.net/img-original/img/2015/08/21/19/14/00/52107168_p0.png
// Yet another filetype mismatch. Great!
// Single images submitted as manga don't work. E.g. http://www.pixiv.net/member_illust.php?mode=medium&illust_id=52465388 and that artist's other works.
// It's impossible to find this script after Greasyfork fucked over "adult" scripts. SleazyFork does not appear in search engines, at all. The empty GreasyFork page doesn't show up. All that people will see if they don't already know where to go is userscripts-mirror.org, which was last updated in twenty-fucking-twelve. This is completely unacceptable.
// On the other hand, Yahoo also won't find 'greasyfork ezalias,' so what the fuck. Google finds it. Yahoo just sucks.
// Apparently Chrome doesn't redirect properly - there's no 'back' functionality. Grand.
// ... there's back functionality if and only if the page finishes first. Fuck Chrome.
// Fixing what ain't broken - I could "//@include *" and keep the switch-case business, if only to skirt GreasyFork's dumb "adult" rules.
// Ugh, I'd probably be marked "adult" just for mentioning that it works on "adult" sites. Like e621... but not tumblr or pixiv. What even.
// Could also generalize all extract_image_url_after guff to a for-loop over an array of search strings.
// view-source:https://inkbunny.net/submissionview.php?id=1325657 - inkbunny 'friends only' page -
// Finally on Pawoo, let's add that. Ugh - Twitter-style presentation. Gimme a gallery grid.
// Might be more like that Tumblr bare-image resizer, which this script should also mimic. Bare images are linked. Are those max size?
// https://curate.mastodon.art/gallery/media_attachments/files/000/088/717/original/525713c353f7db21.jpg
// https://files.mastodon.social/media_attachments/files/000/963/359/original/ccfa625b17d2e1d0.png
// https://files.mastodon.social/media_attachments/files/000/963/345/original/d23781d67022b95f.png
// Compare:
// https://pawoo.net/media/r89DgtVaqQeydHyPb1g
// https://img.pawoo.net/media_attachments/files/001/246/940/original/b4a22a1ea4433a9f.jpg
// Totally arbitrary, no way to figure one from another. Hmm.
// The & thing for SankakuComplex is called an "entity reference." See about decoding that better.
// Reload_if not working?
// FF57 broke FA. God dangit. Probably a Greasemonkey/WebExtensions problem, but fix it anyway. Broke Pixiv as well. I need an FF57 profile.
// Well, great. GreaseMonkey saved the trouble of juggling profiles by breaking FFDE55 as well.
// Greasemonkey 4 is fucked. They removed the menu items, because they removed the menu.
// https://www.pixiv.net/member_illust.php?mode=medium&illust_id=66547435 seems broken.
// https://www.hentai-foundry.com/pictures/user/teku/566798/Daisy-Darret-and-Penny#dnr loads an image from the description?
// Fiddled with @includes to consolidate http/https under http*. Seems to work? Might open execution to e.g. http://maliciousdomain.com/?//gelbooru etc.
// Yeah, it's an attack vector worth worrying about. Admittedly the switch case is on document.domain - the attack site would have to end correctly.
// Goddammit, this is possible anyway, since http://*.gelbooru.org matches http://maliciousdomain.com?.gelbooru.org as-is!
// Aaaugh Greasemonkey needs a goddamn domain inclusion method besides string-matching.
// Twitter broke videos - even on mobile. Fuck that, give me the MP4. You can't show me a video and pretend I don't have it.
// <video preload="none" playsinline="" style="width: 100%; height: 100%; position: absolute; transform: rotate(0deg) scale(1);" poster="https://pbs.twimg.com/media/DdFx3A9VAAEEqA9.jpg"><source src="https://video.twimg.com/amplify_video/995698796209225728/pl/qWXZDSRj7npFBnoS.m3u8?tag=2" type="application/x-mpegURL"><source src="https://video.twimg.com/amplify_video/995698796209225728/vid/720x720/hjcLz5e56ojDYS8j.mp4?tag=2" type="video/mp4"></video>
// And here's the "button" that steals clicks:
// <div style="position: relative; width: 100%; height: 100%; background-color: black;"><video preload="none" playsinline="" style="width: 100%; height: 100%; position: absolute; transform: rotate(0deg) scale(1);" poster="https://pbs.twimg.com/media/DdFx3A9VAAEEqA9.jpg"><source src="https://video.twimg.com/amplify_video/995698796209225728/pl/qWXZDSRj7npFBnoS.m3u8?tag=2" type="application/x-mpegURL"><source src="https://video.twimg.com/amplify_video/995698796209225728/vid/720x720/hjcLz5e56ojDYS8j.mp4?tag=2" type="video/mp4"></video></div>
// Pixiv animations are still broken, and inconsistently. I hate their new pages. They went from the nicest possible site to one of the worst.
// Deviantart broke again. What a shock.
// Paheal broke again, again. Seems related to a new feature this time: they added resizing options.
// Since I'm just leafing through HTML (usually), can I jump to the image /before/ trying to load the page? GreaseMonkey has a wonky option for running the script before the page runs, but I don't think we get all the HTML first. Maybe... maybe AJAX the page we're on? Like, @RunAtStart or whatever, then create a little blank page, then grab the URL via XmlHTTPgetObject or whatever, then read the HTML as responseText. The trouble (I expect) would be going back to the normal page when someone hits 'back.' This script shouldn't run... but any browser will probably have cached the fake page.
// Owyn Tyler has a ridiculously replete script with similar goals called Handy Just Image - http://userscripts.org/scripts/show/166494
// The supported-site list is waaay longer than mine, and/but his goals are more complex. Image Glutton exists only to deliver the image.
// He's having trouble with back-trapping, though. His solution sounds absurdly complex even compared to mine. Test the script and recommend help if possible.
// Changes since last upload:
// Fixed a bad redirect on Paheal... a different one, this time.
// global variables, for simplicity
var image_url = ''; // location of the full-size image to redirect to
var wait_for_dnr = false; // some site URLs use "#" liberally, so if this var isn't empty, only "#dnr" will stop a redirect
var simple_redirect = false; // some domains are kicking back my JS redirect (for native referral), so do naive location=url instead
var page_failed = false; // If the page 503s or otherwise forces us to reload, wait a moment, then reload.
var interval_handle; // In case we need to set an interval, this is the global handle to kill it. Because a simple "die" or "clearInterval( this )" would be too much to ask.
// detect site, extract image URL, then decide whether or not to redirect
switch( document.domain.replace( 'www.', '' ) ) { // Remove "www" to avoid cases where both example.com and www.example.com are supported.
////////// Simple extract_image_url_after sites
case 'e621.net': image_url = document.getElementById( 'highres' ).href; break;
case 'e926.net': image_url = document.getElementById( 'highres' ).href; break;
case 'weasyl.com': extract_image_url_after( '<div id="detail-art">', '/' ); break; // also redirects to plaintext/HTML on stories, haha
case 'y-gallery.net':
extract_image_url_after( 'id="idPreviewImage"', 'http://' );
break; // Fucked, but they say they're coming back eventually
case 'rule34.xxx': extract_image_url_after( '>Edit</a></li>', '//' ); break;
case 'derpibooru.org': extract_image_url_after( ' View</a>', '//' ); simple_redirect = true; break;
case 'chan.sankakucomplex.com': extract_image_url_after( '<li>Original:', '//' ); image_url = image_url.replace( '&', '&' ); break;
case 'idol.sankakucomplex.com': extract_image_url_after( '<li>Original:', '//' ); image_url = image_url.replace( '&', '&' ); break;
case 'furiffic.com': extract_image_url_after( 'onload="$', '//' ); break; // Not using og:image because different URL causes image to re-load is user hits Back
case 'jabarchives.com': extract_image_url_after( 'class="group1"', '/main' ); break;
////////// Slightly complicated extract_image_url_after sites
case 'rule34hentai.net': extract_image_url_after( 'shm-zoomer', '/_images/' ); wait_for_dnr = true; reload_if( '<h2>Rate limit hit' ); break; // wtf? even 'view image' returns text nonsense. images save fine. I bet the site's lying about the mime type. google's not helping for other answers, and I can't fix that, so 'meh' for now. sorry.
case 'rule34.paheal.net': extract_image_url_after( 'Links</th>', 'http' ); wait_for_dnr = true; break;
case 'majhost.com': image_url = document.getElementsByTagName( "img" )[0].src; break; // first and only <img> tag
case 'luscious.net': image_url = document.getElementsByClassName( 'icon-download' )[0].href; wait_for_dnr = true; break;
case 'gelbooru.com': extract_image_url_after( "og:image", '//' ); simple_redirect = true; break;
case 'youhate.us': extract_image_url_after( "og:image", '//' ); simple_redirect = true; break;
////////// Simple custom sites
case 'sofurry.com':
image_url = window.location.href.replace('sofurry.com/view/','sofurryfiles.com/std/content?page=');
if( document.body.outerHTML.indexOf( '<div id="sfContentImage' ) < 0 ) { image_url = ''; } // Do not redirect from stories
if( document.body.outerHTML.indexOf( '<div class="sf-story"' ) > 0 ) { image_url = ''; } // Really do not redirect from stories
break;
case 'danbooru.donmai.us':
extract_image_url_after( '% of original (', '/data/' ); // resized images will say "X% of original (view full" or something like that
if( image_url === '' ) {extract_image_url_after( 'twitter:image', 'http://' ); // otherwise just grab the preview image (also works on pages claiming you need Gold)
image_url = image_url.replace( '/sample/sample-', '/' ); } // if the preview-sized image is a sample, fix that - this sometimes fails for PNG images with JPG previews
break;
case 'furaffinity.net': // This is a mess because I'm trying not to redirect from stories / music... but FA kindly links the thumbnail images for those.
reload_if( 'center;">Error 503' );
extract_image_url_after( '<div class="alt1 actions', '//' ); // Works even when not signed in
// Choosing not to redirect based on content type is impossible because FA's tags and categories are a complete joke. Nothing is reliable.
if( document.getElementsByTagName('html')[0].innerHTML.indexOf('/themes/beta') > -1 ) { // Total kludge. If beta theme, use full-url, audio files be damned.
image_url = unsafeWindow.full_url;
}
break;
case 'e-hentai.org': image_url = document.getElementById( 'img' ).src; break;
case 'nijie.info': // Lord, I don't even care about this site.
extract_image_url_after( 'name="twitter:image"', 'http://' ); // some images are behind some sort of barrier, so let's grab the twitter-size image instead...
image_url = image_url.replace( '/sp/', '/' ); // ... and drop the /sp/ to get the full-size URL.
break;
case 'sleepymaid.com':
case 'yay.sleepymaid.com':
image_url = document.getElementById( 'the-image' ).src;
if( document.getElementById( 'next' ) ) { image_url = ''; } // Don't redirect on comic pages
break;
case 'imageboard.neko-sentai.com': image_url = document.getElementById( 'main_image' ).src; break;
case 'uberbooru.com':
extract_image_url_after( 'Size: <a', '/data' );
if( image_url.indexOf( '<' ) > -1 ) { image_url = ''; } // Uberbooru is having back-end problems with missing images. Don't redirect if we grabbed HTML instead.
break;
case 'hiccears.com': extract_image_url_after( 'href="./upl0ads', './' ); break; // Wow, long garbage names. Can we use Download titles? Apparently not.
case 'hentai-foundry.com':
extract_image_url_after( ' ', '//pictures.' );
if( image_url.indexOf( "';" ) > 0 ) { image_url = image_url.substring( 0, image_url.indexOf( "';" ) ) } // Singlequote terminate, more or less - only on resizable images
reload_if( '<h1>An error occurred.' );
break;
////////// Sites complex enough to shove into a function down below
case 'inkbunny.net': scrape_inkbunny(); break;
case 'pixiv.net': scrape_pixiv(); break;
case 'mspabooru.com': scrape_booru(); break;
case 'safebooru.org': scrape_booru(); break;
case 'bronibooru.com': scrape_booru(); break;
}
////////// Holdovers from the previous method; domains that don't neatly conform to document.domain switch selection.
if( address_bar_contains( 'tumblr.com' ) ) { extract_image_url_after( '"og:image"', 'http' ); }
if( address_bar_contains( 'deviantart.com' ) ) { scrape_deviantart(); wait_for_dnr = true; }
if( address_bar_contains( '.booru.org' ) ) { scrape_booru(); }
if( address_bar_contains( 'beta.furrynetwork.com' ) ) { interval_handle = setInterval( scrape_furrynetwork, 500 ); } // This site's designers are loons.
// If the page didn't load properly, but could be fixed by reloading, then wait a moment and reload
if( page_failed ) { // If we get a 503 or other 'please reload' error
image_url = ''; // do not redirect this time
setTimeout( function inline_reload() { location.reload(); }, Math.floor((Math.random() * 10) + 1) * 1000 ); // 1s-10s pause. Can't believe you have to name inline functions.
}
// Don't redirect if the filetype is obviously not an image. SWF, TXT, MP3, etc.
// Arguably include Webm? Opening many Gelbooru webms in tabs is a cacaphony.
// It's tedious to detect flash, story, and music pages on every website supported, so instead let's just cancel redirection based on those file extensions.
// Possibly implement as an array with a For loop instead of a list of OR operations. This could get silly, with text, music, and video formats galore.
// txt, doc, pdf, swf, mp3, mp4, webm, midi, mid, wav,
var ext = image_url.substring( image_url.lastIndexOf( '.' ) + 1, image_url.length ); // e.g. "png"
//var not_images = [ 'mp3', 'swf', 'txt', 'webm', 'mp4', 'docx', 'pdf', 'doc', 'rtf', 'midi', 'mid', 'wav', 'flv', 'cab' ];
var not_images = [ 'mp3', 'swf', 'txt', 'mp4', 'docx', 'pdf', 'doc', 'rtf', 'midi', 'mid', 'wav', 'flv', 'cab' ];
for( var n in not_images ) { if( ext == not_images[n] ) { image_url = ''; } } // If the extension is in our blacklist, don't redirect.
// Oh right. Doesn't work on FA because FA points to the icon. Yaaayfuck.
// Having defined image_url based on the page's HTML or DOM, modify the current URL to prevent back-traps, then redirect to that full image.
var do_we_redirect = true; // If we've come this far we'll probably go to an image.
if( image_url == '' ) { do_we_redirect = false; } // Don't redirect to an empty string. (Emptying this string is how some functions fail safe.)
if( !wait_for_dnr && address_bar_contains( '#' ) ) { do_we_redirect = false; } // Don't redirect if the wait_for_dnr flag is false and there's a hash. (E.g. FA comments.)
if( address_bar_contains( '#dnr' ) ) { do_we_redirect = false; } // Don't redirect if there's a #dnr in the URL.
if( do_we_redirect == true ) // So much clearer than a mess of &&s and ||s.
{
// some images don't redirect properly, even if you manually "view image" - so we append ".jpg" to URLs without extensions, forcing the browser to consider them images
// even if this doesn't work, the new URL should just 404, which is better than the semi-modal "octet stream" dialog seen otherwise.
if( image_url.lastIndexOf( '/' ) > image_url.lastIndexOf( '.' ) ) { image_url = image_url + '.jpg'; } // if there's not a "." after the last "/" then slap a file extension on there
if( image_url[ image_url.length - 1 ] == '.' ) { image_url = image_url + 'jpg'; } // if the URL ends with a dot, slap a file extension on there
// modify current location, so that when the user clicks "back," they aren't immediately sent forward again
modified_url = window.location.href + '#dnr'; // add do-not-redirect tag to current URL
history.replaceState( {foo:'bar'}, 'Do-not-redirect version', modified_url ); // modify URL without redirecting. {foo:'bar'} is a meaningless but necessary state object.
image_url = encodeURI( image_url ); // Executing code with strings from the page has always been a mildly horrifying attack surface - hopefully this defangs it.
if( simple_redirect ) { window.location.href = image_url; } // This has different referral properties than clicking a link or displaying an image, so some sites 403
else { location.assign("javascript:window.location.href=\""+image_url+"\";"); } // Pixiv-friendly redirect to full image: maintains referral, happens within document's scope.
} // end of main execution
// ----- // Functions for readability
function extract_image_url_after( string_before_url, url_begins_with ) { // extract the first quote-delimited string that appears after unique first var and begins with second var
var html_elements = document.getElementsByTagName('html'); // this way we avoiding doing getElementsEtc every time, and we still access the whole page's HTML by reference
var string_index = html_elements[0].innerHTML.indexOf( string_before_url ); // find a unique string somewhere before the image URL
if( string_index > -1 ) {
var image_index = html_elements[0].innerHTML.indexOf( url_begins_with, string_index ); // find where the image URL starts after the unique string
var delimiter_index = html_elements[0].innerHTML.indexOf( '"', image_index ); // find first doublequote after the image URL starts
image_url = html_elements[0].innerHTML.substring( image_index, delimiter_index ); // grab the image URL up to the next doublequote
}
}
function address_bar_contains( string_to_look_for ) { // I'm so tired of typing out window.location.etc == -1. It's stupidly verbose and it looks terrible.
return (window.location.href.indexOf( string_to_look_for ) !== -1); // this makes code more concise and readable. if( address_bar_contains( 'tld.com' ) ) { do tld.com stuff; }
}
function reload_if( error_string ) {
var html_elements = document.getElementsByTagName('html'); // this way we avoiding doing getElementsEtc every time, and we still access the whole page's HTML by reference
var string_index = html_elements[0].innerHTML.indexOf( error_string ); // look for a string indicating the page failed to load
if( string_index > -1 ) { page_failed = true; }
}
// ----- // Functions for individual websites (separated for being especially long)
// DeviantArt sometimes doesn't redirect until you F5. I suspect it's their fancy-pants not-actually-redirecting nonsense. Websites - stop acting stupid and just /be documents./ You are not an app.
// God damn, do I hate Deviantart. They've got half a dozen different URL structures, fifteen CDNs with incompatible directory structures, inconsistent page elements, inconsistent ways of implementing the /same/ page elements, and sometimes pages do something different based on what you clicked to get there. This is not an old site clunking along in modern times - that's the VCL. You suck by design. You suck by /choice./
function scrape_deviantart() { // this doesn't use ditch_html_before because data-super-full-img's appear for random links - avoid grabbing one from pages with small-images.
// "Download" goddamn well ought to be an image, but is sometimes a page with an image on it, if you click on it. Because DeviantArt is the devil.
let collection_list = document.getElementsByClassName( "dev-page-download" ); // We have to do this in two steps because JS shits the bed on undefined variables.
if( collection_list.length > 0 ) { image_url = collection_list[0].href; }
if( image_url == '' ) { extract_image_url_after( 'class="dev-content-normal', '//' ); } // If there's no "download" link, grab the large size, defined after the preview size
// Right here: might need to handle images too small for separate preview / full sizes, because DA is a nightmare of exceptions.
if( document.body.outerHTML.indexOf( '<div id="flashed-in"' ) > 0 ) { image_url = ''; } // Do not redirect on flash pages
}
// Haha, I fixed it for the 2/22/2016 redesign, and it doesn't work on my images. Only the user's own images? Ahh, probably the 'edit' options at top.
// Well... it's a shippable bug. Leave it for now, fix it with the next serious update.
// Nope, fails on https://inkbunny.net/submissionview.php?id=999328 - which isn't mine. Ditto https://inkbunny.net/submissionview.php?id=1019701 by Aogami.
// ... The old way works. Sure, whatever.
// view-source:https://inkbunny.net/submissionview.php?id=1325657 - private page / friends page
// Haha, /private_files/ vs. /files/ - literally a one-character fix. Removed the leading slash.
function scrape_inkbunny() {
// Old way, pre-2016
// var image_index = document.body.outerHTML.indexOf( 'https://us.ib.metapix.net/files/screen/' ); // look for screen-size image URL
var image_index = document.body.outerHTML.indexOf( 'files/screen/' ); // Find the middle of a screen-sized image URL
image_index = document.body.outerHTML.lastIndexOf( 'https://', image_index ); // ... then back up to the start of it
if( image_index !== -1 ) // if that URL is found
{
var delimiter_index = document.body.outerHTML.indexOf( '"', image_index ); // find first doublequote delimiter after URL
image_url = document.body.outerHTML.substring( image_index, delimiter_index ); // grab delimited URL
image_url = image_url.replace( '/screen/', '/full/' ); // turn screen URL into full URL - we don't care if /screen/ is already full-size, because /full/ will kindly redirect anyway
}
// New way, 2/22/2016 site redesign
// extract_image_url_after( "class='widget_imageFromSubmission", "http" );
// if( unsafeWindow.highdefURL ) { image_url = unsafeWindow.highdefURL; } // sloppy JS - 'if var' as in 'if this exists,' 'if this isn't undefined.'
wait_for_dnr = true;
// if this page is the landing page for a multi-image submission, do not redirect
//if ( document.body.outerHTML.indexOf( '<form id="changethumboriginal_form"' ) !== -1 && !address_bar_contains( '&page=' ) ) {
// Look for 'show custom thumbnails' button (indicating multi-page submission) or #pictop (which doesn't appear on landing pages for multi-page submissions)
if ( document.body.outerHTML.indexOf( '<form id="changethumboriginal_form"' ) !== -1 && !address_bar_contains( '#pictop' ) ) {
image_url = ''; // note: we do redirect on URLs for individual pages, including the first.
}
}
// Furrynetwork is a joke because every single page has the same HTML. We have to use the DOM, but on an unknown delay, because these fools were too clever to just deliver a goddamn document.
function scrape_furrynetwork() {
let link_list = Array.from( document.getElementsByClassName( 't--reset-link' ) );
if( link_list.length > 0 ) {
clearInterval( interval_handle ); // Once we detect something - anything - stop looping.
image_url = link_list[0].href; // Safely handling an HTMLcollection, because Javascript is pain.
if( image_url !== window.location.href ) {
// Fuck it, copy-paste for now. This can't just 'return' because it's faux-parallel.
let modified_url = window.location.href + '#dnr'; // add do-not-redirect tag to current URL
history.replaceState( {foo:'bar'}, 'Do-not-redirect version', modified_url ); // modify URL without redirecting.
window.location.href = image_url;
}
}
}
function scrape_pixiv() {
// 22-05-2018 - Pixiv changed their mode=medium page.
// Fuck! Ugoku is broken because the "pixiv" object no longer exists.
// Still fails on https://www.pixiv.net/member_illust.php?mode=medium&illust_id=68660204#dnr
// Ah: pagecount is 10. Add a terminator.
// https://www.pixiv.net/member_illust.php?mode=medium&illust_id=68401195#dnr - ugoku example.
// https://www.pixiv.net/member_illust.php?mode=medium&illust_id=48788127#dnr - another.
// Christ, does it work differently from the front page? Seems fine.
// Trying to test with TamperMonkey / ViolentMonkey (there's only two hard problems in computer science!) and it's more like I'm testing if Pixiv works when logged-out.
extract_image_url_after( '"original":', 'https:' );
// image_url = image_url.replace( '\\', '' );
let submission = window.location.href.split( '=' ).pop(); // Submission ID - #dnr won't matter; we don't redirect then - might want to grab from HTML anyway
var html_elements = document.getElementsByTagName('html'); // Get page HTML (in an HTMLcollection).
var illust_ids = html_elements[0].innerHTML.split( '{"illust' ); // Take HTML as string, split by illustId definitions.
illust_ids.shift(); // Get rid of first element (everything prior to first illustId definition).
var is_manga = true; // We have to start with 'true' because we're checking for pageCount:1.
for( x in illust_ids ) { // For each string split on illustId,
// If this is the right ID and has a defined pagecount of 1, it's not a manga. Sometimes the ID shows up in other places but there's no associated pagecount.
if( illust_ids[x].indexOf( 'Id":"' + submission ) > -1 && illust_ids[x].indexOf( '"pageCount":1,' ) > -1 ) { is_manga = false; }
}
if( is_manga ) { image_url = window.location.href.replace( 'mode=medium', 'mode=manga' ); }
if( html_elements[0].innerHTML.indexOf( '"romaji":"ugoira"' ) > -1 ) { image_url = ''; } // Don't redirect on animated images ("ugoira").
// Restoring a link to a download is nontrivial. I don't even know what the old URL format looks like.
// In a pinch, I could restore an old version of my Firefox profile, then check the history for instances of 'ugoira600x600.zip'.
// One of the randomly-named files appearing in FF's debugger references ugoiras, but only to distinguish illust/manga/ugoira... submissions? Links? Eh.
// https://s.pximg.net/www/js/bundle/2.0907c36535b58a7e6ec6.js
// Check if any random single-letter variable corresponds to the old "pixiv" object. Nope, none of them make it to the console's scope.
// content.something? Mostly default DOM stuff, surely. No dice.
// "Memory" under F12 doesn't provide an easy list of files accessed or cached, sadly. (Nor is anything cached as "ugoira zip" appearing in Everything.)
// Viewing as Dominators points to... why can't I select this text, Mozilla? Ugh. It's a JS file named "runtime.28c4" etc.
// https://s.pximg.net/www/js/spa/runtime.28c4035b820eee811d9d.js?1 under Debugger. Use the Prettify Source button at the bottom, labeled "{ }".
// If I need to fetch this JS file it won't work due to CORS.
// This script appends stuff to the DOM. Maybe just inspect the DOM, and fuck about with document.getwhateverybwhatever.
// <canvas class="Jj6cgRQ _19hYROS" style="width: 600px; height: 411px; background-image: url("https://i.pximg.net/c/540x540_70/img-master/img/2018/07/18/10/22/36/69744872_master1200.jpg");" width="600" height="411"></canvas>
// <button class="kbpwWEq"><svg viewBox="0 0 24 24" class="_3h8MJLq" style="width: 48px; height: 48px;"><circle cx="12" cy="12" r="10" class="Cmuq7tJ"></circle><path d="M9,8.74841664 L9,15.2515834 C9,15.8038681 9.44771525,16.2515834 10,16.2515834 C10.1782928,16.2515834 10.3533435,16.2039156 10.5070201,16.1135176 L16.0347118,12.8619342 C16.510745,12.5819147 16.6696454,11.969013 16.3896259,11.4929799 C16.3034179,11.3464262 16.1812655,11.2242738 16.0347118,11.1380658 L10.5070201,7.88648243 C10.030987,7.60646294 9.41808527,7.76536339 9.13806578,8.24139652 C9.04766776,8.39507316 9,8.57012386 9,8.74841664 Z"></path></svg></button>
// Well those are useless. Even the button's click function expands to 'function o () {}' or something very similarly empty.
// "Search HTML" - .zip, nothing. Ugoira, 2 results:
// An inline script, where I detect the romaji:ugoira thing to begin with.
// The link to the tag for ugoira submissions.
// Maybe search for うごイラ, the hirogana(?) for ugoira? Only appears in the title for the submission I'm looking at. What a pain in the ass.
// https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65889857 still redirects. Goddammit.
// Oh for fuck's sake, 'ugoira' is a user-supplied tag, not a part of the site HTML.
}
function scrape_pixiv_old() {
// http://www.pixiv.net/member_illust.php?mode=medium&illust_id=52107168 404s.
// goes to http://i1.pixiv.net/img-original/img/2015/08/21/19/14/00/52107168_p0.jpg
// should be http://i1.pixiv.net/img-original/img/2015/08/21/19/14/00/52107168_p0.png
// Yet another filetype mismatch. Great! No clear idea how to handle this, aside from fetching ?mode=big.
// http://i3.pixiv.net/img-original/img/2015/08/04/22/56/24/51785706_p0.jpg too
// Possible solution: redirect from '.jpg' with 404 text to '.png.' not a loop because not finding the 404 message means we don't redirect.
// https://www.pixiv.net/member_illust.php?mode=medium&illust_id=53192902#dnr 404's to
// https://i.pximg.net/img-original/img/2015/10/24/23/35/39/53192902_p0.jpg
// 22-05-2018 - Pixiv changed their mode=medium page.
// https://www.pixiv.net/member_illust.php?mode=medium&illust_id=68871661
// https://i.pximg.net/img-original/img/2018/05/22/21/40/26/68871661_p0.jpg
// As before, I guess.
// "original":"https:\/\/i.pximg.net\/img-original\/img\/2018\/05\/22\/21\/40\/26\/68871661_p0.jpg"
// Fuck! Ugoku is broken because the "pixiv" object no longer exists.
extract_image_url_after( 'class="_illust_modal', '//' ); // Oh, what now? Code below doesn't work for some pages, so do this instead. (This goes first because only the last successful 'extraction' matters.)
extract_image_url_after( 'class="big"', '//' ); // New Pixiv pages (Dec '14) provide the big URL rather directly.
if( image_url === '') { // try this old nonsense first, because god forbid these sites update all their code to be remotely fucking consistent
extract_image_url_after( 'bookmark_modal_thumbnail', '//' ); // grab bookmark-thumbnail image from "medium" landing page
}
if( image_url.indexOf( '/c/' ) > -1 ) { // Convert thumbnail URL to full-size. Used to only happen for bookmark_modal if(), but Pixiv changed something.
// convert URL to full-size.
image_url = image_url.replace( '_m.', '.' ); // old style: remove _m for full-size URL
image_url = image_url.replace( '/c/600x600', '' ); // new style: remove /c/600x600, swap image-master for image-original, remove _master1200.
image_url = image_url.replace( '/c/150x150', '' ); // new style: remove /c/150x150, swap image-master for image-original, remove _master1200.
image_url = image_url.replace( '/img-master/', '/img-original/' );
image_url = image_url.replace( '_master1200', '' );
}
// Through sheer accident, the old manga code still works after Pixiv's latest change.
if( document.getElementsByTagName('html')[0].innerHTML.indexOf( '<a href="member_illust.php?mode=manga' ) > 0 ) { // If the works_display preview links to the manga, go there instead
image_url = window.location.href.replace( 'mode=medium', 'mode=manga' ); // manga pages deserve their own HTML, so just go to that page
// Users: please consider Eza's Pixiv Fixiv, which replaces the default manga HTML with full images and none of that scroll-to-load nonsense.
}
// Don't redirect to "Ugoira" animations (ZIP full of JPGs, played as HTML slideshow)
if( document.getElementsByTagName('html')[0].innerHTML.indexOf( 'class="_ugoku' ) > 0 ) { // A little messy since we ditched html_copy, isn't it?
image_url = ''; // prevent redirect by blanking image_url
// add link to ZIP for Ugoira, purely for archival purposes
var ugoku_link = pixiv.context.ugokuIllustData.src;
document.getElementsByClassName( '_ugoku-illust-player-container' )[0].innerHTML += '<br><a href="' + ugoku_link + '">Download Ugoku frames as ZIP file</a>';
}
}
// Maybe clean this up now that Gelbooru's stupid shit gets its own function.
function scrape_booru() { // this works on a wide variety of booru-style imageboards.
extract_image_url_after( '>Resize image</a>', 'http://' ); // for booru's which have automatic resizing and images which require it
// Gelbooru's anti-adblock shit might make the script fail the FIRST time you load a page, but not subsequent times. God dammit.
// Might be time for Gelbooru to get its own scrape function, because god damn.
extract_image_url_after( "$('edit_form')", '//' ); // For booru's with automatic resizing on, use the Original Image link, which appears after the Edit button
if( image_url === '' ) { extract_image_url_after( "$('resized_notice')", '//' ); } // Hey guess what! Gelbooru now serves different sidebars for adblock. Fuck you!
if( image_url === '' ) { extract_image_url_after( 'class="showEditBox">', '//' ); } // Hey guess what!!! Gelbooru now just tells you not to adblock! Fuuuck youuu!
// if( image_url === '//gelbooru.com' ) { image_url = ''; } // Kludge
if( image_url === '' ) { // otherwise, use the image that's being displayed
var container = document.getElementById( 'image' ); // Instead of lurching through raw HTML, let's just grab the display image via the DOM.
image_url = container.src; // "You think it's cool that things don't always have to be a federal fucking issue."
}
}
/*
Test suite of random URLs from the relevant sites:
http://www.hentai-foundry.com/pictures/user/Bottlesoldier/133840/Akibabuse
http://www.hentai-foundry.com/pictures/user/Bottlesoldier/214533/Lil-Gwendolyn
https://inkbunny.net/submissionview.php?id=483550
https://inkbunny.net/submissionview.php?id=374519
http://rule34.xxx/index.php?page=post&s=view&id=1399731
http://rule34.xxx/index.php?page=post&s=view&id=1415193
http://equi.booru.org/index.php?page=post&s=view&id=56940
http://furry.booru.org/index.php?page=post&s=view&id=340299
http://derpibooru.org/470074?scope=scpe80a78d33e96a29ea172a0d93e6e90b47c6a431ea
http://mspabooru.com/index.php?page=post&s=view&id=131809
http://mspabooru.com/index.php?page=post&s=view&id=131804
http://shiniez.deviantart.com/art/thanx-for-5-m-alan-in-some-heavy-makeup-XD-413414430
http://danbooru.donmai.us/posts/1250724?tags=dennou_coil
http://danbooru.donmai.us/posts/1162284?tags=dennou_coildata:text/html,<img src='http://example.com/image.jpg'>
http://www.furaffinity.net/view/12077223/
http://gamesbynick.tumblr.com/post/67039820534/the-secrets-out-guys-the-secret-is-out
http://honeyclop.tumblr.com/post/67122645946/stallion-foursome-commission-for-ciderbarrel-d
http://shubbabang.tumblr.com/post/20990300285/new-headcanon-karkat-is-ridiculously-good-at
http://www.furaffinity.net/view/12092394/
https://e621.net/post/show?md5=25385d2349ae11f2057874f0479422ad
http://sandralvv.tumblr.com/post/64933897836/how-did-varrick-get-that-film-cuz-i-want-a-copy
*/