Wrote this a few hours ago for the obvious reason.
# 4chan-image-downloader.rb
# Ruby script to download images from 4chan threads.
# Author: Fur
# Date: 15th Oct 2014
# Usage: 4chan-image-downloader.rb 4chan.org/board/thread /dir/to/download/images/to
# Notes:
# Files in the directory will be overwritten, so be careful!
require 'open-uri'
require 'json'
require 'fileutils'
require 'trollop'
require 'colored'
VERSION = "0.2.3"
# Parse a 4chan thread url.
# Returns hash containing board and thread number.
def parse_4chan_thread_url(thread_url)
match_result = thread_url.match /4chan\.org\/(\w{1,4})\/thread\/(\d+)/
if !match_result
throw ArgumentError, "Url was not a valid 4chan thread url!"
end
return {:board => match_result.captures[0], :thread_number => match_result.captures[1]}
end
# Download and parse information about a 4chan thread and parse as JSON
# See: https://github.com/4chan/4chan-API
# board is the board that the thread was posted in.
# thread_number is number of the thread as a string.
# Returns: json object representing 4chan thread.
def download_thread_info(board, thread_number)
open("http://a.4cdn.org/%s/thread/%s.json" % [board, thread_number]) do |f|
JSON.parse(f.read)
end
end
# Get urls for all images in 4chan thread.
# thread_url is the url for the 4chan thread.
# Returns: array containing image urls as URI objects.
def get_images_in_thread(thread_url)
url_info = parse_4chan_thread_url(thread_url)
thread_info = download_thread_info(url_info[:board], url_info[:thread_number])
images = []
thread_info["posts"].each do |post|
# Filename only appears on posts with images and we can't download deleted images.
if post["filename"] && !post["filedeleted"]
filename = post["tim"].to_s + post["ext"]
image_url = "http://i.4cdn.org/%s/%s" % [url_info[:board], filename]
images << URI.parse(image_url)
end
end
images
end
# Save a file from the Internet to disk.
# url is the url at which the file to download resides.
# filename is the path to save the file to.
# Warning: will truncate existing files!
def download_file(url, path)
File.open(path, "wb") do |saved_file|
open(url, "rb") do |read_file|
saved_file.write(read_file.read)
end
end
end
# Download images in thread and print download information to stdout.
# thread_url is the url of the thread to download
# directory is the directory to download the images to. Must exist.
def download_images_in_thread(thread_url, directory)
images = []
begin
images = get_images_in_thread(thread_url)
rescue OpenURI::HTTPError => e
abort("Could not download thread information: #{e.message}!")
rescue ArgumentError => e
abort("Invalid thread url!")
end
i = 1
images.each do |image|
filename = File.basename(image.path)
print "downloading %s (%i/%i)... " % [filename, i, images.size]
begin
download_file(image, File.join(directory, filename))
rescue OpenURI::HTTPError => e
print "error! (#{e.message})".red
else
print "done!".green
end
print "\n"
i += 1
end
end
def main
opts = Trollop::options do
version "#{$0} v#{VERSION}"
banner <<-EOS
#{$0} is a command-line program to download images from 4chan threads
Usage:
#{$0} <thread_url> <image_download_directory>
EOS
end
thread_url = ARGV.shift
directory = ARGV.shift
if !thread_url || !directory then
Trollop::die("Required argument(s) missing!")
end
directory = File.expand_path(directory)
if !File.exists?(directory) then
FileUtils::mkdir_p(directory)
end
download_images_in_thread(thread_url, directory)
end
main
Dependency installation:
gem install trollop
gem install json
gem install colored
Changelog:
Added text colouring to the "done!" and "failed (error)!" output.
Lowered the default interval from 1 to 0.5
Script now automatically makes image download directory if it does not exist
Corrected the help banner.
Completely refactored code and removed interval parameter because it was useless, given how much bandwidth 4chan has.