#!/usr/bin/env ruby # Prints sites.json entries containing links which may be out of operation or # unreachable require "net/http" require "json" # Taken from https://rossta.net/blog/a-ruby-antihero-thread-pool.html class ThreadPool def initialize(size) @size = size @jobs = Queue.new @pool = Array.new(@size) do |i| Thread.new do Thread.current[:id] = i catch(:exit) do loop do job, args = @jobs.pop job.call(*args) end end end end end # add a job to queue def schedule(*args, &block) @jobs << [block, args] end # run threads and perform jobs from queue def run! @size.times do schedule { throw :exit } end @pool.map(&:join) end end def url_exist(name, url_string) url = URI.parse(url_string.strip) res = Net::HTTP.get_response(url) if res.kind_of?(Net::HTTPRedirection) # Do nothing elsif res.code == "404" STDERR.puts "Entry #{name} returned HTTP 404" end rescue Errno::EADDRNOTAVAIL, Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::EHOSTUNREACH, Errno::ENETUNREACH, Errno::ENOENT, Errno::ETIMEDOUT, Net::OpenTimeout, Net::ReadTimeout, EOFError, SocketError, Zlib::DataError => e # All categories where a site is most definitely non-operational puts "HTTP request failed to #{name}: #{e.inspect}" false rescue OpenSSL::SSL::SSLError # Websites with certificate errors are responding to requests true end json = JSON.parse(File.read('_data/sites.json')) pool = ThreadPool.new(20) # check if a website is alive json.each_with_index do |(key, _), i| name = key['name'] url = key['url'] pool.schedule(name, url) do |name , url| url_exist(name, url) end end pool.run!