diff options
author | Tulio Leao <tupaschoal@gmail.com> | 2019-09-15 21:32:20 -0300 |
---|---|---|
committer | Tulio Leao <tupaschoal@gmail.com> | 2019-09-25 21:59:28 -0300 |
commit | d41459c3b3b1c2c72f298a85ac38ab2617899b9f (patch) | |
tree | 23fd6dfa06be26da5f52d4ab7d6376e58d8acca9 | |
parent | 7710591aa92dcc33bb3363c5d326fbbd12aee30a (diff) |
Add script to ping websites in sites.json
The script pings all entries to see if they contain a valid website, so that the file can be periodically cleaned up
Previously called `URI.encode` which was breaking actually valid links. Changed to `get_response` which seems to be more reliable than `request_head`
-rwxr-xr-x | script/cibuild | 1 | ||||
-rwxr-xr-x | script/ping_websites.rb | 88 |
2 files changed, 89 insertions, 0 deletions
diff --git a/script/cibuild b/script/cibuild index a2ae2d09..47c32708 100755 --- a/script/cibuild +++ b/script/cibuild @@ -7,6 +7,7 @@ bundle exec htmlproofer ./_site --checks-to-ignore 'LinkCheck' # Validate JSON ./script/validate_json.rb +./script/ping_websites.rb # Validate all files adhere to .editorconfig # Exclude files which should not be checked against .editorconfig diff --git a/script/ping_websites.rb b/script/ping_websites.rb new file mode 100755 index 00000000..ac892eb2 --- /dev/null +++ b/script/ping_websites.rb @@ -0,0 +1,88 @@ +#!/usr/bin/env ruby + +# Validates sites.json in the _data directory +# Exits 0 on success, exits 1 upon JSON parsing errors + +require "net/http" +require "json" + +# thread pool class +class ThreadPool + def initialize(size) + @size = size + @jobs = Queue.new + @pool = Array.new(@size) do |i| + Thread.new do + Thread.current[:id] = i + catch(:exit) do + loop do + job, args = @jobs.pop + job.call(*args) + end + end + end + end + end + + # add a job to queue + def schedule(*args, &block) + @jobs << [block, args] + end + + # run threads and perform jobs from queue + def run! + @size.times do + schedule { throw :exit } + end + @pool.map(&:join) + end +end + +def url_exist(name, url_string) + url = URI.parse(url_string.strip) + res = Net::HTTP.get_response(url) + if res.kind_of?(Net::HTTPRedirection) + # Do nothing + elsif res.code == "404" + STDERR.puts "Entry #{name} returned HTTP 404" + end +rescue Errno::ECONNRESET, + Errno::EHOSTUNREACH, + Errno::ENOENT, + Errno::ETIMEDOUT, + Net::OpenTimeout, + Net::ReadTimeout, + SocketError => e + # All categories where a site is most definitely not operational anymore + puts "Rescued #{name}: #{e.inspect}" + false +rescue OpenSSL::SSL::SSLError + # Bad website has SSL certificate error, but at least it responds to requests + true +end + +begin + json = JSON.parse(File.read('_data/sites.json')) + pool = ThreadPool.new(20) + # check if a website is alive + json.each_with_index do |(key, _), i| + name = key['name'] + if key.key?('url') + url = key['url'] + pool.schedule(name, url) do |name , url| + url_exist(name, url) + end + else + # Forces all entries on the JSON to have an URL + STDERR.puts "Entry: #{name} has no URL" + exit 1 + end + end + pool.run! +rescue JSON::ParserError => error + STDERR.puts 'JSON parsing error encountered!' + STDERR.puts error.backtrace.join("\n") + exit 1 +end + +exit 0 |