aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTulio Leao <tupaschoal@gmail.com>2019-09-15 21:32:20 -0300
committerTulio Leao <tupaschoal@gmail.com>2019-09-25 21:59:28 -0300
commitd41459c3b3b1c2c72f298a85ac38ab2617899b9f (patch)
tree23fd6dfa06be26da5f52d4ab7d6376e58d8acca9
parent7710591aa92dcc33bb3363c5d326fbbd12aee30a (diff)
Add script to ping websites in sites.json
The script pings all entries to see if they contain a valid website, so that the file can be periodically cleaned up Previously called `URI.encode` which was breaking actually valid links. Changed to `get_response` which seems to be more reliable than `request_head`
-rwxr-xr-xscript/cibuild1
-rwxr-xr-xscript/ping_websites.rb88
2 files changed, 89 insertions, 0 deletions
diff --git a/script/cibuild b/script/cibuild
index a2ae2d09..47c32708 100755
--- a/script/cibuild
+++ b/script/cibuild
@@ -7,6 +7,7 @@ bundle exec htmlproofer ./_site --checks-to-ignore 'LinkCheck'
# Validate JSON
./script/validate_json.rb
+./script/ping_websites.rb
# Validate all files adhere to .editorconfig
# Exclude files which should not be checked against .editorconfig
diff --git a/script/ping_websites.rb b/script/ping_websites.rb
new file mode 100755
index 00000000..ac892eb2
--- /dev/null
+++ b/script/ping_websites.rb
@@ -0,0 +1,88 @@
+#!/usr/bin/env ruby
+
+# Validates sites.json in the _data directory
+# Exits 0 on success, exits 1 upon JSON parsing errors
+
+require "net/http"
+require "json"
+
+# thread pool class
+class ThreadPool
+ def initialize(size)
+ @size = size
+ @jobs = Queue.new
+ @pool = Array.new(@size) do |i|
+ Thread.new do
+ Thread.current[:id] = i
+ catch(:exit) do
+ loop do
+ job, args = @jobs.pop
+ job.call(*args)
+ end
+ end
+ end
+ end
+ end
+
+ # add a job to queue
+ def schedule(*args, &block)
+ @jobs << [block, args]
+ end
+
+ # run threads and perform jobs from queue
+ def run!
+ @size.times do
+ schedule { throw :exit }
+ end
+ @pool.map(&:join)
+ end
+end
+
+def url_exist(name, url_string)
+ url = URI.parse(url_string.strip)
+ res = Net::HTTP.get_response(url)
+ if res.kind_of?(Net::HTTPRedirection)
+ # Do nothing
+ elsif res.code == "404"
+ STDERR.puts "Entry #{name} returned HTTP 404"
+ end
+rescue Errno::ECONNRESET,
+ Errno::EHOSTUNREACH,
+ Errno::ENOENT,
+ Errno::ETIMEDOUT,
+ Net::OpenTimeout,
+ Net::ReadTimeout,
+ SocketError => e
+ # All categories where a site is most definitely not operational anymore
+ puts "Rescued #{name}: #{e.inspect}"
+ false
+rescue OpenSSL::SSL::SSLError
+ # Bad website has SSL certificate error, but at least it responds to requests
+ true
+end
+
+begin
+ json = JSON.parse(File.read('_data/sites.json'))
+ pool = ThreadPool.new(20)
+ # check if a website is alive
+ json.each_with_index do |(key, _), i|
+ name = key['name']
+ if key.key?('url')
+ url = key['url']
+ pool.schedule(name, url) do |name , url|
+ url_exist(name, url)
+ end
+ else
+ # Forces all entries on the JSON to have an URL
+ STDERR.puts "Entry: #{name} has no URL"
+ exit 1
+ end
+ end
+ pool.run!
+rescue JSON::ParserError => error
+ STDERR.puts 'JSON parsing error encountered!'
+ STDERR.puts error.backtrace.join("\n")
+ exit 1
+end
+
+exit 0