lib/tasks/graphs.rake
require File.join(File.dirname(__FILE__), '../graphs')
namespace :graphs do
include Graphs
task generate_user_use_graph: :environment do
minimum_data_size = ENV.fetch('MINIMUM_DATA_SIZE', 1).to_i
# set the local font path for the current task
ENV["GDFONTPATH"] = "/usr/share/fonts/truetype/ttf-bitstream-vera"
active_users = "SELECT DATE(ir.created_at), COUNT(distinct user_id) " \
"FROM info_requests ir " \
"JOIN users on ir.user_id = users.id " \
"WHERE users.ban_text = '' " \
"AND users.closed_at IS NULL " \
"GROUP BY DATE(ir.created_at) " \
"ORDER BY DATE(ir.created_at)"
confirmed_users = "SELECT DATE(created_at), COUNT(*) FROM users " \
"WHERE email_confirmed = 't' " \
"AND ban_text = '' " \
"AND closed_at IS NULL " \
"GROUP BY DATE(created_at) " \
"ORDER BY DATE(created_at)"
# here be database-specific dragons...
# this uses a window function which is not supported by MySQL, but
# is reportedly available in MariaDB from 10.2 onward (and Postgres 9.1+)
aggregate_signups = "SELECT DATE(created_at), COUNT(*), SUM(count(*)) " \
"OVER (ORDER BY DATE(created_at)) " \
"FROM users " \
"WHERE ban_text = '' " \
"AND closed_at IS NULL " \
"GROUP BY DATE(created_at)"
Gnuplot.open(false) do |gp|
Gnuplot::Plot.new(gp) do |plot|
plot.terminal("png font 'Vera.ttf' 9 size 1200,400")
plot.output(File.expand_path("public/foi-user-use.png", Rails.root))
#general settings
plot.unset(:border)
plot.unset(:arrow)
plot.key("left")
plot.tics("out")
# x-axis
plot.xdata("time")
plot.set('timefmt "%Y-%m-%d"')
plot.set('format x "%d %b %Y"')
plot.set("xtics nomirror")
# primary y-axis
plot.set("ytics nomirror")
plot.ylabel("number of users on the calendar day")
plot.yrange("[0:]")
# secondary y-axis
plot.set("y2tics tc lt 2")
plot.set('y2label "cumulative total number of users" tc lt 2')
plot.set('format y2 "%.0f"')
plot.y2range("[0:]")
# start plotting the data from largest to smallest so
# that the shorter bars overlay the taller bars
state_list = [
{
title: "users each day ... who registered",
colour: :lightblue
},
{
title: "... and since confirmed their email",
with: "impulses",
linewidth: 15,
colour: :mauve,
sql: confirmed_users
},
{
title: "...who made an FOI request",
with: "lines",
linewidth: 1,
colour: :red,
sql: active_users
}
]
# plot all users
options = { with: "impulses",
linecolor: COLOURS[state_list[0][:colour]],
linewidth: 15, title: state_list[0][:title] }
all_users = select_as_columns(aggregate_signups)
# nothing to do, bail
unless all_users && (all_users[0].size >= minimum_data_size)
if verbose
exit "warning: no request data to graph, skipping task"
else
exit!
end
end
plot_data_from_columns(all_users, options, plot.data)
graph_param_sets = []
state_list.each_with_index do |state_info, index|
if index > 0
graph_param_sets << GraphParams.new(
state_info[:sql],
options.merge({
title: state_info[:title],
linecolor: COLOURS[state_info[:colour]],
with: state_info[:with],
linewidth: state_info[:linewidth] })
)
end
end
plot_datasets(graph_param_sets, plot.data)
# skip this if there is just a single datapoint
# (counts the number of values in the first column)
if all_users[0].size > 1
# plot cumulative user totals
options.merge!({
title: "cumulative total number of users",
axes: "x1y2",
with: "lines",
linewidth: 1,
linecolor: COLOURS[:lightgreen],
using: "1:3" })
plot_data_from_columns(all_users, options, plot.data)
end
end
end
end
task generate_request_creation_graph: :environment do
minimum_data_size = ENV.fetch('MINIMUM_DATA_SIZE', 2).to_i
# set the local font path for the current task
ENV["GDFONTPATH"] = "/usr/share/fonts/truetype/ttf-bitstream-vera"
def assemble_sql(where_clause="")
"SELECT DATE(info_requests.created_at), COUNT(*) " \
"FROM info_requests " \
"LEFT OUTER JOIN embargoes " \
"ON embargoes.info_request_id = info_requests.id " \
"WHERE #{where_clause} " \
"AND PROMINENCE = 'normal' " \
"AND (embargoes.id IS NULL) " \
"GROUP BY DATE(info_requests.created_at)" \
"ORDER BY DATE(info_requests.created_at)"
end
def state_exclusion_sql(states)
"described_state NOT IN ('#{states.join("','")}')"
end
Gnuplot.open(false) do |gp|
Gnuplot::Plot.new(gp) do |plot|
plot.terminal("png font 'Vera.ttf' 9 size 1600,600")
plot.output(File.expand_path("public/foi-live-creation.png", Rails.root))
#general settings
plot.unset(:border)
plot.unset(:arrow)
plot.key("left")
plot.tics("out")
# x-axis
plot.xdata("time")
plot.set('timefmt "%Y-%m-%d"')
plot.set('format x "%d %b %Y"')
plot.set("xtics nomirror")
plot.xlabel("status of requests that were created on each calendar day")
# primary y-axis
plot.ylabel("number of requests created on the calendar day")
plot.yrange("[0:]")
# secondary y-axis
plot.set("y2tics tc lt 2")
plot.set('y2label "cumulative total number of requests" tc lt 2')
plot.set('format y2 "%.0f"')
plot.y2range("[0:]")
# get the data, plot the graph
state_list = [ { state: 'waiting_response', colour: :darkblue },
{ state: 'waiting_clarification', colour: :lightblue },
{ state: 'not_held', colour: :yellow },
{ state: 'rejected', colour: :red },
{ state: 'successful', colour: :lightgreen },
{ state: 'partially_successful', colour: :darkgreen },
{ state: 'requires_admin', colour: :cyan },
{ state: 'gone_postal', colour: :darkyellow },
{ state: 'internal_review', colour: :mauve },
{ state: 'error_message', colour: :redbrown },
{ state: 'user_withdrawn', colour: :pink } ]
options = { with: "impulses",
linecolor: COLOURS[state_list[0][:colour]],
linewidth: 4, title: state_list[0][:state] }
# here be database-specific dragons...
# this uses a window function which is not supported by MySQL, but
# is reportedly available in MariaDB from 10.2 onward (and Postgres 9.1+)
sql = "SELECT DATE(created_at), COUNT(*), SUM(count(*)) " \
"OVER (ORDER BY DATE(created_at)) " \
"FROM info_requests " \
"WHERE prominence != 'backpage' " \
"GROUP BY DATE(created_at)"
all_requests = select_as_columns(sql)
# nothing to do, bail
# (both nil and a single datapoint will result in an undrawable graph)
unless all_requests && (all_requests[0].size >= minimum_data_size)
if verbose
abort "warning: no request data to graph, skipping task"
else
exit!
end
end
# start plotting the data from largest to smallest so
# that the shorter bars overlay the taller bars
plot_data_from_columns(all_requests, options, plot.data)
graph_param_sets = []
previous_states = []
state_list.each_with_index do |state_info, index|
if index > 0
graph_param_sets << GraphParams.new(
assemble_sql(state_exclusion_sql(previous_states)),
options.merge({
title: state_info[:state],
linecolor: COLOURS[state_info[:colour]] })
)
end
previous_states << state_list[index][:state]
end
plot_datasets(graph_param_sets, plot.data)
# plot the cumulative counts
options.merge!({
with: "lines",
linecolor: COLOURS[:lightgreen],
linewidth: 1,
title: "cumulative total number of requests",
using: "1:3",
axes: "x1y2"
})
plot_data_from_columns(all_requests, options, plot.data)
end
end
end
end