Skip to content

Instantly share code, notes, and snippets.

@zunda
Last active April 6, 2020 02:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zunda/e73db9e94f2a7638e1a7c123ac34bc65 to your computer and use it in GitHub Desktop.
Save zunda/e73db9e94f2a7638e1a7c123ac34bc65 to your computer and use it in GitHub Desktop.
Plot data from CSSEGISandData/COVID-19
*.png
*.dat
*.csv
*.xlsx
/.bundle
/vendor/bundle
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
load 'timeseries.plot'
set title "Time series from https://github.com/CSSEGISandData/COVID-19\n\n"
set ylabel "Cumulative confirmed in country/region"
plot \
'confirmed-global.dat' using 1:5 title 'US',\
'confirmed-global.dat' using 1:12 title 'Spain',\
'confirmed-global.dat' using 1:3 title 'Italy',\
'confirmed-global.dat' using 1:13 title 'Germany',\
'confirmed-global.dat' using 1:2 title 'China (exc. HK)',\
'confirmed-global.dat' using 1:10 title 'UK',\
'confirmed-global.dat' using 1:6 title 'Korea',\
'confirmed-global.dat' using 1:11 title 'Philippines',\
'confirmed-global.dat' using 1:4 title 'Japan',\
'confirmed-global.dat' using 1:9 title 'Hong Kong',\
'confirmed-global.dat' using 1:8 title 'Taiwan'
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
load 'timeseries.plot'
set title "Time series from https://github.com/CSSEGISandData/COVID-19\n*https://stopcovid19.metro.tokyo.lg.jp\n**http://www.pref.hokkaido.lg.jp/hf/kth/kak/hasseijoukyou.htm"
set ylabel "Cumulative confirmed in state/prefecture"
plot \
'confirmed-us.dat' using 1:10 title 'New York',\
'confirmed-us.dat' using 1:11 title 'New York, NY',\
'confirmed-us.dat' using 1:2 title 'California',\
'confirmed-us.dat' using 1:12 title 'Washington',\
'Tokyo.dat' using 1:2 title '*Tokyo',\
'confirmed-us.dat' using 1:3 title 'Hawaii',\
'confirmed-us.dat' using 1:5 title 'Honolulu, HI',\
'Hokkaido.dat' using 1:2 title '**Hokkaido',\
'confirmed-us.dat' using 1:7 title 'Maui, HI',\
'confirmed-us.dat' using 1:4 title 'Hawaii, HI',\
'confirmed-us.dat' using 1:6 title 'Kauai, HI'
source 'https://rubygems.org'
gem "roo", "~> 2.8.0"
gem "nokogiri"
GEM
remote: https://rubygems.org/
specs:
mini_portile2 (2.4.0)
nokogiri (1.10.9)
mini_portile2 (~> 2.4.0)
roo (2.8.3)
nokogiri (~> 1)
rubyzip (>= 1.3.0, < 3.0.0)
rubyzip (2.3.0)
PLATFORMS
ruby
DEPENDENCIES
nokogiri
roo (~> 2.8.0)
BUNDLED WITH
2.1.2
#!/usr/bin/ruby
#
# usage: ruby global2dat.rb time_series_covid19_confirmed_global.csv > confirmed.dat
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
require 'csv'
require 'time'
countries = [
'China',
'Italy',
'Japan',
'US',
'Korea, South',
'Iran',
'Taiwan*',
'Hong Kong',
'United Kingdom',
'Philippines',
'Spain',
'Germany',
]
def parsedate(str)
if str =~ %r[\d+/\d+/\d+]
Time.strptime(str + ' +0000', '%m/%d/%y %z')
else
nil
end
end
def parsedata(raw, countries)
raw = CSV.parse(raw, headers:true)
out = Hash.new{|h, country| h[country] = Hash.new{|j, date| j[date] = 0}}
raw.each do |data|
country = nil
%w(Province/State Country/Region).each do |header|
if countries.include?(data[header])
country = data[header]
break
end
end
next unless country
data.each do |h, count|
next unless count
date = parsedate(h)
next unless date
begin
i = Integer(count)
out[country][date] += Integer(count) if i > 0
rescue TypeError => error
raise error.exception(data.inspect)
end
end
end
return out
end
data = parsedata(ARGF.read, countries)
dates = data.values.map{|x| x.keys}.flatten.uniq.sort
$stderr.puts dates.first.strftime("%Y-%m-%d") + " - " + dates.last.strftime("%Y-%m-%d")
puts ['#date', countries].join("\t")
dates.each_with_index do |date, i|
puts [
date.strftime("%Y-%m-%d"), countries.map{|c| data[c][date] || 0}
].join("\t")
end
#!/usr/bin/ruby
#
# usage: ruby hokkaido2dat.rb http://www.pref.hokkaido.lg.jp/hf/kth/kak/hasseijoukyou.htm > Hokkaido.dat
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
require 'nokogiri'
require 'open-uri'
require 'time'
charset = nil
html = URI.open(ARGV.shift) do |page|
charset = page.charset
page.read
end
counts = Hash.new{0}
col = nil
Nokogiri::HTML.parse(html, nil, charset).search('table tbody tr').each do |tr|
unless col
col = tr.search('td').find_index{|td| td.content.gsub("\u00A0", "").strip == '公表日'}
else
m, d = tr.search('td')[col].content.scan(/\d+/)
date = Time.new(2020, m, d, 0, 0, 0, 'UTC')
counts[date] += 1
end
end
puts "#date\tcount"
count = 0
counts.keys.sort.each do |date|
count += counts[date]
puts [date.strftime("%Y-%m-%d"), count].join("\t")
end
#!/usr/bin/ruby
#
# usage: ruby osaka2dat.rb youseisyajyouhou.xlsx > Osaka.dat
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
require 'roo'
require 'time'
counts = Hash.new{0}
Roo::Spreadsheet.open(ARGV.shift).sheet('Sheet1').parse(header_search: [/\A番号\z/]).each do |record|
next unless record['報道提供日']
date = Time.new(1900, 1, 1, 0, 0, 0, 'UTC') + record['報道提供日']*24*3600
counts[date] += 1
end
puts "#date\tcount"
count = 0
counts.keys.sort.each do |date|
count += counts[date]
puts [date.strftime("%Y-%m-%d"), count].join("\t")
end
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
set grid
set xdata time
set timefmt "%Y-%m-%d"
set style data lines
set log y
set yrange [0.8:500000]
set xrange ["2020-02-06":*]
set xlabel "Date"
set key top left
#!/usr/bin/ruby
#
# usage: ruby tokyo2dat.rb 130001_tokyo_covid19_patients.csv > Tokyo.dat
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
require 'csv'
require 'time'
counts = Hash.new{0}
CSV.parse(ARGF.read, headers:true).map do |record|
next unless record['公表_年月日']
begin
date = Time.strptime(record['公表_年月日'] + " UTC", "%Y-%m-%d %z")
rescue NoMethodError => err
raise err.exception(record.inspect)
end
counts[date] += 1
end
puts "#date\tcount"
count = 0
counts.keys.sort.each do |date|
count += counts[date]
puts [date.strftime("%Y-%m-%d"), count].join("\t")
end
#!/bin/sh
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
set -e
# Location of a working copy for https://github.com/CSSEGISandData/COVID-19.git
jhuwc='../../../github.com/CSSEGISandData/COVID-19'
echo \# Refreshing working copy for JHU CSSE data
cd $jhuwc && git pull; cd - > /dev/null
# JHU time series
echo \# Parsing time series
ruby global2dat.rb $jhuwc/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv > confirmed-global.dat
ruby us2dat.rb $jhuwc/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv > confirmed-us.dat
# local data
echo \# Fetching and formatting data for Tokyo
curl -O https://stopcovid19.metro.tokyo.lg.jp/data/130001_tokyo_covid19_patients.csv
ruby tokyo2dat.rb 130001_tokyo_covid19_patients.csv > Tokyo.dat
echo \# Fetching and formatting data for Hokkaido
if test ! -d .bundle; then bundle install --path=vendor/bundle; fi
bundle exec ruby hokkaido2dat.rb http://www.pref.hokkaido.lg.jp/hf/kth/kak/hasseijoukyou.htm > Hokkaido.dat
# plot
echo \# Plotting data
for a in global us; do
gnuplot <<_END
set term png small
set output "confirmed-$a.png"
load "confirmed-$a.plot"
unset output
_END
done
#!/usr/bin/ruby
#
# usage: ruby us2dat.rb time_series_covid19_confirmed_US.csv > confirmed-us.dat
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
require 'csv'
require 'time'
regions = {
states: [
'Hawaii',
'Washington',
'California',
'New York',
],
counties_in_states: [
'Hawaii',
],
counties: [
# County, State
['New York', 'New York'],
],
}
def parsedate(str)
if str =~ %r[\d+/\d+/\d+]
Time.strptime(str + ' +0000', '%m/%d/%y %z')
else
nil
end
end
def count_up(hash, row, region)
row.each do |h, count|
next unless count
date = parsedate(h)
next unless date
begin
i = Integer(count)
hash[region][date] += Integer(count) if i > 0
rescue TypeError => error
raise error.exception(data.inspect)
end
end
end
def parsedata(raw, regions)
raw = CSV.parse(raw, headers:true)
out = Hash.new{|h, region| h[region] = Hash.new{|j, date| j[date] = 0}}
raw.each do |data|
if regions[:states].include?(data['Province_State'])
count_up(out, data, data['Province_State'])
end
if regions[:counties_in_states].include?(data['Province_State'])
count_up(out, data, "#{data['Province_State']}/#{data['Admin2']}")
elsif regions[:counties].find{|c| c[0] == data['Admin2'] && c[1] == data['Province_State']}
count_up(out, data, "#{data['Province_State']}/#{data['Admin2']}")
end
end
return out
end
data = parsedata(ARGF.read, regions)
regions = data.keys.sort
dates = data.values.map{|x| x.keys}.flatten.uniq.sort
$stderr.puts dates.first.strftime("%Y-%m-%d") + " - " + dates.last.strftime("%Y-%m-%d")
puts ['#date', regions].join("\t")
dates.each_with_index do |date, i|
puts [
date.strftime("%Y-%m-%d"), regions.map{|c| data[c][date] || 0}
].join("\t")
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment