zunda/.gitignore

## .gitignore
*.png
*.dat
*.csv
*.xlsx
/.bundle
/vendor/bundle

## .ruby-version
2.7.0

## confirmed-global.plot
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
load 'timeseries.plot'
set title "Time series from https://github.com/CSSEGISandData/COVID-19\n\n"
set ylabel "Cumulative confirmed in country/region"
plot \
'confirmed-global.dat' using 1:5 title 'US',\
'confirmed-global.dat' using 1:12 title 'Spain',\
'confirmed-global.dat' using 1:3 title 'Italy',\
'confirmed-global.dat' using 1:13 title 'Germany',\
'confirmed-global.dat' using 1:2 title 'China (exc. HK)',\
'confirmed-global.dat' using 1:10 title 'UK',\
'confirmed-global.dat' using 1:6 title 'Korea',\
'confirmed-global.dat' using 1:11 title 'Philippines',\
'confirmed-global.dat' using 1:4 title 'Japan',\
'confirmed-global.dat' using 1:9 title 'Hong Kong',\
'confirmed-global.dat' using 1:8 title 'Taiwan'

## confirmed-us.plot
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
load 'timeseries.plot'
set title "Time series from https://github.com/CSSEGISandData/COVID-19\n*https://stopcovid19.metro.tokyo.lg.jp\n**http://www.pref.hokkaido.lg.jp/hf/kth/kak/hasseijoukyou.htm"
set ylabel "Cumulative confirmed in state/prefecture"
plot \
'confirmed-us.dat' using 1:10 title 'New York',\
'confirmed-us.dat' using 1:11 title 'New York, NY',\
'confirmed-us.dat' using 1:2 title 'California',\
'confirmed-us.dat' using 1:12 title 'Washington',\
'Tokyo.dat' using 1:2 title '*Tokyo',\
'confirmed-us.dat' using 1:3 title 'Hawaii',\
'confirmed-us.dat' using 1:5 title 'Honolulu, HI',\
'Hokkaido.dat' using 1:2 title '**Hokkaido',\
'confirmed-us.dat' using 1:7 title 'Maui, HI',\
'confirmed-us.dat' using 1:4 title 'Hawaii, HI',\
'confirmed-us.dat' using 1:6 title 'Kauai, HI'

## Gemfile
source 'https://rubygems.org'

gem "roo", "~> 2.8.0"
gem "nokogiri"

## Gemfile.lock
GEM
  remote: https://rubygems.org/
  specs:
    mini_portile2 (2.4.0)
    nokogiri (1.10.9)
      mini_portile2 (~> 2.4.0)
    roo (2.8.3)
      nokogiri (~> 1)
      rubyzip (>= 1.3.0, < 3.0.0)
    rubyzip (2.3.0)

PLATFORMS
  ruby

DEPENDENCIES
  nokogiri
  roo (~> 2.8.0)

BUNDLED WITH
   2.1.2

## global2dat.rb
#!/usr/bin/ruby
#
# usage: ruby global2dat.rb time_series_covid19_confirmed_global.csv > confirmed.dat
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
require 'csv'
require 'time'

countries = [
  'China',
  'Italy',
  'Japan',
  'US',
  'Korea, South',
  'Iran',
  'Taiwan*',
  'Hong Kong',
  'United Kingdom',
  'Philippines',
  'Spain',
  'Germany',
]

def parsedate(str)
	if str =~ %r[\d+/\d+/\d+]
		Time.strptime(str + ' +0000', '%m/%d/%y %z')
	else
		nil
	end
end

def parsedata(raw, countries)
  raw = CSV.parse(raw, headers:true)
  out = Hash.new{|h, country| h[country] = Hash.new{|j, date| j[date] = 0}}
  raw.each do |data|
    country = nil
    %w(Province/State Country/Region).each do |header|
      if countries.include?(data[header])
        country = data[header]
        break
      end
    end
    next unless country

    data.each do |h, count|
      next unless count
      date = parsedate(h)
      next unless date
      begin
        i = Integer(count)
        out[country][date] += Integer(count) if i > 0
      rescue TypeError => error
        raise error.exception(data.inspect)
      end
    end
  end
  return out
end

data = parsedata(ARGF.read, countries)

dates = data.values.map{|x| x.keys}.flatten.uniq.sort
$stderr.puts dates.first.strftime("%Y-%m-%d") + " - " + dates.last.strftime("%Y-%m-%d")
puts ['#date', countries].join("\t")
dates.each_with_index do |date, i|
	puts [
		date.strftime("%Y-%m-%d"), countries.map{|c| data[c][date] || 0}
	].join("\t")
end

## hokkaido2dat.rb
#!/usr/bin/ruby
#
# usage: ruby hokkaido2dat.rb http://www.pref.hokkaido.lg.jp/hf/kth/kak/hasseijoukyou.htm > Hokkaido.dat
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
require 'nokogiri'
require 'open-uri'
require 'time'

charset = nil
html = URI.open(ARGV.shift) do |page|
  charset = page.charset
  page.read
end

counts = Hash.new{0}
col = nil
Nokogiri::HTML.parse(html, nil, charset).search('table tbody tr').each do |tr|
  unless col
    col = tr.search('td').find_index{|td| td.content.gsub("\u00A0", "").strip == '公表日'}
  else
    m, d = tr.search('td')[col].content.scan(/\d+/)
    date = Time.new(2020, m, d, 0, 0, 0, 'UTC')
    counts[date] += 1
  end
end

puts "#date\tcount"
count = 0
counts.keys.sort.each do |date|
  count += counts[date]
  puts [date.strftime("%Y-%m-%d"), count].join("\t")
end

## osaka2dat.rb
#!/usr/bin/ruby
#
# usage: ruby osaka2dat.rb youseisyajyouhou.xlsx > Osaka.dat
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
require 'roo'
require 'time'

counts = Hash.new{0}
Roo::Spreadsheet.open(ARGV.shift).sheet('Sheet1').parse(header_search: [/\A番号\z/]).each do |record|
  next unless record['報道提供日']
  date = Time.new(1900, 1, 1, 0, 0, 0, 'UTC') + record['報道提供日']*24*3600
  counts[date] += 1
end

puts "#date\tcount"
count = 0
counts.keys.sort.each do |date|
  count += counts[date]
  puts [date.strftime("%Y-%m-%d"), count].join("\t")
end

## timeseries.plot
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
set grid
set xdata time
set timefmt "%Y-%m-%d"
set style data lines
set log y
set yrange [0.8:500000]
set xrange ["2020-02-06":*]
set xlabel "Date"
set key top left

## tokyo2dat.rb
#!/usr/bin/ruby
#
# usage: ruby tokyo2dat.rb 130001_tokyo_covid19_patients.csv > Tokyo.dat
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
require 'csv'
require 'time'

counts = Hash.new{0}
CSV.parse(ARGF.read, headers:true).map do |record|
  next unless record['公表_年月日']
  begin
    date = Time.strptime(record['公表_年月日'] + " UTC", "%Y-%m-%d %z")
  rescue NoMethodError => err
    raise err.exception(record.inspect)
  end
  counts[date] += 1
end

puts "#date\tcount"
count = 0
counts.keys.sort.each do |date|
  count += counts[date]
  puts [date.strftime("%Y-%m-%d"), count].join("\t")
end

## update-plot.sh
#!/bin/sh
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
set -e

# Location of a working copy for https://github.com/CSSEGISandData/COVID-19.git
jhuwc='../../../github.com/CSSEGISandData/COVID-19'

echo \# Refreshing working copy for JHU CSSE data
cd $jhuwc && git pull; cd - > /dev/null

# JHU time series
echo \# Parsing time series
ruby global2dat.rb $jhuwc/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv > confirmed-global.dat
ruby us2dat.rb $jhuwc/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv > confirmed-us.dat

# local data
echo \# Fetching and formatting data for Tokyo
curl -O https://stopcovid19.metro.tokyo.lg.jp/data/130001_tokyo_covid19_patients.csv
ruby tokyo2dat.rb 130001_tokyo_covid19_patients.csv > Tokyo.dat

echo \# Fetching and formatting data for Hokkaido
if test ! -d .bundle; then bundle install --path=vendor/bundle; fi
bundle exec ruby hokkaido2dat.rb http://www.pref.hokkaido.lg.jp/hf/kth/kak/hasseijoukyou.htm > Hokkaido.dat

# plot
echo \# Plotting data
for a in global us; do
	gnuplot <<_END
set term png small
set output "confirmed-$a.png"
load "confirmed-$a.plot"
unset output
_END
done

## us2dat.rb
#!/usr/bin/ruby
#
# usage: ruby us2dat.rb time_series_covid19_confirmed_US.csv > confirmed-us.dat
#
# Copyright 2020 by zunda <zundan at gmail.com>
#
# Permission is granted for use, copying, modification, distribution,
# and distribution of modified versions of this work as long as the
# above copyright notice is included.
#
require 'csv'
require 'time'

regions = {
  states: [
    'Hawaii',
    'Washington',
    'California',
    'New York',
  ],
  counties_in_states: [
    'Hawaii',
  ],
  counties: [
    # County, State
    ['New York', 'New York'],
  ],
}

def parsedate(str)
	if str =~ %r[\d+/\d+/\d+]
		Time.strptime(str + ' +0000', '%m/%d/%y %z')
	else
		nil
	end
end

def count_up(hash, row, region)
  row.each do |h, count|
    next unless count
    date = parsedate(h)
    next unless date
    begin
      i = Integer(count)
      hash[region][date] += Integer(count) if i > 0
    rescue TypeError => error
      raise error.exception(data.inspect)
    end
  end
end

def parsedata(raw, regions)
  raw = CSV.parse(raw, headers:true)
  out = Hash.new{|h, region| h[region] = Hash.new{|j, date| j[date] = 0}}
  raw.each do |data|
    if regions[:states].include?(data['Province_State'])
      count_up(out, data, data['Province_State'])
    end
    if regions[:counties_in_states].include?(data['Province_State'])
      count_up(out, data, "#{data['Province_State']}/#{data['Admin2']}")
    elsif regions[:counties].find{|c| c[0] == data['Admin2'] && c[1] == data['Province_State']}
      count_up(out, data, "#{data['Province_State']}/#{data['Admin2']}")
    end
  end
  return out
end

data = parsedata(ARGF.read, regions)
regions = data.keys.sort

dates = data.values.map{|x| x.keys}.flatten.uniq.sort
$stderr.puts dates.first.strftime("%Y-%m-%d") + " - " + dates.last.strftime("%Y-%m-%d")
puts ['#date', regions].join("\t")
dates.each_with_index do |date, i|
	puts [
		date.strftime("%Y-%m-%d"), regions.map{|c| data[c][date] || 0}
	].join("\t")
end
	#
	# Copyright 2020 by zunda <zundan at gmail.com>
	#
	# Permission is granted for use, copying, modification, distribution,
	# and distribution of modified versions of this work as long as the
	# above copyright notice is included.
	#
	load 'timeseries.plot'
	set title "Time series from https://github.com/CSSEGISandData/COVID-19\n\n"
	set ylabel "Cumulative confirmed in country/region"
	plot \
	'confirmed-global.dat' using 1:5 title 'US',\
	'confirmed-global.dat' using 1:12 title 'Spain',\
	'confirmed-global.dat' using 1:3 title 'Italy',\
	'confirmed-global.dat' using 1:13 title 'Germany',\
	'confirmed-global.dat' using 1:2 title 'China (exc. HK)',\
	'confirmed-global.dat' using 1:10 title 'UK',\
	'confirmed-global.dat' using 1:6 title 'Korea',\
	'confirmed-global.dat' using 1:11 title 'Philippines',\
	'confirmed-global.dat' using 1:4 title 'Japan',\
	'confirmed-global.dat' using 1:9 title 'Hong Kong',\
	'confirmed-global.dat' using 1:8 title 'Taiwan'
	source 'https://rubygems.org'

	gem "roo", "~> 2.8.0"
	gem "nokogiri"
	GEM
	remote: https://rubygems.org/
	specs:
	mini_portile2 (2.4.0)
	nokogiri (1.10.9)
	mini_portile2 (~> 2.4.0)
	roo (2.8.3)
	nokogiri (~> 1)
	rubyzip (>= 1.3.0, < 3.0.0)
	rubyzip (2.3.0)

	PLATFORMS
	ruby

	DEPENDENCIES
	nokogiri
	roo (~> 2.8.0)

	BUNDLED WITH
	2.1.2
	#!/usr/bin/ruby
	#
	# usage: ruby global2dat.rb time_series_covid19_confirmed_global.csv > confirmed.dat
	#
	# Copyright 2020 by zunda <zundan at gmail.com>
	#
	# Permission is granted for use, copying, modification, distribution,
	# and distribution of modified versions of this work as long as the
	# above copyright notice is included.
	#
	require 'csv'
	require 'time'

	countries = [
	'China',
	'Italy',
	'Japan',
	'US',
	'Korea, South',
	'Iran',
	'Taiwan*',
	'Hong Kong',
	'United Kingdom',
	'Philippines',
	'Spain',
	'Germany',
	]

	def parsedate(str)
	if str =~ %r[\d+/\d+/\d+]
	Time.strptime(str + ' +0000', '%m/%d/%y %z')
	else
	nil
	end
	end

	def parsedata(raw, countries)
	raw = CSV.parse(raw, headers:true)
	out = Hash.new{\|h, country\| h[country] = Hash.new{\|j, date\| j[date] = 0}}
	raw.each do \|data\|
	country = nil
	%w(Province/State Country/Region).each do \|header\|
	if countries.include?(data[header])
	country = data[header]
	break
	end
	end
	next unless country

	data.each do \|h, count\|
	next unless count
	date = parsedate(h)
	next unless date
	begin
	i = Integer(count)
	out[country][date] += Integer(count) if i > 0
	rescue TypeError => error
	raise error.exception(data.inspect)
	end
	end
	end
	return out
	end

	data = parsedata(ARGF.read, countries)

	dates = data.values.map{\|x\| x.keys}.flatten.uniq.sort
	$stderr.puts dates.first.strftime("%Y-%m-%d") + " - " + dates.last.strftime("%Y-%m-%d")
	puts ['#date', countries].join("\t")
	dates.each_with_index do \|date, i\|
	puts [
	date.strftime("%Y-%m-%d"), countries.map{\|c\| data[c][date] \|\| 0}
	].join("\t")
	end
	#!/usr/bin/ruby
	#
	# usage: ruby hokkaido2dat.rb http://www.pref.hokkaido.lg.jp/hf/kth/kak/hasseijoukyou.htm > Hokkaido.dat
	#
	# Copyright 2020 by zunda <zundan at gmail.com>
	#
	# Permission is granted for use, copying, modification, distribution,
	# and distribution of modified versions of this work as long as the
	# above copyright notice is included.
	#
	require 'nokogiri'
	require 'open-uri'
	require 'time'

	charset = nil
	html = URI.open(ARGV.shift) do \|page\|
	charset = page.charset
	page.read
	end

	counts = Hash.new{0}
	col = nil
	Nokogiri::HTML.parse(html, nil, charset).search('table tbody tr').each do \|tr\|
	unless col
	col = tr.search('td').find_index{\|td\| td.content.gsub("\u00A0", "").strip == '公表日'}
	else
	m, d = tr.search('td')[col].content.scan(/\d+/)
	date = Time.new(2020, m, d, 0, 0, 0, 'UTC')
	counts[date] += 1
	end
	end

	puts "#date\tcount"
	count = 0
	counts.keys.sort.each do \|date\|
	count += counts[date]
	puts [date.strftime("%Y-%m-%d"), count].join("\t")
	end
	#!/usr/bin/ruby
	#
	# usage: ruby osaka2dat.rb youseisyajyouhou.xlsx > Osaka.dat
	#
	# Copyright 2020 by zunda <zundan at gmail.com>
	#
	# Permission is granted for use, copying, modification, distribution,
	# and distribution of modified versions of this work as long as the
	# above copyright notice is included.
	#
	require 'roo'
	require 'time'

	counts = Hash.new{0}
	Roo::Spreadsheet.open(ARGV.shift).sheet('Sheet1').parse(header_search: [/\A番号\z/]).each do \|record\|
	next unless record['報道提供日']
	date = Time.new(1900, 1, 1, 0, 0, 0, 'UTC') + record['報道提供日']243600
	counts[date] += 1
	end

	puts "#date\tcount"
	count = 0
	counts.keys.sort.each do \|date\|
	count += counts[date]
	puts [date.strftime("%Y-%m-%d"), count].join("\t")
	end
	#!/usr/bin/ruby
	#
	# usage: ruby tokyo2dat.rb 130001_tokyo_covid19_patients.csv > Tokyo.dat
	#
	# Copyright 2020 by zunda <zundan at gmail.com>
	#
	# Permission is granted for use, copying, modification, distribution,
	# and distribution of modified versions of this work as long as the
	# above copyright notice is included.
	#
	require 'csv'
	require 'time'

	counts = Hash.new{0}
	CSV.parse(ARGF.read, headers:true).map do \|record\|
	next unless record['公表_年月日']
	begin
	date = Time.strptime(record['公表_年月日'] + " UTC", "%Y-%m-%d %z")
	rescue NoMethodError => err
	raise err.exception(record.inspect)
	end
	counts[date] += 1
	end

	puts "#date\tcount"
	count = 0
	counts.keys.sort.each do \|date\|
	count += counts[date]
	puts [date.strftime("%Y-%m-%d"), count].join("\t")
	end
	#!/bin/sh
	#
	# Copyright 2020 by zunda <zundan at gmail.com>
	#
	# Permission is granted for use, copying, modification, distribution,
	# and distribution of modified versions of this work as long as the
	# above copyright notice is included.
	#
	set -e

	# Location of a working copy for https://github.com/CSSEGISandData/COVID-19.git
	jhuwc='../../../github.com/CSSEGISandData/COVID-19'

	echo \# Refreshing working copy for JHU CSSE data
	cd $jhuwc && git pull; cd - > /dev/null

	# JHU time series
	echo \# Parsing time series
	ruby global2dat.rb $jhuwc/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv > confirmed-global.dat
	ruby us2dat.rb $jhuwc/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv > confirmed-us.dat

	# local data
	echo \# Fetching and formatting data for Tokyo
	curl -O https://stopcovid19.metro.tokyo.lg.jp/data/130001_tokyo_covid19_patients.csv
	ruby tokyo2dat.rb 130001_tokyo_covid19_patients.csv > Tokyo.dat

	echo \# Fetching and formatting data for Hokkaido
	if test ! -d .bundle; then bundle install --path=vendor/bundle; fi
	bundle exec ruby hokkaido2dat.rb http://www.pref.hokkaido.lg.jp/hf/kth/kak/hasseijoukyou.htm > Hokkaido.dat

	# plot
	echo \# Plotting data
	for a in global us; do
	gnuplot <<_END
	set term png small
	set output "confirmed-$a.png"
	load "confirmed-$a.plot"
	unset output
	_END
	done
	#!/usr/bin/ruby
	#
	# usage: ruby us2dat.rb time_series_covid19_confirmed_US.csv > confirmed-us.dat
	#
	# Copyright 2020 by zunda <zundan at gmail.com>
	#
	# Permission is granted for use, copying, modification, distribution,
	# and distribution of modified versions of this work as long as the
	# above copyright notice is included.
	#
	require 'csv'
	require 'time'

	regions = {
	states: [
	'Hawaii',
	'Washington',
	'California',
	'New York',
	],
	counties_in_states: [
	'Hawaii',
	],
	counties: [
	# County, State
	['New York', 'New York'],
	],
	}

	def parsedate(str)
	if str =~ %r[\d+/\d+/\d+]
	Time.strptime(str + ' +0000', '%m/%d/%y %z')
	else
	nil
	end
	end

	def count_up(hash, row, region)
	row.each do \|h, count\|
	next unless count
	date = parsedate(h)
	next unless date
	begin
	i = Integer(count)
	hash[region][date] += Integer(count) if i > 0
	rescue TypeError => error
	raise error.exception(data.inspect)
	end
	end
	end

	def parsedata(raw, regions)
	raw = CSV.parse(raw, headers:true)
	out = Hash.new{\|h, region\| h[region] = Hash.new{\|j, date\| j[date] = 0}}
	raw.each do \|data\|
	if regions[:states].include?(data['Province_State'])
	count_up(out, data, data['Province_State'])
	end
	if regions[:counties_in_states].include?(data['Province_State'])
	count_up(out, data, "#{data['Province_State']}/#{data['Admin2']}")
	elsif regions[:counties].find{\|c\| c[0] == data['Admin2'] && c[1] == data['Province_State']}
	count_up(out, data, "#{data['Province_State']}/#{data['Admin2']}")
	end
	end
	return out
	end

	data = parsedata(ARGF.read, regions)
	regions = data.keys.sort

	dates = data.values.map{\|x\| x.keys}.flatten.uniq.sort
	$stderr.puts dates.first.strftime("%Y-%m-%d") + " - " + dates.last.strftime("%Y-%m-%d")
	puts ['#date', regions].join("\t")
	dates.each_with_index do \|date, i\|
	puts [
	date.strftime("%Y-%m-%d"), regions.map{\|c\| data[c][date] \|\| 0}
	].join("\t")
	end