#!/usr/bin/env python
output_csv_file_name = 'covid19_deaths_by_country.csv'
deaths_csv_url = 'https://covid19.who.int/WHO-COVID-19-global-table-data.csv'
pop_xls_url = 'https://population.un.org/wup/Download/Files/WUP2018-F05-Total_Population.xls'
import sqlite3
import pandas
import csv
# Ignore UserWarning messages from pandas library
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)
# Define in-memory SQLite3 database
conn = sqlite3.connect(':memory:')
cur_1 = conn.cursor()
cur_2 = conn.cursor()
# Read WHO Covid detahs spreadsheet into db table
df = pandas.read_csv(deaths_csv_url)
df.to_sql(name='deaths', con=conn)
# Read UN popluation data into db table
df = pandas.read_excel(pop_xls_url)
df.to_sql(name='population', con=conn)
# Create CSV with country name, deaths, population and "1 death per NNN people" calculation
with open(output_csv_file_name, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
csvwriter.writerow(['Country', 'Deaths', 'Population', '1 Death For Every NNN People'])
# Get country name, cumulative coronavirus deaths
cur_1.execute("SELECT `Name`,`Deaths - cumulative total` FROM 'deaths'")
for row in cur_1:
# Manually fix mismatched country names
country = row[0]
if country == 'Global': # Skip this row
continue
elif country == 'The United Kingdom':
country = 'United Kingdom'
elif country == 'North Macedonia':
country = 'TFYR Macedonia'
elif 'Mariana Islands' in country:
country = 'Northern Mariana Islands'
elif country == "Democratic People's Republic of Korea":
country == "Dem. People's Republic of Korea"
# Get 2020 population when country names match across spreadsheets
sql = "SELECT `Unnamed: 18` FROM 'population' WHERE instr(`Unnamed: 1`, '{}') > 0".format(country.replace("'", "''"))
pop = cur_2.execute(sql).fetchone()
if pop:
# Calculate the "1 person out of every NNN people" number
if row[1] == 0:
calc = 0
else:
calc = pop[0] * 1000 / row[1]
csvwriter.writerow([country, row[1], pop[0] * 1000, calc])
else:
print("Unable to match country name from WHO Covid deaths table: {} ({} deaths)".format(country, row[1]))
print('\nCSV file is ready: {}'.format(output_csv_file_name))