Snippets

Dénes Türei Epidemics excess deaths in Europe

Created by Dénes Türei last modified
#!/usr/bin/env Rscript

require(readr)
require(tibble)
require(dplyr)
require(tidyr)
require(magrittr)
require(ggplot2)
require(eurostat)
require(COVID19)
require(countrycode)
require(lubridate)
require(colorspace)
require(pals)

# Eurostat data online:
# https://ec.europa.eu/eurostat/databrowser/view/demo_r_mwk_ts/default/table?lang=en

options(width = 200, dplyr.width = 200)

.baseline_years <- c(2016, 2017, 2018, 2019)
.small_countries <- c('Andorra', 'Liechtenstein')


print_colnames <- function(d){

    print(names(d))

    invisible(d)

}


read_deaths <- function(...){

    get_eurostat('demo_r_mwk_ts', ...) %>%
    mutate(
        country = countrycode(geo, 'eurostat', 'country.name.en'),
        year = substr(time, 1, 4),
        week = substr(time, 6, 8)
    ) %>%
    filter(sex == 'T') %>% # T = total
    select(country, year, week, values) %>%
    mutate(
        week = as.numeric(week),
        year = as.numeric(year)
    )

}


process_deaths <- function(){

    manual_updates <- tibble(
        year =           c(2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2021, 2021),
        week =           c(  45,   46,   47,   48,   49,   50,   51,   52,   53,    1,   2),
        deaths_hungary = c(3541, 3941, 3919, 3991, 4054, 3938, 3586, 3575, 3277, 2973, 2810)
    )

    d <- read_deaths()

    d %>%
    filter(year %in% .baseline_years) %>%
    # week 53 we have only in every few years, and 2020 is such a year,
    # while all the years 2016-2019 have 52 weeks;
    # here we create a fake week 53 from the weeks 1 and 52 of all the
    # baseline years
    bind_rows(
        filter(., week %in% c(1, 52)) %>%
        group_by(country) %>%
        mutate(
            values = mean(values, na.rm = TRUE),
            week = 53
        ) %>%
        summarize_all(first) %>%
        ungroup()
    ) %>%
    group_by(country, week) %>%
    mutate(values = mean(values, na.rm = TRUE)) %>%
    summarize_all(first) %>%
    ungroup() %>%
    select(-year) %>%
    left_join(
        d %>%
        filter(year >= 2020),
        by = c('week', 'country'),
        suffix = c('_baseline', '_epidemics')
    ) %>%
    left_join(
        manual_updates,
        by = c('year', 'week')
    ) %>%
    mutate(
        year = ifelse(!is.na(year), year, 2020),
        values_epidemics = ifelse(
            country == 'Hungary' & !is.na(deaths_hungary),
            deaths_hungary,
            values_epidemics
        ),
        diff_pct = values_epidemics / values_baseline * 100
    ) %>%
    select(-deaths_hungary) %>%
    filter(
        week != 99 &
        # small countries come with high fluctuation
        # resulting meaningless data and expanding scale limits
        !(country %in% .small_countries)
    ) %>%
    mutate(
        diff_pct = ifelse(diff_pct > 0, diff_pct, NA)
    )

}


add_covid_deaths <- function(d, ...){

    all_countries <- d %>%
        pull(country) %>%
        unique() %>%
        countrycode('country.name.en', 'iso3c')

    d %>%
    full_join(
        covid19(all_countries, ...) %>%
        mutate(
            country = countrycode(id, 'iso3c', 'country.name.en'),
            new_deaths = deaths - lag(deaths),
            week = isoweek(date),
            year = year(date),
            year = ifelse(week == 53, 2020, year)
        ) %>%
        ungroup() %>%
        group_by(country, week, year) %>%
        mutate(weekly_new_deaths = sum(new_deaths, na.rm = TRUE)) %>%
        summarize_all(first) %>%
        filter(
            week != week(Sys.Date()) |
            year != year(Sys.Date())
        ) %>% # current week is not complete
        select(country, week, year, covid_deaths = weekly_new_deaths),
        by = c('country', 'week', 'year')
    ) %>%
    left_join(
        select(., country, week, values_baseline) %>%
        filter(!is.na(values_baseline)) %>%
        distinct_all(),
        by = c('country', 'week')
    ) %>%
    select(-values_baseline.x) %>%
    rename(values_baseline = values_baseline.y) %>%
    mutate(
        covid_pct = (values_baseline + covid_deaths) / values_baseline * 100,
        week2020 = week + (year - 2020) * 53
    )

}


deaths_heatmap <- function(d){

    cairo_pdf(
        'epidemics_deaths_2020_heatmap.pdf',
        width = 9,
        height = 7,
        family = 'DINPro'
    )

        p <- ggplot(d, aes(fill = diff_pct, x = week, y = country)) +
            geom_tile() +
            scale_fill_continuous_divergingx(
                palette = 'Geyser',
                mid = 100.,
                na.value = '#FFFFFF',
                guide = guide_colorbar(
                    title = '%',
                    title.hjust = 0
                )
            ) +
            xlab('Weeks') +
            ylab('Countries') +
            ggtitle(
                paste0(
                    'Weekly deaths in 2020, ',
                    'percentage relative to baseline (2016-2019)'
                )
            ) +
            theme_bw()

        print(p)

    dev.off()

    invisible(d)

}


deaths_lineplot <- function(d){

    cairo_pdf(
        'epidemics_deaths_2020_lineplot.pdf',
        width = 9,
        height = 9,
        family = 'DINPro'
    )

        pal <- tolochko.redblue(9)

        d__long <- d %>%
            select(
                country,
                week2020,
                extra = diff_pct,
                covid = covid_pct
            ) %>%
            pivot_longer(cols = c('extra', 'covid')) %>%
            mutate(value = value - 100)

        p <- ggplot(d__long, aes(y = value, x = week2020, color = name)) +
            geom_line(lwd = .35) +
            scale_color_manual(
                values = c(
                    extra = pal[3],
                    covid = pal[7]
                ),
                labels = c(
                    extra = 'Total excess',
                    covid = 'Reported COVID'
                ),
                guide = guide_legend(
                    title = ''
                )
            ) +
            facet_wrap(~country, ncol = 4) +
            xlab('Weeks') +
            ylab('Deaths relative to baseline [%]') +
            ggtitle(
                paste0(
                    'Weekly deaths in 2020, ',
                    'percentage relative to baseline (2016-2019)'
                )
            ) +
            theme_bw() +
            theme(
                legend.position = 'bottom'
            )

        print(p)

    dev.off()

    invisible(d)

}


covid_excess_scatter <- function(d){

    d__is.na %<>% filter(
        !is.na(diff_pct) &
        !is.na(covid_pct)
    )


    print(lm(diff_pct~covid_pct, d))

    cairo_pdf(
        'excess_covid_corr.pdf',
        width = 4,
        height = 3,
        family = 'DINPro'
    )

        p <- ggplot(d__is.na, aes(x = diff_pct, y = covid_pct)) +
            geom_point(alpha = .33, shape = 16) +
            geom_smooth(method = 'lm', formula = y~x) +
            ggtitle(paste0(
                'Reported COVID deaths vs. total excess deaths\n',
                'percentage above baseline (2016-2019)'
            )) +
            ylab('Reported COVID [%]') +
            xlab('Total excess [%]') +
            scale_x_continuous(minor_breaks = seq(0, 500, by = 12.5)) +
            coord_fixed(ratio = 1) +
            theme_bw() +
            theme(
                plot.title = element_text(size = 11)
            )

        print(p)

    dev.off()

    invisible(d)

}


excess_by_country <- function(d){



}


covid_excess_deaths_main <- function(){

    process_deaths() %>%
    add_covid_deaths() %>%
    deaths_heatmap() %>%
    deaths_lineplot() %>%
    covid_excess_scatter() %>%
    invisible()

}

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.