Snippets

Dénes Türei Spain property prices map visualisation

Created by Dénes Türei
#!/usr/bin/env Rscript

#
# Denes Turei (turei.denes@gmail.com) 2023
#
# Visualise real estate price statistics from idealista.com
# on maps of municipalities.
#

library(mapSpain)
library(tidyterra)

library(rvest)
library(httr)

library(ggplot2)

library(magrittr)
library(rlang)

library(readr)
library(stringr)
library(stringi)

library(dplyr)
library(purrr)


HTTP_HEADERS <- c(
    Host = 'www.idealista.com',
    `User-Agent` = paste0(
        'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) ',
        'Gecko/20100101 Firefox/109.0'
    ),
    Accept = paste0(
        'text/html,application/xhtml+xml,application/xml;',
        'q=0.9,image/avif,image/webp,*/*;q=0.8'
    ),
    `Accept-Language` = 'en-GB,en;q=0.5',
    `Accept-Encoding` = 'gzip, deflate, br',
    DNT = '1',
    Connection = 'keep-alive',
    `Upgrade-Insecure-Requests` = '1',
    `Sec-Fetch-Dest` = 'document',
    `Sec-Fetch-Mode` = 'navigate',
    `Sec-Fetch-Site` = 'none',
    `Sec-Fetch-User` = '?1',
    Pragma = 'no-cache',
    `Cache-Control` = 'no-cache'
)

LOCALE_ES <- locale(decimal_mark = ',', grouping_mark = '.')

REGIONS <- c('Asturias', 'Galicia', 'Cantabria')
VARIABLES <- c('price', 'var_max', 'var_annual')


UNITS <- list(
    price = '€/sqm',
    var_annual = '%',
    var_max = '%',
    var_monthly = '%',
    var_trimester = '%',
    price_max = '€/sqm'
)

TITLES <- list(
    price = 'Real estate prices',
    var_annual = 'Change of prices, last year',
    var_max = 'Change of prices compared to historical maximum',
    var_monthly = 'Change of prices, last month',
    var_trimester = 'Change of prices, last trimester',
    price_max = 'Historical maximum of prices'
)

LABELS <- list(
    price = 'Price',
    var = 'Change'
)


price_data <- function(
    region = 'asturias',
    province = 'asturias',
    process = TRUE
) {

    region %<>% url_proc

    province %<>%
        url_proc %>%
        {`if`(
            endsWith(., ',-a'),
            str_extract(., '[:alpha:]+') %>% sprintf('a-%s', .),
            .
        )} %>%
        {`if`(. != region, sprintf('%s-provincia', .), .)}

    HTTP_HEADERS %>%
    add_headers(.headers = .) %>%
    set_config

    paste0(
        "https://www.idealista.com/sala-de-prensa/",
        "informes-precio-vivienda/venta/%s/%s/"
    ) %>%
    sprintf(region, province) %T>%
    {cat(sprintf('\t:: Downloading %s\n', .))} %>%
    GET %>%
    content(as = 'text') %>%
    read_html %>%
    html_nodes('table') %>%
    magrittr::extract(1L) %>%
    html_table(fill = TRUE) %>%
    magrittr::extract2(1L) %>%
    {`if`(
        process,
        process_prices(.) %>%
        mutate(region = region, province = province),
        .
    )}

}


url_proc <- function(name) {

    name %>%
    str_to_lower %>%
    str_replace(' ', '-') %>%
    stri_trans_general("Latin-ASCII")

}


process_prices <- function(prices) {

    prices %>%
    rename(
        name = Localización,
        price = `Precio m2 ene 2023`,
        var_monthly = `Variación mensual`,
        var_trimester = `Variación trimestral`,
        var_annual = `Variación anual`,
        price_max = `Máximo histórico`,
        var_max = `Variación máximo`
    ) %>%
    mutate(
        across(
            c(-name, -price, -price_max),
            ~str_replace(.x, '- ', '-')
        )
    ) %>%
    mutate(
        across(
            -name,
            ~parse_number(.x, locale = LOCALE_ES, na = 'n.d.')
        )
    )

}

region_map <- function(region = 'Asturias') {

    the_map <- esp_get_munic_siane(region = region)

    provinces <-
        the_map %>%
        pull(ine.prov.name) %>%
        unique %>%
        map_chr(url_proc)

    prices <-
        provinces %>%
        map(~price_data(region = region, province = .x)) %>%
        bind_rows(!!!.)

    left_join(the_map, prices, by = 'name')

}


map_plot <- function(
    region = 'Asturias',
    var = 'price',
    the_map = NULL,
    label = NULL,
    unit = NULL,
    title = NULL,
    post_title = 'Jan 2023'
) {

    the_map %<>%
        `%||%`(region_map(region))

    bgtiles <-
        the_map %>%
        esp_getTiles("IGNBase.Todo",
            bbox_expand = .1,
            zoommin = 1L
        )

    unit %<>%
        `%||%`(extract2(UNITS, var))

    label %<>%
        `%||%`({var %>% str_extract('[:alpha:]+') %>% extract2(LABELS, .)}) %>%
        `%||%`(str_to_title(var)) %>%
        c(unit) %>%
        paste(collapse = ', ')

    title %<>%
        `%||%`(extract2(TITLES, var)) %>%
        sprintf('%s in %s', ., region) %>%
        c(unit, post_title) %>%
        paste(collapse = ', ')

    p <-
        ggplot(the_map) +
        geom_spatraster_rgb(data = bgtiles, alpha = 1.) +
        geom_sf(aes(fill = !!sym(var)),
            color = "grey70"
        ) +
        labs(title = title) +
        scale_fill_viridis_c(
            guide = guide_colorbar(title = label),
            alpha = .6,
            na.value = '#88888888'
        ) +
        theme_bw()

    print(p)
    invisible(p)

}


main <- function() {

    cairo_pdf('prices.pdf', onefile = TRUE)

        REGIONS %>%
        expand_grid(VARIABLES) %>%
        {map(c(1L, 2L), function(i){extract2(., i)})} %>%
        exec(walk2, !!!., map_plot)

    dev.off()

}

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.