eurostat

A key to European statistics

Anne Sophie Gill https://www.skemagloballab.io/gillAnneSophie.html (SKEMA Global Lab in AI)https://skemagloballab.io , Marine Leroi https://www.skemagloballab.io/leroiMarine.html (SKEMA Global Lab in AI)https://skemagloballab.io , Thierry Warin https://www.nuance-r.com/principalInvestigator.html (SKEMA Business School (Raleigh, NC))https://www.skemagloballab.io
02-24-2020

Using the SKEMA Quantum Studio(Warin 2019) framework, we will teach you how to use the eurostat package.

How to use the eurostat package


# Load the package
library(eurostat)
library(rvest)

# Get Eurostat data listing
eurostats <- get_eurostat_toc()

# search_eurostat
search_eurostat("waste", type = "table")

# A tibble: 33 x 8
   title code  type  `last update of… `last table str… `data start`
   <chr> <chr> <chr> <chr>            <chr>            <chr>       
 1 Popu… tgs0… table 31.01.2020       27.02.2020       2002        
 2 Gene… ten0… table 31.01.2020       27.02.2020       2004        
 3 Gene… ten0… table 31.01.2020       27.02.2020       2004        
 4 Wast… ten0… table 30.01.2020       27.02.2020       2004        
 5 Gene… sdg_… table 31.01.2020       27.02.2020       2004        
 6 Land… t202… table 31.01.2020       27.02.2020       2010        
 7 Reco… ten0… table 31.01.2020       27.02.2020       2006        
 8 Recy… ten0… table 31.01.2020       27.02.2020       2006        
 9 Recy… t202… table 05.02.2020       27.02.2020       1995        
10 Recy… t202… table 05.03.2020       05.03.2020       2008        
# … with 23 more rows, and 2 more variables: `data end` <chr>,
#   values <chr>

# upload dataframe
dataframe <- get_eurostat("ten00108", type = "label", time_format = "num")

# Data Wrangling

data_good<- dplyr::select(dataframe, geo, time, unit, values) # select only columns you need

data_good <- data_good[-c(1:1704), ] # delete unwanted rows

data_good <- aggregate(values~ geo, data = data_good, sum) # use aggregate function to combine identical data

data_good$geo <- gsub("\\(.*"," ", data_good$geo) # remove unwated characters

data_good$geo <- gsub("\\-.*"," ", data_good$geo) # remove unwated characters

data_good$geo <- trimws(data_good$geo, which = "right", whitespace = "[ \t\r\n]") # remove unwated white space

names(data_good)[names(data_good) == "geo"] <- "NAME_SORT" #rename column

data_good <- data_good[-c(11,12), ] # delete unwanted rows

datafinal <- data_good # this is your final data frame

Visualize your data


# Load the following libraries
library(dplyr)
library(spdep)
library(maptools)
library(leaflet)
library(maps)
library(rgdal)
library(RColorBrewer)

# Open your shp file with the readOGR function

Europe <- readOGR("world.shp")

OGR data source with driver: ESRI Shapefile 
Source: "/home/gilla/mondo/skemalab/blog/_posts/2020-02-24-eurostat/world.shp", layer: "world"
with 241 features
It has 94 fields
Integer64 fields read as strings:  POP_EST NE_ID 

Shapefile link Natural Earth


# Use "Left join" function to combine your two data frames

Europe@data <- left_join(Europe@data, datafinal, by = "NAME_SORT")

# Create labels

Europe@data$NAME_SORT <- as.character(Europe@data$NAME_SORT)

labels <- sprintf("<strong>%s</strong><br/>%g", Europe@data$NAME_SORT, Europe@data$values) %>% lapply(htmltools::HTML)

# Determinate the intervalls that will be shown in the map legend
bins <- c(0, 100000000, 300000000, 600000000, Inf)

# Choose a color scheme for your map

colors <- colorBin("Reds", domain = Europe@data$values, bins = bins)

# Plot the data using leaflet

leaflet(Europe) %>%
  setView(lat = 53.0000, lng = 9.0000, zoom = 3)%>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addLegend(pal = colors, values = Europe@data$values, opacity = 0.7, title = NULL, position = "bottomleft") %>%
  addPolygons(fillColor = ~colors(Europe@data$values),
              weight = 2,
              opacity = 1,
              color = "white",
              dashArray = 1,
              fillOpacity = 0.8,
              highlight = highlightOptions(weight = 2,
                                           color = "black",
                                           dashArray = 1,
                                           fillOpacity = 0.7,
                                           bringToFront = TRUE),
              label = labels
              )