作者:李誉辉,四川大学研究生,研究方向为:数据分析与可视化,机器学习,网络爬虫;擅长:R语言
丁香园·丁香医生
数据介绍:
该数据是从丁香园·丁香医生通过爬虫获取的全国2019-ncov病毒的感染病例。
devtools::install_github("microly/alimap")
library(alimap) # to get China map at the prefecture city level
library(sf)
library(ggplot2)
library(dplyr)
library(tibble)
library(tidyr)
library(magrittr)
library(purrr)
library(readr)
library(stringr)
library(gganimate)
library(lubridate)
library(Cairo)
library(magick)
library(RColorBrewer)
如果有本地数据,可以自行读取。 因为很多市级地名存在变化, 而且爬取的比较乱,部分没有“市”这个字, 所以使用前2个汉字进行联结表。以地图数据集中的城市名为准。
Chinamap_cities_sf <- map_prefecture_city() %>%
mutate(c2 = str_sub(name, 1, 2))
每12小时更新1次,从早上9点到晚上9点。
# set start day
startTime <- ymd_h("2020/1/25 21", tz="Asia/Shanghai")
nowTime <- Sys.time() %>% with_tz(tz="Asia/Shanghai") # only support Shanghai timezone
timeLength <- interval(startTime, nowTime) %>%
time_length("hour") %>% `%/%`(12)
endTime <- startTime + dhours(timeLength*12)
# time sequence
mytime <- startTime + dhours(12*(0:timeLength)) %>% .[-6] # 404 at the time
mymonth <- month(mytime)
myday <- day(mytime)
myhour <- hour(mytime) %>% as.character() %>%
str_pad(width = 2, side = "left", pad = "0") # make character string same length
myAPI <- paste(date(mytime), myhour, sep = "T")
通过API接口读取疫情历史数据,API接口由网友提供,爬取自丁香园。
# define a function to read epidemic data of a day
read_epidemic <- function(oneAPI) {
url_API <- paste0("http://69.171.70.18:5000/download/city_level_", oneAPI, ".csv")
epidemic_df <- read_csv(file = url_API)
colnames(epidemic_df) <- c("x1","unnamed", "city", "confirmed_c", "suspected_c",
"cured_c", "dead_c", "province", "short_p", "confirmed_p",
"suspected_p", "cured_p", "dead_p", "comment")
epidemic_df %<>% select(city, confirmed_c)
return(epidemic_df)
}
epidemic_nest <- tibble(time = mytime,
myAPI = myAPI) %>%
mutate(., data = map(.$myAPI, ~read_epidemic(.x))) %>%
select(-myAPI) %>% unnest()
因为很多市级地名存在变化, 而且爬取的比较乱,部分没有“市”这个字, 所以使用前2个汉字进行联结表。以地图数据集中的城市名为准。
mybreaks <- c(0, 1, 10, 50, 100, 500, 1000, 5000, 100000)
mylabels <- c("0", "1-9", "10-49", "50-99", "100-499",
"500-999", "1000-4999", ">=5000")
mypal <- c("white", brewer.pal(7, "YlOrRd"))
epidemic_df <- epidemic_nest %>%
mutate(conf2 = cut(confirmed_c, breaks = mybreaks,
labels = mylabels, include.lowest = TRUE,
right = FALSE, ordered_result = TRUE)) %>%
mutate(c2 = str_sub(city, 1, 2))
# create temporary document
dir.create(dir1 <- file.path(tempdir(), "testdir"))
for (i in 1:length(mytime)) {
# join epidemic data with map data
epidemic_time <- epidemic_df %>% filter(time == mytime[i])
epidemic_city <- Chinamap_cities_sf %>% left_join(epidemic_time, by = "c2")
# treatment NA
conf2 <- epidemic_city$conf2 %>% replace_na(0)
epidemic_city %<>% select(-c2, -city, -conf2)
epidemic_city$conf2 <- conf2
# plot
gg_epidemic <- ggplot(epidemic_city) +
geom_sf(aes(fill = conf2)) +
coord_sf() +
scale_fill_manual(values = mypal, limits = mylabels, labels = mylabels) +
guides(fill = guide_legend(title = "确诊人数", reverse = T)) +
labs(title = "2019-ncov疫情数据可视化",
subtitle = mytime[i],
caption = "数据来源:丁香园·丁香医生") +
theme(
# 标题
plot.title = element_text(face = "bold", hjust = 0.5,
color = "black"),
plot.subtitle = element_text(face = "bold", hjust = 0.5, size = 20,
color = "red"),
plot.caption = element_text(face = "bold", hjust = 1,
color = "blue"),
# 图例
legend.title = element_text(face = "bold",
color = "black"),
legend.text = element_text(face = "bold",
color = "black"),
legend.background = element_rect(colour = "black"),
legend.key = element_rect(fill = NA), # 图例箱体无背景
legend.position = c(0.85, 0.2),
axis.ticks = element_blank(),
axis.text = element_blank(),
# 绘图面板
panel.background = element_blank(),
panel.border = element_rect(color = "black", linetype = "solid", size = 1, fill = NA)
)
# save picture
ggsave(filename = paste0(date(mytime[i]), "_", hour(mytime[i]), ".png"),
plot = gg_epidemic, path = dir1,
width = 20, height = 20, units = "cm")
}
#
path_pre <- "./"
animate_epidemic <-
image_animate(image = image_read(path = paste0(dir1, "/",
date(mytime), "_", hour(mytime), ".png")),
fps = 5)
anim_save(filename = "疫情地图可视化动态图.gif",
animation = animate_epidemic, path = path_pre)
unlink(dir1)
ps:具体代码可见:https://github.com/Easy-Shu/EasyShu-WeChat
参考来源: