山脊图可以同时显示几个组的数值分布情况,并且可以在同一水平下,直观地对比多个分布的变化。
# 1950~2010年西雅图的平均气温,并展示其分布
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# 导入数据
temp = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2016-weather-data-seattle.csv')
temp['month'] = pd.to_datetime(temp['Date']).dt.month
# 构造月份字典
month_dict = {1: 'january',
2: 'february',
3: 'march',
4: 'april',
5: 'may',
6: 'june',
7: 'july',
8: 'august',
9: 'september',
10: 'october',
11: 'november',
12: 'december'}
temp['month'] = temp['month'].map(month_dict)
# 生成月平均温度
month_mean_serie = temp.groupby('month')['Mean_TemperatureC'].mean()
temp['mean_month'] = temp['month'].map(month_mean_serie)
temp.head()
Date | Max_TemperatureC | Mean_TemperatureC | Min_TemperatureC | month | mean_month | |
---|---|---|---|---|---|---|
0 | 1/1/1948 | 10 | 8.0 | 7.0 | january | 4.493982 |
1 | 1/2/1948 | 6 | 4.0 | 3.0 | january | 4.493982 |
2 | 1/3/1948 | 7 | 4.0 | 2.0 | january | 4.493982 |
3 | 1/4/1948 | 7 | 4.0 | 2.0 | january | 4.493982 |
4 | 1/5/1948 | 7 | 3.0 | 0.0 | january | 4.493982 |
# 利用FacetGrid绘制山脊图
# 调色板
pal = sns.color_palette(palette='coolwarm', n_colors=12)
# 初始12个月份的画布
g = sns.FacetGrid(temp, row='month', hue='mean_month', aspect=15, height=0.75, palette=pal)
# 生成每个月温度Mean_TemperatureC的密度图
g.map(sns.kdeplot, 'Mean_TemperatureC',
bw_adjust=1, clip_on=False,
fill=True, alpha=1, linewidth=1.5)
# 给每个密度轮廓增添一条白线
g.map(sns.kdeplot, 'Mean_TemperatureC',
bw_adjust=1, clip_on=False,
color="w", lw=2)
# 给每个密度图添加水平线
g.map(plt.axhline, y=0,
lw=2, clip_on=False)
# 增加对应的月份文本(颜色相对应)
for i, ax in enumerate(g.axes.flat):
ax.text(-15, 0.02, month_dict[i+1],
fontweight='bold', fontsize=15,
color=ax.lines[-1].get_color())
# 重叠子图
g.fig.subplots_adjust(hspace=-0.3)
# 删除标题、标签、和边框
g.set_titles("")
g.set_ylabels("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
# 设置标题、标签以及相关参数美化
plt.setp(ax.get_xticklabels(), fontsize=15, fontweight='bold')
plt.xlabel('Temperature in degree Celsius', fontweight='bold', fontsize=15)
g.fig.suptitle('Daily average temperature in Seattle per month',
ha='right',
fontsize=20,
fontweight=20)
plt.show()
# 1950~2010年西雅图的平均气温,并展示其分布
import plotly.graph_objects as go
import numpy as np
import pandas as pd
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# 导入数据
temp = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2016-weather-data-seattle.csv')
temp['year'] = pd.to_datetime(temp['Date']).dt.year
# 构造年份list
year_list = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
temp = temp[temp['year'].isin(year_list)]
# 计算每年各平均温度数量
temp = temp.groupby(['year', 'Mean_TemperatureC']).agg({'Mean_TemperatureC': 'count'}).rename(columns={'Mean_TemperatureC': 'count'}).reset_index()
# 数据展示
temp.head()
year | Mean_TemperatureC | count | |
---|---|---|---|
0 | 1950 | -12.0 | 1 |
1 | 1950 | -11.0 | 1 |
2 | 1950 | -10.0 | 2 |
3 | 1950 | -9.0 | 6 |
4 | 1950 | -8.0 | 4 |
# 将df转为字典,方便遍历
array_dict = {} # 初始空字典
for year in year_list:
array_dict[f'x_{year}'] = temp[temp['year']==year]['Mean_TemperatureC'] # 存储每年温度
array_dict[f'y_{year}'] = temp[temp['year']==year]['count'] # 存储每年温度计数
array_dict[f'y_{year}'] = (array_dict[f'y_{year}'] - array_dict[f'y_{year}'].min()) \
/ (array_dict[f'y_{year}'].max() - array_dict[f'y_{year}'].min()) # 标准化处理温度计数
fig = go.Figure()
for index, year in enumerate(year_list):
# 绘制基础画板
fig.add_trace(go.Scatter(
x=[-20, 40], y=np.full(2, len(year_list)-index),
mode='lines',
line_color='white'))
# 绘制散点图
fig.add_trace(go.Scatter(
x=array_dict[f'x_{year}'],
y=array_dict[f'y_{year}'] + (len(year_list)-index) + 0.4, # y标签增加高度,呈现堆叠状态(忽略数值意义)
fill='tonexty',
name=f'{year}'))
# 添加文本信息
fig.add_annotation(
x=-20,
y=len(year_list)-index,
text=f'{year}',
showarrow=False,
yshift=10)
# 修改标题等信息
fig.update_layout(
title='Average temperature from 1950 until 2010 in Seattle',
showlegend=False,
xaxis=dict(title='Temperature in degree Celsius'),
yaxis=dict(showticklabels=False) # that way you hide the y axis ticks labels
)
fig.show()
以上介绍了两种绘制山脊图的方式,一种结合searbon的FacetGrid
和kdeplot
绘制,另一种则是利用的plotly的go.Scatter
,并修改参数fill='tonexty'
以绘制区域图的效果。