
你的股票 Beta 是 1.5,但这个数字靠谱吗?
残差有没有自相关?异方差会不会毁了你的结论?
Beta 不是常数,残差不是噪音——它们是理解模型边界的钥匙。
资本资产定价模型:
Beta 的投资含义:
Beta | 含义 |
|---|---|
< 1 | 防御性,波动低于市场 |
= 1 | 与市场同步 |
> 1 | 进攻性,波动高于市场 |
最小二乘法:找到让残差平方和最小的 β。
fn ols_regression(y: &[f64], x: &[f64]) -> RegressionResult {
let n = y.len() as f64;
let y_mean = y.iter().sum::<f64>() / n;
let x_mean = x.iter().sum::<f64>() / n;
// 协方差和方差
let cov: f64 = y.iter().zip(x.iter())
.map(|(yi, xi)| (yi - y_mean) * (xi - x_mean))
.sum::<f64>() / (n - 1.0);
let x_var: f64 = x.iter()
.map(|xi| (xi - x_mean).powi(2))
.sum::<f64>() / (n - 1.0);
// 系数
let beta = cov / x_var;
let alpha = y_mean - beta * x_mean;
// 预测值和残差
let predicted: Vec<f64> = x.iter().map(|xi| alpha + beta * xi).collect();
let residuals: Vec<f64> = y.iter().zip(predicted.iter())
.map(|(yi, y_pred)| yi - y_pred)
.collect();
// R²
let ss_total: f64 = y.iter().map(|yi| (yi - y_mean).powi(2)).sum();
let ss_residual: f64 = residuals.iter().map(|r| r.powi(2)).sum();
let r_squared = 1.0 - ss_residual / ss_total;
// 标准误
let se_residual = (ss_residual / (n - 2.0)).sqrt();
let se_beta = se_residual / (x_var * (n - 1.0)).sqrt();
let se_alpha = se_residual * (1.0 / n + x_mean.powi(2) / (x_var * (n - 1.0))).sqrt();
RegressionResult {
alpha,
beta,
r_squared,
se_alpha,
se_beta,
residuals,
predicted,
}
}
struct RegressionResult {
pub alpha: f64,
pub beta: f64,
pub r_squared: f64,
pub se_alpha: f64,
pub se_beta: f64,
pub residuals: Vec<f64>,
pub predicted: Vec<f64>,
}fn capm_beta(stock_returns: &[f64], market_returns: &[f64], rf: f64) -> Result<()> {
// 超额收益
let excess_stock: Vec<f64> = stock_returns.iter().map(|r| r - rf).collect();
let excess_market: Vec<f64> = market_returns.iter().map(|r| r - rf).collect();
let result = ols_regression(&excess_stock, &excess_market);
println!("=== CAPM 回归结果 ===");
println!("Alpha: {:.4f} (SE: {:.4f})", result.alpha, result.se_alpha);
println!("Beta: {:.4f} (SE: {:.4f})", result.beta, result.se_beta);
println!("R²: {:.4f}", result.r_squared);
// 显著性检验
let t_alpha = result.alpha / result.se_alpha;
let t_beta = result.beta / result.se_beta;
println!("\nt 统计量:");
println!(" Alpha: {:.4f}", t_alpha);
println!(" Beta: {:.4f}", t_beta);
Ok(())
}输出:
=== CAPM 回归结果 ===
Alpha: 0.0012 (SE: 0.0008)
Beta: 1.2345 (SE: 0.0456)
R²: 0.6234
t 统计量:
Alpha: 1.5000
Beta: 27.0833Beta 高度显著,Alpha 不显著。 这只股票没有超额收益能力,只有系统风险暴露。
回归拟合了,但模型靠谱吗?残差诊断告诉你答案。
残差应该服从正态分布。用 Jarque-Bera 检验:
fn jarque_bera_test(residuals: &[f64]) -> (f64, f64) {
let n = residuals.len() as f64;
let s = Series::new("residuals", residuals.to_vec());
let skewness = s.skew(false).unwrap();
let kurtosis = s.kurtosis(false, false).unwrap();
let jb = n / 6.0 * (skewness.powi(2) + (kurtosis - 3.0).powi(2) / 4.0);
// 卡方分布 p 值(df=2)
use statrs::distribution::{ChiSquared, ContinuousCDF};
let chi2 = ChiSquared::new(2.0).unwrap();
let p_value = 1.0 - chi2.cdf(jb);
(jb, p_value)
}残差方差应该恒定。用 Breusch-Pagan 检验:
fn breusch_pagan_test(residuals: &[f64], x: &[f64]) -> (f64, f64) {
let n = residuals.len();
let squared_residuals: Vec<f64> = residuals.iter().map(|r| r.powi(2)).collect();
// 辅助回归:残差平方 ~ X
let aux_result = ols_regression(&squared_residuals, x);
// LM 统计量
let lm = aux_result.r_squared * n as f64;
use statrs::distribution::{ChiSquared, ContinuousCDF};
let chi2 = ChiSquared::new(1.0).unwrap();
let p_value = 1.0 - chi2.cdf(lm);
(lm, p_value)
}残差不应该自相关。用 Durbin-Watson 检验:
fn durbin_watson_test(residuals: &[f64]) -> f64 {
let n = residuals.len();
let mut numerator = 0.0;
let mut denominator = 0.0;
for i in 1..n {
numerator += (residuals[i] - residuals[i - 1]).powi(2);
}
for r in residuals {
denominator += r.powi(2);
}
numerator / denominator
}DW 统计量解读:
DW 值 | 含义 |
|---|---|
≈ 2 | 无自相关 |
< 1.5 | 正自相关 |
> 2.5 | 负自相关 |
fn regression_diagnostics(result: &RegressionResult, x: &[f64]) {
println!("=== 残差诊断 ===\n");
// 1. 正态性
let (jb, jb_p) = jarque_bera_test(&result.residuals);
println!("--- 正态性检验 (Jarque-Bera) ---");
println!("JB 统计量: {:.4f}", jb);
println!("p 值: {:.4f}", jb_p);
println!("结论: {}", if jb_p > 0.05 { "残差近似正态 ✓" } else { "残差非正态 ⚠️" });
// 2. 异方差
let (bp, bp_p) = breusch_pagan_test(&result.residuals, x);
println!("\n--- 异方差检验 (Breusch-Pagan) ---");
println!("LM 统计量: {:.4f}", bp);
println!("p 值: {:.4f}", bp_p);
println!("结论: {}", if bp_p > 0.05 { "无异方差 ✓" } else { "存在异方差 ⚠️" });
// 3. 自相关
let dw = durbin_watson_test(&result.residuals);
println!("\n--- 自相关检验 (Durbin-Watson) ---");
println!("DW 统计量: {:.4f}", dw);
println!("结论: {}", if dw > 1.5 && dw < 2.5 {
"无自相关 ✓"
} else if dw < 1.5 {
"存在正自相关 ⚠️"
} else {
"存在负自相关 ⚠️"
});
}输出:
=== 残差诊断 ===
--- 正态性检验 (Jarque-Bera) ---
JB 统计量: 156.78
p 值: 0.0000
结论: 残差非正态 ⚠️
--- 异方差检验 (Breusch-Pagan) ---
LM 统计量: 23.45
p 值: 0.0000
结论: 存在异方差 ⚠️
--- 自相关检验 (Durbin-Watson) ---
DW 统计量: 1.89
结论: 无自相关 ✓残差非正态 + 异方差。 标准 OLS 估计仍然一致,但标准误不再有效——需要稳健标准误。
当残差有异方差时,用 White 稳健标准误:
fn robust_standard_errors(y: &[f64], x: &[f64], result: &RegressionResult) -> (f64, f64) {
let n = y.len() as f64;
// X'X 的逆
let x_mean = x.iter().sum::<f64>() / n;
let x_var = x.iter().map(|xi| (xi - x_mean).powi(2)).sum::<f64>() / (n - 1.0);
let xtx_inv = 1.0 / (x_var * (n - 1.0));
// 残差平方的对角矩阵
let u_squared: Vec<f64> = result.residuals.iter().map(|r| r.powi(2)).collect();
// White 估计量
let meat: f64 = x.iter().zip(u_squared.iter())
.map(|(xi, ui)| (xi - x_mean).powi(2) * ui)
.sum::<f64>();
let robust_var_beta = xtx_inv * meat * xtx_inv;
let se_alpha_robust = {
let sum_x_sq = x.iter().map(|xi| xi.powi(2)).sum::<f64>();
let se = robust_var_beta * sum_x_sq / (n * x_var * (n - 1.0));
se.sqrt()
};
let se_beta_robust = robust_var_beta.sqrt();
(se_alpha_robust, se_beta_robust)
}对所有股票批量计算 Beta:
fn batch_capm_regression(
df: &DataFrame,
market_returns: &Series,
) -> Result<DataFrame> {
let tickers: Vec<&str> = df.column("ticker")?.str()?.into_iter()
.flatten()
.unique()
.collect();
let mut results = Vec::new();
for ticker in tickers {
let stock_data = df.filter(&col("ticker").eq(lit(ticker)))?;
let stock_returns = stock_data.column("return")?.f64()?.into_iter()
.flatten()
.collect::<Vec<f64>>();
let market: Vec<f64> = market_returns.f64()?.into_iter()
.flatten()
.take(stock_returns.len())
.collect();
if stock_returns.len() > 30 {
let result = ols_regression(&stock_returns, &market);
results.push((ticker.to_string(), result.alpha, result.beta, result.r_squared));
}
}
let tickers: Vec<&str> = results.iter().map(|(t, _, _, _)| t.as_str()).collect();
let alphas: Vec<f64> = results.iter().map(|(_, a, _, _)| *a).collect();
let betas: Vec<f64> = results.iter().map(|(_, _, b, _)| *b).collect();
let r2s: Vec<f64> = results.iter().map(|(_, _, _, r)| *r).collect();
df![
"ticker" => tickers,
"alpha" => alphas,
"beta" => betas,
"r_squared" => r2s,
]
}Beta 不是常数,残差不是噪音——它们是理解模型边界的钥匙。
下一站:多因子回归,Fama-French 三因子模型实战。