井出草平の研究ノート

ルームエアコンの普及率へのモデルフィッティング[R]

「主要耐久消費財の普及率」の推移のデータを用いて、ルームエアコンの普及率へのモデルフィッティングを行う。 https://www.e-stat.go.jp/stat-search/files?page=1&layout=datalist&toukei=00100405&tstat=000001014549&cycle=0&tclass1=000001200820&tclass2=000001203960&tclass3val=0

データの読み込み

data <- read.csv("air_conditioner.csv")

多項式回帰モデルのフィッティング

library(ggplot2)
poly_model <- lm(air_conditioner ~ poly(year, 3), data = data)
data$poly_pred <- predict(poly_model, newdata = data)
poly_coefs <- coefficients(poly_model)

ggplot(data, aes(x = year, y = air_conditioner)) + 
  geom_point() +
  geom_line(aes(y = poly_pred), col = "blue") +
  labs(title = "Polynomial Regression Fit", x = "Year", y = "Adoption Rate") +
  theme_minimal()

 \eta = 53.27 + 256.07{\xi} + -46.78{\xi}^2 + -39.97{\xi}^3

スプライン回帰モデルのフィッティング

library(splines)
spline_model <- lm(air_conditioner ~ ns(year, 3), data = data)
data$spline_pred <- predict(spline_model, newdata = data)
spline_coefs <- coefficients(spline_model)

ggplot(data, aes(x = year, y = air_conditioner)) + 
  geom_point() +
  geom_line(aes(y = spline_pred), col = "red") +
  labs(title = "Spline Regression Fit", x = "Year", y = "Adoption Rate") +
  theme_minimal()

 {\eta} = -6.46 + 94.36{\xi} + 125.06{\xi} + 76.55{\xi}^3

ロジスティック回帰

library(nls2)
library(broom)
library(dplyr)

logistic_model <- nls(air_conditioner ~ SSlogis(year, Asym, xmid, scal), data = data)
data$logistic_pred <- predict(logistic_model, newdata = data)
logistic_coefs <- coefficients(logistic_model)

residuals <- resid(logistic_model)
s <- summary(logistic_model)$sigma

data <- data %>%
  mutate(
    log.lwr = logistic_pred - s,
    log.upr = logistic_pred + s
  )

ggplot(data, aes(x = year, y = air_conditioner)) + 
  geom_point() +
  geom_line(aes(y = logistic_pred), color = "blue") +
  geom_ribbon(aes(ymin = log.lwr, ymax = log.upr), alpha = 0.2, fill = "darkgray") +
  labs(title = "Logistic Regression Fit with Standard Error Interval", x = "Year", y = "Adoption Rate") +
  theme_minimal()

 {\eta} =
  \frac{90.32}
    {1 + e^{
      \frac{1983.32- {\xi}}{6.16}
   }}

ガウス過程回帰 (Gaussian Process Regression)

library(kernlab)
library(dplyr)
library(ggplot2)

gp_model <- gausspr(air_conditioner ~ year, data = data)
data$gp_pred <- predict(gp_model, newdata = data)

data$gp_residuals <- data$air_conditioner - data$gp_pred
gp_se <- sqrt(mean(data$gp_residuals^2))

data <- data %>%
  mutate(
    gp.lwr = gp_pred - gp_se,
    gp.upr = gp_pred + gp_se
  )

ggplot(data, aes(x = year, y = air_conditioner)) + 
  geom_point() +
  geom_line(aes(y = gp_pred), color = "purple") +
  geom_ribbon(aes(ymin = gp.lwr, ymax = gp.upr), alpha = 0.2, fill = "darkgray") +
  labs(title = "Gaussian Process Regression Fit with Standard Error Interval", x = "Year", y = "Adoption Rate") +
  theme_minimal()


\hat{y} =
 \mathcal{GP}(X)

一般化加法モデル (GAM)

library(mgcv)
]
gam_model <- gam(air_conditioner ~ s(year), data = data)
data$gam_pred <- predict(gam_model, newdata = data)

ggplot(data, aes(x = year, y = air_conditioner)) + 
  geom_point() +
  geom_smooth(method = "gam", formula = y ~ s(x), col = "orange", se = TRUE) +
  labs(title = "Generalized Additive Model (GAM) Fit with Standard Error", x = "Year", y = "Adoption Rate") +
  theme_minimal()

 \hat{y} = {\beta}_0 +  f_1(X_1) + f_2(X_2) + {\ldots} + f_p(X_p)

評価指標の計算式の作成

calc_metrics <- function(actual, predicted) {
  mse <- mean((actual - predicted)^2)
  rmse <- sqrt(mse)
  mae <- mean(abs(actual - predicted))
  mape <- mean(abs((actual - predicted) / ifelse(actual == 0, 0.1, actual))) * 100  # 0を0.1に置き換え
  r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
  
  return(c(R2 = r_squared, RMSE = rmse, MSE = mse, MAE = mae, MAPE = mape))
}

評価指標の計算

# 各モデルの評価指標を計算
spline_metrics <- calc_metrics(data$air_conditioner, data$spline_pred)
logistic_metrics <- calc_metrics(data$air_conditioner, data$logistic_pred)
poly_metrics <- calc_metrics(data$air_conditioner, data$poly_pred)
gp_metrics <- calc_metrics(data$air_conditioner, data$gp_pred)
gam_metrics <- calc_metrics(data$air_conditioner, data$gam_pred)

# 結果をデータフレームにまとめる
results <- data.frame(
  Model = c("多項式回帰", "スプライン回帰", "ロジスティック回帰", "ガウス過程回帰", "一般化加法モデル"),
  R2 = c(poly_metrics["R2"], spline_metrics["R2"], logistic_metrics["R2"], gp_metrics["R2"], gam_metrics["R2"]),
  RMSE = c(poly_metrics["RMSE"], spline_metrics["RMSE"], logistic_metrics["RMSE"], gp_metrics["RMSE"], gam_metrics["RMSE"]),
  MSE = c(poly_metrics["MSE"], spline_metrics["MSE"], logistic_metrics["MSE"], gp_metrics["MSE"], gam_metrics["MSE"]),
  MAE = c(poly_metrics["MAE"], spline_metrics["MAE"], logistic_metrics["MAE"], gp_metrics["MAE"], gam_metrics["MAE"]),
  MAPE = c(poly_metrics["MAPE"], spline_metrics["MAPE"], logistic_metrics["MAPE"], gp_metrics["MAPE"], gam_metrics["MAPE"])
)

# 結果を表示
library(knitr)
knitr::kable(results, caption = "モデルごとの評価指標")
               Model        R2      RMSE        MSE       MAE      MAPE
1         多項式回帰 0.9910743 3.2538000 10.5872147 2.7796622 109.24270
2     スプライン回帰 0.9916413 3.1487624  9.9147045 2.5487960 160.47906
3 ロジスティック回帰 0.9951085 2.4087451  5.8020531 1.9865553  66.24820
4     ガウス過程回帰 0.9908456 3.2952201 10.8584755 2.4824953 268.18665
5   一般化加法モデル 0.9992883 0.9187766  0.8441504 0.6841114   7.16261