「主要耐久消費財の普及率」の推移のデータを用いて、ルームエアコンの普及率へのモデルフィッティングを行う。 https://www.e-stat.go.jp/stat-search/files?page=1&layout=datalist&toukei=00100405&tstat=000001014549&cycle=0&tclass1=000001200820&tclass2=000001203960&tclass3val=0
データの読み込み
data <- read.csv("air_conditioner.csv")
多項式回帰モデルのフィッティング
library(ggplot2) poly_model <- lm(air_conditioner ~ poly(year, 3), data = data) data$poly_pred <- predict(poly_model, newdata = data) poly_coefs <- coefficients(poly_model) ggplot(data, aes(x = year, y = air_conditioner)) + geom_point() + geom_line(aes(y = poly_pred), col = "blue") + labs(title = "Polynomial Regression Fit", x = "Year", y = "Adoption Rate") + theme_minimal()

スプライン回帰モデルのフィッティング
library(splines) spline_model <- lm(air_conditioner ~ ns(year, 3), data = data) data$spline_pred <- predict(spline_model, newdata = data) spline_coefs <- coefficients(spline_model) ggplot(data, aes(x = year, y = air_conditioner)) + geom_point() + geom_line(aes(y = spline_pred), col = "red") + labs(title = "Spline Regression Fit", x = "Year", y = "Adoption Rate") + theme_minimal()

ロジスティック回帰
library(nls2)
library(broom)
library(dplyr)
logistic_model <- nls(air_conditioner ~ SSlogis(year, Asym, xmid, scal), data = data)
data$logistic_pred <- predict(logistic_model, newdata = data)
logistic_coefs <- coefficients(logistic_model)
residuals <- resid(logistic_model)
s <- summary(logistic_model)$sigma
data <- data %>%
mutate(
log.lwr = logistic_pred - s,
log.upr = logistic_pred + s
)
ggplot(data, aes(x = year, y = air_conditioner)) +
geom_point() +
geom_line(aes(y = logistic_pred), color = "blue") +
geom_ribbon(aes(ymin = log.lwr, ymax = log.upr), alpha = 0.2, fill = "darkgray") +
labs(title = "Logistic Regression Fit with Standard Error Interval", x = "Year", y = "Adoption Rate") +
theme_minimal()

ガウス過程回帰 (Gaussian Process Regression)
library(kernlab)
library(dplyr)
library(ggplot2)
gp_model <- gausspr(air_conditioner ~ year, data = data)
data$gp_pred <- predict(gp_model, newdata = data)
data$gp_residuals <- data$air_conditioner - data$gp_pred
gp_se <- sqrt(mean(data$gp_residuals^2))
data <- data %>%
mutate(
gp.lwr = gp_pred - gp_se,
gp.upr = gp_pred + gp_se
)
ggplot(data, aes(x = year, y = air_conditioner)) +
geom_point() +
geom_line(aes(y = gp_pred), color = "purple") +
geom_ribbon(aes(ymin = gp.lwr, ymax = gp.upr), alpha = 0.2, fill = "darkgray") +
labs(title = "Gaussian Process Regression Fit with Standard Error Interval", x = "Year", y = "Adoption Rate") +
theme_minimal()

一般化加法モデル (GAM)
library(mgcv) ] gam_model <- gam(air_conditioner ~ s(year), data = data) data$gam_pred <- predict(gam_model, newdata = data) ggplot(data, aes(x = year, y = air_conditioner)) + geom_point() + geom_smooth(method = "gam", formula = y ~ s(x), col = "orange", se = TRUE) + labs(title = "Generalized Additive Model (GAM) Fit with Standard Error", x = "Year", y = "Adoption Rate") + theme_minimal()

評価指標の計算式の作成
calc_metrics <- function(actual, predicted) {
mse <- mean((actual - predicted)^2)
rmse <- sqrt(mse)
mae <- mean(abs(actual - predicted))
mape <- mean(abs((actual - predicted) / ifelse(actual == 0, 0.1, actual))) * 100 # 0を0.1に置き換え
r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
return(c(R2 = r_squared, RMSE = rmse, MSE = mse, MAE = mae, MAPE = mape))
}
評価指標の計算
# 各モデルの評価指標を計算
spline_metrics <- calc_metrics(data$air_conditioner, data$spline_pred)
logistic_metrics <- calc_metrics(data$air_conditioner, data$logistic_pred)
poly_metrics <- calc_metrics(data$air_conditioner, data$poly_pred)
gp_metrics <- calc_metrics(data$air_conditioner, data$gp_pred)
gam_metrics <- calc_metrics(data$air_conditioner, data$gam_pred)
# 結果をデータフレームにまとめる
results <- data.frame(
Model = c("多項式回帰", "スプライン回帰", "ロジスティック回帰", "ガウス過程回帰", "一般化加法モデル"),
R2 = c(poly_metrics["R2"], spline_metrics["R2"], logistic_metrics["R2"], gp_metrics["R2"], gam_metrics["R2"]),
RMSE = c(poly_metrics["RMSE"], spline_metrics["RMSE"], logistic_metrics["RMSE"], gp_metrics["RMSE"], gam_metrics["RMSE"]),
MSE = c(poly_metrics["MSE"], spline_metrics["MSE"], logistic_metrics["MSE"], gp_metrics["MSE"], gam_metrics["MSE"]),
MAE = c(poly_metrics["MAE"], spline_metrics["MAE"], logistic_metrics["MAE"], gp_metrics["MAE"], gam_metrics["MAE"]),
MAPE = c(poly_metrics["MAPE"], spline_metrics["MAPE"], logistic_metrics["MAPE"], gp_metrics["MAPE"], gam_metrics["MAPE"])
)
# 結果を表示
library(knitr)
knitr::kable(results, caption = "モデルごとの評価指標")
Model R2 RMSE MSE MAE MAPE 1 多項式回帰 0.9910743 3.2538000 10.5872147 2.7796622 109.24270 2 スプライン回帰 0.9916413 3.1487624 9.9147045 2.5487960 160.47906 3 ロジスティック回帰 0.9951085 2.4087451 5.8020531 1.9865553 66.24820 4 ガウス過程回帰 0.9908456 3.2952201 10.8584755 2.4824953 268.18665 5 一般化加法モデル 0.9992883 0.9187766 0.8441504 0.6841114 7.16261