「主要耐久消費財の普及率」の推移のデータを用いて、ルームエアコンの普及率へのモデルフィッティングを行う。 https://www.e-stat.go.jp/stat-search/files?page=1&layout=datalist&toukei=00100405&tstat=000001014549&cycle=0&tclass1=000001200820&tclass2=000001203960&tclass3val=0
データの読み込み
data <- read.csv("air_conditioner.csv")
多項式回帰モデルのフィッティング
library(ggplot2) poly_model <- lm(air_conditioner ~ poly(year, 3), data = data) data$poly_pred <- predict(poly_model, newdata = data) poly_coefs <- coefficients(poly_model) ggplot(data, aes(x = year, y = air_conditioner)) + geom_point() + geom_line(aes(y = poly_pred), col = "blue") + labs(title = "Polynomial Regression Fit", x = "Year", y = "Adoption Rate") + theme_minimal()
スプライン回帰モデルのフィッティング
library(splines) spline_model <- lm(air_conditioner ~ ns(year, 3), data = data) data$spline_pred <- predict(spline_model, newdata = data) spline_coefs <- coefficients(spline_model) ggplot(data, aes(x = year, y = air_conditioner)) + geom_point() + geom_line(aes(y = spline_pred), col = "red") + labs(title = "Spline Regression Fit", x = "Year", y = "Adoption Rate") + theme_minimal()
ロジスティック回帰
library(nls2) library(broom) library(dplyr) logistic_model <- nls(air_conditioner ~ SSlogis(year, Asym, xmid, scal), data = data) data$logistic_pred <- predict(logistic_model, newdata = data) logistic_coefs <- coefficients(logistic_model) residuals <- resid(logistic_model) s <- summary(logistic_model)$sigma data <- data %>% mutate( log.lwr = logistic_pred - s, log.upr = logistic_pred + s ) ggplot(data, aes(x = year, y = air_conditioner)) + geom_point() + geom_line(aes(y = logistic_pred), color = "blue") + geom_ribbon(aes(ymin = log.lwr, ymax = log.upr), alpha = 0.2, fill = "darkgray") + labs(title = "Logistic Regression Fit with Standard Error Interval", x = "Year", y = "Adoption Rate") + theme_minimal()
ガウス過程回帰 (Gaussian Process Regression)
library(kernlab) library(dplyr) library(ggplot2) gp_model <- gausspr(air_conditioner ~ year, data = data) data$gp_pred <- predict(gp_model, newdata = data) data$gp_residuals <- data$air_conditioner - data$gp_pred gp_se <- sqrt(mean(data$gp_residuals^2)) data <- data %>% mutate( gp.lwr = gp_pred - gp_se, gp.upr = gp_pred + gp_se ) ggplot(data, aes(x = year, y = air_conditioner)) + geom_point() + geom_line(aes(y = gp_pred), color = "purple") + geom_ribbon(aes(ymin = gp.lwr, ymax = gp.upr), alpha = 0.2, fill = "darkgray") + labs(title = "Gaussian Process Regression Fit with Standard Error Interval", x = "Year", y = "Adoption Rate") + theme_minimal()
一般化加法モデル (GAM)
library(mgcv) ] gam_model <- gam(air_conditioner ~ s(year), data = data) data$gam_pred <- predict(gam_model, newdata = data) ggplot(data, aes(x = year, y = air_conditioner)) + geom_point() + geom_smooth(method = "gam", formula = y ~ s(x), col = "orange", se = TRUE) + labs(title = "Generalized Additive Model (GAM) Fit with Standard Error", x = "Year", y = "Adoption Rate") + theme_minimal()
評価指標の計算式の作成
calc_metrics <- function(actual, predicted) { mse <- mean((actual - predicted)^2) rmse <- sqrt(mse) mae <- mean(abs(actual - predicted)) mape <- mean(abs((actual - predicted) / ifelse(actual == 0, 0.1, actual))) * 100 # 0を0.1に置き換え r_squared <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2) return(c(R2 = r_squared, RMSE = rmse, MSE = mse, MAE = mae, MAPE = mape)) }
評価指標の計算
# 各モデルの評価指標を計算 spline_metrics <- calc_metrics(data$air_conditioner, data$spline_pred) logistic_metrics <- calc_metrics(data$air_conditioner, data$logistic_pred) poly_metrics <- calc_metrics(data$air_conditioner, data$poly_pred) gp_metrics <- calc_metrics(data$air_conditioner, data$gp_pred) gam_metrics <- calc_metrics(data$air_conditioner, data$gam_pred) # 結果をデータフレームにまとめる results <- data.frame( Model = c("多項式回帰", "スプライン回帰", "ロジスティック回帰", "ガウス過程回帰", "一般化加法モデル"), R2 = c(poly_metrics["R2"], spline_metrics["R2"], logistic_metrics["R2"], gp_metrics["R2"], gam_metrics["R2"]), RMSE = c(poly_metrics["RMSE"], spline_metrics["RMSE"], logistic_metrics["RMSE"], gp_metrics["RMSE"], gam_metrics["RMSE"]), MSE = c(poly_metrics["MSE"], spline_metrics["MSE"], logistic_metrics["MSE"], gp_metrics["MSE"], gam_metrics["MSE"]), MAE = c(poly_metrics["MAE"], spline_metrics["MAE"], logistic_metrics["MAE"], gp_metrics["MAE"], gam_metrics["MAE"]), MAPE = c(poly_metrics["MAPE"], spline_metrics["MAPE"], logistic_metrics["MAPE"], gp_metrics["MAPE"], gam_metrics["MAPE"]) ) # 結果を表示 library(knitr) knitr::kable(results, caption = "モデルごとの評価指標")
Model R2 RMSE MSE MAE MAPE 1 多項式回帰 0.9910743 3.2538000 10.5872147 2.7796622 109.24270 2 スプライン回帰 0.9916413 3.1487624 9.9147045 2.5487960 160.47906 3 ロジスティック回帰 0.9951085 2.4087451 5.8020531 1.9865553 66.24820 4 ガウス過程回帰 0.9908456 3.2952201 10.8584755 2.4824953 268.18665 5 一般化加法モデル 0.9992883 0.9187766 0.8441504 0.6841114 7.16261