eficiency <- read.table("https://themys.sid.uncu.edu.ar/rpalma/R-cran/Solar.txt",header = TRUE)
names(eficiency)
## [1] "kWh" "gas" "solar"
head(eficiency)
## kWh gas solar
## 1 84 46 354
## 2 73 20 190
## 3 65 52 405
## 4 70 30 263
## 5 76 57 451
## 6 69 25 302
pairs(eficiency)
Mejorando la apariencia de los gráficos
library(psych)
pairs.panels(eficiency)
multi.hist(eficiency)
cor(eficiency)
## kWh gas solar
## kWh 1.0000000 0.2400133 0.2652935
## gas 0.2400133 1.0000000 0.8373534
## solar 0.2652935 0.8373534 1.0000000
regresion <- lm(solar ~ gas, data = eficiency)
summary(regresion)
##
## Call:
## lm(formula = solar ~ gas, data = eficiency)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63.478 -26.816 -3.854 28.315 90.881
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 102.5751 29.6376 3.461 0.00212 **
## gas 5.3207 0.7243 7.346 1.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 43.46 on 23 degrees of freedom
## Multiple R-squared: 0.7012, Adjusted R-squared: 0.6882
## F-statistic: 53.96 on 1 and 23 DF, p-value: 1.794e-07
nuevas.gases <- data.frame(gas = seq(30, 50))
predict(regresion, nuevas.gases)
## 1 2 3 4 5 6 7 8
## 262.1954 267.5161 272.8368 278.1575 283.4781 288.7988 294.1195 299.4402
## 9 10 11 12 13 14 15 16
## 304.7608 310.0815 315.4022 320.7229 326.0435 331.3642 336.6849 342.0056
## 17 18 19 20 21
## 347.3263 352.6469 357.9676 363.2883 368.6090
confint(regresion)
## 2.5 % 97.5 %
## (Intercept) 41.265155 163.885130
## gas 3.822367 6.818986
options(tidy=TRUE, width=50)
nuevas.gases <- data.frame(gas = seq(10, 90))
plot(eficiency$gas, eficiency$solar, xlab = "gas", ylab = "solar")
abline(regresion)
ic <- predict(regresion, nuevas.gases, interval = "confidence")
lines(nuevas.gases$gas, ic[, 2], lty = 2)
lines(nuevas.gases$gas, ic[, 3], lty = 2)
ic <- predict(regresion, nuevas.gases, interval ="prediction")
lines(nuevas.gases$gas, ic[, 2], lty = 2, col = "red")
lines(nuevas.gases$gas, ic[, 3], lty = 2, col = "red")
Intervalos de confianza de la respuesta media: ic es una matriz con tres columnas: la primera es la prediccion, las otras dos son los extremos del intervalo
anova(regresion)
## Analysis of Variance Table
##
## Response: solar
## Df Sum Sq Mean Sq F value Pr(>F)
## gas 1 101933 101933 53.964 1.794e-07 ***
## Residuals 23 43444 1889
## ---
## Signif. codes:
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
residuos <- rstandard(regresion)
valores.ajustados <- fitted(regresion)
plot(valores.ajustados, residuos)
qqnorm(residuos)
qqline(residuos)