library(neuralnet) # regression
library(nnet) # classification
library(NeuralNetTools)
library(plyr)
library(readr)
Startups <- read_csv("/home/rpalma/AAA_Datos/2020/Posgrado/Di3/Datasets/50 Start Ups/50_Startups_LAC.csv")
## Rows: 50 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Pais
## dbl (5): R_D_Spend, POM, Logist_Market, Profit, Supervivencia
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Categoric <- read_csv("/home/rpalma/AAA_Datos/2020/Posgrado/Di3/Datasets/50 Start Ups/50_Startups_Categoric_LAC.csv")
## Rows: 50 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Pais, Supervivencia
## dbl (4): R_D_Spend, POM, Logist_Market, Profit
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
La columna supervivencia de la tabla Categoric tiene etiquetas que identifican la situación en la que terminaron los emprendimientos, a saber:
SpinOf La empresa se separó de una principal y logró su independencia económica
RevEq La empresa alcanzó su punto de equilibrio canceló su deuda con el banco y fue comprada por la competencia.
BankR La empresa no prosperó y el banco se quedó con los activos que vendidos no compensaron el preśtamo inicial (Banca Rota).
tabla1 <- table(Categoric$Pais)
tabla2 <- table(Categoric$Supervivencia)
tabla3 <- table(Categoric$Pais,Categoric$Supervivencia)
plot(tabla1, col=c("red","green","blue"))
plot(tabla2, col=c("red","green","blue"))
plot(tabla3, col=c("red","green","blue"))
ind_1 <- which(Categoric$Pais=="Colombia")
p1 <- as.matrix(Categoric[ind_1,5])
ind_2 <- which(Categoric$Pais=="Ecuador")
p2 <- as.matrix(Categoric[ind_2,5])
ind_3 <- which(Categoric$Pais=="Chile")
p3 <- as.matrix(Categoric[ind_3,5])
hp1 <- hist(p1)
hp2 <- hist(p2)
hp3 <- hist(p3)
par(mfrow=c(3,1))
plot( hp1, col=rgb(0,0,1,1/4), xlim=c(30000,200000),ylim=c(0,5),main="Ecuador")
plot( hp2, col=rgb(1,0,0,1/4),xlim=c(30000,200000),ylim=c(0,10),main="Colombia")
plot( hp3, col=rgb(1,0,0,1/4),xlim=c(30000,200000),ylim=c(0,10),main="Chile")
par(mfrow=c(1,3))
plot( hp1, col=rgb(0,0,1,1/4), xlim=c(30000,200000),ylim=c(0,5),main="Ecuador")
plot( hp2, col=rgb(1,0,0,1/4),xlim=c(30000,200000),ylim=c(0,10),main="Colombia")
plot( hp3, col=rgb(1,0,0,1/4),xlim=c(30000,200000),ylim=c(0,10),main="Chile")
pairs(Categoric[ ,1:3])
boxplot(Categoric[ ,1:3])
Categoric$Pais <- as.numeric(revalue(Categoric$Pais,
c("Colombia"="0", "Ecuador"="1",
"Chile"="2")))
Categoric$Supervivencia <- as.numeric(revalue(Categoric$Supervivencia,
c("BankR"="0", "RevEq"="1",
"SpinOff"="2")))
plot(Categoric$Pais, Categoric$Profit)
Tabla Textual
library(kableExtra)
kable(head(Categoric), "pipe")
R_D_Spend | POM | Logist_Market | Pais | Profit | Supervivencia |
---|---|---|---|---|---|
165349.2 | 136897.80 | 471784.1 | 0 | 192261.8 | 2 |
162597.7 | 151377.59 | 443898.5 | 1 | 191792.1 | 2 |
153441.5 | 101145.55 | 407934.5 | 2 | 191050.4 | 2 |
144372.4 | 118671.85 | 383199.6 | 0 | 182902.0 | 2 |
142107.3 | 91391.77 | 366168.4 | 2 | 166187.9 | 2 |
131876.9 | 99814.71 | 362861.4 | 0 | 156991.1 | 2 |
Tabla Simple
kable(head(Categoric), "simple")
R_D_Spend | POM | Logist_Market | Pais | Profit | Supervivencia |
---|---|---|---|---|---|
165349.2 | 136897.80 | 471784.1 | 0 | 192261.8 | 2 |
162597.7 | 151377.59 | 443898.5 | 1 | 191792.1 | 2 |
153441.5 | 101145.55 | 407934.5 | 2 | 191050.4 | 2 |
144372.4 | 118671.85 | 383199.6 | 0 | 182902.0 | 2 |
142107.3 | 91391.77 | 366168.4 | 2 | 166187.9 | 2 |
131876.9 | 99814.71 | 362861.4 | 0 | 156991.1 | 2 |
normalize<-function(x){
return ( (x-min(x))/(max(x)-min(x)))
}
Startups_norm<-as.data.frame(lapply(Categoric,FUN=normalize))
summary(Startups_norm$Profit)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.4249 0.5254 0.5481 0.7044 1.0000
Datos Originales y Datos normalizados
head(Categoric$Profit)
## [1] 192261.8 191792.1 191050.4 182902.0 166187.9 156991.1
head(Startups_norm)
## R_D_Spend POM Logist_Market Pais Profit Supervivencia
## 1 1.0000000 0.6517439 1.0000000 0.0 1.0000000 1
## 2 0.9833595 0.7619717 0.9408934 0.5 0.9973546 1
## 3 0.9279846 0.3795790 0.8646636 1.0 0.9931781 1
## 4 0.8731364 0.5129984 0.8122351 0.0 0.9472924 1
## 5 0.8594377 0.3053280 0.7761356 1.0 0.8531714 1
## 6 0.7975660 0.3694479 0.7691259 0.0 0.8013818 1
Muestreo para entrenamento
indice <- sample(2, nrow(Startups_norm), replace = TRUE, prob = c(0.7,0.3))
startups_train <- Startups_norm[indice==1,]
startups_test <- Startups_norm[indice==2,]
library(neuralnet)
attach(Categoric)
startups_model <- neuralnet(Profit ~ R_D_Spend+ POM + Logist_Market + Pais , data = startups_train)
str(startups_model)
## List of 14
## $ call : language neuralnet(formula = Profit ~ R_D_Spend + POM + Logist_Market + Pais, data = startups_train)
## $ response : num [1:38, 1] 1 0.997 0.993 0.947 0.853 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:38] "1" "2" "3" "4" ...
## .. ..$ : chr "Profit"
## $ covariate : num [1:38, 1:4] 1 0.983 0.928 0.873 0.859 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:38] "1" "2" "3" "4" ...
## .. ..$ : chr [1:4] "R_D_Spend" "POM" "Logist_Market" "Pais"
## $ model.list :List of 2
## ..$ response : chr "Profit"
## ..$ variables: chr [1:4] "R_D_Spend" "POM" "Logist_Market" "Pais"
## $ err.fct :function (x, y)
## ..- attr(*, "type")= chr "sse"
## $ act.fct :function (x)
## ..- attr(*, "type")= chr "logistic"
## $ linear.output : logi TRUE
## $ data :'data.frame': 38 obs. of 6 variables:
## ..$ R_D_Spend : num [1:38] 1 0.983 0.928 0.873 0.859 ...
## ..$ POM : num [1:38] 0.652 0.762 0.38 0.513 0.305 ...
## ..$ Logist_Market: num [1:38] 1 0.941 0.865 0.812 0.776 ...
## ..$ Pais : num [1:38] 0 0.5 1 0 1 0.5 1 0.5 1 0.5 ...
## ..$ Profit : num [1:38] 1 0.997 0.993 0.947 0.853 ...
## ..$ Supervivencia: num [1:38] 1 1 1 1 1 1 1 0.5 0.5 0.5 ...
## $ exclude : NULL
## $ net.result :List of 1
## ..$ : num [1:38, 1] 0.974 0.96 0.932 0.902 0.889 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:38] "1" "2" "3" "4" ...
## .. .. ..$ : NULL
## $ weights :List of 1
## ..$ :List of 2
## .. ..$ : num [1:5, 1] -1.6669 2.9771 -0.0746 0.5157 -0.0244
## .. ..$ : num [1:2, 1] 0.0569 1.0724
## $ generalized.weights:List of 1
## ..$ : num [1:38, 1:4] 15.68 11.12 7.55 6.03 5.64 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:38] "1" "2" "3" "4" ...
## .. .. ..$ : NULL
## $ startweights :List of 1
## ..$ :List of 2
## .. ..$ : num [1:5, 1] -0.6355 0.4437 0.3312 -0.0529 -1.3376
## .. ..$ : num [1:2, 1] -0.632 -0.58
## $ result.matrix : num [1:10, 1] 0.06428 0.00725 339 -1.66687 2.97714 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:10] "error" "reached.threshold" "steps" "Intercept.to.1layhid1" ...
## .. ..$ : NULL
## - attr(*, "class")= chr "nn"
plot(startups_model, rep = "best")
Esto me indica cuales son los KPI
par(mar = numeric(4), family = 'serif')
plotnet(startups_model, alpha = 0.6)
model_results <- compute(startups_model,startups_test[1:4])
predicted_profit <- model_results$net.result
cor(predicted_profit,startups_test$Profit)
## [,1]
## [1,] 0.973849
Dado que hicimos la predicciones con los datos normalizados, ahora deberemos des-normalizarlos
str_max <- max(Startups$Profit)
str_min <- min(Startups$Profit)
unnormalize <- function(x, min, max) {
return( (max - min)*x + min )
}
ActualProfit_pred <- unnormalize(predicted_profit,str_min,str_max)
head(ActualProfit_pred)
## [,1]
## 6 166817.6
## 9 155717.0
## 14 129753.0
## 17 118788.3
## 22 120662.2
## 23 116761.1
Es posible mejorar la performance con el agregado de más capas ocultas.
Startups_model2 <- neuralnet(Profit~R_D_Spend+ POM + Logist_Market + Pais , data = startups_train, hidden = c(2,4))
plot(Startups_model2 ,rep = "best")
model_results2<-compute(Startups_model2,startups_test[1:4])
predicted_Profit2<-model_results2$net.result
cor(predicted_Profit2,startups_test$Profit)
## [,1]
## [1,] 0.9737566
par(mar = numeric(4), family = 'serif')
plotnet(Startups_model2, alpha = 0.6)
Armamos el dataset de datos a clasificar
library(nnet)
supervivencia <- as.factor(Categoric$Supervivencia)
R_D_Spend <- as.matrix(Categoric$R_D_Spend)
POM <- as.matrix(Categoric$POM)
Logist_Market <- as.matrix(Categoric$Logist_Market)
Clasificar <- data.frame (supervivencia,R_D_Spend,POM,Logist_Market)
indice <- sample(2, nrow(Clasificar), replace = TRUE, prob = c(0.7,0.3))
clasificar_train <- Startups_norm[indice==1,]
clasificar_test <- Startups_norm[indice==2,]
supervivientes_clasificados <- factor(clasificar_train$Supervivencia)
Entrenamiento de nnet como clasificador
supervivientes_train<-nnet(supervivientes_clasificados~clasificar_train$R_D_Spend + clasificar_train$POM+ clasificar_train$Logist_Market ,data=clasificar_train,size=5, decay=5e-4, maxit=2000)
## # weights: 38
## initial value 45.168420
## iter 10 value 14.655054
## iter 20 value 3.731461
## iter 30 value 1.901369
## iter 40 value 1.692317
## iter 50 value 1.524971
## iter 60 value 1.455029
## iter 70 value 1.436595
## iter 80 value 1.422593
## iter 90 value 1.402283
## iter 100 value 1.359745
## iter 110 value 1.349081
## iter 120 value 1.338589
## iter 130 value 1.332154
## iter 140 value 1.328070
## iter 150 value 1.326972
## iter 160 value 1.326546
## iter 170 value 1.325602
## iter 180 value 1.324245
## iter 190 value 1.322318
## iter 200 value 1.320788
## iter 210 value 1.317902
## iter 220 value 1.317289
## iter 230 value 1.316978
## iter 240 value 1.316919
## iter 250 value 1.316900
## iter 260 value 1.316882
## iter 270 value 1.316852
## iter 280 value 1.316824
## iter 290 value 1.316774
## iter 300 value 1.316683
## iter 310 value 1.316667
## iter 320 value 1.316644
## iter 330 value 1.316622
## iter 340 value 1.316613
## iter 350 value 1.316605
## iter 360 value 1.316599
## iter 370 value 1.316593
## iter 380 value 1.316588
## iter 390 value 1.316566
## iter 400 value 1.316558
## iter 410 value 1.316555
## iter 420 value 1.316549
## iter 430 value 1.316547
## iter 440 value 1.316544
## iter 450 value 1.316540
## iter 460 value 1.316537
## iter 470 value 1.316531
## iter 480 value 1.316530
## iter 490 value 1.316527
## iter 500 value 1.316523
## iter 510 value 1.316522
## iter 520 value 1.316520
## iter 530 value 1.316519
## iter 540 value 1.316516
## iter 540 value 1.316516
## final value 1.316516
## converged
plotnet(supervivientes_train, alpha = 0.6)
https://stackoverrun.com/es/q/3338607
crs\(nnet <- nnet(as.factor(Target) ~ ., data=crs\)dataset[crs\(sample,c(crs\)input, crs$target)], size=10, skip=TRUE, MaxNWts=10000, trace=FALSE, maxit=100)