9 Taller Redes Neuronales

9.1 Carge de bibliotecas

library(neuralnet)  # regression

library(nnet) # classification 

library(NeuralNetTools)

library(plyr)

library(kableExtra)

9.2 Carga de Datos

library(readr)
Startups <- read_csv("/media/rpalma/OS/AAA_Datos/2020/Posgrado/Di3/Datasets/50 Start Ups/50_Startups_LAC.csv")

## Parsed with column specification:
## cols(
##   R_D_Spend = col_double(),
##   POM = col_double(),
##   Logist_Market = col_double(),
##   Pais = col_character(),
##   Profit = col_double(),
##   Supervivencia = col_double()
## )

Categoric <- read_csv("/media/rpalma/OS/AAA_Datos/2020/Posgrado/Di3/Datasets/50 Start Ups/50_Startups_Categoric_LAC.csv")

## Parsed with column specification:
## cols(
##   R_D_Spend = col_double(),
##   POM = col_double(),
##   Logist_Market = col_double(),
##   Pais = col_character(),
##   Profit = col_double(),
##   Supervivencia = col_character()
## )

9.3 Tratamiento de variables categóricas

tabla1 <- table(Categoric$Pais)
tabla2 <- table(Categoric$Supervivencia)
tabla3 <- table(Categoric$Pais,Categoric$Supervivencia)

plot(tabla1, col=c("red","green","blue"))

plot(tabla2, col=c("red","green","blue"))

plot(tabla3, col=c("red","green","blue"))

9.4 Histogramas superpuestos

ind_1 <- which(Categoric$Pais=="Colombia")
p1 <- as.matrix(Categoric[ind_1,5])

ind_2 <- which(Categoric$Pais=="Ecuador")
p2 <- as.matrix(Categoric[ind_2,5])

ind_3 <- which(Categoric$Pais=="Chile")
p3 <- as.matrix(Categoric[ind_3,5])

hp1 <- hist(p1)

hp2 <- hist(p2)

hp3 <- hist(p3)

par(mfrow=c(3,1))
plot( hp1, col=rgb(0,0,1,1/4), xlim=c(30000,200000),ylim=c(0,5),main="Ecuador")
plot( hp2, col=rgb(1,0,0,1/4),xlim=c(30000,200000),ylim=c(0,10),main="Colombia") 
plot( hp3, col=rgb(1,0,0,1/4),xlim=c(30000,200000),ylim=c(0,10),main="Chile")

par(mfrow=c(1,3))
plot( hp1, col=rgb(0,0,1,1/4), xlim=c(30000,200000),ylim=c(0,5),main="Ecuador")
plot( hp2, col=rgb(1,0,0,1/4),xlim=c(30000,200000),ylim=c(0,10),main="Colombia") 
plot( hp3, col=rgb(1,0,0,1/4),xlim=c(30000,200000),ylim=c(0,10),main="Chile")

pairs(Categoric[ ,1:3])

boxplot(Categoric[ ,1:3])

Categoric$Pais <- as.numeric(revalue(Categoric$Pais,
                          c("Colombia"="0", "Ecuador"="1",
                            "Chile"="2")))

9.5 Cuadro de campos categóricos

Categoric$Supervivencia <- as.numeric(revalue(Categoric$Supervivencia,
                          c("BankR"="0", "RevEq"="1",
                            "SpinOff"="2")))

9.6 Profit versus País

plot(Categoric$Pais, Categoric$Profit)

9.7 Visualización de Tablas

Tabla Textual

library(kableExtra)
kable(head(Categoric), "pipe")

R_D_Spend	POM	Logist_Market	Pais	Profit	Supervivencia
165349.2	136897.80	471784.1	0	192261.8	2
162597.7	151377.59	443898.5	1	191792.1	2
153441.5	101145.55	407934.5	2	191050.4	2
144372.4	118671.85	383199.6	0	182902.0	2
142107.3	91391.77	366168.4	2	166187.9	2
131876.9	99814.71	362861.4	0	156991.1	2

Tabla Simple

kable(head(Categoric), "simple")

R_D_Spend	POM	Logist_Market	Pais	Profit	Supervivencia
165349.2	136897.80	471784.1	0	192261.8	2
162597.7	151377.59	443898.5	1	191792.1	2
153441.5	101145.55	407934.5	2	191050.4	2
144372.4	118671.85	383199.6	0	182902.0	2
142107.3	91391.77	366168.4	2	166187.9	2
131876.9	99814.71	362861.4	0	156991.1	2

9.8 Normailización

normalize<-function(x){
  return ( (x-min(x))/(max(x)-min(x)))
}

Startups_norm<-as.data.frame(lapply(Categoric,FUN=normalize))
summary(Startups_norm$Profit)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.4249  0.5254  0.5481  0.7044  1.0000

Datos Originales y Datos normalizados

head(Categoric$Profit)

## [1] 192261.8 191792.1 191050.4 182902.0 166187.9
## [6] 156991.1

head(Startups_norm)

##   R_D_Spend       POM Logist_Market Pais
## 1 1.0000000 0.6517439     1.0000000  0.0
## 2 0.9833595 0.7619717     0.9408934  0.5
## 3 0.9279846 0.3795790     0.8646636  1.0
## 4 0.8731364 0.5129984     0.8122351  0.0
## 5 0.8594377 0.3053280     0.7761356  1.0
## 6 0.7975660 0.3694479     0.7691259  0.0
##      Profit Supervivencia
## 1 1.0000000             1
## 2 0.9973546             1
## 3 0.9931781             1
## 4 0.9472924             1
## 5 0.8531714             1
## 6 0.8013818             1

Muestreo para entrenamento

indice <- sample(2, nrow(Startups_norm), replace = TRUE, prob = c(0.7,0.3))
startups_train <- Startups_norm[indice==1,]
startups_test  <- Startups_norm[indice==2,]

9.9 Modelo de Neural Net

attach(Categoric)

startups_model <- neuralnet(Profit~R_D_Spend+ POM + Logist_Market + Pais
, data = startups_train)
str(startups_model)

## List of 14
##  $ call               : language neuralnet(formula = Profit ~ R_D_Spend +      POM + Logist_Market + Pais, data = startups_train)
##  $ response           : num [1:26, 1] 0.997 0.947 0.853 0.796 0.774 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:26] "2" "4" "5" "7" ...
##   .. ..$ : chr "Profit"
##  $ covariate          : num [1:26, 1:4] 0.983 0.873 0.859 0.814 0.729 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:26] "2" "4" "5" "7" ...
##   .. ..$ : chr [1:4] "R_D_Spend" "POM" "Logist_Market" "Pais"
##  $ model.list         :List of 2
##   ..$ response : chr "Profit"
##   ..$ variables: chr [1:4] "R_D_Spend" "POM" "Logist_Market" "Pais"
##  $ err.fct            :function (x, y)  
##   ..- attr(*, "type")= chr "sse"
##  $ act.fct            :function (x)  
##   ..- attr(*, "type")= chr "logistic"
##  $ linear.output      : logi TRUE
##  $ data               :'data.frame': 26 obs. of  6 variables:
##   ..$ R_D_Spend    : num [1:26] 0.983 0.873 0.859 0.814 0.729 ...
##   ..$ POM          : num [1:26] 0.762 0.513 0.305 0.73 0.742 ...
##   ..$ Logist_Market: num [1:26] 0.941 0.812 0.776 0.271 0.66 ...
##   ..$ Pais         : num [1:26] 0.5 0 1 0.5 0 1 0.5 1 0.5 0.5 ...
##   ..$ Profit       : num [1:26] 0.997 0.947 0.853 0.796 0.774 ...
##   ..$ Supervivencia: num [1:26] 1 1 1 1 1 0.5 0.5 0.5 0.5 0.5 ...
##  $ exclude            : NULL
##  $ net.result         :List of 1
##   ..$ : num [1:26, 1] 0.935 0.909 0.886 0.807 0.81 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:26] "2" "4" "5" "7" ...
##   .. .. ..$ : NULL
##  $ weights            :List of 1
##   ..$ :List of 2
##   .. ..$ : num [1:5, 1] -1.438 3.448 -0.419 0.556 -0.2
##   .. ..$ : num [1:2, 1] 0.087 0.956
##  $ generalized.weights:List of 1
##   ..$ : num [1:26, 1:4] 5.43 4.8 4.49 3.94 3.95 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:26] "2" "4" "5" "7" ...
##   .. .. ..$ : NULL
##  $ startweights       :List of 1
##   ..$ :List of 2
##   .. ..$ : num [1:5, 1] 0.5926 -0.7423 -0.0559 2.1104 -0.6281
##   .. ..$ : num [1:2, 1] 0.678 1.606
##  $ result.matrix      : num [1:10, 1] 0.01743 0.00727 93 -1.43786 3.44781 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:10] "error" "reached.threshold" "steps" "Intercept.to.1layhid1" ...
##   .. ..$ : NULL
##  - attr(*, "class")= chr "nn"

9.10 Ploteo de la red Neuronal

plot(startups_model, rep = "best")

9.11 Ploteo de la red proporcional

Esto me indica cuales son los KPI

par(mar = numeric(4), family = 'serif')
plotnet(startups_model, alpha = 0.6)

9.11.1 Evaluación de la performance del modelo

model_results <- compute(startups_model,startups_test[1:4])
predicted_profit <- model_results$net.result

9.12 Predicted profit Vs Actual profit of test data.

cor(predicted_profit,startups_test$Profit)

##           [,1]
## [1,] 0.9522906

9.13 Desnormalización de los resultados

Dado que hicimos la predicciones con los datos normalizados, ahora deberemos des-normalizarlos

str_max <- max(Startups$Profit)
str_min <- min(Startups$Profit)

unnormalize <- function(x, min, max) { 
  return( (max - min)*x + min )
}

ActualProfit_pred <- unnormalize(predicted_profit,str_min,str_max)
head(ActualProfit_pred)

##        [,1]
## 1  184342.2
## 3  177499.6
## 6  171030.0
## 8  159389.2
## 10 160917.4
## 12 144564.5

9.14 Mejoramiento de la performance del modelo

Es posible mejorar la performance con el agregado de más capas ocultas.

Startups_model2 <- neuralnet(Profit~R_D_Spend+ POM + Logist_Market + Pais
, data = startups_train, hidden = 2)
plot(Startups_model2 ,rep = "best")

9.15 Performance del modelo mejorado

model_results2<-compute(Startups_model2,startups_test[1:4])
predicted_Profit2<-model_results2$net.result
cor(predicted_Profit2,startups_test$Profit)

##           [,1]
## [1,] 0.9428186

9.16 Modelo Mejorado KPI

par(mar = numeric(4), family = 'serif')
plotnet(Startups_model2, alpha = 0.6)

9.17 Neural Net Clasificación

Armamos el dataset de datos a clasificar

library(nnet)

supervivencia <- as.factor(Categoric$Supervivencia)
R_D_Spend <- as.matrix(Categoric$R_D_Spend)
POM <- as.matrix(Categoric$POM)
Logist_Market <- as.matrix(Categoric$Logist_Market)
Clasificar <- data.frame (supervivencia,R_D_Spend,POM,Logist_Market)

9.18 Muestreo

indice <- sample(2, nrow(Clasificar), replace = TRUE, prob = c(0.7,0.3))

clasificar_train <- Startups_norm[indice==1,]
clasificar_test  <- Startups_norm[indice==2,]

supervivientes_clasificados <- factor(clasificar_train$Supervivencia)

Entrenamiento de nnet como clasificador

supervivientes_train<-nnet(supervivientes_clasificados~clasificar_train$R_D_Spend + clasificar_train$POM+ clasificar_train$Logist_Market ,data=clasificar_train,size=5, decay=5e-4, maxit=2000)

## # weights:  38
## initial  value 33.902626 
## iter  10 value 8.725337
## iter  20 value 3.961923
## iter  30 value 2.486946
## iter  40 value 1.878157
## iter  50 value 1.480637
## iter  60 value 1.378036
## iter  70 value 1.304077
## iter  80 value 1.252581
## iter  90 value 1.229374
## iter 100 value 1.211815
## iter 110 value 1.195886
## iter 120 value 1.187665
## iter 130 value 1.183899
## iter 140 value 1.180618
## iter 150 value 1.176144
## iter 160 value 1.173408
## iter 170 value 1.172258
## iter 180 value 1.170433
## iter 190 value 1.169332
## iter 200 value 1.166716
## iter 210 value 1.164242
## iter 220 value 1.163139
## iter 230 value 1.161066
## iter 240 value 1.160280
## iter 250 value 1.159733
## iter 260 value 1.159332
## iter 270 value 1.158817
## iter 280 value 1.158632
## iter 290 value 1.157832
## iter 300 value 1.156969
## iter 310 value 1.156118
## iter 320 value 1.155847
## iter 330 value 1.155571
## iter 340 value 1.154717
## iter 350 value 1.154154
## iter 360 value 1.153444
## iter 370 value 1.152567
## iter 380 value 1.152300
## iter 390 value 1.152219
## iter 400 value 1.152196
## iter 410 value 1.152148
## iter 420 value 1.152125
## iter 430 value 1.152108
## iter 440 value 1.152100
## iter 450 value 1.152095
## iter 460 value 1.152090
## final  value 1.152088 
## converged

9.19 Visualización del clasificador

plotnet(supervivientes_train, alpha = 0.6)

9.20 Aramdos de set de entrenamiento y de predicción

https://stackoverrun.com/es/q/3338607

crs$nnet <- nnet(as.factor(Target) ~ ., data=crs$dataset[crs$sample,c(crs$input, crs$target)], size=10, skip=TRUE, MaxNWts=10000, trace=FALSE, maxit=100)