Define Artificial Intelligence

Disambiguate Terms Between Different Fields

See Some Code

2010

2010 + 20 Minutes

Same Thing

Different Words

2019

Same Thing

Different Words

Same Thing

Different Words

Let’s Define AI

Let’s Try Again

What Humans Find Easy but Computers Find Hard

“People want to regulate AI but no one cared when it was called logistic regression.”

Same Thing

Different Words

\[ y = a + bx \]

\[ y = a + bx \]

Traditional Newer
Intercept Bias

\[ y = a + bx \]

Traditional Newer
Intercept Bias
Coefficients Weights

\[ \frac{1}{1 + e^{-x}} = \frac{e^x}{e^x + 1} \]

\[ \frac{1}{1 + e^{-x}} = \frac{e^x}{e^x + 1} \]

Traditional Newer
Inverse Logit Sigmoid

\[ \hat{y} = \hat{f}(\tilde{x}) \]

\[ \hat{y} = \hat{f}(\tilde{x}) \]

Traditional Newer
Prediction Inference

Same Thing

Different Words

if_else(price > 100, 'sell', 'buy')

More Advanced

\[ \min_{\beta \in \mathbb{R}} -\left[ \frac{1}{N} \sum_{i=1}^N y_i X_{i:}\beta - log(1 + e^{X_{i:}\beta}) \right] + \lambda\left[ \frac{1}{2}(1 - \alpha) \|\beta\|_2^2 + \alpha\|\beta\|_1 \right] \]

Prepare Data

logistic_rec <- recipe(Credit ~ ., data = train) %>%
  # collapse some levels into "other"
  step_other(all_nominal(), -all_outcomes(), threshold = 0.01) %>%
  # remove variables with zero variance
  step_zv(all_predictors()) %>% 
  # center and scale numeric variables
  step_center(all_numeric()) %>% step_scale(all_numeric()) %>% 
  # turn categoricals into dummies
  step_dummy(all_nominal(), -all_outcomes()) %>%
  # ensure there is an intercept
  step_intercept() %>% 
  # perform calculations
  prep(training = train, retain = TRUE)

# carry out procedure on data
train_prepped <- juice(logistic_rec)

Define the Model

# fit a logistic regression
credit_logistic_1 <- logistic_reg() %>% 
  set_engine("glmnet") %>%
  # provide the data
  fit(Credit ~ ., data = train_prepped)

coefpath(credit_logistic_1$fit)

coefplot(credit_logistic_1$fit, sort='magnitude', lambda=0.00017)

coefplot(credit_logistic_1$fit, sort='magnitude', lambda=0.015)

coefplot(credit_logistic_1$fit, sort='magnitude', lambda=0.051)

Hyperparameters

# fit a logistic regression
credit_logistic_2 <- logistic_reg(mixture=0) %>% 
  set_engine("glmnet") %>%
  # provide the data
  fit(Credit ~ ., data = train_prepped)

coefpath(credit_logistic_2$fit)

Different Engines

credit_logistic_3 <- logistic_reg() %>% 
  set_engine("glm") %>%
  # provide the data
  fit(Credit ~ ., data = train_prepped)

credit_logistic_4 <- logistic_reg() %>% 
  set_engine("stan") %>%
  # provide the data
  fit(Credit ~ ., data = train_prepped)

credit_logistic_5 <- logistic_reg() %>% 
  set_engine("spark") %>%
  # provide the data
  fit(Credit ~ ., data = train_prepped)

credit_logistic_6 <- logistic_reg() %>% 
  set_engine("keras") %>%
  # provide the data
  fit(Credit ~ ., data = train_prepped)

credit_logistic_7 <- keras_model_sequential() %>% 
    # fully connected layer
    layer_dense(units = 512, activation = 'relu', 
                input_shape=dim(train_prepped)[[-1]] - 1, name='fc1') %>% 
    # batch normalization
    layer_batch_normalization(name='batchnorm1') %>% 
    # dropout
    layer_dropout(rate=0.5, name='dropout1') %>% 
    # fully connected layer
    layer_dense(units=256, activation='relu', name='fc2') %>% 
    # batch normalization
    layer_batch_normalization(name='batchnorm2') %>% 
    # dropout
    layer_dropout(rate=0.5, name='dropout2') %>% 
    # fully connected layer
    layer_dense(units=128, activation='relu', name='fc3') %>% 
    # batch normalization
    layer_batch_normalization(name='batchnorm3') %>% 
    # dropout
    layer_dropout(rate=0.5, name='dropout3') %>% 
    # output layer
    layer_dense(units=1, activation="sigmoid", name='out')

Same Thing

Different Words

Let’s See Something Different

But Same Idea

credit_tree_1 <- decision_tree(mode='classification') %>% 
    set_engine("rpart") %>%
    # provide the data
    fit(Credit ~ ., data = train_prepped)

rpart.plot(credit_tree_1$fit, uniform=FALSE, under=TRUE, minbranch=.9, cex=.8)

More of the Same

credit_tree_2 <- decision_tree(mode='classification') %>% 
    set_engine("C5.0") %>%
    # provide the data
    fit(Credit ~ ., data = train_prepped)

credit_tree_3 <- decision_tree(mode='classification') %>% 
    set_engine("spark") %>%
    # provide the data
    fit(Credit ~ ., data = train_prepped)

Now with Boosting

boost_tree_1 <- boost_tree(mode='classification') %>% 
    set_engine("xgboost") %>%
    # provide the data
    fit(Credit ~ ., data = train_prepped)

xgb.importance(feature_names=boost_tree_1$fit$feature_names, 
               model=boost_tree_1$fit)[1:15] %>% 
  xgb.plot.importance()

Other Ways to Boost

boost_tree_2 <- boost_tree(mode='classification') %>% 
    set_engine("C5.0") %>%
    # provide the data
    fit(Credit ~ ., data = train_prepped)

boost_tree_3 <- boost_tree(mode='classification') %>% 
    set_engine("spark") %>%
    # provide the data
    fit(Credit ~ ., data = train_prepped)

Many Ways to Accomplish Our Goal

So Remember

AI Code Can be Complex or Simple

Same Thing

Different Words

Thank You