Regression Modeling in R: Case Studies
Adding Factors and Interactions
REGRESSION MODELING IN R: CASE STUDIES
Adding Factors and Interactions Danielle Quinn PhD Candidate, - - PowerPoint PPT Presentation
Regression Modeling in R: Case Studies REGRESSION MODELING IN R : CASE STUDIES Adding Factors and Interactions Danielle Quinn PhD Candidate, Memorial University Regression Modeling in R: Case Studies Identifying additional factors
Regression Modeling in R: Case Studies
REGRESSION MODELING IN R: CASE STUDIES
Regression Modeling in R: Case Studies
pr_fac(poisson_glm, dragonflies$season, xlabel = "season", modeltype = "poisson")
Regression Modeling in R: Case Studies
Regression Modeling in R: Case Studies
# Poisson GLM poisson_glm <- glm(abundance ~ stream_flow, data = dragonflies, family = "poisson") # Poisson GLM with an interaction added poisson_glm_factor <- glm(abundance ~ stream_flow * season, data = dragonflies, family = "poisson")
Regression Modeling in R: Case Studies
# Create pred_df data frame pred_df <- expand.grid(stream_flow = seq(from = 1, to = 5, length = 10), season = c("summer", "autumn")) # Add predictions to pred_df pred_df$predicted <- predict(poisson_glm_factor, pred_df, type = "response" pred_df stream_flow season predicted 1.00 summer 75.61 1.44 summer 60.98 1.89 summer 49.17 ... ... ... ... ... ... ... ... ... 1.00 autumn 64.88 1.44 autumn 51.21 1.89 autumn 40.41
Regression Modeling in R: Case Studies
ggplot(dragonflies) + geom_point(aes(x = stream_flow, y = abundance)) + geom_line(aes(x = stream_flow, y = predicted, col = season), data = pred_
Regression Modeling in R: Case Studies
diag <- data.frame(residuals = resid(poisson_glm_factor), fitted = fitted(poisson_glm_factor)) ggplot(diag) + geom_point(aes(x = fitted, y = residuals))
Regression Modeling in R: Case Studies
dispersion(poisson_glm_factor, modeltype = "poisson") 20.65
Regression Modeling in R: Case Studies
REGRESSION MODELING IN R: CASE STUDIES
Regression Modeling in R: Case Studies
REGRESSION MODELING IN R: CASE STUDIES
Regression Modeling in R: Case Studies
head(dragonflies, n = 1) abundance feeding_events area stream_flow time season 1 16 69 3.671 1.288379 day summer
Regression Modeling in R: Case Studies
# Birds per square meter 15 / 1 15 # Birds per square meter 44 / 3 14.67
Regression Modeling in R: Case Studies
# Create column containing natural log of area dragonflies$logarea <- log(dragonflies$area) head(dragonflies) abundance feeding_events area stream_flow time season logarea 1 16 69 3.671 1.2883787 day summer 1.300464 2 32 153 4.574 1.2787605 night autumn 1.520388 3 88 408 5.100 0.5956905 day summer 1.629241 4 140 691 3.188 1.4999930 day summer 1.159394 5 62 355 3.830 1.1653945 day summer 1.342865 6 143 678 3.826 1.4268238 day summer 1.341820 # Add offset to the model poisson_glm_offset <- glm(abundance ~ stream_flow * season + offset(logarea data = dragonflies, family = "poisson")
Regression Modeling in R: Case Studies
REGRESSION MODELING IN R: CASE STUDIES
Regression Modeling in R: Case Studies
REGRESSION MODELING IN R: CASE STUDIES
Regression Modeling in R: Case Studies
Regression Modeling in R: Case Studies
library(MASS) neg_binom_glm <- glm.nb(abundance ~ stream_flow * season + offset(logarea), data = dragonflies)
Regression Modeling in R: Case Studies
# Use drop1 to test influence of each term drop1(neg_binom_glm, test = "Chisq") Single term deletions Model: abundance ~ stream_flow * season + offset(logarea) Df Deviance AIC LRT Pr(>Chi) <none> 159.93 1361.1 stream_flow:season 1 160.10 1359.3 0.16363 0.6858 # Remove the interaction from the model neg_binom_glm_small <- glm.nb(abundance ~ stream_flow + season + offset(log data = dragonflies)
Regression Modeling in R: Case Studies
drop1(neg_binom_glm_small, test = "Chisq") Single term deletions Model: abundance ~ stream_flow + season + offset(logarea) Df Deviance AIC LRT Pr(>Chi) <none> 160.04 1359.3 stream_flow 1 351.37 1548.6 191.323 <2e-16 *** season 1 161.48 1358.7 1.434 0.0485 *
Regression Modeling in R: Case Studies
neg_binom_glm neg_binom_glm_small
Regression Modeling in R: Case Studies
dispersion(neg_binom_glm, modeltype = "nb") 1.11 dispersion(neg_binom_glm_small, modeltype = "nb") 1.10
Regression Modeling in R: Case Studies
REGRESSION MODELING IN R: CASE STUDIES
Regression Modeling in R: Case Studies
REGRESSION MODELING IN R: CASE STUDIES
Regression Modeling in R: Case Studies
Regression Modeling in R: Case Studies
Regression Modeling in R: Case Studies
AIC(neg_binom_glm, neg_binom_glm_small) df AIC neg_binom_glm 5 1363.135 neg_binom_glm_small 4 1361.299
Regression Modeling in R: Case Studies
# Create data frame pred_df pred_df <- expand.grid(stream_flow = seq(from = 1, to = 5, length = 10), season = c("summer", "autumn"), logarea = mean(dragonflies$logarea)) # Add predicted values to pred_df pred_df$predicted <- predict(neg_binom_glm_small, pred_df, type = "response head(pred_df) stream_flow season logarea predicted 1 1.000000 summer 1.73009 126.37291 2 1.444444 summer 1.73009 85.21510 3 1.888889 summer 1.73009 57.46179 4 2.333333 summer 1.73009 38.74732 5 2.777778 summer 1.73009 26.12789 6 3.222222 summer 1.73009 17.61842
Regression Modeling in R: Case Studies
ggplot(dragonflies) + geom_point(aes(x = stream_flow, y = abundance)) + geom_line(aes(x = stream_flow, y = predicted, col = season), data = pred_
Regression Modeling in R: Case Studies
Regression Modeling in R: Case Studies
# Extract fitted values raw_fit <- predict(neg_binom_glm_small, pred_df, type = "link") # Extract standard errors raw_se <- predict(neg_binom_glm_small, pred_df, type = "link", se.fit = TRUE)$se # Calculate upper and lower standard errors and add to pred_df pred_df$lower <- exp(raw_fit - 1.96 * raw_se) pred_df$upper <- exp(raw_fit + 1.96 * raw_se) head(pred_df) stream_flow season logarea predicted upper lower 1 1.000000 summer 1.73009 126.37291 153.46921 104.06068 2 1.444444 summer 1.73009 85.21510 101.30002 71.68422 3 1.888889 summer 1.73009 57.46179 67.56242 48.87121 4 2.333333 summer 1.73009 38.74732 45.62704 32.90494 5 2.777778 summer 1.73009 26.12789 31.19052 21.88699 6 3.222222 summer 1.73009 17.61842 21.52939 14.41790
Regression Modeling in R: Case Studies
ggplot(dragonflies) + geom_point(aes(x = stream_flow, y = abundance)) + geom_line(aes(x = stream_flow, y = predicted, col = season), data = pred_ geom_line(aes(x = stream_flow, y = upper, col = season), linetype = "dash data = pred_df) + geom_line(aes(x = stream_flow, y = lower, col = season), linetype = "dash data = pred_df)
Regression Modeling in R: Case Studies
Regression Modeling in R: Case Studies
REGRESSION MODELING IN R: CASE STUDIES