This project is aimed at characterizing the life expectancy and survivorship of FFOs, with the ultimate aim of answering several questions: What is the life expectancy and 5-year survivorship of current family forest owners in the USA? To what extent does expected mortality predict future plans, including plans to transfer land, manage land, or engage in estate planning activities?
This project uses actuarial life tables in order to estimate life expectancy and survivorship. The tables used come from the National Center for Health Statistics (Arias E, Xu JQ. United States life tables, 2017. National Vital Statistics Reports; vol 68 no 7. Hyattsville, MD: National Center for Health Statistics. 2019.)
First, the NCHS data products (spreadsheets) are imported and functions for filtering by demographic variables are defined:
library(readxl)
library(ggplot2)
library(scales)
library(nwos)
library(car)
library(ResourceSelection)
#read in all individual life tables into a list
files <- dir('..')[grepl("xlsx",dir('..'))] #list of tables
files <- paste('..',files,sep='/')
#labels for the demographic categories associated with each table in files (same order)
cats <- c('M','F','HM','HF','NHWM','NHWF','NHBM','NHBF')
lt <- rep(list(NA),length(files))
for (i in 1:length(lt)){
table <- read_excel(files[i],sheet=1)[3:102,]
names(table)[1] <- 'AGE'
table$TAB <- files[i]
table$CAT <- cats[i]
lt[[i]] <- table
rm(table)
}
lt <- do.call("rbind",lt) #merge list into a dataframe
#reformat age
lt$AGE <- as.numeric(gsub('\\–.*','',lt$AGE))
#calculate survivorship as inverse of mortality
lt$qxINV <- 1-as.numeric(lt[[2]])
#function for determining demographic of a respondent
dcat <- function(SEX='1',HISPANIC='0',BLACK='0',WHITE='1'){
if (SEX=='1' & HISPANIC=='1'){
dc <- 'HM'
} else if (SEX=='2' & HISPANIC=='1'){
dc <- 'HF'
} else if (SEX=='1' & HISPANIC=='0' & BLACK=='1'){
dc <- 'NHBM'
} else if (SEX=='2' & HISPANIC=='0' & BLACK=='1'){
dc <- 'NHBF'
} else if (SEX=='1' & HISPANIC=='0' & WHITE=='1'){
dc <- 'NHWM'
} else if (SEX=='2' & HISPANIC=='0' & WHITE=='1'){
dc <- 'NHWF'
} else if (SEX=='1'){
dc <- 'M'
} else if (SEX=='2'){
dc <- 'F'
}
return(dc)
}
#function for calculating 5-year survival probability of a respondent
surv5 <- function(age=60,demo='M'){
df <- lt[lt$CAT==demo,] #subset by demographic category
df <- df[df$AGE %in% age:(age+4),] #subset to five years
if (nrow(df)!=5){
message("less than five records in life table")
}
total <- prod(df$qxINV) #calculate total survivorship
total <- ifelse(total>1,1,total)
return(total)
}
#function for calculating life expectancy of a respondent
lex <- function(age=60,demo='M'){
df <- lt[lt$CAT==demo,] #subset by demographic category
df <- df[df$AGE==age,] #subset by age
ex <- as.numeric(df[[7]][1])
return(ex)
}
The NWOS data are imported and wide dataframes are created, one for the plots and one for the survey results (with imputed data inserted):
quest <- get_nwos(cycle='2018',study='base')
qu <- nwos_wide(quest,imputations="1")
qu <- qu[qu$OWNCD_NWOS=='45',]
qu$AC_WOOD <- as.numeric(qu$AC_WOOD)
qu$OWN1_AGE <- as.numeric(qu$OWN1_AGE)
qu$FINAL_WEIGHT <- as.numeric(qu$FINAL_WEIGHT)
These tables only go up to age 99, so OWN1_AGE is truncated:
table(qu$OWN1_AGE>99) #how many?
##
## FALSE TRUE
## 9520 4
qu$OWN1_AGE <- ifelse(qu$OWN1_AGE>99,99,qu$OWN1_AGE)
The correct life table demographic categories for each respondent are determined based on sex, ethnicity, and race (OWN1_GENDER, OWN1_ETH, and OWN1_RACE). These categories include Hispanic male (HM), Hispanic female (HF), non-Hispanic black male (NHBM), non-Hispanic black female (NHBF), non-Hispanic white male (NHWM), non-Hispanic white female (NHWF), general male (M), and general female (F).
Any respondent who identifies as Hispanic (i.e. OWN1_ETH=1) is identified as Hispanic. Any respondent who identifies as non-Hispanic (i.e. OWN1_ETH=0) and both black (i.e. OWN1_RACE_BLACK=1) and white (i.e. OWN1_RACE_WHITE=1) is classified as non-Hispanic black. Races other than black and white are not included in these life tables. Consequently, respondents who identify as non-Hispanic and neither black nor white are classified by gender only (i.e. the population-wide tables for the appropriate genders are used).
qu$CAT <- mapply(dcat,qu$OWN1_GENDER,qu$OWN1_ETH,qu$OWN1_RACE_BLACK,qu$OWN1_RACE_WHITE)
For each respondent, 5-year survivorship (i.e. the probability of surviving another five years) and life expectancy are calculated from the tables based on demographic category. A period of 5 years was adopted to complement the 5-year period of future activities and land transfer questions in the NWOS.
qu$SURV5 <- mapply(surv5,qu$OWN1_AGE,qu$CAT)
qu$LEX <- mapply(lex,qu$OWN1_AGE,qu$CAT)
qu$LE_CAT <- ifelse(qu$LEX<5,'<5yr','>5yr') #create categorical grouping variable based on life expectancy
qu$LE_CAT <- ifelse(qu$LEX>=5&qu$LEX<=10,'5-10yr',qu$LE_CAT)
qu$LE_CAT <- ifelse(qu$LEX>10,'>10yr',qu$LE_CAT)
qu$LE_CAT <- ordered(qu$LE_CAT,levels=c('<5yr','5-10yr','>10yr'))
Plot and tabulate demographic category and age.
qu$CAT <- ordered(qu$CAT,
levels=c('HF','HM','NHBF','NHBM',
'NHWF','NHWM','F','M'))
table(qu$CAT)
##
## HF HM NHBF NHBM NHWF NHWM F M
## 26 80 29 50 1938 7278 32 91
ggplot(data=qu,aes(x='1',fill=CAT))+geom_bar(color="black")+
coord_flip()+
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title.y=element_blank(),
legend.title=element_blank())
sum(qu$CAT=='NHWM')/length(qu$CAT) #proportion NHWM
## [1] 0.7641747
ggplot(data=qu,aes(x=OWN1_AGE))+geom_histogram(color="black")+
labs(x="years")
mean(qu$OWN1_AGE) #mean of age
## [1] 65.43879
sd(qu$OWN1_AGE)
## [1] 11.67343
Using the survey weights (which represent number of ownerships), we can generate histograms and density plots that represent the entire population.
ggplot(data=qu,aes(SURV5,weight=FINAL_WEIGHT*PLOT_COUNT/1e3))+
geom_histogram(fill="red",color="black")+
labs(x="probability of surviving an additional five years",
y="thousand FFOs")
ggplot(data=qu,aes(LEX,weight=FINAL_WEIGHT*PLOT_COUNT/1e3))+
geom_histogram(fill="red",color="black")+
labs(x="years remaining",
y="thousand FFOs")
We can also feed the mortality variables into the NWOS estimation engine in order to produce population estimates and then examine the results.
#these tables come from the same raw data, from the estimation tool on the Forest Service server
pop.est <- readRDS("../NWOS_2018_FFO_ONE_PLUS_W_MORTALITY.RDS")
pop.est$SE <- sqrt(pop.est$VARIANCE) #calculate standard errors
pop.est$LEVEL <- ordered(pop.est$LEVEL,levels=c('1','<5yr','5-10yr','>10yr')) #order levels
pop.est <- pop.est[order(pop.est$LEVEL),]
For example, mean 5-year survivorship and life expectancy:
#survivorship
pop.est[pop.est$VARIABLE=='SURV5' & pop.est$STATISTIC=='MEAN',c("VALUE","SE")]
## # A tibble: 1 x 2
## VALUE SE
## <dbl> <dbl>
## 1 0.893 0.00904
#life expectancy
pop.est[pop.est$VARIABLE=='LEX' & pop.est$STATISTIC=='MEAN',c("VALUE","SE")]
## # A tibble: 1 x 2
## VALUE SE
## <dbl> <dbl>
## 1 21.0 1.60
…and also totals, such as the total amount of area by whether life expectancy of the current owner is less than five years, five to 10 years, or greater than 10 years:
pop.est[pop.est$VARIABLE=='LE_CAT' &
pop.est$STATISTIC=='TOTAL' &
pop.est$UNITS=='ACRES',
c("LEVEL","VALUE","SE")]
## # A tibble: 3 x 3
## LEVEL VALUE SE
## <ord> <dbl> <dbl>
## 1 <5yr 6126003. 638622.
## 2 5-10yr 39382164. 6494167.
## 3 >10yr 230283588. 6769608.
…and proportions.
pop.est[pop.est$VARIABLE=='LE_CAT' &
pop.est$STATISTIC=='PROPORTION' &
pop.est$UNITS=='ACRES',
c("LEVEL","VALUE","SE")]
## # A tibble: 3 x 3
## LEVEL VALUE SE
## <ord> <dbl> <dbl>
## 1 <5yr 0.0222 0.00181
## 2 5-10yr 0.143 0.0185
## 3 >10yr 0.835 0.0183
In addition to producing simple estimates, we can fit statistical models and test hypotheses (Note: these are unweighted models). First, we create some additional variables:
#assign region
west <- c(2.1,2.2,15,8,6,41,16,32,35,30,56,53,49,4)
south <- c(5,12,1,51,13,40.1,40.2,48.1,48.2,37,45,21,28,22,47)
qu$REGION <- ifelse(qu$STATECD_NWOS %in% west,'WEST',
ifelse(qu$STATECD_NWOS %in% south,'SOUTH','NORTH'))
qu$TRAN_FUT2 <- ifelse(qu$TRAN_FUT %in% 5,1,0) #box1, TRAN_FUT
qu$EMO_WOOD2 <- ifelse(qu$EMO_WOOD %in% 4:5,"1","0") #box2, EMO_WOOD
qu$FUT_ANY <- ifelse(qu$FUT_NONE==1|qu$FUT_DONTKNOW==1,0,1) #binary, any future actions
qu$TRAN_RECENT2 <- ifelse(qu$TRAN_RECENT==1,"1","0") #recode, TRAN_RECENT
qu$MAN_PLAN2 <- ifelse(qu$MAN_PLAN==1,"1","0") #recode, MAN_PLAN
qu$OWNERS_NUMBER2 <- ifelse(as.numeric(qu$OWNERS_NUMBER)>1,"1","0") #recode, OWNERS_NUMBER
qu$OWN1_EDU2 <- ifelse(qu$OWN1_EDU %in% 3:6,"1","0") #recode, EDUCATION
qu$TRAN_FAM <- ifelse(qu$TRAN_FUT_TO_CHILD=='1'|qu$TRAN_FUT_TO_FAM=='1',"1","0") #binary, whether likely heir is child or family
Tjur <- function(x){ #function for calculating Tjur's statistic
pred <- predict(x,newdata=x$data,type='response')
ts <- mean(pred[x$y=='1'])-mean(pred[x$y=='0'])
return(ts)
}
First, we look to see whether life expectancy and survivorship vary by region.
reg.surv <- aov(log(SURV5)~REGION,qu) #survivorship
summary(reg.surv)
## Df Sum Sq Mean Sq F value Pr(>F)
## REGION 2 0.5 0.25297 6.04 0.00239 **
## Residuals 9521 398.7 0.04188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(reg.surv)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = log(SURV5) ~ REGION, data = qu)
##
## $REGION
## diff lwr upr p adj
## SOUTH-NORTH -0.015822063 -0.0269212455 -0.004722881 0.0024105
## WEST-NORTH -0.001627558 -0.0148664666 0.011611351 0.9552556
## WEST-SOUTH 0.014194506 0.0002506734 0.028138338 0.0449032
#checking homoscedasticity, region variable
qu$reg.surv.res <- reg.surv$residuals[match(rownames(qu),names(reg.surv$residuals))]
ggplot(qu,aes(y=reg.surv.res,fill=REGION))+geom_boxplot()+ #compare variances
scale_y_continuous(limits=c(-0.25,0.25))
reg.lex <- aov(log(LEX)~REGION,qu) #life expectancy
summary(reg.lex)
## Df Sum Sq Mean Sq F value Pr(>F)
## REGION 2 6.4 3.188 12.57 0.00000354 ***
## Residuals 9521 2414.7 0.254
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(reg.lex)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = log(LEX) ~ REGION, data = qu)
##
## $REGION
## diff lwr upr p adj
## SOUTH-NORTH -0.05811619 -0.085429554 -0.03080282 0.0000019
## WEST-NORTH -0.01789751 -0.050476408 0.01468139 0.4021130
## WEST-SOUTH 0.04021868 0.005905074 0.07453229 0.0165715
#checking homoscedasticity, region variable
qu$reg.lex.res <- reg.lex$residuals[match(rownames(qu),names(reg.lex$residuals))]
ggplot(qu,aes(y=reg.lex.res,fill=REGION))+geom_boxplot()+ #compare variances
scale_y_continuous(limits=c(-1,1))
Next, we look to see whether survivorship predicts whether a respondent is very likely to transfer land in the next five years.
lr1 <- glm(TRAN_FUT2~SURV5+
EMO_WOOD2+
ACT_NONE+
TRAN_RECENT2+
TRAN_FAM+
AC_WOOD+
REGION+
MAN_PLAN2+
OWN1_EDU2,family="binomial",data=qu)
summary(lr1)
##
## Call:
## glm(formula = TRAN_FUT2 ~ SURV5 + EMO_WOOD2 + ACT_NONE + TRAN_RECENT2 +
## TRAN_FAM + AC_WOOD + REGION + MAN_PLAN2 + OWN1_EDU2, family = "binomial",
## data = qu)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5313 -0.3997 -0.2589 -0.2230 2.7877
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.19024670 0.26602530 0.715 0.475
## SURV5 -2.57675965 0.27036808 -9.531 < 0.0000000000000002 ***
## EMO_WOOD21 -0.38039953 0.09532729 -3.990 0.0000659 ***
## ACT_NONE1 0.11981130 0.11386644 1.052 0.293
## TRAN_RECENT21 1.15006881 0.11012282 10.444 < 0.0000000000000002 ***
## TRAN_FAM1 -1.25327545 0.09185543 -13.644 < 0.0000000000000002 ***
## AC_WOOD 0.00001251 0.00001330 0.941 0.347
## REGIONSOUTH 0.10829190 0.09960329 1.087 0.277
## REGIONWEST -0.03002545 0.12178896 -0.247 0.805
## MAN_PLAN21 0.00246075 0.10773037 0.023 0.982
## OWN1_EDU21 0.14498114 0.11434953 1.268 0.205
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4451.9 on 9523 degrees of freedom
## Residual deviance: 4037.3 on 9513 degrees of freedom
## AIC: 4059.3
##
## Number of Fisher Scoring iterations: 6
exp(lr1$coefficients)
## (Intercept) SURV5 EMO_WOOD21 ACT_NONE1 TRAN_RECENT21
## 1.20954796 0.07601994 0.68358824 1.12728412 3.15841024
## TRAN_FAM1 AC_WOOD REGIONSOUTH REGIONWEST MAN_PLAN21
## 0.28556790 1.00001251 1.11437298 0.97042084 1.00246378
## OWN1_EDU21
## 1.15601777
hoslem.test(lr1$y,fitted(lr1))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: lr1$y, fitted(lr1)
## X-squared = 15.288, df = 8, p-value = 0.05378
Tjur(lr1)
## [1] 0.05629825
vif(lr1)
## GVIF Df GVIF^(1/(2*Df))
## SURV5 1.069002 1 1.033926
## EMO_WOOD2 1.076514 1 1.037552
## ACT_NONE 1.086310 1 1.042262
## TRAN_RECENT2 1.055990 1 1.027614
## TRAN_FAM 1.105632 1 1.051490
## AC_WOOD 1.069064 1 1.033956
## REGION 1.068622 2 1.016731
## MAN_PLAN2 1.093428 1 1.045671
## OWN1_EDU2 1.058936 1 1.029046
table(qu$TRAN_FUT2) #number of events
##
## 0 1
## 8929 595
#checking homoscedasticity, region variable
qu$lr1.res <- lr1$residuals[match(rownames(qu),names(lr1$residuals))]
ggplot(qu,aes(y=lr1.res,fill=REGION))+geom_boxplot()+ #compare variances
scale_y_continuous(limits=c(-1.5,-1))
Lastly, we look to see whether survivorship predicts whether a respondent plans to undertake any actions in the next five years.
lr2 <- glm(FUT_ANY~SURV5+
EMO_WOOD2+
ACT_NONE+
TRAN_RECENT2+
TRAN_FAM+
AC_WOOD+
REGION+
MAN_PLAN2+
OWN1_EDU2,family="binomial",data=qu)
summary(lr2)
##
## Call:
## glm(formula = FUT_ANY ~ SURV5 + EMO_WOOD2 + ACT_NONE + TRAN_RECENT2 +
## TRAN_FAM + AC_WOOD + REGION + MAN_PLAN2 + OWN1_EDU2, family = "binomial",
## data = qu)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.4329 0.1968 0.3646 0.4625 2.0904
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.20764519 0.23124082 0.898 0.36921
## SURV5 1.24610272 0.23757200 5.245 0.000000156148 ***
## EMO_WOOD21 0.47388897 0.07400525 6.403 0.000000000152 ***
## ACT_NONE1 -2.88081175 0.06988042 -41.225 < 0.0000000000000002 ***
## TRAN_RECENT21 -0.14823448 0.12788214 -1.159 0.24640
## TRAN_FAM1 0.30213008 0.07690283 3.929 0.000085397299 ***
## AC_WOOD 0.00049686 0.00008255 6.019 0.000000001752 ***
## REGIONSOUTH 0.02279170 0.07599819 0.300 0.76426
## REGIONWEST 0.51515776 0.10273140 5.015 0.000000531415 ***
## MAN_PLAN21 0.97936571 0.11230754 8.720 < 0.0000000000000002 ***
## OWN1_EDU21 0.26004098 0.07817452 3.326 0.00088 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 8864.9 on 9523 degrees of freedom
## Residual deviance: 6011.7 on 9513 degrees of freedom
## AIC: 6033.7
##
## Number of Fisher Scoring iterations: 7
exp(lr2$coefficients)
## (Intercept) SURV5 EMO_WOOD21 ACT_NONE1 TRAN_RECENT21
## 1.23077640 3.47676659 1.60622864 0.05608921 0.86222891
## TRAN_FAM1 AC_WOOD REGIONSOUTH REGIONWEST MAN_PLAN21
## 1.35273718 1.00049699 1.02305341 1.67390256 2.66276673
## OWN1_EDU21
## 1.29698324
hoslem.test(lr2$y,fitted(lr2))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: lr2$y, fitted(lr2)
## X-squared = 13.067, df = 8, p-value = 0.1096
Tjur(lr2)
## [1] 0.358486
vif(lr2)
## GVIF Df GVIF^(1/(2*Df))
## SURV5 1.030720 1 1.015244
## EMO_WOOD2 1.041607 1 1.020592
## ACT_NONE 1.023869 1 1.011864
## TRAN_RECENT2 1.029515 1 1.014650
## TRAN_FAM 1.070008 1 1.034412
## AC_WOOD 1.081145 1 1.039781
## REGION 1.072565 2 1.017667
## MAN_PLAN2 1.020012 1 1.009957
## OWN1_EDU2 1.043363 1 1.021451
table(qu$FUT_ANY) #number of events
##
## 0 1
## 1677 7847
#checking homoscedasticity, region variable
qu$lr2.res <- lr2$residuals[match(rownames(qu),names(lr2$residuals))]
ggplot(qu,aes(y=lr2.res,fill=REGION))+geom_boxplot()+ #compare variances
scale_y_continuous(limits=c(-2,-1))
The NIFA data are imported. There are two datasets, corresponding to the screener survey (nf1) and preferences survey. We are using only the screener data for this analysis.
These tables only go up to age 99, so age is truncated:
table(nf1$age>99) #how many?
##
## FALSE
## 763
nf1$age <- ifelse(nf1$age>99,99,nf1$age)
The correct life table demographic categories for each respondent are determined by gender only (i.e. the population-wide tables for the appropriate genders are used).
nf1$CAT <- ifelse(nf1$gender=='1','M','F')
nf1 <- nf1[!is.na(nf1$age) & !is.na(nf1$CAT),] #drop nulls
For each respondent, 5-year survivorship (i.e. the probability of surviving another five years) and life expectancy are calculated from the tables based on demographic category.
nf1$SURV5 <- mapply(surv5,nf1$age,nf1$CAT)
nf1$LEX <- mapply(lex,nf1$age,nf1$CAT)
Plot and tabulate demographic category and age.
nf1$CAT <- ordered(nf1$CAT,
levels=c('F','M'))
table(nf1$CAT)
##
## F M
## 221 542
ggplot(data=nf1,aes(x='1',fill=CAT))+geom_bar(color="black")+
coord_flip()+
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title.y=element_blank(),
legend.title=element_blank())
ggplot(data=nf1,aes(x=age))+geom_histogram(color="black")+
labs(x="years")
mean(nf1$age) #mean of age
## [1] 63.31324
sd(nf1$age)
## [1] 12.11678
The NIFA data do not have survey weights, BUT we can summarize survivorship and life expectancy for the sample:
mean(nf1$SURV5)
## [1] 0.8857289
sd(nf1$SURV5)
## [1] 0.1266086
mean(nf1$LEX)
## [1] 20.68604
sd(nf1$LEX)
## [1] 9.18862
In addition to producing simple estimates, we can fit statistical models and test hypotheses (Note: these are unweighted models). First, we create some additional variables:
nf1$TTM_FAM2 <- factor(ifelse(nf1$ttm_fam %in% 1:3,1,0)) #box3, TTM_FAM
nf1$TTM_DEC2 <- factor(ifelse(nf1$ttm_dec %in% 1:3,"1","0")) #box3, TTM_DEC
nf1$TTM_WILL2 <- factor(ifelse(nf1$ttm_will %in% 1:3,"1","0")) #box3, TTM_WILL
nf1$TTM_TRUST2 <- factor(ifelse(nf1$ttm_trust %in% 1:3,"1","0")) #box3, TTM_TRUST
nf1$HEIRS2 <- factor(ifelse(nf1$heirs %in% 1,"1","0")) #recode, HEIRS
nf1$EDU2 <- factor(ifelse(nf1$edu %in% 3:6,"1","0")) #recode, EDU
nf1$INT_FUT2 <- factor(ifelse(nf1$intentions_fut %in% 1:3,"1","0")) #recode, intentions_fut
Tjur <- function(x){ #function for calculating Tjur's statistic
pred <- predict(x,newdata=x$data,type='response')
pred <- pred[!is.na(pred)]
ts <- mean(pred[x$y=='1'])-mean(pred[x$y=='0'])
return(ts)
}
Next, we look to see whether survivorship predicts whether a respondent has done or is likely to do within the next year a number of planning actions:
lr3 <- glm(TTM_FAM2~SURV5+ #discuss options with family
HEIRS2+
INT_FUT2+
ac_wood+
EDU2,family="binomial",data=nf1)
summary(lr3)
##
## Call:
## glm(formula = TTM_FAM2 ~ SURV5 + HEIRS2 + INT_FUT2 + ac_wood +
## EDU2, family = "binomial", data = nf1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.295 -1.061 0.670 1.101 1.690
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.318172 0.585784 -0.543 0.587023
## SURV5 -0.879396 0.628180 -1.400 0.161540
## HEIRS21 0.558654 0.165332 3.379 0.000728 ***
## INT_FUT21 0.820497 0.183799 4.464 0.00000804 ***
## ac_wood 0.003637 0.001288 2.824 0.004749 **
## EDU21 0.605656 0.177688 3.409 0.000653 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1046.51 on 755 degrees of freedom
## Residual deviance: 968.49 on 750 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 980.49
##
## Number of Fisher Scoring iterations: 4
exp(lr3$coefficients)
## (Intercept) SURV5 HEIRS21 INT_FUT21 ac_wood EDU21
## 0.7274777 0.4150337 1.7483183 2.2716295 1.0036441 1.8324546
hoslem.test(lr3$y,fitted(lr3))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: lr3$y, fitted(lr3)
## X-squared = 5.2126, df = 8, p-value = 0.7346
Tjur(lr3)
## [1] 0.09916798
vif(lr3)
## SURV5 HEIRS2 INT_FUT2 ac_wood EDU2
## 1.029690 1.150635 1.131551 1.006363 1.024280
table(nf1$TTM_FAM2) #number of events
##
## 0 1
## 363 400
lr4 <- glm(TTM_DEC2~SURV5+ #decide options
HEIRS2+
INT_FUT2+
ac_wood+
EDU2,family="binomial",data=nf1)
summary(lr4)
##
## Call:
## glm(formula = TTM_DEC2 ~ SURV5 + HEIRS2 + INT_FUT2 + ac_wood +
## EDU2, family = "binomial", data = nf1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9654 -0.9034 -0.8126 1.2248 1.9302
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.099296 0.593576 -1.852 0.064027 .
## SURV5 -0.630910 0.632773 -0.997 0.318737
## HEIRS21 0.140718 0.174495 0.806 0.419996
## INT_FUT21 0.613131 0.180971 3.388 0.000704 ***
## ac_wood 0.003218 0.001106 2.910 0.003611 **
## EDU21 0.769262 0.194562 3.954 0.0000769 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 980.66 on 755 degrees of freedom
## Residual deviance: 934.02 on 750 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 946.02
##
## Number of Fisher Scoring iterations: 4
exp(lr4$coefficients)
## (Intercept) SURV5 HEIRS21 INT_FUT21 ac_wood EDU21
## 0.3331054 0.5321073 1.1510994 1.8462026 1.0032232 2.1581724
hoslem.test(lr4$y,fitted(lr4))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: lr4$y, fitted(lr4)
## X-squared = 5.637, df = 8, p-value = 0.6878
Tjur(lr4)
## [1] 0.06140125
vif(lr4)
## SURV5 HEIRS2 INT_FUT2 ac_wood EDU2
## 1.030375 1.197865 1.182333 1.012821 1.019572
table(nf1$TTM_DEC2) #number of events
##
## 0 1
## 494 269
lr5 <- glm(TTM_WILL2~SURV5+ #file will
HEIRS2+
INT_FUT2+
ac_wood+
EDU2,family="binomial",data=nf1)
summary(lr5)
##
## Call:
## glm(formula = TTM_WILL2 ~ SURV5 + HEIRS2 + INT_FUT2 + ac_wood +
## EDU2, family = "binomial", data = nf1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3811 -1.2591 0.7055 0.8921 1.3622
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.980266 0.755337 2.622 0.00875 **
## SURV5 -2.495479 0.818045 -3.051 0.00228 **
## HEIRS21 0.559871 0.178639 3.134 0.00172 **
## INT_FUT21 0.083976 0.199585 0.421 0.67394
## ac_wood 0.003187 0.001446 2.204 0.02751 *
## EDU21 0.832112 0.181515 4.584 0.00000456 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 928.99 on 755 degrees of freedom
## Residual deviance: 877.53 on 750 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 889.53
##
## Number of Fisher Scoring iterations: 4
exp(lr5$coefficients)
## (Intercept) SURV5 HEIRS21 INT_FUT21 ac_wood EDU21
## 7.24466980 0.08245693 1.75044602 1.08760246 1.00319245 2.29816811
hoslem.test(lr5$y,fitted(lr5))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: lr5$y, fitted(lr5)
## X-squared = 16.024, df = 8, p-value = 0.04204
Tjur(lr5)
## [1] 0.06850944
vif(lr5)
## SURV5 HEIRS2 INT_FUT2 ac_wood EDU2
## 1.033227 1.191965 1.176634 1.009609 1.031022
table(nf1$TTM_WILL2) #number of events
##
## 0 1
## 230 533
lr6 <- glm(TTM_TRUST2~SURV5+ #set up trust
HEIRS2+
INT_FUT2+
ac_wood+
EDU2,family="binomial",data=nf1)
summary(lr6)
##
## Call:
## glm(formula = TTM_TRUST2 ~ SURV5 + HEIRS2 + INT_FUT2 + ac_wood +
## EDU2, family = "binomial", data = nf1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5416 -0.7666 -0.6041 -0.3925 2.2855
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.211404 0.611846 -0.346 0.729704
## SURV5 -2.447778 0.655635 -3.733 0.000189 ***
## HEIRS21 0.635867 0.202803 3.135 0.001716 **
## INT_FUT21 -0.102192 0.206526 -0.495 0.620732
## ac_wood 0.002145 0.001071 2.002 0.045285 *
## EDU21 0.838027 0.232803 3.600 0.000319 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 808.36 on 755 degrees of freedom
## Residual deviance: 764.50 on 750 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 776.5
##
## Number of Fisher Scoring iterations: 4
exp(lr6$coefficients)
## (Intercept) SURV5 HEIRS21 INT_FUT21 ac_wood EDU21
## 0.80944667 0.08648556 1.88865973 0.90285663 1.00214733 2.31180018
hoslem.test(lr6$y,fitted(lr6))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: lr6$y, fitted(lr6)
## X-squared = 6.1895, df = 8, p-value = 0.626
Tjur(lr6)
## [1] 0.05968041
vif(lr6)
## SURV5 HEIRS2 INT_FUT2 ac_wood EDU2
## 1.034175 1.168660 1.168219 1.022966 1.034299
table(nf1$TTM_TRUST2) #number of events
##
## 0 1
## 591 172