Supplement 1: A survival analysis of family forest landowners in the USA

Introduction

This project is aimed at characterizing the life expectancy and survivorship of FFOs, with the ultimate aim of answering several questions: What is the life expectancy and 5-year survivorship of current family forest owners in the USA? To what extent does expected mortality predict future plans, including plans to transfer land, manage land, or engage in estate planning activities?

Methods

This project uses actuarial life tables in order to estimate life expectancy and survivorship. The tables used come from the National Center for Health Statistics (Arias E, Xu JQ. United States life tables, 2017. National Vital Statistics Reports; vol 68 no 7. Hyattsville, MD: National Center for Health Statistics. 2019.)

First, the NCHS data products (spreadsheets) are imported and functions for filtering by demographic variables are defined:

library(readxl)
library(ggplot2)
library(scales)
library(nwos)
library(car)
library(ResourceSelection)

#read in all individual life tables into a list
files <- dir('..')[grepl("xlsx",dir('..'))] #list of tables
files <- paste('..',files,sep='/')
#labels for the demographic categories associated with each table in files (same order)
cats <- c('M','F','HM','HF','NHWM','NHWF','NHBM','NHBF')
lt <- rep(list(NA),length(files))
for (i in 1:length(lt)){
  table <- read_excel(files[i],sheet=1)[3:102,]
  names(table)[1] <- 'AGE'
  table$TAB <- files[i]
  table$CAT <- cats[i]
  lt[[i]] <- table
  rm(table)
}
lt <- do.call("rbind",lt) #merge list into a dataframe
#reformat age
lt$AGE <- as.numeric(gsub('\\–.*','',lt$AGE))
#calculate survivorship as inverse of mortality
lt$qxINV <- 1-as.numeric(lt[[2]])

#function for determining demographic of a respondent
dcat <- function(SEX='1',HISPANIC='0',BLACK='0',WHITE='1'){
  if (SEX=='1' & HISPANIC=='1'){
    dc <- 'HM'
  } else if (SEX=='2' & HISPANIC=='1'){
    dc <- 'HF'
  } else if (SEX=='1' & HISPANIC=='0' & BLACK=='1'){
    dc <- 'NHBM'
  } else if (SEX=='2' & HISPANIC=='0' & BLACK=='1'){
    dc <- 'NHBF'
  } else if (SEX=='1' & HISPANIC=='0' & WHITE=='1'){
    dc <- 'NHWM'
  } else if (SEX=='2' & HISPANIC=='0' & WHITE=='1'){
    dc <- 'NHWF'
  } else if (SEX=='1'){
    dc <- 'M'
  } else if (SEX=='2'){
    dc <- 'F'
  }
  return(dc)
}

#function for calculating 5-year survival probability of a respondent
surv5 <- function(age=60,demo='M'){
  df <- lt[lt$CAT==demo,] #subset by demographic category
  df <- df[df$AGE %in% age:(age+4),] #subset to five years
  if (nrow(df)!=5){
    message("less than five records in life table")
  }
  total <- prod(df$qxINV) #calculate total survivorship
  total <- ifelse(total>1,1,total)
  return(total)
}

#function for calculating life expectancy of a respondent
lex <- function(age=60,demo='M'){
  df <- lt[lt$CAT==demo,] #subset by demographic category
  df <- df[df$AGE==age,] #subset by age
  ex <- as.numeric(df[[7]][1])
  return(ex)
}

NWOS data (national)

The NWOS data are imported and wide dataframes are created, one for the plots and one for the survey results (with imputed data inserted):

quest <- get_nwos(cycle='2018',study='base')
qu <- nwos_wide(quest,imputations="1")
qu <- qu[qu$OWNCD_NWOS=='45',]
qu$AC_WOOD <- as.numeric(qu$AC_WOOD)
qu$OWN1_AGE <- as.numeric(qu$OWN1_AGE)
qu$FINAL_WEIGHT <- as.numeric(qu$FINAL_WEIGHT)

These tables only go up to age 99, so OWN1_AGE is truncated:

table(qu$OWN1_AGE>99) #how many?

## 
## FALSE  TRUE 
##  9520     4

qu$OWN1_AGE <- ifelse(qu$OWN1_AGE>99,99,qu$OWN1_AGE)

The correct life table demographic categories for each respondent are determined based on sex, ethnicity, and race (OWN1_GENDER, OWN1_ETH, and OWN1_RACE). These categories include Hispanic male (HM), Hispanic female (HF), non-Hispanic black male (NHBM), non-Hispanic black female (NHBF), non-Hispanic white male (NHWM), non-Hispanic white female (NHWF), general male (M), and general female (F).

Any respondent who identifies as Hispanic (i.e. OWN1_ETH=1) is identified as Hispanic. Any respondent who identifies as non-Hispanic (i.e. OWN1_ETH=0) and both black (i.e. OWN1_RACE_BLACK=1) and white (i.e. OWN1_RACE_WHITE=1) is classified as non-Hispanic black. Races other than black and white are not included in these life tables. Consequently, respondents who identify as non-Hispanic and neither black nor white are classified by gender only (i.e. the population-wide tables for the appropriate genders are used).

qu$CAT <- mapply(dcat,qu$OWN1_GENDER,qu$OWN1_ETH,qu$OWN1_RACE_BLACK,qu$OWN1_RACE_WHITE)

For each respondent, 5-year survivorship (i.e. the probability of surviving another five years) and life expectancy are calculated from the tables based on demographic category. A period of 5 years was adopted to complement the 5-year period of future activities and land transfer questions in the NWOS.

qu$SURV5 <- mapply(surv5,qu$OWN1_AGE,qu$CAT)
qu$LEX <- mapply(lex,qu$OWN1_AGE,qu$CAT)

qu$LE_CAT <- ifelse(qu$LEX<5,'<5yr','>5yr') #create categorical grouping variable based on life expectancy
qu$LE_CAT <- ifelse(qu$LEX>=5&qu$LEX<=10,'5-10yr',qu$LE_CAT)
qu$LE_CAT <- ifelse(qu$LEX>10,'>10yr',qu$LE_CAT)
qu$LE_CAT <- ordered(qu$LE_CAT,levels=c('<5yr','5-10yr','>10yr'))

Plot and tabulate demographic category and age.

qu$CAT <- ordered(qu$CAT,
                  levels=c('HF','HM','NHBF','NHBM',
                           'NHWF','NHWM','F','M'))
table(qu$CAT)

## 
##   HF   HM NHBF NHBM NHWF NHWM    F    M 
##   26   80   29   50 1938 7278   32   91

ggplot(data=qu,aes(x='1',fill=CAT))+geom_bar(color="black")+
  coord_flip()+
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank())

sum(qu$CAT=='NHWM')/length(qu$CAT) #proportion NHWM

## [1] 0.7641747

ggplot(data=qu,aes(x=OWN1_AGE))+geom_histogram(color="black")+
    labs(x="years")

mean(qu$OWN1_AGE) #mean of age

## [1] 65.43879

sd(qu$OWN1_AGE)

## [1] 11.67343

Using the survey weights (which represent number of ownerships), we can generate histograms and density plots that represent the entire population.

ggplot(data=qu,aes(SURV5,weight=FINAL_WEIGHT*PLOT_COUNT/1e3))+
  geom_histogram(fill="red",color="black")+
  labs(x="probability of surviving an additional five years",
       y="thousand FFOs")

ggplot(data=qu,aes(LEX,weight=FINAL_WEIGHT*PLOT_COUNT/1e3))+
  geom_histogram(fill="red",color="black")+
  labs(x="years remaining",
       y="thousand FFOs")

We can also feed the mortality variables into the NWOS estimation engine in order to produce population estimates and then examine the results.

#these tables come from the same raw data, from the estimation tool on the Forest Service server
pop.est <- readRDS("../NWOS_2018_FFO_ONE_PLUS_W_MORTALITY.RDS")
pop.est$SE <- sqrt(pop.est$VARIANCE) #calculate standard errors
pop.est$LEVEL <- ordered(pop.est$LEVEL,levels=c('1','<5yr','5-10yr','>10yr')) #order levels
pop.est <- pop.est[order(pop.est$LEVEL),]

For example, mean 5-year survivorship and life expectancy:

#survivorship

pop.est[pop.est$VARIABLE=='SURV5' & pop.est$STATISTIC=='MEAN',c("VALUE","SE")]

## # A tibble: 1 x 2
##   VALUE      SE
##   <dbl>   <dbl>
## 1 0.893 0.00904

#life expectancy

pop.est[pop.est$VARIABLE=='LEX' & pop.est$STATISTIC=='MEAN',c("VALUE","SE")]

## # A tibble: 1 x 2
##   VALUE    SE
##   <dbl> <dbl>
## 1  21.0  1.60

…and also totals, such as the total amount of area by whether life expectancy of the current owner is less than five years, five to 10 years, or greater than 10 years:

pop.est[pop.est$VARIABLE=='LE_CAT' &
          pop.est$STATISTIC=='TOTAL' &
                pop.est$UNITS=='ACRES',
        c("LEVEL","VALUE","SE")]

## # A tibble: 3 x 3
##   LEVEL       VALUE       SE
##   <ord>       <dbl>    <dbl>
## 1 <5yr     6126003.  638622.
## 2 5-10yr  39382164. 6494167.
## 3 >10yr  230283588. 6769608.

…and proportions.

pop.est[pop.est$VARIABLE=='LE_CAT' &
          pop.est$STATISTIC=='PROPORTION' &
                pop.est$UNITS=='ACRES',
        c("LEVEL","VALUE","SE")]

## # A tibble: 3 x 3
##   LEVEL   VALUE      SE
##   <ord>   <dbl>   <dbl>
## 1 <5yr   0.0222 0.00181
## 2 5-10yr 0.143  0.0185 
## 3 >10yr  0.835  0.0183

Testing some statistical models

In addition to producing simple estimates, we can fit statistical models and test hypotheses (Note: these are unweighted models). First, we create some additional variables:

#assign region
west <- c(2.1,2.2,15,8,6,41,16,32,35,30,56,53,49,4)
south <- c(5,12,1,51,13,40.1,40.2,48.1,48.2,37,45,21,28,22,47)
qu$REGION <- ifelse(qu$STATECD_NWOS %in% west,'WEST',
                    ifelse(qu$STATECD_NWOS %in% south,'SOUTH','NORTH'))

qu$TRAN_FUT2 <- ifelse(qu$TRAN_FUT %in% 5,1,0) #box1, TRAN_FUT
qu$EMO_WOOD2 <- ifelse(qu$EMO_WOOD %in% 4:5,"1","0") #box2, EMO_WOOD
qu$FUT_ANY <- ifelse(qu$FUT_NONE==1|qu$FUT_DONTKNOW==1,0,1) #binary, any future actions
qu$TRAN_RECENT2 <- ifelse(qu$TRAN_RECENT==1,"1","0") #recode, TRAN_RECENT
qu$MAN_PLAN2 <- ifelse(qu$MAN_PLAN==1,"1","0") #recode, MAN_PLAN
qu$OWNERS_NUMBER2 <- ifelse(as.numeric(qu$OWNERS_NUMBER)>1,"1","0") #recode, OWNERS_NUMBER
qu$OWN1_EDU2 <- ifelse(qu$OWN1_EDU %in% 3:6,"1","0") #recode, EDUCATION
qu$TRAN_FAM <- ifelse(qu$TRAN_FUT_TO_CHILD=='1'|qu$TRAN_FUT_TO_FAM=='1',"1","0") #binary, whether likely heir is child or family

Tjur <- function(x){ #function for calculating Tjur's statistic
  pred <- predict(x,newdata=x$data,type='response')
  ts <- mean(pred[x$y=='1'])-mean(pred[x$y=='0'])
  return(ts)
}

First, we look to see whether life expectancy and survivorship vary by region.

reg.surv <- aov(log(SURV5)~REGION,qu) #survivorship
summary(reg.surv)

##               Df Sum Sq Mean Sq F value  Pr(>F)   
## REGION         2    0.5 0.25297    6.04 0.00239 **
## Residuals   9521  398.7 0.04188                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(reg.surv)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = log(SURV5) ~ REGION, data = qu)
## 
## $REGION
##                     diff           lwr          upr     p adj
## SOUTH-NORTH -0.015822063 -0.0269212455 -0.004722881 0.0024105
## WEST-NORTH  -0.001627558 -0.0148664666  0.011611351 0.9552556
## WEST-SOUTH   0.014194506  0.0002506734  0.028138338 0.0449032

#checking homoscedasticity, region variable
qu$reg.surv.res <- reg.surv$residuals[match(rownames(qu),names(reg.surv$residuals))]
ggplot(qu,aes(y=reg.surv.res,fill=REGION))+geom_boxplot()+ #compare variances
  scale_y_continuous(limits=c(-0.25,0.25))

reg.lex <- aov(log(LEX)~REGION,qu) #life expectancy
summary(reg.lex)

##               Df Sum Sq Mean Sq F value     Pr(>F)    
## REGION         2    6.4   3.188   12.57 0.00000354 ***
## Residuals   9521 2414.7   0.254                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(reg.lex)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = log(LEX) ~ REGION, data = qu)
## 
## $REGION
##                    diff          lwr         upr     p adj
## SOUTH-NORTH -0.05811619 -0.085429554 -0.03080282 0.0000019
## WEST-NORTH  -0.01789751 -0.050476408  0.01468139 0.4021130
## WEST-SOUTH   0.04021868  0.005905074  0.07453229 0.0165715

#checking homoscedasticity, region variable
qu$reg.lex.res <- reg.lex$residuals[match(rownames(qu),names(reg.lex$residuals))]
ggplot(qu,aes(y=reg.lex.res,fill=REGION))+geom_boxplot()+ #compare variances
  scale_y_continuous(limits=c(-1,1))

Next, we look to see whether survivorship predicts whether a respondent is very likely to transfer land in the next five years.

lr1 <- glm(TRAN_FUT2~SURV5+
             EMO_WOOD2+
             ACT_NONE+
             TRAN_RECENT2+
             TRAN_FAM+
             AC_WOOD+
             REGION+
             MAN_PLAN2+
             OWN1_EDU2,family="binomial",data=qu)
summary(lr1)

## 
## Call:
## glm(formula = TRAN_FUT2 ~ SURV5 + EMO_WOOD2 + ACT_NONE + TRAN_RECENT2 + 
##     TRAN_FAM + AC_WOOD + REGION + MAN_PLAN2 + OWN1_EDU2, family = "binomial", 
##     data = qu)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5313  -0.3997  -0.2589  -0.2230   2.7877  
## 
## Coefficients:
##                  Estimate  Std. Error z value             Pr(>|z|)    
## (Intercept)    0.19024670  0.26602530   0.715                0.475    
## SURV5         -2.57675965  0.27036808  -9.531 < 0.0000000000000002 ***
## EMO_WOOD21    -0.38039953  0.09532729  -3.990            0.0000659 ***
## ACT_NONE1      0.11981130  0.11386644   1.052                0.293    
## TRAN_RECENT21  1.15006881  0.11012282  10.444 < 0.0000000000000002 ***
## TRAN_FAM1     -1.25327545  0.09185543 -13.644 < 0.0000000000000002 ***
## AC_WOOD        0.00001251  0.00001330   0.941                0.347    
## REGIONSOUTH    0.10829190  0.09960329   1.087                0.277    
## REGIONWEST    -0.03002545  0.12178896  -0.247                0.805    
## MAN_PLAN21     0.00246075  0.10773037   0.023                0.982    
## OWN1_EDU21     0.14498114  0.11434953   1.268                0.205    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4451.9  on 9523  degrees of freedom
## Residual deviance: 4037.3  on 9513  degrees of freedom
## AIC: 4059.3
## 
## Number of Fisher Scoring iterations: 6

exp(lr1$coefficients)

##   (Intercept)         SURV5    EMO_WOOD21     ACT_NONE1 TRAN_RECENT21 
##    1.20954796    0.07601994    0.68358824    1.12728412    3.15841024 
##     TRAN_FAM1       AC_WOOD   REGIONSOUTH    REGIONWEST    MAN_PLAN21 
##    0.28556790    1.00001251    1.11437298    0.97042084    1.00246378 
##    OWN1_EDU21 
##    1.15601777

hoslem.test(lr1$y,fitted(lr1))

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  lr1$y, fitted(lr1)
## X-squared = 15.288, df = 8, p-value = 0.05378

Tjur(lr1)

## [1] 0.05629825

vif(lr1)

##                  GVIF Df GVIF^(1/(2*Df))
## SURV5        1.069002  1        1.033926
## EMO_WOOD2    1.076514  1        1.037552
## ACT_NONE     1.086310  1        1.042262
## TRAN_RECENT2 1.055990  1        1.027614
## TRAN_FAM     1.105632  1        1.051490
## AC_WOOD      1.069064  1        1.033956
## REGION       1.068622  2        1.016731
## MAN_PLAN2    1.093428  1        1.045671
## OWN1_EDU2    1.058936  1        1.029046

table(qu$TRAN_FUT2) #number of events

## 
##    0    1 
## 8929  595

#checking homoscedasticity, region variable
qu$lr1.res <- lr1$residuals[match(rownames(qu),names(lr1$residuals))]
ggplot(qu,aes(y=lr1.res,fill=REGION))+geom_boxplot()+ #compare variances
  scale_y_continuous(limits=c(-1.5,-1))

Lastly, we look to see whether survivorship predicts whether a respondent plans to undertake any actions in the next five years.

lr2 <- glm(FUT_ANY~SURV5+
             EMO_WOOD2+
             ACT_NONE+
             TRAN_RECENT2+
             TRAN_FAM+
             AC_WOOD+
             REGION+
             MAN_PLAN2+
             OWN1_EDU2,family="binomial",data=qu)
summary(lr2)

## 
## Call:
## glm(formula = FUT_ANY ~ SURV5 + EMO_WOOD2 + ACT_NONE + TRAN_RECENT2 + 
##     TRAN_FAM + AC_WOOD + REGION + MAN_PLAN2 + OWN1_EDU2, family = "binomial", 
##     data = qu)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.4329   0.1968   0.3646   0.4625   2.0904  
## 
## Coefficients:
##                  Estimate  Std. Error z value             Pr(>|z|)    
## (Intercept)    0.20764519  0.23124082   0.898              0.36921    
## SURV5          1.24610272  0.23757200   5.245       0.000000156148 ***
## EMO_WOOD21     0.47388897  0.07400525   6.403       0.000000000152 ***
## ACT_NONE1     -2.88081175  0.06988042 -41.225 < 0.0000000000000002 ***
## TRAN_RECENT21 -0.14823448  0.12788214  -1.159              0.24640    
## TRAN_FAM1      0.30213008  0.07690283   3.929       0.000085397299 ***
## AC_WOOD        0.00049686  0.00008255   6.019       0.000000001752 ***
## REGIONSOUTH    0.02279170  0.07599819   0.300              0.76426    
## REGIONWEST     0.51515776  0.10273140   5.015       0.000000531415 ***
## MAN_PLAN21     0.97936571  0.11230754   8.720 < 0.0000000000000002 ***
## OWN1_EDU21     0.26004098  0.07817452   3.326              0.00088 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 8864.9  on 9523  degrees of freedom
## Residual deviance: 6011.7  on 9513  degrees of freedom
## AIC: 6033.7
## 
## Number of Fisher Scoring iterations: 7

exp(lr2$coefficients)

##   (Intercept)         SURV5    EMO_WOOD21     ACT_NONE1 TRAN_RECENT21 
##    1.23077640    3.47676659    1.60622864    0.05608921    0.86222891 
##     TRAN_FAM1       AC_WOOD   REGIONSOUTH    REGIONWEST    MAN_PLAN21 
##    1.35273718    1.00049699    1.02305341    1.67390256    2.66276673 
##    OWN1_EDU21 
##    1.29698324

hoslem.test(lr2$y,fitted(lr2))

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  lr2$y, fitted(lr2)
## X-squared = 13.067, df = 8, p-value = 0.1096

Tjur(lr2)

## [1] 0.358486

vif(lr2)

##                  GVIF Df GVIF^(1/(2*Df))
## SURV5        1.030720  1        1.015244
## EMO_WOOD2    1.041607  1        1.020592
## ACT_NONE     1.023869  1        1.011864
## TRAN_RECENT2 1.029515  1        1.014650
## TRAN_FAM     1.070008  1        1.034412
## AC_WOOD      1.081145  1        1.039781
## REGION       1.072565  2        1.017667
## MAN_PLAN2    1.020012  1        1.009957
## OWN1_EDU2    1.043363  1        1.021451

table(qu$FUT_ANY) #number of events

## 
##    0    1 
## 1677 7847

#checking homoscedasticity, region variable
qu$lr2.res <- lr2$residuals[match(rownames(qu),names(lr2$residuals))]
ggplot(qu,aes(y=lr2.res,fill=REGION))+geom_boxplot()+ #compare variances
  scale_y_continuous(limits=c(-2,-1))

NIFA data (New England)

The NIFA data are imported. There are two datasets, corresponding to the screener survey (nf1) and preferences survey. We are using only the screener data for this analysis.

These tables only go up to age 99, so age is truncated:

table(nf1$age>99) #how many?

## 
## FALSE 
##   763

nf1$age <- ifelse(nf1$age>99,99,nf1$age)

The correct life table demographic categories for each respondent are determined by gender only (i.e. the population-wide tables for the appropriate genders are used).

nf1$CAT <- ifelse(nf1$gender=='1','M','F')
nf1 <- nf1[!is.na(nf1$age) & !is.na(nf1$CAT),] #drop nulls

For each respondent, 5-year survivorship (i.e. the probability of surviving another five years) and life expectancy are calculated from the tables based on demographic category.

nf1$SURV5 <- mapply(surv5,nf1$age,nf1$CAT)
nf1$LEX <- mapply(lex,nf1$age,nf1$CAT)

Plot and tabulate demographic category and age.

nf1$CAT <- ordered(nf1$CAT,
                  levels=c('F','M'))
table(nf1$CAT)

## 
##   F   M 
## 221 542

ggplot(data=nf1,aes(x='1',fill=CAT))+geom_bar(color="black")+
  coord_flip()+
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank())

ggplot(data=nf1,aes(x=age))+geom_histogram(color="black")+
    labs(x="years")

mean(nf1$age) #mean of age

## [1] 63.31324

sd(nf1$age)

## [1] 12.11678

The NIFA data do not have survey weights, BUT we can summarize survivorship and life expectancy for the sample:

mean(nf1$SURV5)

## [1] 0.8857289

sd(nf1$SURV5)

## [1] 0.1266086

mean(nf1$LEX)

## [1] 20.68604

sd(nf1$LEX)

## [1] 9.18862

Testing some statistical models

In addition to producing simple estimates, we can fit statistical models and test hypotheses (Note: these are unweighted models). First, we create some additional variables:

nf1$TTM_FAM2 <- factor(ifelse(nf1$ttm_fam %in% 1:3,1,0)) #box3, TTM_FAM
nf1$TTM_DEC2 <- factor(ifelse(nf1$ttm_dec %in% 1:3,"1","0")) #box3, TTM_DEC
nf1$TTM_WILL2 <- factor(ifelse(nf1$ttm_will %in% 1:3,"1","0")) #box3, TTM_WILL
nf1$TTM_TRUST2 <- factor(ifelse(nf1$ttm_trust %in% 1:3,"1","0")) #box3, TTM_TRUST

nf1$HEIRS2 <- factor(ifelse(nf1$heirs %in% 1,"1","0")) #recode, HEIRS
nf1$EDU2 <- factor(ifelse(nf1$edu %in% 3:6,"1","0")) #recode, EDU
nf1$INT_FUT2 <- factor(ifelse(nf1$intentions_fut %in% 1:3,"1","0")) #recode, intentions_fut

Tjur <- function(x){ #function for calculating Tjur's statistic
  pred <- predict(x,newdata=x$data,type='response')
  pred <- pred[!is.na(pred)]
  ts <- mean(pred[x$y=='1'])-mean(pred[x$y=='0'])
  return(ts)
}

Next, we look to see whether survivorship predicts whether a respondent has done or is likely to do within the next year a number of planning actions:

lr3 <- glm(TTM_FAM2~SURV5+ #discuss options with family
             HEIRS2+
             INT_FUT2+
             ac_wood+
             EDU2,family="binomial",data=nf1)
summary(lr3)

## 
## Call:
## glm(formula = TTM_FAM2 ~ SURV5 + HEIRS2 + INT_FUT2 + ac_wood + 
##     EDU2, family = "binomial", data = nf1)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.295  -1.061   0.670   1.101   1.690  
## 
## Coefficients:
##              Estimate Std. Error z value   Pr(>|z|)    
## (Intercept) -0.318172   0.585784  -0.543   0.587023    
## SURV5       -0.879396   0.628180  -1.400   0.161540    
## HEIRS21      0.558654   0.165332   3.379   0.000728 ***
## INT_FUT21    0.820497   0.183799   4.464 0.00000804 ***
## ac_wood      0.003637   0.001288   2.824   0.004749 ** 
## EDU21        0.605656   0.177688   3.409   0.000653 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1046.51  on 755  degrees of freedom
## Residual deviance:  968.49  on 750  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 980.49
## 
## Number of Fisher Scoring iterations: 4

exp(lr3$coefficients)

## (Intercept)       SURV5     HEIRS21   INT_FUT21     ac_wood       EDU21 
##   0.7274777   0.4150337   1.7483183   2.2716295   1.0036441   1.8324546

hoslem.test(lr3$y,fitted(lr3))

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  lr3$y, fitted(lr3)
## X-squared = 5.2126, df = 8, p-value = 0.7346

Tjur(lr3)

## [1] 0.09916798

vif(lr3)

##    SURV5   HEIRS2 INT_FUT2  ac_wood     EDU2 
## 1.029690 1.150635 1.131551 1.006363 1.024280

table(nf1$TTM_FAM2) #number of events

## 
##   0   1 
## 363 400

lr4 <- glm(TTM_DEC2~SURV5+ #decide options
             HEIRS2+
             INT_FUT2+
             ac_wood+
             EDU2,family="binomial",data=nf1)
summary(lr4)

## 
## Call:
## glm(formula = TTM_DEC2 ~ SURV5 + HEIRS2 + INT_FUT2 + ac_wood + 
##     EDU2, family = "binomial", data = nf1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9654  -0.9034  -0.8126   1.2248   1.9302  
## 
## Coefficients:
##              Estimate Std. Error z value  Pr(>|z|)    
## (Intercept) -1.099296   0.593576  -1.852  0.064027 .  
## SURV5       -0.630910   0.632773  -0.997  0.318737    
## HEIRS21      0.140718   0.174495   0.806  0.419996    
## INT_FUT21    0.613131   0.180971   3.388  0.000704 ***
## ac_wood      0.003218   0.001106   2.910  0.003611 ** 
## EDU21        0.769262   0.194562   3.954 0.0000769 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 980.66  on 755  degrees of freedom
## Residual deviance: 934.02  on 750  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 946.02
## 
## Number of Fisher Scoring iterations: 4

exp(lr4$coefficients)

## (Intercept)       SURV5     HEIRS21   INT_FUT21     ac_wood       EDU21 
##   0.3331054   0.5321073   1.1510994   1.8462026   1.0032232   2.1581724

hoslem.test(lr4$y,fitted(lr4))

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  lr4$y, fitted(lr4)
## X-squared = 5.637, df = 8, p-value = 0.6878

Tjur(lr4)

## [1] 0.06140125

vif(lr4)

##    SURV5   HEIRS2 INT_FUT2  ac_wood     EDU2 
## 1.030375 1.197865 1.182333 1.012821 1.019572

table(nf1$TTM_DEC2) #number of events

## 
##   0   1 
## 494 269

lr5 <- glm(TTM_WILL2~SURV5+ #file will
             HEIRS2+
             INT_FUT2+
             ac_wood+
             EDU2,family="binomial",data=nf1)
summary(lr5)

## 
## Call:
## glm(formula = TTM_WILL2 ~ SURV5 + HEIRS2 + INT_FUT2 + ac_wood + 
##     EDU2, family = "binomial", data = nf1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3811  -1.2591   0.7055   0.8921   1.3622  
## 
## Coefficients:
##              Estimate Std. Error z value   Pr(>|z|)    
## (Intercept)  1.980266   0.755337   2.622    0.00875 ** 
## SURV5       -2.495479   0.818045  -3.051    0.00228 ** 
## HEIRS21      0.559871   0.178639   3.134    0.00172 ** 
## INT_FUT21    0.083976   0.199585   0.421    0.67394    
## ac_wood      0.003187   0.001446   2.204    0.02751 *  
## EDU21        0.832112   0.181515   4.584 0.00000456 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 928.99  on 755  degrees of freedom
## Residual deviance: 877.53  on 750  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 889.53
## 
## Number of Fisher Scoring iterations: 4

exp(lr5$coefficients)

## (Intercept)       SURV5     HEIRS21   INT_FUT21     ac_wood       EDU21 
##  7.24466980  0.08245693  1.75044602  1.08760246  1.00319245  2.29816811

hoslem.test(lr5$y,fitted(lr5))

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  lr5$y, fitted(lr5)
## X-squared = 16.024, df = 8, p-value = 0.04204

Tjur(lr5)

## [1] 0.06850944

vif(lr5)

##    SURV5   HEIRS2 INT_FUT2  ac_wood     EDU2 
## 1.033227 1.191965 1.176634 1.009609 1.031022

table(nf1$TTM_WILL2) #number of events

## 
##   0   1 
## 230 533

lr6 <- glm(TTM_TRUST2~SURV5+ #set up trust
             HEIRS2+
             INT_FUT2+
             ac_wood+
             EDU2,family="binomial",data=nf1)
summary(lr6)

## 
## Call:
## glm(formula = TTM_TRUST2 ~ SURV5 + HEIRS2 + INT_FUT2 + ac_wood + 
##     EDU2, family = "binomial", data = nf1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5416  -0.7666  -0.6041  -0.3925   2.2855  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.211404   0.611846  -0.346 0.729704    
## SURV5       -2.447778   0.655635  -3.733 0.000189 ***
## HEIRS21      0.635867   0.202803   3.135 0.001716 ** 
## INT_FUT21   -0.102192   0.206526  -0.495 0.620732    
## ac_wood      0.002145   0.001071   2.002 0.045285 *  
## EDU21        0.838027   0.232803   3.600 0.000319 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 808.36  on 755  degrees of freedom
## Residual deviance: 764.50  on 750  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 776.5
## 
## Number of Fisher Scoring iterations: 4

exp(lr6$coefficients)

## (Intercept)       SURV5     HEIRS21   INT_FUT21     ac_wood       EDU21 
##  0.80944667  0.08648556  1.88865973  0.90285663  1.00214733  2.31180018

hoslem.test(lr6$y,fitted(lr6))

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  lr6$y, fitted(lr6)
## X-squared = 6.1895, df = 8, p-value = 0.626

Tjur(lr6)

## [1] 0.05968041

vif(lr6)

##    SURV5   HEIRS2 INT_FUT2  ac_wood     EDU2 
## 1.034175 1.168660 1.168219 1.022966 1.034299

table(nf1$TTM_TRUST2) #number of events

## 
##   0   1 
## 591 172

Supplement 1: A survival analysis of family forest landowners in the USA

Jesse Caputo

March 30, 2023

Introduction

Methods

NWOS data (national)

Testing some statistical models

NIFA data (New England)

Testing some statistical models