---
title: "final_fn_iv_hun_intervention"
output: html_document
date: '2022-08-11'
---

# loading packages
```{r}
library(haven)
library(tidyverse)
library(readxl)
library(emmeans)
library(sjPlot)
library(cowplot)
```

# import database
```{r}
d1 <- read_sav("C:/Users/orosz/Dropbox/fakenewsIV/fn_final_analysis/fn_1_1_anonym.sav")

d2 <- read_sav("C:/Users/orosz/Dropbox/fakenewsIV/fn_final_analysis/fn_1_2_anonym.sav")
```
## --------------------- preparation for analysis ---------------------

# creating necessary variables
```{r}
d1$Neptun <- toupper(d1$Neptun)
d2$Neptun <- toupper(d2$Neptun)

d12 <- d1 %>%
  filter(Q112_First_Click >= 0 | Q122_First_Click >= 0) %>% # include only those who reached one of the conditions -> 
  group_by(Neptun) %>%              # group by student ID
  filter(StartDate==min(StartDate)) %>%   # include only first response (delete other responses if a student would have responded multiple times)
  mutate(cond = case_when(Q112_First_Click >= 0 ~ "Treatment",  # Treatment
                          Q122_First_Click >= 0 ~ "Control", # Control
                          TRUE ~ "other")) %>% 
  filter(cond %in%  c("Treatment", "Control"))

d22 <- d2 %>% 
  group_by(Neptun) %>%  
  filter(StartDate==min(StartDate))

```

# joining the follow-up one month later along their student ID (Neptun)
```{r}

d13 <- d12 %>%
  left_join(d22, by = c("Neptun")) 

```

# creating a variable that indicates attrition
```{r}

d <- d13 %>% 
mutate(partofr2 = case_when(!is.na(Progress.y) ~ "in",
                          TRUE ~ "out"))

d$partofr2 <- as.factor(d$partofr2)

```

# information for the consort diagram
```{r}
# participants allocated to the condition
until_cond <- d1 %>% 
  filter(Q112_First_Click >= 0 | Q122_First_Click >= 0)
  nrow(until_cond)

# without duplication (among those who got to the condition)
without_dupl <- 
  until_cond %>% 
  group_by(Neptun) %>% 
  filter(StartDate==min(StartDate)) 

nrow(without_dupl) 

# this is equal to the number of participants in the two conditions in Round 1
table(d$cond)

```

# attrition before and after joining the followup
```{r}
# Summary
nrow(d1) # 539 - initial number of responses in Qualtrics
nrow(until_cond) # 503 - number of responses who could make the conditions
nrow(without_dupl) # 473 - number of not duplicated participants immediately after the intervention
table(d12$cond) # 218 (46.18.72%) control, 255 (54.02%) intervention - proportion of students in the two conditions
table(d$partofr2) # 338 (71.45%) number of students whose data could be joined along their student ID
table(d$partofr2, d$cond) # 163 (48.22%) control, 175 (51.78%) intervention in the follow-up

```

# Demographics of the non-duplicated participants who were allocated to conditions
```{r}

# Age

without_dupl <- 
  without_dupl %>% 
  filter(Age<123)# filtering those who wrote nonsense to age

summary(without_dupl$Age) # mean
sd(without_dupl$Age) # sd

# gender

df = table(without_dupl$Gender)
prop.table(df)*100

# ethnic minority status

df = table(without_dupl$Ethnicity)
prop.table(df)*100

# first-gen status

without_dupl <- mutate(without_dupl, momedu2 = ifelse(Education_mother == 5, 1, 0))
#table(d$momedu2)

without_dupl <- mutate(without_dupl, dadedu2 = ifelse(Education_father == 5, 10, 0))
#table(d$dadedu2)

without_dupl <- mutate(without_dupl, paredu4 = dadedu2 + momedu2)
#table(d$paredu4)

without_dupl <- mutate(without_dupl, firstgen = ifelse(paredu4 == 0, "first-gen","cont-gen"))

without_dupl$firstgen <- as.factor(without_dupl$firstgen)

table(without_dupl$firstgen)

df = table(without_dupl$firstgen)
prop.table(df)*100

```

# Demographics of the non-duplicated participants who were allocated to conditions and also made it to the followup
```{r}
# Age

madefollowup <- d %>% 
  filter(partofr2=="in")

madefollowup <- 
  madefollowup %>% 
  filter(Age<123)

summary(madefollowup$Age) # mean
sd(madefollowup$Age) # sd

# gender

df = table(madefollowup$Gender)
prop.table(df)*100

# ethnic minority status

df = table(madefollowup$Ethnicity)
prop.table(df)*100

# first-gen status

madefollowup <- mutate(madefollowup, momedu2 = ifelse(Education_mother == 5, 1, 0))
#table(d$momedu2)

madefollowup <- mutate(madefollowup, dadedu2 = ifelse(Education_father == 5, 10, 0))
#table(d$dadedu2)

madefollowup <- mutate(madefollowup, paredu4 = dadedu2 + momedu2)
#table(d$paredu4)

madefollowup <- mutate(madefollowup, firstgen = ifelse(paredu4 == 0, "first-gen","cont-gen"))

madefollowup$firstgen <- as.factor(madefollowup$firstgen)

table(madefollowup$firstgen)

df = table(madefollowup$firstgen)
prop.table(df)*100

```

# setting up basic demographic variables
```{r}
#names(d)

d$cond <- as.factor(d$cond) # condition
d$gender <- as.factor(d$Gender) # gender
d$minority <- as.factor(d$Ethnicity) # minority
    d$minority <- recode_factor(d$minority,`1` = "Minority", `2` = "Majority")
d <- mutate(d,city = ifelse(Settlement_type == 1, 1, 0)) # type of settlement
    d$city <- as.factor(d$city)
d$part <- as.factor(d$Partisanship) # Partisanship

```

# setting up firstgen status
```{r}
d <- mutate(d, momedu2 = ifelse(Education_mother == 5, 1, 0))
#table(d$momedu2)

d <- mutate(d, dadedu2 = ifelse(Education_father == 5, 10, 0))
#table(d$dadedu2)

d <- mutate(d, paredu4 = dadedu2 + momedu2)
#table(d$paredu4)

d <- mutate(d, firstgen = ifelse(paredu4 == 0, "first-gen","cont-gen"))

d$firstgen <- as.factor(d$firstgen)

table(d$firstgen)
```

# setting up fake news accuracy and media truth variables - immediate
```{r}

d$fake_acc_sum <- (d$Fake01_accuracy.y + d$Fake02_accuracy.y + d$Fake03_accuracy.y + d$Fake04_accuracy.y + d$Fake05_accuracy.y + d$Fake06_accuracy.y + d$Fake07_accuracy.y + d$Fake08_accuracy.y) / 8 # fake news accuracy long term

d$real_acc_sum <- (d$Real01_accuracy.y + d$Real02_accuracy.y + d$Real03_accuracy.y + d$Real04_accuracy.y + d$Real05_accuracy.y + d$Real06_accuracy.y + d$Real07_accuracy.y + d$Real08_accuracy.y) / 8 # real news accuracy long term

d$real_min_fake_acc_sum <- (d$real_acc_sum - d$fake_acc_sum) # media truth discernment long term
```

# setting up fake news accuracy and media truth variables - long term
```{r}
d$fake_acc_sum_i <- (d$Fake01_accuracy.x + d$Fake02_accuracy.x + d$Fake03_accuracy.x + d$Fake04_accuracy.x + d$Fake05_accuracy.x + d$Fake06_accuracy.x + d$Fake07_accuracy.x + d$Fake08_accuracy.x) / 8 # fake news accuracy short term

d$real_acc_sum_i <- (d$Real01_accuracy.x + d$Real02_accuracy.x + d$Real03_accuracy.x + d$Real04_accuracy.x + d$Real05_accuracy.x + d$Real06_accuracy.x + d$Real07_accuracy.x + d$Real08_accuracy.x) / 8 # real news accuracy short term

d$real_min_fake_acc_sum_i <- (d$real_acc_sum_i - d$fake_acc_sum_i) # media truth discernment short term
```

# Bullshit receptivity - pre-intervention
```{r}
d$bs_pre <- (d$Bullshit1_1 + d$Bullshit1_2 + d$Bullshit1_3 + d$Bullshit1_4 + d$Bullshit1_5)/5

d$bs_post <- (d$Post_bs1 + d$Bullshit1_2 + d$Bullshit1_3 + d$Bullshit1_4 + d$Bullshit1_5)/5

d$Post_bs1

d$bs_pre <- (d$Bullshit1_1 + d$Bullshit1_2 + d$Bullshit1_3 + d$Bullshit1_4 + d$Bullshit1_5)/5

```

# CRT - post-intervention
```{r}
d <- mutate(d,crt1 = ifelse(CRT01 == 4, 1, 0))
table(d$crt1)

d <- mutate(d,crt2 = ifelse(CRT02 == 10, 1, 0))
table(d$crt2)

d <- mutate(d,crt3 = ifelse(CRT03 == 39, 1, 0))
table(d$crt3)

d <- mutate(d,crt4 = ifelse(CRT04 == 2, 1, 0))
table(d$crt4)

d <- mutate(d,crt5 = ifelse(CRT05 == 8, 1, 0))
#table(d$crt5)

d$crt_sum <- (d$crt1 + d$crt2 + d$crt3 + d$crt4 + d$crt5)

summary(d$crt_sum)

```

# Conspiracy beliefs - post-intervention
```{r}

d$cm <- (d$CM_1 + d$CM_2 + d$CM_3 + d$CM_4 + d$CM_5)/5
summary(d$CM_1)
summary(d$CM_2)
summary(d$CM_3)
summary(d$CM_4)
summary(d$CM_5)

head(d$CM_1, 100)

```

# need for cognition - post-intervention
```{r}
d$nfc1 <- (d$NFC1-1)
d$nfc2 <- (d$NFC2-1)
d$nfc3 <- (d$NFC3-1)
d$nfc4 <- (d$NFC4-1)
d$nfc5 <- (d$NFC5-1)

d$nfc_sum <- ((8-d$nfc1) + (8-d$nfc4) + d$nfc2 + d$nfc3 + d$nfc5)/5

summary(d$nfc_sum)
```

# need for cognition - post-intervention
```{r}

d$dl <- ((d$DL_1)-26 + (d$DL_2)-26 + (d$DL_3)-26 + (d$DL_4)-26 + (d$DL_5) - 26)/5

summary(d$dl)
table(d$dl)

```

# allocation to conditions
```{r}
# lack of significant differences regarding Age,level of parental education, firstgen status, ethnic minority status, frequency of using Facebook and Instagram, bullshit receptivity, crt, need for cognition, digital literacy, conspiracy mentality (all ps>0.13).

mod <- lm(scale(Age) ~ cond, data = d) 
smy <- summary(mod)
smy

mod <- glm(gender ~ cond, data = d, subset=Gender<3, family = binomial) 
smy <- summary(mod)
smy

mod <- lm(scale(Education) ~ cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(Education_father) ~ cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(Education_mother) ~ cond, data = d) 
smy <- summary(mod)
smy

mod <- glm(firstgen ~ cond, data = d, family = binomial) 
smy <- summary(mod)
smy

mod <- glm(minority ~ cond, data = d, family = binomial) 
smy <- summary(mod)
smy

mod <- lm(scale(Frequency_Facebook_1) ~ cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(Frequency_Instagram_1) ~ cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(bs_pre) ~ cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(crt_sum) ~ cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(cm) ~ cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(nfc_sum) ~ cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(dl) ~ cond, data = d) 
smy <- summary(mod)
smy

```

# overall attrition
```{r}
# minority participants showed up less likely (p=0.038), Age, gender, place of residency, level of parental education, firstgen status, frequency of using Facebook and Instagram, bullshit receptivity, crt, need for cognition (all ps>0.109)

mod <- lm(scale(Age) ~ partofr2, data = d) 
smy <- summary(mod)
smy

mod <- glm(gender ~ partofr2, data = d, subset=Gender<3, family = binomial) 
smy <- summary(mod)
smy

mod <- glm(city ~ partofr2, data = d, subset=Gender<3, family = binomial) 
smy <- summary(mod)
smy

mod <- lm(scale(Education) ~ partofr2, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(Education_father) ~ partofr2, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(Education_mother) ~ partofr2, data = d) 
smy <- summary(mod)
smy

mod <- glm(firstgen ~ partofr2, data = d, family = binomial) 
smy <- summary(mod)
smy

# a follow-upban kevesebb minority diak volt
mod <- glm(minority ~ partofr2, data = d, family = binomial) 
smy <- summary(mod)
smy

mod <- lm(scale(Frequency_Facebook_1) ~ partofr2, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(Frequency_Instagram_1) ~ partofr2, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(bs_pre) ~ partofr2, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(crt_sum) ~ partofr2, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(nfc_sum) ~ partofr2, data = d) 
smy <- summary(mod)
smy

# conspiracy mentality and digital literacy was measured in round 2, therefore we cannot run attrition analyses on these variables

```

# differential attrition
```{r}

# Age, gender, level of parental education, firstgen status, frequency of using Facebook and Instagram, bullshit receptivity, crt, need for cognition (all ps>0.119)

mod <- lm(scale(Age) ~ partofr2*cond, data = d) 
smy <- summary(mod)
smy

mod <- glm(gender ~ partofr2*cond, data = d, subset=Gender<3, family = binomial) 
smy <- summary(mod)
smy

mod <- lm(scale(Education_father) ~ partofr2*cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(Education_mother) ~ partofr2*cond, data = d) 
smy <- summary(mod)
smy

mod <- glm(firstgen ~ partofr2*cond, data = d, family = binomial) 
smy <- summary(mod)
smy

mod <- glm(minority ~ partofr2*cond, data = d, family = binomial) 
smy <- summary(mod)
smy

mod <- lm(scale(Frequency_Facebook_1) ~ partofr2*cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(Frequency_Instagram_1) ~ partofr2*cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(bs_pre) ~ partofr2*cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(crt_sum) ~ partofr2*cond, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(nfc_sum) ~ partofr2*cond, data = d) 
smy <- summary(mod)
smy

# conspiracy mentality and digital literacy was measured in round 2, therefore we cannot run attrition analyses on these variables


```

# main accuacy effects short term
```{r}

mod <- lm(fake_acc_sum_i ~ cond, data = d) 
smy <- summary(mod)
smy
emmeans(mod, "cond")

mod <- lm(scale(fake_acc_sum_i) ~ cond, data = d) 
smy <- summary(mod)
smy
emmeans(mod, "cond")

```

# main accuracy effects long term
```{r}

mod <- lm(fake_acc_sum ~ cond, data = d) 
smy <- summary(mod)
smy
emmeans(mod, "cond")

mod <- lm(scale(fake_acc_sum) ~ cond, data = d) 
smy <- summary(mod)
smy
emmeans(mod, "cond")
```

# main discernment short term
```{r}

mod <- lm(scale(real_min_fake_acc_sum_i) ~ cond, data = d) 
smy <- summary(mod)
smy
emmeans(mod, "cond")

mod <- lm(real_min_fake_acc_sum_i ~ cond, data = d) 
smy <- summary(mod)
smy
emmeans(mod, "cond")

```

# main effects long term
```{r}

mod <- lm(scale(real_min_fake_acc_sum) ~ cond, data = d) 
smy <- summary(mod)
smy
emmeans(mod, "cond")

mod <- lm(real_min_fake_acc_sum ~ cond, data = d) 
smy <- summary(mod)
smy
emmeans(mod, "cond")

```


# political party analyses immediate
```{r}

mod <- lm(scale(real_min_fake_acc_sum_i) ~ cond*part, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(real_min_fake_acc_sum_i) ~ cond, data = d, subset=part==2) 
smy <- summary(mod)
smy

mod <- lm(real_min_fake_acc_sum_i ~ cond, data = d, subset=part==2) 
smy <- summary(mod)
smy

mod <- lm(scale(real_min_fake_acc_sum_i) ~ cond, data = d, subset=part==1) 
smy <- summary(mod)
smy

mod <- lm(real_min_fake_acc_sum_i ~ cond, data = d, subset=part==1) 
smy <- summary(mod)
smy

```

# long-term political party
```{r}

mod <- lm(scale(real_min_fake_acc_sum) ~ cond*part, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(real_min_fake_acc_sum) ~ cond, data = d, subset=part==2) 
smy <- summary(mod)
smy

mod <- lm(real_min_fake_acc_sum ~ cond, data = d, subset=part==2) 
smy <- summary(mod)
smy

mod <- lm(scale(real_min_fake_acc_sum) ~ cond, data = d, subset=part==1) 
smy <- summary(mod)
smy

mod <- lm(real_min_fake_acc_sum ~ cond, data = d, subset=part==1) 
smy <- summary(mod)
smy

```

# robustness
```{r}

mod <- lm(scale(fake_acc_sum_i) ~ cond + Age + gender + firstgen + minority + crt_sum + cm + nfc_sum + dl, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(fake_acc_sum) ~ cond + Age + gender + firstgen + minority + crt_sum + cm + nfc_sum + dl, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(real_min_fake_acc_sum_i) ~ cond + Age + gender + firstgen + minority + crt_sum + cm + nfc_sum + dl, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(real_min_fake_acc_sum) ~ cond + Age + gender + firstgen + minority + crt_sum + cm + nfc_sum + dl, data = d) 
smy <- summary(mod)
smy

mod <- lm(scale(real_min_fake_acc_sum_i) ~ cond + Age + gender + firstgen + minority+ crt_sum + cm + nfc_sum + dl, data = d, subset=part==2) 
smy <- summary(mod)
smy

mod <- lm(scale(real_min_fake_acc_sum_i) ~ cond + Age + gender + firstgen + minority + crt_sum + cm + nfc_sum + dl, data = d, subset=part==1) 
smy <- summary(mod)
smy

mod <- lm(scale(real_min_fake_acc_sum) ~ cond + Age + gender + firstgen + minority + crt_sum + cm + nfc_sum + dl, data = d, subset=part==2) 
smy <- summary(mod)
smy

mod <- lm(scale(real_min_fake_acc_sum) ~ cond + Age + gender + firstgen + minority + crt_sum + cm + nfc_sum + dl, data = d, subset=part==1) 
smy <- summary(mod)
smy

```

```{r}
# general discernment effect
mod <- lm(scale(real_min_fake_acc_sum) ~ cond, data = d) 
smy <- summary(mod)
smy

fully <- plot_model(mod, type = "pred", terms = "cond", colors = "gs", legend.title = "Conditions", title = "Full sample", axis.title = c("Conditions", " 
Low Values: Ratings of fake news are correct 
and real news are incorrect 
High Values: Ratings of real news are correct 
and fake news are incorrect"))+ 
  geom_bar (position=position_dodge(), stat = "identity", width=0.5, alpha = 0.7) + 
  coord_cartesian(ylim=c(-0.8,0.5))+
  theme_classic()

# Anti-government voters 
mod <- lm(scale(real_min_fake_acc_sum) ~ cond, data = d, subset=part==1) 
smy <- summary(mod)
smy

progovy <- plot_model(mod, type = "pred", terms = "cond", colors = "gs", legend.title = "Conditions", title = "Pro-government voters", axis.title = c("", ""))+ 
  geom_bar (position=position_dodge(), stat = "identity", width=0.5, alpha = 0.7) + 
  coord_cartesian(ylim=c(-0.8, 0.5))+
  theme_classic()

# Pro-government voters 

mod <- lm(scale(real_min_fake_acc_sum) ~ cond, data = d, subset=part==2) 
smy <- summary(mod)
smy

antigovy <- plot_model(mod, type = "pred", terms = "cond", colors = "gs", legend.title = "Conditions", title = "Anti-government voters", axis.title = c("", ""))+ 
  geom_bar (position=position_dodge(), stat = "identity", width=0.5, alpha = 0.7) + 
  coord_cartesian(ylim=c(-0.8, 0.5))+
  theme_classic()

plot_grid(fully, antigovy, progovy, c("hv"), ncol=3)


```

# descriptive raw data
```{r}
# fake news accuracy - short term immediately after intervention
fake_acc_sum_i

d %>%
  group_by(cond) %>%
  summarise(mean_fake_acc_sum_i = mean(fake_acc_sum_i, na.rm = TRUE),
            sd_fake_acc_sum_i = sd(fake_acc_sum_i, na.rm = TRUE))

```

```{r}
# real news accuracy - short term immediately after intervention

real_acc_sum_i

d %>%
  group_by(cond) %>%
  summarise(m_real_acc_sum_i = mean(real_acc_sum_i, na.rm = TRUE),
            sd_real_acc_sum_i = sd(real_acc_sum_i, na.rm = TRUE))

```

```{r}
# fake news accuracy - long term follow-up 

d %>%
  group_by(cond) %>%
  summarise(mean_fake_acc_sum = mean(fake_acc_sum, na.rm = TRUE), sd_fake_acc_sum = sd(fake_acc_sum, na.rm = TRUE))

```

```{r}
# fake news accuracy - long term follow-up 

d %>%
  group_by(cond) %>%
  summarise(mean_real_acc_sum = mean(real_acc_sum, na.rm = TRUE), sd_real_acc_sum = sd(real_acc_sum, na.rm = TRUE))

```

```{r}
# bullshit receptivity – pretest 
d %>%
  group_by(cond) %>%
  summarise(m_bs_pre = mean(bs_pre, na.rm = TRUE),
            sd_bs_pre = sd(bs_pre, na.rm = TRUE))

```

```{r}
# bullshit receptivity – post-test
d %>%
  group_by(cond) %>%
  summarise(m_bs_post = mean(bs_post, na.rm = TRUE),
            sd_bs_post = sd(bs_post, na.rm = TRUE))

```

```{r}
# bullshit receptivity – follow-up 
d$bs_follow_up <- (d$BSR_1 + d$BSR_2 + d$BSR_3 + d$BSR_4 + d$BSR_5 + d$BSR_6 + d$BSR_7 + d$BSR_8 + d$BSR_9 + d$BSR_10)/10

d %>%
  group_by(cond) %>%
  summarise(m_bsr_fu = mean(bs_follow_up, na.rm = TRUE),
            sd_bsr_fu = sd(bs_follow_up, na.rm = TRUE))

```

```{r}
# cognitive reflection – immediately post-intervention


d %>%
  group_by(cond) %>%
  summarise(m_crt_sum = mean(crt_sum, na.rm = TRUE),
            sd_crt_sum = sd(crt_sum, na.rm = TRUE))
```

```{r}
# need for cognition – immediately post-intervention

d %>%
  group_by(cond) %>%
  summarise(m_nfc_sum = mean(nfc_sum, na.rm = TRUE),
            sd_nfc_sum = sd(nfc_sum, na.rm = TRUE))
```

```{r}
# Digital literacy – follow-up

d %>%
  group_by(cond) %>%
  summarise(m_dl = mean(dl, na.rm = TRUE),
            sd_dl = sd(dl, na.rm = TRUE))

```

```{r}
# conspiracy mentality – follow-up

d %>%
  group_by(cond) %>%
  summarise(m_cm = mean(cm, na.rm = TRUE),
            sd_cm = sd(cm, na.rm = TRUE))

```