This data analysis provides an overview of Ecological Momentary Assessment (EMA) responses, including cleaning, flagging, and visualization. All steps and parameters are documented below.
This file was generated using the EMA-CleanR package from the University of Michigan. See README file for instructions, more documentation and full license information.
Refer to each package for their individual license
# Load required packages
# Refer to each package for their individual license
library(dplyr)
library(psych)
library(tidyverse)
library(lubridate)
library(table1)
library(corrplot)
library(ggplot2)
library(patchwork)
library(rlang)
params:
input_file: "EMA-Data.csv"
Refer to README.md and the knowledge base article for documentation on each parameter.
# Assign each parameter from YAML (top section of this script) as a variable in the parent environment
for (nm in names(params)) {
# Unlist if the parameter is a named mapping
if (nm %in% c("ema_item_labels", "participant_group_map", "plot_colors")) {
assign(nm, unlist(params[[nm]]), envir = .GlobalEnv)
} else {
assign(nm, params[[nm]], envir = .GlobalEnv)
}
}
# Print all parameter names and values that will be used by this script
for (nm in names(params)) {
val <- if (nm == "ema_item_labels") unlist(params[[nm]]) else params[[nm]]
if (is.null(val)) {
cat(nm, "= NULL\n")
} else if (is.atomic(val) && length(val) > 1 && is.null(names(val))) {
cat(nm, "=", paste(val, collapse = ", "), "\n")
} else if (is.list(val) || (is.atomic(val) && !is.null(names(val)))) {
val_vec <- unlist(val)
if (!is.null(names(val_vec))) {
cat(nm, "=\n ")
cat(paste(names(val_vec), val_vec, sep=": ", collapse = "; "), "\n")
} else {
cat(nm, "=", paste(val_vec, collapse = ", "), "\n")
}
} else {
cat(nm, "=", val, "\n")
}
}
## input_file = EMA-Data.csv
## input_file_has_headers = TRUE
## output_dir = output
## late_survey_cutoff_hour = 9
## ignore_surveys = Practice Daily Survey, Practice, Practica, Prueba, Test, Temp, Trial
## surveys_per_day = 4
## total_surveys_in_study = 112
## ema_item_prefix = EMA_
## ema_item_labels =
## EMA_01: Nervous/Anxious; EMA_02: Sad/Blue; EMA_03: Irritable/Angry; EMA_04: Happy/Joyful; EMA_05: Excited/Enthusiastic; EMA_06: Energy Level; EMA_07: Situation Stressful; EMA_08: Emotions Out of Control; EMA_09: Attention to Feelings; EMA_10: Confused About Feelings; EMA_11: Emotion Regulation; EMA_12: Impulsivity; EMA_13: Acting on Feelings
## participant_group_map =
## 7: HC; 6: SBD; 5: BD
## plot_colors =
## um_maize: #FFCB05; um_blue: #00274C; um_ligth_blue: #537BAE; um_dark_blue: #012D5F
# Ensure output directory is writable:
if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE)
Global functions for common tasks throughout the code, such as setting color gradients.
# Global functions for common tasks throughout the code, such as setting color gradients.
# get_color_gradient(): Reusable function for color gradient, from U-M Blue to Maize
# usage: survey_colors <- get_color_gradient(survey_names)
get_color_gradient <- function(categories, from_col = plot_colors["um_blue"], to_col = plot_colors["um_maize"]) {
colorRampPalette(c(from_col, to_col))(length(categories)) |> setNames(categories)
}
# plot_ema_by_survey(): Reusable plotting function with optional friendly item labels
plot_ema_by_survey <- function(df, item, survey_colors, item_labels = NULL) {
ylabel <- if (!is.null(item_labels) && item %in% names(item_labels)) item_labels[[item]] else item
ggplot(df, aes(x = surveyname, y = .data[[item]], color = surveyname, fill = surveyname)) +
geom_jitter(width = 0.2, alpha = 0.5) +
geom_boxplot(width = 0.2, color = "black", alpha = 0.5) +
scale_fill_manual(values = survey_colors) +
scale_color_manual(values = survey_colors) +
labs(
title = paste(ylabel, "by Survey"),
x = "Survey Name",
y = ylabel
) +
theme_minimal()
}
# Read in data set
EMAwide.merge <- read.csv(input_file, sep = ",", header=input_file_has_headers)
cat("Number of rows loaded:", nrow(EMAwide.merge), "\n")
## Number of rows loaded: 9412
## Get rid of practice and test surveys
EMAwide.merge2 <- EMAwide.merge[!EMAwide.merge$surveyname %in% ignore_surveys, ]
cat(
"Excluding surveys with these names:",
paste(ignore_surveys, collapse = ", "),
"\n",
"Remaining rows after exclusion:",
nrow(EMAwide.merge2),
"\n"
)
## Excluding surveys with these names: Practice Daily Survey, Practice, Practica, Prueba, Test, Temp, Trial
## Remaining rows after exclusion: 9309
# Sort dataset and filter invalid participant IDs (R NA, empty string, "na", "n/a", "null", "NULL")
EMAwide.merge2 <- EMAwide.merge2 %>%
arrange(participantidentifier, start_datetime) %>% #sort the data by participant ID and date
filter(
!is.na(participantidentifier), # removes actual R NA
participantidentifier != "", # removes empty string
!tolower(participantidentifier) %in% c("na", "n/a", "null") # removes "NA", "N/A", "null", "NULL"
)
cat("Remaining rows after participantidentifier cleaning:", nrow(EMAwide.merge2), "\n")
## Remaining rows after participantidentifier cleaning: 9309
# Take day time variables and make sure they are a time class
EMAwide.merge2$start_datetime <- mdy_hm(EMAwide.merge2$start_datetime, tz = "UTC")
EMAwide.merge2$end_datetime <- mdy_hm(EMAwide.merge2$end_datetime, tz = "UTC")
# Create a variable that is date alone
EMAwide.merge2<- EMAwide.merge2 %>%
mutate(start_day = (as.Date(format(start_datetime, "%Y-%m-%d")))) %>%
relocate(start_day, .before = start_datetime)
# Identify duplicates based on participantidentifier, start_day, and enddate
duplicates <- EMAwide.merge2 %>%
group_by(participantidentifier,
start_datetime,
end_datetime) %>%
filter(n() > 1) %>%
ungroup()
# Combine duplicate rows by participantidentifier, start_day, and enddate
EMAwide.merge2_combined <- EMAwide.merge2 %>%
group_by(participantidentifier,
start_datetime,
end_datetime) %>%
summarise(across(everything(), ~ coalesce(.[1], .[2])), .groups = "drop") # Merge partial responses
# Resort and rename df
EMA.DIGIT <- EMAwide.merge2_combined %>%
arrange(participantidentifier, start_datetime)
cat("Remaining rows after parsing dates and removing duplicates:", nrow(EMA.DIGIT), "\n")
## Remaining rows after parsing dates and removing duplicates: 9296
# Create a new variable participant_group based on 'participant identifier'
# In this example, the participant IDs are setup so that
# anything starting with 5 indicates a bowel disease diagnosis, etc.
# This approach could also be used to group by other characteristics or cohorts.
EMA.DIGIT <- EMA.DIGIT %>%
mutate(
participant_group = unname(participant_group_map[substr(participantidentifier, 1, 1)])
) %>%
relocate(participant_group, .after = participantidentifier)
# Identify EMA item columns
ema_vars <- EMA.DIGIT %>%
select(starts_with(ema_item_prefix)) %>% # all items in our study begin with EMA_
names()
cat(
"Identify EMA item columns (prefix =", ema_item_prefix, ")...\n",
"EMA item vars found:", paste(ema_vars, collapse = ", "), "\n"
)
## Identify EMA item columns (prefix = EMA_ )...
## EMA item vars found: EMA_01, EMA_02, EMA_03, EMA_04, EMA_05, EMA_06, EMA_07, EMA_08, EMA_09, EMA_10, EMA_11, EMA_12, EMA_13
# Remove rows where all EMA items are NA
EMA.DIGIT <- EMA.DIGIT %>%
filter(!if_all(all_of(ema_vars), is.na))
cat("Rows remaining in cleaned data:", nrow(EMA.DIGIT), "\n")
## Rows remaining in cleaned data: 9280
# Calculate days since start date
calculate_day_in_study <- function(data) {
data <- data %>%
arrange(participantidentifier, start_datetime) %>%
group_by(participantidentifier) %>%
mutate(dayinstudy_uncorrected = as.integer(difftime(start_day, first(start_day), units = "days")) + 1) %>% # for each participant, calculate the number of days since their first start_day, treating the earliest day as Day 1
ungroup()
return(data)
}
# apply calculate_day_in_study function
EMA.DIGIT2 <- calculate_day_in_study(EMA.DIGIT) %>% relocate(dayinstudy_uncorrected, .before = start_day)
# Updated code without hard-coded survey name
# When surveys are completed late, after midnight, count them towards the previous day
# as long as they were completed before the specified cut-off time (late_survey_cutoff_hour)
cat("Surveys completed between midnight and", late_survey_cutoff_hour, " will be counted as the previous study day.\n")
## Surveys completed between midnight and 9 will be counted as the previous study day.
EMA.DIGIT2 <- EMA.DIGIT2 %>%
group_by(participantidentifier) %>% # Ensures correct lag in multi-participant data
mutate(
dayinstudy = case_when(
hour(start_datetime) < late_survey_cutoff_hour ~ lag(dayinstudy_uncorrected), # previous day
TRUE ~ dayinstudy_uncorrected
)
) %>%
ungroup() %>%
relocate(dayinstudy, .before = start_datetime)
# Check to make sure this worked by creating a new dataframe showing whether dayinstudy_uncorrected matches dayinstudy
EMAwide_day_check <- EMA.DIGIT2 %>%
mutate(match = dayinstudy_uncorrected == dayinstudy)
# Delete dayinstudy_uncorrected variable
EMA.DIGIT2$dayinstudy_uncorrected <- NULL
# Create a Survey number per participant
EMA.DIGIT2 <- EMA.DIGIT2 %>%
arrange(participantidentifier, start_day) %>% # Ensure data are sorted as desired
group_by(participantidentifier) %>% # Group by each participant
mutate(surveynum = row_number()) %>% # Create a sequence variable within each group
ungroup() %>%
# move our surveynum sequence before start_day variable
relocate(surveynum, .before = start_day)
# Add a weekday column
EMA.DIGIT2<- EMA.DIGIT2 %>%
mutate(weekday = weekdays(start_day),
day_type = ifelse(weekday %in% c("Saturday", "Sunday"), "Weekend", "Weekday")) %>%
relocate(day_type, .before = start_day)
Find total time it took to complete survey and identify flags
# Calculate the time difference in mins
EMA.DIGIT2<- EMA.DIGIT2 %>%
mutate(time_diff =as.integer(difftime(EMA.DIGIT2$end_datetime, EMA.DIGIT2$start_datetime, units = "secs"))) %>% relocate(time_diff, .before = start_day)
## Create a cutoff time for surveys that are taking longer, where SDcuthigh == mean + 2SD
mean <- mean(EMA.DIGIT2$time_diff, na.rm = TRUE)
mean
## [1] 178.9655
sd <- sd(EMA.DIGIT2$time_diff, na.rm = TRUE)
sd
## [1] 1429.678
SDcuthigh <- mean + (2*sd)
SDcuthigh
## [1] 3038.321
# Create a binary flag variable based on SDcuthigh condition
EMA.DIGIT2$TTCFlag_High <- ifelse(EMA.DIGIT2$time_diff > SDcuthigh, 1, 0)
## Visualize time_diff in a histogram, where red line shows our SDcuthigh threshold
ggplot(EMA.DIGIT2, aes(x = time_diff)) +
geom_histogram(binwidth = 5, fill = plot_colors["um_blue"], color = "black") +
geom_vline(xintercept = SDcuthigh, color = "red", linetype = "dashed", size = 1) + # Add vertical red line
labs(title = "Histogram of Time Difference",
x = "Time Difference (secs)",
y = "Frequency") +
theme_minimal()
## Identify the timediff low. Theoretically, we choose the minimum as someone who completes all possible items in less than 1 second per item. In the EBI study, we had 23 items, so the code below originally used 23 seconds, even though not all 23 items are included in our deidentified data file. To generalize it, the code was modified to use length(ema_vars) which would yield the number of items in the actual data file. One item = 1 second minimum to complete.
EMA.DIGIT2$TTCFlag_Low <- ifelse(EMA.DIGIT2$time_diff < length(ema_vars), 1, 0)
Calculate SD for items with Likert Scale and create flags
## If there is zero variance across all ema items, SD_Flag == 1
last_ema_var <- tail(ema_vars, 1)
EMA.DIGIT3 <- EMA.DIGIT2 %>%
mutate(
sd_EMA = apply(select(., all_of(ema_vars)), 1, sd, na.rm = TRUE),
SD_Flag = if_else(sd_EMA == 0, 1, 0)
) %>%
relocate(sd_EMA, SD_Flag, .after = last_ema_var)
## Create a df with IDs and the counts and percentages of data that are flagged
summary_df <- EMA.DIGIT3 %>%
group_by(participantidentifier) %>%
summarise(
TTCFlag_High = sum(TTCFlag_High, na.rm = TRUE),
TTCFlag_Low = sum(TTCFlag_Low, na.rm = TRUE),
SD_Flag = sum(SD_Flag, na.rm = TRUE),
Percent_TTCFlag_High = round((TTCFlag_High / total_surveys_in_study) * 100, 0),
Percent_TTCFlag_Low = round((TTCFlag_Low / total_surveys_in_study) * 100, 0),
Percent_SD_Flag = round((SD_Flag / total_surveys_in_study) * 100, 0)
)
summary_df
# Identify IDs that have 2 or more flags
SuspectIDs <- summary_df %>%
rowwise() %>%
filter(sum(c_across(Percent_TTCFlag_High:Percent_SD_Flag) > 0) >= 2) %>% # row-wise sum across
ungroup() %>% # Ungroup after row-wise operation
select(participantidentifier, Percent_TTCFlag_High, Percent_TTCFlag_Low, Percent_SD_Flag)
SuspectIDs
# Filter the dataset to drop rows where any of the conditions are met
EMA.DIGIT3_filtered <- EMA.DIGIT3 %>%
filter(!(TTCFlag_High == 1 | TTCFlag_Low == 1))
## round to nearest 0.5 minutes for visualization
EMA.DIGIT3_filtered <- EMA.DIGIT3_filtered %>%
mutate(
minutes = time_diff / 60,
minutes_rounded = floor(minutes * 2 + 0.5) / 2 )
ggplot(EMA.DIGIT3_filtered, aes(x = minutes_rounded)) +
geom_histogram(binwidth = 5, fill = "#00274C", color = "black") +
facet_wrap(~ participantidentifier) +
labs(title = "Histogram of Time Difference per Participant",
x = "Time Difference (mins)",
y = "Frequency") +
theme_minimal() +
theme(
panel.spacing = unit(1, "lines") #increase spacing between plots
)
# SD FLAGS
df_counts <- EMA.DIGIT3 %>%
count(participantidentifier, SD_Flag)
ggplot(df_counts, aes(x = factor(SD_Flag), y = n, fill = factor(SD_Flag))) +
geom_col(color = "black") +
facet_wrap(~ participantidentifier) +
labs(title = "Frequency of SD Flags",
x = "Response",
y = "Count") +
scale_fill_manual(values = c("0" = unname(plot_colors["um_maize"]), "1" = unname(plot_colors["um_blue"]))) +
theme_minimal()
# Remove duplicates and keep the first occurrence based on participantidentifier, dayinstudy, and surveyname
EMA.DIGIT4 <- EMA.DIGIT3 %>%
arrange(participantidentifier, dayinstudy, surveyname) %>% # Arrange by the relevant columns
distinct(participantidentifier, dayinstudy, surveyname, .keep_all = TRUE) # Keep first occurrence
# Define all possible survey names
survey_names <- EMA.DIGIT4$surveyname %>% unique()
# Create a complete dataset with all combinations of participant, day, and surveyname. Our current dataset only has completed surveys, so it's important we know what's really missing
expected_surveys <- EMA.DIGIT4 %>%
distinct(participantidentifier, dayinstudy) %>%
tidyr::expand(participantidentifier, dayinstudy, surveyname = survey_names)
# Count actual completed surveys per participant per day per surveyname
actual_surveys <- EMA.DIGIT4 %>%
group_by(participantidentifier, dayinstudy, surveyname) %>%
summarise(completed = n(), .groups = "drop")
# Merge expected and actual surveys
missing_surveys <- expected_surveys %>%
left_join(actual_surveys, by = c("participantidentifier", "dayinstudy", "surveyname")) %>%
mutate(completed = ifelse(is.na(completed), 0, completed), # Replace NA with 0
missing_count = 1 - completed) # 1 if missing, 0 if completed
## Plot missingness count by survey_name
# Aggregate data to sum completed and missing_count for each surveyname
survey_summary <- missing_surveys %>%
group_by(surveyname) %>%
summarise(completed = sum(completed, na.rm = TRUE),
missing_count = sum(missing_count, na.rm = TRUE)) %>%
ungroup()
# Reshape the data to long format
long_data <- survey_summary %>%
pivot_longer(cols = c(completed, missing_count),
names_to = "survey_status",
values_to = "count") # Convert to long format
# Create the grouped bar chart
ggplot(long_data, aes(x = surveyname, y = count, fill = survey_status)) +
geom_bar(stat = "identity", position = "dodge") + # Side-by-side bars
labs(x = "Survey Name", y = "Count", title = "Completed vs Missing Count by Survey Name") + # Labels & title
theme_minimal() + # Clean theme
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Rotate x-axis labels
scale_fill_manual(values = c("completed" = unname(plot_colors["um_blue"]), "missing_count" = unname(plot_colors["um_maize"])))
## Plot missingness count by weekday vs. weekend
## Get other variables we want to add to missingsurvey
Vars <- EMA.DIGIT4 %>%
select(participantidentifier, dayinstudy, surveyname,
day_type, deviceplatform, participant_group)
missing_surveys2 <- merge(missing_surveys, Vars, by = c("participantidentifier", "dayinstudy", "surveyname"), all.x = TRUE)
missing_surveys2 <- missing_surveys2 %>%
group_by(participantidentifier) %>%
fill(day_type, deviceplatform, participant_group, .direction = "downup") %>% # Specify .direction only once
ungroup()
# Aggregate data to sum completed and missing_count for each surveyname
survey_summary <- missing_surveys2 %>%
group_by(day_type) %>%
summarise(completed = sum(completed, na.rm = TRUE),
missing_count = sum(missing_count, na.rm = TRUE)) %>%
ungroup()
# Reshape the data to long format
long_data <- survey_summary %>%
pivot_longer(cols = c(completed, missing_count),
names_to = "survey_status",
values_to = "count") # Convert to long format
# Create the grouped bar chart
ggplot(long_data, aes(x = day_type, y = count, fill = survey_status)) +
geom_bar(stat = "identity", position = "dodge") + # Side-by-side bars
labs(x = "Day Type", y = "Count", title = "Completed vs Missing Count by Day Type") + # Labels & title
theme_minimal() + # Clean theme
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Rotate x-axis labels
scale_fill_manual(values = c("completed" = unname(plot_colors["um_blue"]), "missing_count" = unname(plot_colors["um_maize"])))
## Now plot completion v. deviceplatform
# Aggregate data to sum completed and missing_count for each surveyname
survey_summary <- missing_surveys2 %>%
group_by(deviceplatform) %>%
summarise(completed = sum(completed, na.rm = TRUE),
missing_count = sum(missing_count, na.rm = TRUE)) %>%
ungroup()
# Reshape the data to long format
long_data <- survey_summary %>%
pivot_longer(cols = c(completed, missing_count),
names_to = "survey_status",
values_to = "count") # Convert to long format
# Create the grouped bar chart
ggplot(long_data, aes(x = deviceplatform, y = count, fill = survey_status)) +
geom_bar(stat = "identity", position = "dodge") + # Side-by-side bars
labs(x = "Device", y = "Count", title = "Completed vs Missing Count by Device") + # Labels & title
theme_minimal() + # Clean theme
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Rotate x-axis labels
scale_fill_manual(values = c("completed" = unname(plot_colors["um_blue"]), "missing_count" = unname(plot_colors["um_maize"])))
## Now do it for participant groups (diagnosis, cohort, etc.)
# Aggregate data to sum completed and missing_count for each participant group
survey_summary <- missing_surveys2 %>%
group_by(participant_group) %>%
summarise(completed = sum(completed, na.rm = TRUE),
missing_count = sum(missing_count, na.rm = TRUE)) %>%
ungroup()
# Reshape the data to long format
long_data <- survey_summary %>%
pivot_longer(cols = c(completed, missing_count),
names_to = "survey_status",
values_to = "count") # Convert to long format
# Create the grouped bar chart
ggplot(long_data, aes(x = participant_group, y = count, fill = survey_status)) +
geom_bar(stat = "identity", position = "dodge") + # Side-by-side bars
labs(x = "Participant Group", y = "Count", title = "Completed vs Missing Count by Participant Group") + # Labels & title
theme_minimal() + # Clean theme
theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Rotate x-axis labels
scale_fill_manual(values = c("completed" = unname(plot_colors["um_blue"]), "missing_count" = unname(plot_colors["um_maize"])))
# Compliance summary
df_compliance <- EMA.DIGIT4 %>%
group_by(participantidentifier) %>%
summarise(
total_completed = sum(!is.na(surveyname)),
total_expected = max(dayinstudy, na.rm = TRUE) * surveys_per_day,
percent_compliance = (total_completed / total_expected) * 100,
participant_group = first(participant_group)
)
df_compliance$participant_group <- as.factor(df_compliance$participant_group)
# Get all unique groups & set chunk size
group_levels <- unique(df_compliance$participant_group)
groups_per_plot <- 4
# Split groups into chunks of 4
group_slices <- split(group_levels, ceiling(seq_along(group_levels) / groups_per_plot))
# Loop and create compliance by participant violin plots, with max 4 groups at a time
for (i in seq_along(group_slices)) {
current_groups <- group_slices[[i]]
# Subset data
df_sub <- df_compliance %>%
filter(participant_group %in% current_groups)
# Dynamic color palette for each group
current_colors <- get_color_gradient(
as.character(current_groups),
from_col = plot_colors["um_blue"],
to_col = plot_colors["um_maize"]
)
# Make and print the plot
p <- ggplot(df_sub, aes(x = participant_group, y = percent_compliance, fill = participant_group)) +
geom_violin(trim = TRUE, alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.5) +
scale_fill_manual(values = current_colors) +
labs(
title = paste("Survey Completion % for Groups:", paste(current_groups, collapse = ", ")),
x = "Participant Group",
y = "Percent Complete"
) +
theme_minimal() +
theme(legend.position = "none") +
ylim(0,101)
print(p)
}
table1(~ percent_compliance | participant_group, data=df_compliance)
| BD (N=36) |
HC (N=34) |
SBD (N=33) |
Overall (N=103) |
|
|---|---|---|---|---|
| percent_compliance | ||||
| Mean (SD) | 79.3 (19.2) | 89.4 (10.8) | 80.2 (20.0) | 82.9 (17.6) |
| Median [Min, Max] | 87.9 [35.2, 100] | 93.3 [55.2, 99.1] | 88.4 [21.1, 99.1] | 89.3 [21.1, 100] |
# Function for plotting histogram
plot_list <- lapply(ema_vars, function(var){ # use previously defined ema_vars
ggplot(EMA.DIGIT4, aes(x = .data[[var]])) +
geom_histogram(bins = 10, fill = "skyblue", color = "black") +
labs(
title = ema_item_labels[[var]],
x = var,
y = "Frequency"
) +
theme_minimal()
})
# Plot on a grid
combined_histograms <- wrap_plots(plot_list, ncol = 3)
combined_histograms
survey_names <- unique(EMA.DIGIT4$surveyname)
num_surveys <- length(survey_names)
survey_colors <- get_color_gradient(survey_names)
names(survey_colors) <- survey_names
ema_plots <- lapply(ema_vars, function(item) {
plot_ema_by_survey(EMA.DIGIT4, item, survey_colors, ema_item_labels)
})
for (i in seq_along(ema_vars)) {
print(ema_plots[[i]])
}
# Define the corrmatrix
corrmatrix <- EMA.DIGIT4 %>%
select(participantidentifier, starts_with(ema_item_prefix))
# For ema item labels, retrieve the ones corresponding to the actual columns in the correlation matrices
current_ema_items <- names(corrmatrix)[names(corrmatrix) != "participantidentifier"]
current_item_labels <- ema_item_labels[current_ema_items]
StatsBetween <- statsBy(corrmatrix, group = "participantidentifier", cors = FALSE, method="spearman", na.rm = TRUE)
Betweencorr <- as.matrix (StatsBetween$rbg) ## this is between person r value
betweenp <- as.matrix(StatsBetween$pbg) ## this is between person p value
betweenm <- as.matrix(StatsBetween$mean) ## This is person centered mean
Withincorr <- as.matrix (StatsBetween$rwg) ## this is within person r value
withinp <- as.matrix(StatsBetween$pwg) ## this is within person p value
withinsd <- as.matrix(StatsBetween$sd) ## this is the within-person SD
# define color palette
my_col <- colorRampPalette(c(plot_colors["um_maize"], "white", plot_colors["um_dark_blue"]))(100)
# Set readable labels for corrplot using your labels param
rownames(Betweencorr) <- paste0(seq_along(current_item_labels), ". ", current_item_labels)
colnames(Betweencorr) <- rep("", length(current_item_labels))
rownames(Withincorr) <- rep("", length(current_item_labels))
colnames(Withincorr) <- paste0(seq_along(current_item_labels), ".")
# Plot within correlations on the upper diagonal
corrplot(Withincorr,
type = "upper",
method = "color",
col = my_col,
p.mat = withinp,
sig.level = 0.05,
insig = "pch",
addCoef.col = "black",
tl.col = "black",
na.label = "--",
pch.cex = 15,
tl.srt = 0,
tl.cex = 3,
number.cex = 3,
cl.cex = 3,
number.digits = 2)
# Plot between person correlations on the lower diagonal
corrplot(Betweencorr,
type = "lower",
method = "color",
col = my_col,
p.mat = betweenp,
sig.level = 0.05,
insig = "pch",
addCoef.col = "black",
tl.col = "black",
pch.cex = 15,
tl.cex = 3,
number.cex = 3,
cl.pos = "n",
add = TRUE) # plots between and within together on the same plot!
The correlation tables are saved as individual CSVs in the output_dir directory.
# Create csv files of correlation tables
write.csv(Betweencorr, file = file.path(output_dir, "Betweencorr.csv"))
write.csv(Withincorr, file = file.path(output_dir, "Withincorr.csv"))
write.csv(betweenm, file = file.path(output_dir, "betweenm.csv"))
write.csv(betweenp, file = file.path(output_dir, "betweenp.csv"))
write.csv(withinp, file = file.path(output_dir, "withinp.csv"))
write.csv(withinsd, file = file.path(output_dir, "withinsd.csv"))
# For HTML report (from R console or script)
#rmarkdown::render("EMA-CleanR.Rmd", output_dir = output_dir)
For instructions and more information on this program, please visit our GitHub page at: https://github.com/DepressionCenter/EMA-CleanR
Copyright © 2025 The Regents of the University of Michigan
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/gpl-3.0-standalone.html.
Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. You should have received a copy of the license included in the section entitled “GNU Free Documentation License”. If not, see https://www.gnu.org/licenses/fdl-1.3-standalone.html
If you find this repository, code or paper useful for your research, please cite it.
Sperry, Sarah; Murphy, Victoria (2025). EMA-CleanR. University of Michigan. Software. https://github.com/DepressionCenter/EMA-CleanR
DOI: 10.5281/zenodo.17982076
Copyright © 2025 The Regents of the University of Michigan