library(igraph)
library(qgraph)
library(casnet)
library(plyr)
library(dplyr)
library(tidyr)
library(caret)
library(psych)
library(lme4)
library(reshape2)
library(lubridate)
library(parallel)
library(wesanderson)
library(stringr)
library(matrixStats)
library(ggthemes)
library(gridExtra)
library(cowplot)
# save the original plotting space parameters
og_par <- par()
devtools::session_info()
setting <chr> | value <chr> | |||
---|---|---|---|---|
version | version | R version 3.6.3 (2020-02-29) | ||
os | os | Ubuntu 18.04.4 LTS | ||
system | system | x86_64, linux-gnu | ||
ui | ui | RStudio | ||
language | language | (EN) | ||
collate | collate | C.UTF-8 | ||
ctype | ctype | C.UTF-8 | ||
tz | tz | Etc/UTC | ||
date | date | 2020-10-06 |
─ Session info ────────────────────────────────────────────────────────────────────────────────────────────────────────
─ Packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────
package * version date lib source
abind 1.4-5 2016-07-21 [1] CRAN (R 3.5.1)
acepack 1.4.1 2016-10-29 [1] CRAN (R 3.6.1)
assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.5.1)
backports 1.1.4 2019-04-10 [1] CRAN (R 3.5.1)
base64enc 0.1-3 2015-07-28 [1] CRAN (R 3.5.1)
BDgraph 2.58 2019-04-15 [1] CRAN (R 3.5.1)
boot 1.3-25 2020-04-26 [3] CRAN (R 3.6.3)
broom 0.5.2 2019-04-07 [1] CRAN (R 3.5.1)
callr 3.3.0 2019-07-04 [1] CRAN (R 3.5.1)
caret * 6.0-83 2019-04-18 [1] CRAN (R 3.5.1)
casnet * 0.1.4 2019-07-10 [1] Github (FredHasselman/casnet@2e6f024)
checkmate 1.9.4 2019-07-04 [1] CRAN (R 3.6.1)
class 7.3-17 2020-04-26 [3] CRAN (R 3.6.3)
cli 1.1.0 2019-03-19 [1] CRAN (R 3.5.1)
cluster 2.1.0 2019-06-19 [3] CRAN (R 3.6.1)
codetools 0.2-16 2018-12-24 [3] CRAN (R 3.6.3)
colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.5.1)
corpcor 1.6.9 2017-04-01 [1] CRAN (R 3.5.1)
cowplot * 0.9.4 2019-01-08 [1] CRAN (R 3.5.1)
crayon 1.3.4 2017-09-16 [1] CRAN (R 3.5.1)
d3Network 0.5.2.1 2015-01-31 [1] CRAN (R 3.5.1)
data.table 1.12.2 2019-04-07 [1] CRAN (R 3.6.1)
desc 1.2.0 2018-05-01 [1] CRAN (R 3.5.1)
devtools 2.0.1 2018-10-26 [1] CRAN (R 3.5.1)
digest 0.6.20 2019-07-04 [1] CRAN (R 3.5.1)
dplyr * 0.8.3 2019-07-04 [1] CRAN (R 3.5.1)
fdrtool 1.2.15 2015-07-08 [1] CRAN (R 3.5.1)
foreach 1.4.4 2017-12-12 [1] CRAN (R 3.5.1)
foreign 0.8-76 2020-03-03 [3] CRAN (R 3.6.3)
Formula 1.2-3 2018-05-03 [1] CRAN (R 3.6.1)
fs 1.2.6 2018-08-23 [1] CRAN (R 3.5.1)
generics 0.0.2 2018-11-29 [1] CRAN (R 3.5.1)
ggm 2.3 2015-01-21 [1] CRAN (R 3.5.1)
ggplot2 * 3.2.0 2019-06-16 [1] CRAN (R 3.5.1)
ggthemes * 4.2.0 2019-05-13 [1] CRAN (R 3.5.1)
glasso 1.10 2018-07-13 [1] CRAN (R 3.5.1)
glue 1.3.1 2019-03-12 [1] CRAN (R 3.5.1)
gower 0.2.0 2019-03-07 [1] CRAN (R 3.5.1)
gridExtra * 2.3 2017-09-09 [1] CRAN (R 3.6.1)
gtable 0.3.0 2019-03-25 [1] CRAN (R 3.5.1)
gtools 3.8.1 2018-06-26 [1] CRAN (R 3.5.1)
Hmisc 4.2-0 2019-01-26 [1] CRAN (R 3.5.1)
htmlTable 1.13.1 2019-01-07 [1] CRAN (R 3.6.1)
htmltools 0.3.6 2017-04-28 [1] CRAN (R 3.5.1)
htmlwidgets 1.3 2018-09-30 [1] CRAN (R 3.5.1)
huge 1.3.2 2019-04-08 [1] CRAN (R 3.5.1)
igraph * 1.2.4.1 2019-04-22 [1] CRAN (R 3.5.1)
invctr 0.1.0 2019-03-07 [1] CRAN (R 3.5.1)
ipred 0.9-8 2018-11-05 [1] CRAN (R 3.5.1)
iterators 1.0.10 2018-07-13 [1] CRAN (R 3.5.1)
jpeg 0.1-8 2014-01-23 [1] CRAN (R 3.5.1)
knitr 1.23 2019-05-18 [1] CRAN (R 3.5.1)
lattice * 0.20-41 2020-04-02 [3] CRAN (R 3.6.3)
latticeExtra 0.6-28 2016-02-09 [1] CRAN (R 3.6.1)
lava 1.6.5 2019-02-12 [1] CRAN (R 3.5.1)
lavaan 0.6-3 2018-09-22 [1] CRAN (R 3.5.1)
lazyeval 0.2.2 2019-03-15 [1] CRAN (R 3.5.1)
lme4 * 1.1-21 2019-03-05 [1] CRAN (R 3.5.1)
lubridate * 1.7.4 2018-04-11 [1] CRAN (R 3.5.1)
magrittr 1.5 2014-11-22 [1] CRAN (R 3.5.1)
MASS 7.3-51.3 2019-03-31 [1] CRAN (R 3.5.1)
Matrix * 1.2-17 2019-03-22 [1] CRAN (R 3.5.1)
matrixStats * 0.54.0 2018-07-23 [1] CRAN (R 3.5.1)
memoise 1.1.0 2017-04-21 [1] CRAN (R 3.5.1)
minqa 1.2.4 2014-10-09 [1] CRAN (R 3.5.1)
mnormt 1.5-5 2016-10-15 [1] CRAN (R 3.5.1)
ModelMetrics 1.2.2 2018-11-03 [1] CRAN (R 3.5.1)
munsell 0.5.0 2018-06-12 [1] CRAN (R 3.5.1)
nlme 3.1-147 2020-04-13 [3] CRAN (R 3.6.3)
nloptr 1.2.1 2018-10-03 [1] CRAN (R 3.6.1)
nnet 7.3-14 2020-04-26 [3] CRAN (R 3.6.3)
pbapply 1.4-0 2019-02-05 [1] CRAN (R 3.5.1)
pbivnorm 0.6.0 2015-01-23 [1] CRAN (R 3.6.1)
pillar 1.4.2 2019-06-29 [1] CRAN (R 3.5.1)
pkgbuild 1.0.2 2018-10-16 [1] CRAN (R 3.5.1)
pkgconfig 2.0.2 2018-08-16 [1] CRAN (R 3.5.1)
pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.5.1)
plyr * 1.8.4 2016-06-08 [1] CRAN (R 3.5.1)
png 0.1-7 2013-12-03 [1] CRAN (R 3.5.1)
prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.5.1)
processx 3.4.0 2019-07-03 [1] CRAN (R 3.5.1)
prodlim 2018.04.18 2018-04-18 [1] CRAN (R 3.5.1)
ps 1.3.0 2018-12-21 [1] CRAN (R 3.5.1)
psych * 1.8.12 2019-01-12 [1] CRAN (R 3.5.1)
purrr 0.3.4 2020-04-17 [1] CRAN (R 3.6.3)
qgraph * 1.6.1 2019-02-13 [1] CRAN (R 3.5.1)
R6 2.4.0 2019-02-14 [1] CRAN (R 3.5.1)
RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 3.5.1)
Rcpp 1.0.2 2019-07-25 [1] CRAN (R 3.6.1)
recipes 0.1.5 2019-03-21 [1] CRAN (R 3.5.1)
remotes 2.0.2 2018-10-30 [1] CRAN (R 3.5.1)
reshape2 * 1.4.3 2017-12-11 [1] CRAN (R 3.5.1)
rjson 0.2.20 2018-06-08 [1] CRAN (R 3.6.1)
rlang 0.4.0 2019-06-25 [1] CRAN (R 3.5.1)
rpart 4.1-15 2019-04-12 [3] CRAN (R 3.6.1)
rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.5.1)
rstudioapi 0.10 2019-03-19 [1] CRAN (R 3.5.1)
scales 1.0.0 2018-08-09 [1] CRAN (R 3.5.1)
sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.5.1)
stringi 1.4.3 2019-03-12 [1] CRAN (R 3.5.1)
stringr * 1.4.0 2019-02-10 [1] CRAN (R 3.5.1)
survival 3.1-12 2020-04-10 [3] CRAN (R 3.6.3)
testthat 2.1.1 2019-04-23 [1] CRAN (R 3.5.1)
tibble 2.1.3 2019-06-06 [1] CRAN (R 3.5.1)
tidyr * 0.8.3 2019-03-01 [1] CRAN (R 3.5.1)
tidyselect 0.2.5 2018-10-11 [1] CRAN (R 3.5.1)
timeDate 3043.102 2018-02-21 [1] CRAN (R 3.5.1)
usethis 1.4.0 2018-08-14 [1] CRAN (R 3.5.1)
wesanderson * 0.3.6 2018-04-20 [1] CRAN (R 3.6.3)
whisker 0.3-2 2013-04-28 [1] CRAN (R 3.5.1)
withr 2.1.2 2018-03-15 [1] CRAN (R 3.5.1)
xfun 0.8 2019-06-25 [1] CRAN (R 3.5.1)
[1] /usr/local/lib/R/site-library
[2] /usr/lib/R/site-library
[3] /usr/lib/R/library
for (timepoint in c('m00', 'm03', 'm06', 'm09', 'm12')) {
aumc_df[, paste0(timepoint, "_NHPT_D_avg")] <- rowMeans(aumc_df[, c(paste0(timepoint, "_NHPT_D1"), paste0(timepoint,"_NHPT_D2"))], na.rm=T)
aumc_df[, paste0(timepoint, "_NHPT_ND_avg")] <- rowMeans(aumc_df[, c(paste0(timepoint, "_NHPT_ND1"), paste0(timepoint,"_NHPT_ND2"))], na.rm=T)
aumc_df[, paste0(timepoint, "_NHPT_BH_avg")] <- rowMeans(aumc_df[, c(paste0(timepoint, "_NHPT_D_avg"), paste0(timepoint,"_NHPT_ND_avg"))], na.rm=T)
aumc_df[, paste0(timepoint, "_TWT_avg")] <- rowMeans(aumc_df[, c(paste0(timepoint, "_TWT_1"), paste0(timepoint,"_TWT_2"))], na.rm=T)
}
outcomes_of_interest_ls <- c()
for (outcome in outcomes_list) {
outcomes_of_interest_ls <- append(outcomes_of_interest_ls, colnames(aumc_df %>% select(matches(outcome))))
}
# features with highest rqa measures (most interesting NLTSA wise)
feature_selection <- c('post_correction_slowing_MEAN_2CD',
'pre_correction_slowing_MEAN_2CD',
'hold_time_STD',
'flight_time_MEDIAN',
'flight_time_MEAN_ABS_CHANGE',
'hold_time_SKEW',
'correction_duration_MEAN_2CD',
'post_correction_slowing_P_AUTO_CORR',
'after_punctuation_pause_MEAN_CHANGE',
'after_punctuation_pause_MEAN_2CD',
'session_duration_MEAN_2CD',
'after_punctuation_pause_MEDIAN',
'flight_time_MAX')
feature_selection
[1] "post_correction_slowing_MEAN_2CD" "pre_correction_slowing_MEAN_2CD" "hold_time_STD"
[4] "flight_time_MEDIAN" "flight_time_MEAN_ABS_CHANGE" "hold_time_SKEW"
[7] "correction_duration_MEAN_2CD" "post_correction_slowing_P_AUTO_CORR" "after_punctuation_pause_MEAN_CHANGE"
[10] "after_punctuation_pause_MEAN_2CD" "session_duration_MEAN_2CD" "after_punctuation_pause_MEDIAN"
[13] "flight_time_MAX"
mri_cols <- colnames(aumc_df %>% select(matches("mri_enhancing")))
diff_df <- NA
for (i in seq(1, length(mri_cols)-1)) {
diff_df <- cbind(diff_df, aumc_df[mri_cols[i+1]] - aumc_df[mri_cols[i]])
}
diff_df <- diff_df[, 2:ncol(diff_df)]
diff_df <- diff_df[rowSums(is.na(diff_df)) < 3, ]
## SPECIFY THE INDICES YOU WISH TO INVESTIGATE
row_amount <- 58
# user indices which have a change in mri
mri_change_index <- as.integer(names(which(rowSums(abs(diff_df), na.rm=TRUE) > 0)))
mri_change_ls <- aumc_df$user_id[mri_change_index]
short_mri_change_ls <- c()
for (user in mri_change_ls) {
if (nrow(key_df[key_df$user_id == user, ]) > row_amount) {
short_mri_change_ls <- c(short_mri_change_ls, user)
}
}
mri_change_index <- which(aumc_df$user_id %in% short_mri_change_ls)
# user indices which don't have a change in mri
mri_no_change_index <- as.integer(names(which(rowSums(abs(diff_df), na.rm=TRUE) == 0)))
mri_no_change_ls <- aumc_df$user_id[mri_no_change_index]
short_mri_no_change_ls <- c()
for (user in mri_no_change_ls) {
if (nrow(key_df[key_df$user_id == user, ]) > row_amount) {
short_mri_no_change_ls <- c(short_mri_no_change_ls, user)
}
}
mri_no_change_index <- which(aumc_df$user_id %in% short_mri_no_change_ls)
# healthy controls
hc_users_index <- which(aumc_df$diagnosis_ms == 0)
hc_users_ls <- aumc_df$user_id[hc_users_index]
short_hc_users_ls <- c()
for (user in hc_users_ls) {
if (nrow(key_df[key_df$user_id == user, ]) > row_amount) {
short_hc_users_ls <- c(short_hc_users_ls, user)
}
}
hc_users_index <- which(aumc_df$user_id %in% short_hc_users_ls)
list_of_user_index_lists=c(data.frame(mri_change=mri_change_index),
data.frame(mri_no_change=mri_no_change_index),
data.frame(hc_users=hc_users_index))
prepped_dfs_list <- list()
for (users_list_number in 1:length(list_of_user_index_lists)){
user_list_name<-names(list_of_user_index_lists[users_list_number])
users_selection_index<-list_of_user_index_lists[[user_list_name]]
print(user_list_name)
# use the chosen index to get a list of users
user_selection <- aumc_df$user_id[users_selection_index]
print(user_selection)
# create a subset of the aumc_df which includes only the users of interest
missing_df <- data.frame("sample size" = colSums(!is.na(aumc_df[aumc_df$diagnosis_ms == 1, c("user_id", mri_cols, visitdate_cols)])))
# subset relevant users' keystroke data
key_sub_df <- key_df[key_df$user_id %in% user_selection, ]
# order based on user id and timestamp
key_sub_df <- key_sub_df[order(key_sub_df$user_id, key_sub_df$timestamp), ]
## Creating a data frame of the complexity measures per user
## Writing code which will parallelize the EWS calculations
dfs_list <- df_user_list(df = key_sub_df, features = c("user_id", "timestamp", feature_selection),
user_column_name = "user_id", users = user_selection)
complexity_dfs_list <- mclapply(dfs_list, EWS_calc) # mclapply is the parallelized version of lapply
# filter only the data frames in the list (drop where there was not enough data)
complexity_dfs_list <- Filter(function(x) is.data.frame(x)[[1]] > 0, complexity_dfs_list)
complex_df <- rbind.fill(complexity_dfs_list)
# merge the complex df back to the keystroke df
key_complex_df <- merge(key_sub_df[, c("user_id", "timestamp", feature_selection)],
complex_df, by = c("user_id", "timestamp"))
# relabel the 10's to 1's in the complexity_peaks factor to avoid issues later
key_complex_df$complexity_peaks[key_complex_df$complexity_peaks == 10] <- 1
# aumc_outcome_selector <- mri_cols
aumc_outcome_selector <- outcomes_of_interest_ls
# create a melted version of the aumc_df with only the variables of interest
aumc_df_m_1 <- melt(aumc_df[c("user_id", aumc_outcome_selector)], id.vars="user_id",
variable.name = "PRO_name", value.name = "PRO_value")
aumc_df[c(date_cols)] <- apply(aumc_df[c(date_cols)], 2, as.character)
aumc_df_m_2 <- melt(aumc_df[c("user_id", date_cols)], id.vars="user_id",
variable.name = "visitdate_cat", value.name = "date")
aumc_df_m_2$date <- as.Date(aumc_df_m_2$date)
aumc_df_m_1 <- aumc_df_m_1 %>% mutate(time_code = substr(PRO_name,1L,4L))
aumc_df_m_2 <- aumc_df_m_2 %>% mutate(time_code = substr(visitdate_cat,1L,4L))
aumc_df_m <- merge(aumc_df_m_1, aumc_df_m_2, by = c("user_id", "time_code"))
# select the outcome measures and their corresponding questionnaire/ visitdates then reassign
aumc_df_m_vs <- aumc_df_m %>%
filter(str_detect(aumc_df_m$PRO_name, paste(visitdate_outcomes, collapse = "|")) &
str_detect(aumc_df_m$visitdate_cat, paste(visitdate_cols, collapse = "|")))
aumc_df_m_qs <- aumc_df_m %>%
filter(str_detect(aumc_df_m$PRO_name, paste(questionnaire_date_outcomes, collapse = "|")) &
str_detect(aumc_df_m$visitdate_cat, paste(questionnare_date_cols, collapse = "|")))
aumc_df_m <- rbind(aumc_df_m_vs, aumc_df_m_qs)
# merge the melted aumc_df with the key_complex_df
key_complex_df$date <- as.Date(key_complex_df$timestamp)
df.mod <- merge(key_complex_df, aumc_df_m, by = c("user_id", "date"), all.x=TRUE, all.y=FALSE)
prepped_dfs_list[[user_list_name]] <- df.mod
}
[1] "mri_change"
[1] 368 377 380 383 389 393 398 409 410 424 433 438 444
[1] "mri_no_change"
[1] 364 365 367 369 382 386 387 388 390 396 400 406 408 411 413 416 429 430 435 449
[1] "hc_users"
[1] 372 373 376 381 384 391 402 403 404 405 412 415 421 423 425 427 431 432 437 439 448 463
user_failed_list <- c()
for (users_list_number in 1:length(list_of_user_index_lists)){
user_list_name<-names(list_of_user_index_lists[users_list_number])
users_selection_index<-list_of_user_index_lists[[user_list_name]]
print(user_list_name)
prepped_df <- prepped_dfs_list[[user_list_name]]
# use the chosen index to get a list of users
user_selection <- aumc_df$user_id[users_selection_index]
for (user in user_selection) {
user_dc_df <- prepped_df[prepped_df$user_id == user, ]
sub_aumc_df_m <- aumc_df_m[aumc_df_m$user_id == user, ]
temp <- aumc_df_m[aumc_df_m$user_id %in% users_selection_index, ]
selected_outcomes_list <- c()
for (i in 1:length(outcomes_list)) {
diff_list <- c()
current_outcome_vector <- na.omit(sub_aumc_df_m[grep(outcomes_list[i], sub_aumc_df_m$PRO_name), "PRO_value"])
for (j in 2: length(current_outcome_vector)-1) {
diff_list <- append(diff_list, current_outcome_vector[j + 1] - current_outcome_vector[j])
}
if (outcomes_list[i] %in% c("NHPT_ND_avg", "NHPT_D_avg", "TWT_avg")){
if (any(abs(diff_list) > diff_list[1] * outcome_plus_cutoffs_df[outcome_plus_cutoffs_df$outcome == outcomes_list[i], "cutoffs"], na.rm=T)){
selected_outcomes_list <- append(selected_outcomes_list, outcomes_list[i])
}
} else{
if (any(abs(diff_list) >= outcome_plus_cutoffs_df[outcome_plus_cutoffs_df$outcome == outcomes_list[i], "cutoffs"], na.rm=T)){
selected_outcomes_list <- append(selected_outcomes_list, outcomes_list[i])
}
}
}
clin_matches <- unique(grep(paste(selected_outcomes_list,collapse="|"),
sub_aumc_df_m$PRO_name, value=TRUE))
clin_sub_aumc_df_m <- sub_aumc_df_m
clin_sub_aumc_df_m[clin_sub_aumc_df_m$PRO_name %notin% clin_matches, "PRO_value"] <- NA
clin_sub_aumc_df_m$m_date <- str_split_fixed(clin_sub_aumc_df_m$PRO_name, "_", 2)[,1]
clin_sub_aumc_df_m$m_date <- factor(clin_sub_aumc_df_m$m_date)
clin_sub_aumc_df_m$PRO_group <- str_split_fixed(clin_sub_aumc_df_m$PRO_name, "_", 2)[,2]
# Two scaling options - 1) scale within each outcome measure
# sort data frame on PRO_group and PRO_name otherwise merge of scaled column will be incorrect
clin_sub_aumc_df_m <- clin_sub_aumc_df_m[order(clin_sub_aumc_df_m$PRO_group, clin_sub_aumc_df_m$PRO_name), ]
tst <- split(clin_sub_aumc_df_m$PRO_value, clin_sub_aumc_df_m$PRO_group)
tst_ls <- lapply(tst, elascer)
clin_sub_aumc_df_m<-cbind(clin_sub_aumc_df_m, data.frame(PRO_value_scaled=unlist(tst_ls)))
# Two scaling options - 2) scale all outcome measures without grouping prior
# clin_sub_aumc_df_m$PRO_value_scaled <- elascer(clin_sub_aumc_df_m$PRO_value)
user_dc_df$CCP_dates <- user_dc_df$date
if (all(is.na(user_dc_df$complexity_peaks))) {
print(paste("For user ==", user, "there were ZERO complexity peaks"))
user_failed_list <- c(user_failed_list, user)
}else{
user_dc_df[user_dc_df$complexity_peaks < 0.5, "CCP_dates"] <- NA
}
# make sure the outcomes_list and the user outcomes data frame are sorted the same way otherwise stuff will not match correctly
clin_sub_aumc_df_m <- clin_sub_aumc_df_m[order(clin_sub_aumc_df_m$PRO_group, clin_sub_aumc_df_m$PRO_name), ]
outcomes_list <- sort(outcomes_list)
clinically_relevant_bool_col <- c()
for (i in 1:length(outcomes_list)) {
diff_list <- c()
current_outcome_vector <- clin_sub_aumc_df_m[grep(outcomes_list[i], clin_sub_aumc_df_m$PRO_name), "PRO_value"]
for (j in 2: length(current_outcome_vector)-1) {
diff_list <- append(diff_list, current_outcome_vector[j + 1] - current_outcome_vector[j])
}
if (outcomes_list[i] %in% c("NHPT_ND_avg", "NHPT_D_avg", "TWT_avg")){
clinically_relevant_bool <- abs(diff_list) > diff_list[1] * outcome_plus_cutoffs_df[outcome_plus_cutoffs_df$outcome == outcomes_list[i], "cutoffs"]
clinically_relevant_bool <- c(FALSE, clinically_relevant_bool)
for (j in 2: length(clinically_relevant_bool)) {
if (clinically_relevant_bool[j] & !is.na(clinically_relevant_bool[j])) {
clinically_relevant_bool[j - 1] <- TRUE
}
}
clinically_relevant_bool_col <- append(clinically_relevant_bool_col, clinically_relevant_bool)
} else{
clinically_relevant_bool <- abs(diff_list) >= outcome_plus_cutoffs_df[outcome_plus_cutoffs_df$outcome == outcomes_list[i], "cutoffs"]
clinically_relevant_bool <- c(FALSE, clinically_relevant_bool)
for (j in 2: length(clinically_relevant_bool)) {
if (clinically_relevant_bool[j] & !is.na(clinically_relevant_bool[j])) {
clinically_relevant_bool[j - 1] <- TRUE
}
}
clinically_relevant_bool_col <- append(clinically_relevant_bool_col, clinically_relevant_bool)
}
}
clin_sub_aumc_df_m$clinically_relevant_boolean <- clinically_relevant_bool_col
# tiff(paste0("/home/james/Data_Science/data-science-project/research-papers/2020-NLTSA/latex/Images/", user, "_main_results.tiff"), units="in", width=10, height=4, res=400)
# png(paste0("/home/james/Data_Science/data-science-project/research-papers/2020-NLTSA/latex/Images/", user, "_main_results.png"), units="in", width=10, height=4, res=400)
date_selection <- unique(clin_sub_aumc_df_m[as.character(clin_sub_aumc_df_m$visitdate_cat) %in% questionnare_date_cols, "date"])
plot <- ggplot(clin_sub_aumc_df_m) +
geom_point(data=clin_sub_aumc_df_m[clin_sub_aumc_df_m$clinically_relevant_boolean, ],
aes(x=date, y=PRO_value_scaled, color=PRO_group, group = PRO_group)) +
geom_line(data=clin_sub_aumc_df_m[clin_sub_aumc_df_m$clinically_relevant_boolean, ],
aes(x=date, y=PRO_value_scaled, color=PRO_group, group = PRO_group#, size = PRO_group
)) +
geom_point(data=na.omit(clin_sub_aumc_df_m[!clin_sub_aumc_df_m$clinically_relevant_boolean, ]),
aes(x=date, y=PRO_value_scaled, color=PRO_group, group = PRO_group), alpha=0.1) +
geom_line(data=na.omit(clin_sub_aumc_df_m[!clin_sub_aumc_df_m$clinically_relevant_boolean, ]),
aes(x=date, y=PRO_value_scaled, color=PRO_group, group = PRO_group), alpha=0.1) +
geom_line(data = user_dc_df, aes(date, dynamic_complexity_sum), size=0.5) +
geom_point(data = user_dc_df, aes(date, dynamic_complexity_sum), shape = 16, size=2) +
xlim(min(aumc_df_m[aumc_df_m$user_id == user, "date"], na.rm = TRUE),
max(aumc_df_m[aumc_df_m$user_id == user, "date"], na.rm = TRUE)) +
ylim(0, max(elascer(user_dc_df[user_dc_df$user_id == user, "PRO_value"]), na.rm = TRUE)) +
geom_vline(xintercept = user_dc_df[user_dc_df$complexity_peaks > 0, "date"], col="red", lty=2) +
scale_x_date(breaks = date_selection,
labels = c("m00", "m002", "m03", "m06", "m09", "m12")[1:length(date_selection)]) +
labs(x = "Date", y = "Measure Change (Scaled)") +
scale_color_discrete(name="Outcome measure") +
scale_color_manual("Outcome Measures",
breaks = names(custom_cmap),
values = custom_cmap,
) +
# scale_size_manual(values = c(rep(0.3, 4), 1.5, rep(0.3, 7))) +
theme_minimal() +
# theme(axis.text.x = element_text(angle = 0), legend.title = element_blank())
theme(axis.text.x = element_text(angle = 60, hjust = 0.9, size=10),
axis.text.y = element_text(size=10), legend.title = element_blank(),
plot.title = element_text(hjust = 0.5)) +
ggtitle(paste("Change in time series data for user ==", user))
print(plot)
ggsave(paste0("/home/james/Data_Science/data-science-project/research-papers/2020-NLTSA/latex/Images/", user, "_main_results.eps"))
# dev.off()
}
}
[1] "mri_change"
[1] "mri_no_change"
[1] "hc_users"
[1] "For user == 381 there were ZERO complexity peaks"
[1] "For user == 391 there were ZERO complexity peaks"
[1] "For user == 403 there were ZERO complexity peaks"
[1] "For user == 404 there were ZERO complexity peaks"
[1] "For user == 405 there were ZERO complexity peaks"
[1] "For user == 423 there were ZERO complexity peaks"
[1] "For user == 439 there were ZERO complexity peaks"
[1] "For user == 448 there were ZERO complexity peaks"
for (users_list_number in 1:length(list_of_user_index_lists)) {
user_list_name<-names(list_of_user_index_lists[users_list_number])
users_selection_index<-list_of_user_index_lists[[user_list_name]]
print(user_list_name)
# use the chosen index to get a list of users
user_selection <- aumc_df$user_id[users_selection_index]
## Show missingness percentages
all_missing_per_user <- data.frame()
for (user in user_selection) {
current_user_missing <- data.frame(user_id = user, t(colMeans(is.na(key_df[key_df$user_id == user, feature_selection]))*100))
all_missing_per_user <- rbind(all_missing_per_user, current_user_missing)
}
print("All percentage missingness per user per feature")
print(all_missing_per_user)
print("Median percentage missingness per user")
median_percent_miss <- data.frame(user_id = user_selection, median_percentage_missing = rowMedians(as.matrix(all_missing_per_user[, feature_selection])))
print(median_percent_miss)
print("Median of the Median percentage missingness per user")
print(median(median_percent_miss$median_percentage_missing, na.rm=TRUE))
cat("\n")
}
[1] "mri_change"
[1] "All percentage missingness per user per feature"
user_id <int> | post_correction_slowing_MEAN_2CD <dbl> | pre_correction_slowing_MEAN_2CD <dbl> | hold_time_STD <dbl> | |
---|---|---|---|---|
368 | 2.1447721 | 2.1447721 | 0.5361930 | |
377 | 0.2710027 | 0.2710027 | 0.0000000 | |
380 | 1.2944984 | 1.2944984 | 0.3236246 | |
383 | 0.2597403 | 0.2597403 | 0.0000000 | |
389 | 0.3164557 | 0.3164557 | 0.0000000 | |
393 | 24.8603352 | 24.3016760 | 1.9553073 | |
398 | 4.8710602 | 4.8710602 | 0.2865330 | |
409 | 2.6058632 | 2.2801303 | 0.0000000 | |
410 | 0.0000000 | 0.0000000 | 0.0000000 | |
424 | 0.0000000 | 0.0000000 | 0.0000000 |
[1] "Median percentage missingness per user"
user_id <int> | median_percentage_missing <dbl> | |||
---|---|---|---|---|
368 | 0.5361930 | |||
377 | 0.2710027 | |||
380 | 1.2944984 | |||
383 | 0.2597403 | |||
389 | 0.0000000 | |||
393 | 10.8938547 | |||
398 | 1.4326648 | |||
409 | 1.9543974 | |||
410 | 0.0000000 | |||
424 | 0.0000000 |
[1] "Median of the Median percentage missingness per user"
[1] 0.536193
[1] "mri_no_change"
[1] "All percentage missingness per user per feature"
user_id <int> | post_correction_slowing_MEAN_2CD <dbl> | pre_correction_slowing_MEAN_2CD <dbl> | hold_time_STD <dbl> | |
---|---|---|---|---|
364 | 7.0938215 | 7.3226545 | 0.0000000 | |
365 | 4.9645390 | 4.9645390 | 0.3546099 | |
367 | 79.7752809 | 79.4943820 | 14.8876404 | |
369 | 3.4120735 | 3.4120735 | 0.0000000 | |
382 | 2.2522523 | 2.2522523 | 0.0000000 | |
386 | 0.0000000 | 0.0000000 | 0.0000000 | |
387 | 4.6783626 | 4.6783626 | 1.4619883 | |
388 | 12.6050420 | 12.6050420 | 0.2801120 | |
390 | 0.0000000 | 0.0000000 | 0.0000000 | |
396 | 8.0691643 | 8.6455331 | 0.5763689 |
[1] "Median percentage missingness per user"
user_id <int> | median_percentage_missing <dbl> | |||
---|---|---|---|---|
364 | 5.0343249 | |||
365 | 3.1914894 | |||
367 | 54.4943820 | |||
369 | 0.0000000 | |||
382 | 0.4504505 | |||
386 | 0.0000000 | |||
387 | 3.5087719 | |||
388 | 8.4033613 | |||
390 | 0.0000000 | |||
396 | 5.4755043 |
[1] "Median of the Median percentage missingness per user"
[1] 2.036246
[1] "hc_users"
[1] "All percentage missingness per user per feature"
user_id <int> | post_correction_slowing_MEAN_2CD <dbl> | pre_correction_slowing_MEAN_2CD <dbl> | hold_time_STD <dbl> | |
---|---|---|---|---|
372 | 4.000000 | 4.000000 | 1.000000 | |
373 | 14.000000 | 14.000000 | 0.000000 | |
376 | 0.000000 | 0.000000 | 0.000000 | |
381 | 1.219512 | 1.219512 | 0.000000 | |
384 | 3.125000 | 3.125000 | 0.000000 | |
391 | 3.750000 | 3.750000 | 0.000000 | |
402 | 2.752294 | 2.752294 | 0.000000 | |
403 | 0.000000 | 0.000000 | 0.000000 | |
404 | 6.862745 | 6.862745 | 0.000000 | |
405 | 35.955056 | 35.955056 | 4.494382 |
[1] "Median percentage missingness per user"
user_id <int> | median_percentage_missing <dbl> | |||
---|---|---|---|---|
372 | 3.0000000 | |||
373 | 7.0000000 | |||
376 | 0.0000000 | |||
381 | 0.0000000 | |||
384 | 1.0416667 | |||
391 | 1.2500000 | |||
402 | 0.9174312 | |||
403 | 0.0000000 | |||
404 | 0.9803922 | |||
405 | 25.8426966 |
[1] "Median of the Median percentage missingness per user"
[1] 1.011029