#This script contains the code that was used to create the sankey diagram illustrating latent class transitions #See https://github.com/davidsjoberg/ggsankey #Script author: Carolina Wannheden, carolina.wannheden@ki.se #Updated: 2022-07-01 #Set working directory to the current directory setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) #install the ggsankey package from github #install.packages("devtools") #devtools::install_github("davidsjoberg/ggsankey") #libraries library(ggplot2) library(tidyverse) library(dplyr) library(viridis) library(ggsankey) #Import LPA results df <- read.csv("lpa-results_docca.csv") #Create transition dataframe df_transition <- df %>% select(Class.t1, Class.t2) %>% rename(T1 = Class.t1, T2 = Class.t2) #Recode to add a dummy variable at T1 - this way the colors will match for profiles with the same name at T1 and T2 df_transition$T1 <- df_transition$T1 %>% recode("5" = "1", #dummy variable "1" = "2", "2" = "3", "3" = "4", "4" = "5") class_labels_t1 <-data.frame(class=as.character(1:5), label=c( "Dummy 1", #dummy variable "Strained", "Neutral", "Supportive", "Optimal")) class_labels_t2 <-data.frame(class=as.character(1:5), label=c( "Unsupportive", "Strained", "Neutral", "Supportive", "Optimal")) #Create sankey plot data sankey_dat <- df_transition %>% make_long(T1, T2) #Add profile labels for T1 sankey_dat_t1 <- sankey_dat %>% filter(x == "T1") %>% left_join(class_labels_t1, by = c("node" = "class")) %>% mutate(node_id = as.factor(paste0(node,".",x))) #Add profile labels for T2 sankey_dat_t2 <- sankey_dat %>% filter(x == "T2") %>% left_join(class_labels_t2, by = c("node" = "class")) %>% mutate(node_id = as.factor(paste0(node,".",x))) #Bind sankey data for T1 and T2 sankey_dat <- bind_rows(sankey_dat_t1, sankey_dat_t2) #Create profile descriptions for T1 class_descr_t1 <- sankey_dat_t1 %>% group_by(node) %>% dplyr::summarise(c_count = n()) %>% mutate(x = "T1", c_prop = formattable::percent(c_count/nrow(df_transition)), label_stats = paste0("\n n=",c_count," (",formattable::percent(c_count/nrow(df_transition), digits = 0),")")) #Create profile descriptions for T2 class_descr_t2 <- sankey_dat_t2 %>% group_by(node) %>% dplyr::summarise(c_count = n()) %>% mutate(x = "T2", c_prop = formattable::percent(c_count/nrow(df_transition)), label_stats = paste0("\n n=",c_count," (",formattable::percent(c_count/nrow(df_transition), digits = 0),")")) #Bind profile descriptions for T1 and T2 class_descr <- rbind(class_descr_t1,class_descr_t2) %>% mutate(node_id = paste0(node,".",x), .keep="unused") #Join class descriptions with the sankey data sankey_dat <- sankey_dat %>% left_join(class_descr, by = "node_id") %>% unite("long_label",c(label,label_stats), sep="") #Set the positions for the labels hjust <- c(.1,-.1,.1,-.1,.1,-.1,.1,-.1,.1) #Sankey plot plot_sankey <-ggplot(sankey_dat, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = long_label)) + geom_sankey(flow.alpha = .6) + geom_sankey_text(size = 3, color = "black", hjust = "outward", position = position_nudge(hjust)) + scale_fill_viridis_d() + theme_sankey(base_size = 10) + labs(x = NULL) + theme(legend.position = "none", plot.title = element_text(hjust = .5), plot.margin = margin(0,0,0,0, "cm")) plot_sankey