Hot questions for Using Ggplot2 in gghighlight

Top 10 R Open Source / Ggplot2 / gghighlight

Question:

Sample of my dataset:

test <- structure(list(CCG = c("NHS DARLINGTON CCG", "NHS DARLINGTON CCG", 
"NHS DARLINGTON CCG", "NHS DARLINGTON CCG", "NHS DURHAM DALES, EASINGTON AND SEDGEFIELD CCG", 
"NHS DURHAM DALES, EASINGTON AND SEDGEFIELD CCG", "NHS DURHAM DALES, EASINGTON AND SEDGEFIELD CCG", 
"NHS DURHAM DALES, EASINGTON AND SEDGEFIELD CCG", "NHS GATESHEAD CCG", 
"NHS GATESHEAD CCG", "NHS GATESHEAD CCG", "NHS GATESHEAD CCG"
), value = c(0.98, 0.97, 0.97, 0.94, 0.96, 0.96, 0.96, 0.94, 
0.93, 0.92, 0.93, 0.94), metric = c("a", "b", "c", "d", "a", 
"b", "c", "d", "a", "b", "c", "d")), row.names = c(NA, -12L), class = c("tbl_df", 
"tbl", "data.frame"))

I'm trying to use the gghighlight package to highlight selected lines in my plot, as featured on the creator's site.

testplot <- test %>% 
  ggplot(aes(x=metric, y=value, group=CCG, colour=CCG)) + 
  geom_line() + 
  theme(legend.position="none")
testplot

Works fine if I want to colour all of my lines, however when I try and incorporate the gghighlight function into my graph, I get the error message:

testplot <- test %>% 
  ggplot(aes(x=metric, y=value, group=CCG, colour=CCG)) + 
  geom_line() + 
  gghighlight(CCG == "NHS DARLINGTON CCG", use_direct_label = FALSE) +
  theme(legend.position="none")
testplot

geom_path: Each group consist of only one observation. Do you need to adjust the group aesthetic?

The selected line from the gghighlight call shows up but the rest have now disappeared instead of being faintly grey. I've already specified the group argument so I don't know what's going wrong.


Answer:

I guess the issue is related to the discrete scale in your plot and my attempt doesn't solve that but offers a workaround.

library(tidyverse)
library(gghighlight)
test %>% 
  group_by(CCG) %>% 
  mutate(idx = seq_along(metric)) %>% 
  ungroup() %>% 
  ggplot(aes(x = idx, y = value, group = CCG, colour = CCG)) + 
  geom_line() + 
  gghighlight(CCG == "NHS DARLINGTON CCG", use_direct_label = FALSE) +
  scale_x_continuous(labels = unique(test$metric)) +
  theme(legend.position = "none")

I simply created a numeric variable, idx, plotted it on the x-axis and used unique(test$metric) as axis labels in scale_x_continuous. Hope this helps.

Question:

I have a data frame that looks like this:

 rowname  Class Sec    ES.2um Mean_WPBs   ES.2um_ZS   Mean_ES  VWF_Sec    name
       1 Formin HAI 113.37340  147.1792  0.16078492 131.69309 162.5219  DIAPH1
       2 Formin HAI  43.90661  121.9017 -0.11594028  75.37296 137.4212    FMN2
       3 Septin HAI  64.32138  132.7591 -0.16218581  66.23765 195.9011 SEPTIN5
       4 Septin HAI  53.15791  145.7871 -0.86969449  81.92690 187.2647   LRCH3
       5 Arp2/3 HAI  68.67222  161.0516 -0.05404113  82.51804 158.2623   ARPC3
       6 Arp2/3 HAI  71.00643  149.0704 -0.38119473  82.91458 220.5494   WASF3

and am currently using gghighlight to identify/highlight a class of proteins; look at the code below:

plot_ESZ_lab <-ggplot(df, aes(ES.2um_ZS, VWF_Sec, color = Sec, shape = Sec)) + 
               geom_point(aes(size = Mean_ES)) + 
               scale_size_continuous(range=c(0.5,10))+ 
               scale_color_manual(values=c("HAI" = "blue", "PMA" = "red")) + 
               gghighlight(Class == "Formin", use_direct_label = TRUE, 
                           label_key = name, label_params = list(size=2)) + 
               xlab("Mean Exit Site Z-Score") + ylab("Secretion") + 
               ggtitle("Formin Highlighted") + 
               theme_bw() + theme(plot.title = element_text(hjust =0.5))

I would also like to highlight just 2 or 3 proteins using their names; this is what I have tried:

plot_ESZ_lab <-ggplot(df, aes(ES.2um_ZS, VWF_Sec, color = Sec, shape = Sec)) + 
               geom_point(aes(size = Mean_ES)) + 
               scale_size_continuous(range=c(0.5,10))+ 
               scale_color_manual(values=c("HAI" = "blue", "PMA" = "red")) + 
               gghighlight(Class == "Formin", name == "FMN2", "DIAPH1", 
                           use_direct_label = TRUE, label_key = name,
                           label_params = list(size=2)) + 
               xlab("Mean Exit Site Z-Score") + ylab("Secretion") +
               ggtitle("Formin Highlighted") + 
               theme_bw() + theme(plot.title = element_text(hjust =0.5))

but only the first name provided to gghighlight (i.e. FMN2) is ever plotted. How can I get more than 1 point to be plotted, i.e. in this case FMN2 and DIAPH1?


Answer:

In ggplot and generally almost all the functions in r, , is used to separate different arguments. You cannot use it to provide multiple inputs to the same variable. You need to write name %in% c("FMN2", "DIAPH1") which translates to name equals to FMN2 or DIAPH1; code below works:

ggplot(df, aes(ES.2um_ZS, VWF_Sec, color = Sec, shape = Sec)) + 
      geom_point(aes(size = Mean_ES)) + 
      scale_size_continuous(range=c(0.5,10))+ 
      scale_color_manual(values=c("HAI" = "blue", "PMA" = "red")) + 
      gghighlight(Class == "Formin", name %in% c("FMN2", "DIAPH1"), 
                  use_direct_label = TRUE, label_key = name,
                  label_params = list(size=2)) + 
      xlab("Mean Exit Site Z-Score") + ylab("Secretion") +
      ggtitle("Formin Highlighted") +  
      theme_bw() + theme(plot.title = element_text(hjust =0.5))

            

Data:

    df <- structure(list(rowname = 1:6, Class = structure(c(2L, 2L, 3L, 
    3L, 1L, 1L), .Label = c("Arp2/3", "Formin", "Septin"), class = "factor"), 
    Sec = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "HAI", class = "factor"), 
    ES.2um = c(113.3734, 43.90661, 64.32138, 53.15791, 68.67222, 
    71.00643), Mean_WPBs = c(147.1792, 121.9017, 132.7591, 145.7871, 
    161.0516, 149.0704), ES.2um_ZS = c(0.16078492, -0.11594028, 
    -0.16218581, -0.86969449, -0.05404113, -0.38119473), Mean_ES = c(131.69309, 
    75.37296, 66.23765, 81.9269, 82.51804, 82.91458), VWF_Sec = c(162.5219, 
    137.4212, 195.9011, 187.2647, 158.2623, 220.5494), name = structure(c(2L, 
    3L, 5L, 4L, 1L, 6L), .Label = c("ARPC3", "DIAPH1", "FMN2", 
    "LRCH3", "SEPTIN5", "WASF3"), class = "factor")), class = "data.frame",
     row.names = c("1", "2", "3", "4", "5", "6"))

Question:

I'm very new to R and I'm looking to highlight certain groups in my plot using gghighlight. For example, I want to highlight in red only groups 16 and 32 while leaving the rest of the lines in gray. My current code and output are below. I can't seem to highlight specific groups, I can only add color to all lines. I've tried gghighlight(grp==16) to try and highlight 16 but it says "Tried to calculate with group_by(), but the calculation failed".

library(readxl)
library(ggplot2)
library(gghighlight)

guidelines <- read_excel("data.xlsx", sheet=1)
guidelines$step <- factor(guidelines$step, levels=c("First", "Highest", "Final"))

map <- ggplot(guidelines,
              aes(x = step, y = type, group = grp, color = factor(grp))) +
scale_color_hue(l=45) +
geom_line(linetype = 1) +
geom_line(position=position_jitter(w=0, h=0.05)) 


map + scale_y_continuous(breaks = c(1,2,3,4))
map + scale_y_continuous(breaks = c(1,2,3,4),
                         labels = c("Method 1", "Method 2", "Method 3", "Method 4"))


Answer:

I tried to follow your way and did the following. I am not familiar with gghighlight(). I had a quick glance and thought you would need to use logical checks to highlight certain groups. If you can set up logical checks, I think you can use the function. In your question, you simply mentioned that you want to highlight specific groups. One way you can take would be to assign colors with scale_color_identity(). I created a sample data below. I basically assigned red to three groups (i.e., 1, 3, 5) in mutate(). The other groups got gray50 in the column, color.

library(tidyverse)

guidelines <- tibble(step = rep(c("First", "Highest", "Final"), times = 5),
                     type = c(2, 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 1, 3, 3),
                     grp = rep(1:5, each = 3))

mutate(guidelines,
       step = factor(step, levels = c("First", "Highest", "Final")),
       color = if_else(grp %in% c(1, 3, 5), "red", "gray50")) -> guidelines


ggplot(data = guidelines, aes(x = step, y = type, color = color, group = grp)) +
geom_line(linetype = 1, position = position_jitter(w = 0, h = 0.05)) +
scale_color_identity(guide = "legend", labels = c("Other groups", "Group 1, 3, 5")) +
scale_y_continuous(breaks = c(1,2,3,4),
                   labels = c("Method 1", "Method 2", "Method 3", "Method 4"))

Potential solution with gghighlight

I do not have your data. So this idea may/may not work. But I came up with the following idea. I modified guidelines once more. I created a new continuous variable (i.e., dummy), which I use for x-axis. This is a bit of twist since we are supposed to have a categorical variable on the axis. But this is necessary if you want to use gghighlight. As far as I understood gghighlight, we need logical check(s) in the function. Here, I wanted to highlight lines which max value is 3. This highlights group 5. Since you want to have two categorical variables, we gotta modify names on the x and y axis. If you can set up logical checks for the two groups in your data (16 and 32), I believe you can use this idea.

library(gghighlight)

mutate(guidelines,
       dummy = recode(.x = step, First = 1, Highest = 2, Final = 3),
       step = factor(step, levels = c("First", "Highest", "Final")),
       color = if_else(grp %in% c(1, 3, 5), "red", "gray50")) -> guidelines

ggplot(data = guidelines, aes(x = dummy, y = type, color = color, group = grp)) +
geom_line(size = 2, linetype = 1) +
gghighlight(max(as.numeric(type)) == 3, label_key = grp) +
labs(x = "Step", y = "Type") +
scale_x_continuous(breaks = c(1,2,3),
                   labels = c("First", "Highest", "Final")) +
scale_y_continuous(breaks = c(1,2,3,4),
                   labels = c("Method 1", "Method 2", "Method 3", "Method 4"))

Question:

I am plotting a bar graph using ggplot2 and highlighting particular bars using gghighlight.

But using gghighlight prints some label_key also in the output.

I want to remove the label_key printed on top plot.

Please help.

ggplot(data=plot, aes(x=subdomain_name, y=mean)) + 
geom_bar(stat="identity", color="blue", fill="blue",width = nrow(plot)/10)+
geom_text(aes(label=format(round(mean,2))),hjust=0)+
coord_flip() + theme(axis.line = element_blank(),axis.line.x = element_blank(),
axis.line.y = element_blank(),plot.margin=unit(c(-0.6,1,1,1),"cm"),
panel.background=element_blank(),panel.border=element_blank(),
axis.title.x = element_blank(),axis.title.y = element_blank() )
+ylim(0,max+(0.05*max)) + gghighlight(grepl('Domain',subdomain_name),
unhighlighted_colour = alpha("red",1),
label_key = NULL))


Answer:

In this case, you need use_direct_label = FALSE.

library(ggplot2)
library(gghighlight)

plot <- data.frame(
  subdomain_name = c(paste("Domain ", letters[1:3]), "foo"),
  mean = 1:4
)

max <- 4

ggplot(data = plot, aes(x = subdomain_name, y = mean)) +
  geom_bar(stat = "identity", color = "blue", fill = "blue", width = nrow(plot) / 10) +
  geom_text(aes(label = format(round(mean, 2))), hjust = 0) +
  coord_flip() +
  theme(
    axis.line = element_blank(), axis.line.x = element_blank(),
    axis.line.y = element_blank(), plot.margin = unit(c(-0.6, 1, 1, 1), "cm"),
    panel.background = element_blank(), panel.border = element_blank(),
    axis.title.x = element_blank(), axis.title.y = element_blank()
  ) +
  ylim(0, max + (0.05 * max)) +
  gghighlight(grepl("Domain", subdomain_name),
    unhighlighted_colour = alpha("red", 1),
    use_direct_label = FALSE
  )

Created on 2018-12-23 by the reprex package (v0.2.1)