Hot questions for Using Ggplot2 in ggpubr

Question:

I'm working with 4 different plots and I'm using ggarrange() from the ggpubr-package to put them in a single plot. I've prepared an example:

library(ggpubr)
library(ggplot2)

p1 <- ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point() + ggtitle("Plot 1")
p2 <- ggplot(iris, aes(x = Petal.Length, y = Petal.Width)) + geom_point() + ggtitle("Plot 2")
p3 <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Width)) + geom_point() + ggtitle("Plot 3")
p4 <- ggplot(iris, aes(x = Petal.Length, y = Sepal.Width)) + geom_point() + ggtitle("Plot 4") +
  facet_wrap(~Species)

plot.list <- list(p1, p2, p3, p4)

ggarrange(plotlist = plot.list)

Output:

I would like to draw a border around the single plots, like so:

Is there any way to draw this border? Thanks!


Answer:

grid.polygon() is quite manual but I think it can do the trick:

Using RStudio

library("ggpubr")
library(ggplot2)
library(gridExtra)
library(grid)
p1 <- ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point() + ggtitle("Plot 1")
p2 <- ggplot(iris, aes(x = Petal.Length, y = Petal.Width)) + geom_point() + ggtitle("Plot 2")
p3 <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Width)) + geom_point() + ggtitle("Plot 3")
p4 <- ggplot(iris, aes(x = Petal.Length, y = Sepal.Width)) + geom_point() + ggtitle("Plot 4") +
  facet_wrap(~Species)

plot.list <- list(p1, p2, p3, p4)

ggarrange(plotlist = plot.list)
x = c(0, 0.5, 1, 0.5, 0.5, 0.5)
y = c(0.5, 0.5, 0.5,0, 0.5, 1)
id = c(1,1,1,2,2,2)
grid.polygon(x,y,id)

Using Shiny (Edit)

When doing it within a shiny-app, ones needs to add the grid using annotation_custom(), as follows:

    ggarrange(plotlist = plot.list) + 
    annotation_custom(
             grid.polygon(c(0, 0.5, 1, 0.5, 0.5, 0.5),
                          c(0.5, 0.5, 0.5,0, 0.5, 1), 
                          id = c(1,1,1,2,2,2), 
                          gp = gpar(lwd = 1.5)))

Question:

I am trying to add significance levels to my boxplots in the form of asterisks using ggplot2 and the ggpubr package, but I have many comparisons and I only want to show the significant ones.

I try to use the option hide.ns=TRUE in stat_compare_means, but it clearly does not work, it might be a bug in the ggpubr package.

Besides, you see that I leave out group "PGMC4" from the pairwise wilcox.test comparisons; how can I leave this group out also for the kruskal.test?

The last question I have is how the significance level works? As in * is significant below 0.05, ** below 0.025, *** below 0.01? what is the convention ggpubr uses? Is it showing p-values or adjusted p-values? If the latter, what's the adjusting method? BH?

Please check my MWE below and this link and this other one for reference

##############################
##MWE
set.seed(5)
#test df
mydf <- data.frame(ID=paste(sample(LETTERS, 163, replace=TRUE), sample(1:1000, 163, replace=FALSE), sep=''),
                   Group=c(rep('C',10),rep('FH',10),rep('I',19),rep('IF',42),rep('NA',14),rep('NF',42),rep('NI',15),rep('NS',10),rep('PGMC4',1)),
                   Value=rnorm(n=163))
#I don't want to compare PGMC4 cause I have only onw sample
groups <- as.character(unique(mydf$Group[which(mydf$Group!="PGMC4")]))
#function to make combinations of groups without repeating pairs, and avoiding self-combinations
expand.grid.unique <- function(x, y, include.equals=FALSE){
    x <- unique(x)
    y <- unique(y)
    g <- function(i){
        z <- setdiff(y, x[seq_len(i-include.equals)])
        if(length(z)) cbind(x[i], z, deparse.level=0)
    }
    do.call(rbind, lapply(seq_along(x), g))
}
#all pairs I want to compare
combs <- as.data.frame(expand.grid.unique(groups, groups), stringsAsFactors=FALSE)
head(combs)
my.comps <- as.data.frame(t(combs), stringsAsFactors=FALSE)
colnames(my.comps) <- NULL
rownames(my.comps) <- NULL
#pairs I want to compare in list format for stat_compare_means
my.comps <- as.list(my.comps)
head(my.comps)
pdf(file="test.pdf", height=20, width=25)
print(#or ggsave()
  ggplot(mydf, aes(x=Group, y=Value, fill=Group)) + geom_boxplot() +
    stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
    scale_fill_manual(values=myPal) +
    ggtitle("TEST TITLE") +
    theme(plot.title = element_text(size=30),
      axis.text=element_text(size=12),
      axis.text.x = element_text(angle=45, hjust=1),
      axis.ticks = element_blank(),
      axis.title=element_text(size=20,face="bold"),
      legend.text=element_text(size=16)) +
  stat_compare_means(comparisons=my.comps, method="wilcox.test", label="p.signif", size=14) + #WHY DOES hide.ns=TRUE NOT WORK??? WHY DOES size=14 NOT WORK???
  stat_compare_means(method="kruskal.test", size=14) #GLOBAL COMPARISON ACROSS GROUPS (HOW TO LEAVE PGMC4 OUT OF THIS??)
)
dev.off()
##############################

The MWE will produce the following boxplots:

The questions would be:

1- How to make hide.ns=TRUE work?

2- How to increase the size of the *?

3- How to exclude a group from the kruskal.test comparison?

4- What is the * convention used by ggpubr, and are the p-values shown adjusted or not?

Many thanks!!

EDIT

Besides, when doing

stat_compare_means(comparisons=my.comps, method="wilcox.test", p.adjust.method="BH")

I do not obtain the same p-values as when doing

wilcox.test(Value ~ Group, data=mydf.sub)$p.value

where mydf.sub is a subset() of mydf for a given comparison of 2 groups.

What is ggpubr doing here? How does it calculate the p.values?

EDIT 2

Please help, the solution does not have to be with ggpubr (but it has to be with ggplot2), I just need to be able to hide the NS and make the size of the asterisks bigger, as well as a p-value calculation identical to wilcox.test() + p.adjust(method"BH").

Thanks!


Answer:

You can try following. The idea is that you calculate the stats by your own using pairwise.wilcox.test. Then you use the ggsignif function geom_signif to add the precalculated pvalues. With y_position you can place the brackets so they don't overlap.

library(tidyverse)
library(ggsignif)
library(broom)
# your list of combinations you want to compare
CN <- combn(levels(mydf$Group)[-9], 2, simplify = FALSE)
# the pvalues. I use broom and tidy to get a nice formatted dataframe. Note, I turned off the adjustment of the pvalues. 
pv <- tidy(with(mydf[ mydf$Group != "PGMC4", ], pairwise.wilcox.test(Value, Group, p.adjust.method = "none")))
#  data preparation 
CN2 <- do.call(rbind.data.frame, CN)
colnames(CN2) <- colnames(pv)[-3]
# subset the pvalues, by merging the CN list
pv_final <- merge(CN2, pv, by.x = c("group2", "group1"), by.y = c("group1", "group2"))
# fix ordering
pv_final <- pv_final[order(pv_final$group1), ] 
# set signif level
pv_final$map_signif <- ifelse(pv_final$p.value > 0.05, "", ifelse(pv_final$p.value > 0.01,"*", "**"))  

# the plot
ggplot(mydf, aes(x=Group, y=Value, fill=Group)) + geom_boxplot() +
  stat_compare_means(data=mydf[ mydf$Group != "PGMC4", ], aes(x=Group, y=Value, fill=Group), size=5) + 
  ylim(-4,30)+
  geom_signif(comparisons=CN,
              y_position = 3:30, annotation= pv_final$map_signif) + 
  theme_bw(base_size = 16)

The arguments vjust, textsize, and size are not properly working. Seems to be a bug in the latest version ggsignif_0.3.0.


Edit: When you want to show only the significant comparisons, you can easily subset the dataset CN. Since I updated to ggsignif_0.4.0 and R version 3.4.1, vjust and textsize are working now as expected. Instead of y_position you can try step_increase.

# subset 
gr <- pv_final$p.value <= 0.05
CN[gr]

ggplot(mydf, aes(x=Group, y=Value, fill=Group)) + 
  geom_boxplot() +
  stat_compare_means(data=mydf[ mydf$Group != "PGMC4", ], aes(x=Group, y=Value, fill=Group), size=5) + 
  geom_signif(comparisons=CN[gr], textsize = 12, vjust = 0.7, 
             step_increase=0.12, annotation= pv_final$map_signif[gr]) + 
  theme_bw(base_size = 16)

You can use ggpubr as well. Add:

stat_compare_means(comparisons=CN[gr], method="wilcox.test", label="p.signif", color="red")

Question:

I would like to change the point denoting the mean in ggerrorplot to a horizontal line (similar to the line used to denote a median in a boxplot). I would like this line to be slightly thicker than the error bars.

I do not see an option to do so in the ggerrorplot documentation. Will I need to do some hacking and perhaps overlay a line outside of ggerrorplot?

# ToothGrowth data set available in R datasets
df <- ToothGrowth

# Examine first 10 rows
head(df, 10)
# len supp dose
# 1   4.2   VC  0.5
# 2  11.5   VC  0.5
# 3   7.3   VC  0.5
# 4   5.8   VC  0.5
# 5   6.4   VC  0.5
# 6  10.0   VC  0.5
# 7  11.2   VC  0.5
# 8  11.2   VC  0.5
# 9   5.2   VC  0.5
# 10  7.0   VC  0.5

require(ggpubr)

# Add mean, jitter points and error bars
ggerrorplot(df, x = "dose", y = "len",
            add = c("mean","jitter"), error.plot= "errorbar")

Answer:

Add a point layer with argument shape = 95 as shown by @hrbrmstr here: https://stackoverflow.com/a/39601572/8583393

p <- ggerrorplot(df, x = "dose", y = "len", 
            add = "jitter", # 'mean' and c() removed in this line
            error.plot = "errorbar")

p + stat_summary(
    geom = "point",
    shape = 95,
    size = 30,
    col = "red",
    fun.y = "mean")

I removed the pointrange layer which does not seem to be needed when you add the horizontal lines / bars.


In case you need control of the width of the horizontal lines, here is an option that uses geom_segment.

We calculate the y-axis values first

df_segment <- aggregate(len ~ dose, p$data, FUN = mean)

Then plot

p +
  geom_segment(
    data = transform(df_segment, dose = as.numeric(dose)),
    aes(
      x = dose - 0.1,
      xend = dose + 0.1,
      y = len,
      yend = len
    ),
    col = "red",
    size = 1
  )

Question:

How can I change the font size of stat_compare_means on the plot below? I.e, change the "Kruskal-Wallis, p = 1.5e-09" and the other p-values font size? I would like to use a smaller font size than the default one...

Following the data example...

library(ggpubr)
data("ToothGrowth")
compare_means(len ~ dose,  data = ToothGrowth)

# Visualize: Specify the comparisons I want
my_comparisons <- list( c("0.5", "1"), c("1", "2"), c("0.5", "2") )

# Plotting
ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+ 
stat_compare_means(comparisons = my_comparisons)+ # Add pairwise comparisons p-value
stat_compare_means(label.y = 50)     # Add global p-value

Plot:


Answer:

your_font_size <- 2

p <- ggboxplot(ToothGrowth, x = "dose", y = "len", color = "dose", palette = "jco") + 
 stat_compare_means(comparisons = my_comparisons) + 
 stat_compare_means(label.y = 50, size = your_font_size)

p$layers[[2]]$aes_params$textsize <- your_font_size
p

The solution is a bit copious but works. I couldn't find another way to overwrite the textsize parameter of the geom_signif layer that is created after the first call to stat_compare_means.

The parameter is stored here: p$layers[[2]]$aes_params$textsize and can manually be modified.

If you need this manipulation for another plot in which the order of the layers might differ from this example, you can use the which_layer function from the gginnards package to detect this layer (or any other) using the following code.

Thanks to @KGee for pointing out that the which_layer function was moved from the ggpmisc package as of version 0.3.0.

library(gginnards)
which_layers(p, "GeomSignif")
## [1] 2

Change the textsize argument like shown above.

p$layers[[which_layers(p, "GeomSignif")]]$aes_params$textsize <- your_font_size

Question:

I have a dataframe df with the following data. I want to plot the logCPM expression of the gene between two groups A and B.

Samples  Type   GeneA
Sample1    B    14.82995162
Sample2    B    12.90512275
Sample3    B    9.196524783
Sample4    A    19.42866012
Sample5    A    19.70386922
Sample6    A    16.22906914
Sample7    A    12.48966785
Sample8    B    15.53280377
Sample9    A    9.345795955
Sample10    B   9.196524783
Sample11    B   9.196524783
Sample12    B   9.196524783
Sample13    A   9.434355615
Sample14    A   15.27604692
Sample15    A   18.90867329
Sample16    B   11.71503095
Sample17    B   13.7632545
Sample18    A   9.793864295
Sample19    B   9.196524783
Sample20    A   14.52562066
Sample21    A   13.85116605
Sample22    A   9.958492229
Sample23    A   17.57075876
Sample24    B   13.04499079
Sample25    B   15.33577937
Sample26    A   13.95849295
Sample27    B   9.196524783
Sample28    A   18.20524388
Sample29    B   17.7058873
Sample30    B   14.0199393
Sample31    A   16.21499069
Sample32    A   14.171432
Sample33    B   9.196524783
Sample34    B   9.196524783
Sample35    B   15.16648035
Sample36    B   12.9435081
Sample37    B   13.81971106
Sample38    B   15.82901231

I tried making a violin plot using ggviolin.

library("ggpubr")
pdf("eg.pdf", width = 5, height = 5)
p <- ggviolin(df, x = "Type", y = "GeneA", fill = "Type",
          color = "Type", palette = c("#00AFBB", "#FC4E07"),
          add="boxplot",add.params = list(fill="white"),
          order = c("A", "B"),
          ylab = "GeneA (logCPM)", xlab = "Groups")
ggpar(p, ylim = c(5,25))
dev.off()

I got the violin plot like this .

1) In this I don't see any whiskers and any points on violin.

2) Is there a way to show which point is which sample? like giving a different color to the point (for eg: I'm interested in Sample 10. I want to give different color to that point because I'm interested to see the expression of that)

Thank you


Answer:

May I suggest using elephant/raincloud or hybrid boxplot plots instead?

From the blog post linked above:

Violin plots mirror the data density in a totally uninteresting/uninformative way, simply repeating the same exact information for the sake of visual aesthetic.

In raincloud plot, we get basically everything we need: eyeballed statistical inference, assessment of data distributions (useful to check assumptions), and the raw data itself showing outliers and underlying patterns.

library(tidyverse)
library(ggrepel)

df <- read_table2(txt)

# create new variable for coloring & labeling `Sample10` pts
df <- df %>% 
  mutate(colSel = ifelse(Samples == 'Sample10', '#10', 'dummy'),
         labSel = ifelse(Samples == 'Sample10', '#10', ''))

# create summary statistics
sumld <- df %>%
  group_by(Type) %>%
  summarise(
    mean     = mean(GeneA, na.rm = TRUE),
    median   = median(GeneA, na.rm = TRUE),
    sd       = sd(GeneA, na.rm = TRUE),
    N        = n(),
    ci       = 1.96 * sd/sqrt(N),
    lower95  = mean - ci,
    upper95  = mean + ci,
    lower    = mean - sd,
    upper    = mean + sd) %>% 
  ungroup()
sumld
#> # A tibble: 2 x 10
#>   Type   mean median    sd     N    ci lower95 upper95 lower upper
#>   <chr> <dbl>  <dbl> <dbl> <int> <dbl>   <dbl>   <dbl> <dbl> <dbl>
#> 1 A      14.7   14.5  3.54    17  1.68    13.0    16.3 11.1   18.2
#> 2 B      12.4   12.9  2.85    21  1.22    11.2    13.6  9.54  15.2

raincloud plot

## get geom_flat_violin function
## https://gist.github.com/benmarwick/b7dc863d53e0eabc272f4aad909773d2
## mirror: https://pastebin.com/J9AzSxtF 
devtools::source_gist("2a1bb0133ff568cbe28d", filename = "geom_flat_violin.R")

pos <- position_jitter(width = 0.15, seed = 1)

p0 <- ggplot(data = df, aes(x = Type, y = GeneA, fill = Type)) +
  geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .8) +
  guides(fill = FALSE) +
  guides(color = FALSE) +
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  theme_classic()

# raincloud plot
p1 <- p0 + 
  geom_point(aes(color = Type), 
             position = pos, size = 3, alpha = 0.8) +
  geom_boxplot(width = .1, show.legend = FALSE, outlier.shape = NA, alpha = 0.5)
p1

# coloring Sample10
p0 +
  geom_point(aes(color = colSel), 
             position = pos, size = 3, alpha = 0.8) +
  geom_text_repel(aes(label = labSel),
                  point.padding = 0.25,
                  direction = 'y',
                  position = pos) +
  geom_boxplot(width = .1, show.legend = FALSE, outlier.shape = NA, alpha = 0.5) +
  scale_color_manual(values = c('dummy' = 'grey50', '#10' = 'red')) 

# errorbar instead of boxplot
p0 + 
  geom_point(aes(color = colSel), 
             position = pos, size = 3, alpha = 0.8) +
  geom_point(data = sumld, aes(x = Type, y = mean), 
             position = position_nudge(x = 0.3), size = 3.5) +
  geom_text_repel(aes(label = labSel),
                  point.padding = 0.25,
                  direction = 'y',
                  position = pos) +
  geom_errorbar(data = sumld, aes(ymin = lower95, ymax = upper95, y = mean), 
                position = position_nudge(x = 0.3), width = 0) +
  guides(fill = FALSE) +
  guides(color = FALSE) +
  scale_color_manual(values = c('dummy' = 'grey50', '#10' = 'red')) +
  scale_fill_brewer(palette = "Dark2") +
  theme_classic()

hybrid boxplot using geom_boxjitter() from the ggpol package

## https://stackoverflow.com/a/49338481/ 
library(ggpol)

half_box <- ggplot(df) + geom_boxjitter(aes(x = Type, y = GeneA, 
                                            fill = Type, color = Type),
                                        jitter.shape = 21, jitter.color = NA, 
                                        jitter.height = 0, jitter.width = 0.04,
                                        outlier.color = NA, errorbar.draw = TRUE) +
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  theme_classic()
half_box

Bonus: you can also replace geom_point() with geom_quasirandom() from the ggbeeswarm package. Here is one example.

. . . Created on 2018-10-03 by the reprex package (v0.2.1.9000)

Question:

I want to replace one of my grouped boxplots (below) to before-after kind, but keep it grouped. This one was made using ggboxplot() from ggpubr. I know there's also ggpaired() but I couldn't manage to make it grouped like this one.

Thanks to this question I was able to create grouped before-after graph like this one. I would now like to change the axis from 4 marks to just 2 (just "yes" and "no", since "before" and "after" are still in the legend.

Here's my code with dummy data:

library(tidyverse)

set.seed(123)
data.frame(ID = rep(LETTERS[1:10], 2),
           consent = rep(sample(c("Yes", "No"), 10, replace = T), 2),
           height = sample(rnorm(20, 170, sd = 10)),
           ind = rep(c("before", "after"), each = 2)
           ) %>%
  ggplot(aes(x = interaction(ind, consent), y = height, color = ind))+
  geom_point()+
  geom_line(aes(group = interaction(ID, consent)), color = "black")+
  scale_x_discrete("response")

Is it even possible to reduce number of categories on axis? Or can I create grouped plot using ggpaired(), but without using facets?


Answer:

Solution can be to create dummy numeric variable (in-between before and after) and put it on the x-axis. Then you can change it's names.

# Generate OP data
library(tidyverse)
set.seed(123)
df <- data.frame(ID = rep(LETTERS[1:10], 2),
           consent = rep(sample(c("Yes", "No"), 10, replace = T), 2),
           height = sample(rnorm(20, 170, sd = 10)),
           ind = rep(c("before", "after"), each = 2)
           )
df$name <- paste(df$consent, df$ind)

# Generate dummy numeric variable for `name` combinations 
foo <- data.frame(name = c("Yes before", "Yes", "Yes after", 
                           "No before", "No", "No after"),
                  X = 1:6)
#         name X
# 1 Yes before 1
# 2        Yes 2
# 3  Yes after 3
# 4  No before 4
# 5         No 5
# 6   No after 6

And now we just need to map name to X and put it on x-axis:

df <- merge(foo, df)
ggplot(df, aes(X, height))+
    geom_point(aes(color = ind)) +
    geom_line(aes(group = interaction(ID, consent))) +
    scale_x_continuous(breaks = c(2, 5), labels = foo$name[c(2, 5)])

Question:

I would like to show the significance levels (*** or n.s.) as labels in my linear regression using ggpubr in R. This seems to be done by using aes(label = ..p.signif..) as posted here: https://www.r-bloggers.com/add-p-values-and-significance-levels-to-ggplots/

However, when I simply replace the ..p.label.. by ..p.signif.. in my stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~,~")) ie. stat_cor(aes(label = paste(..rr.label.., ..p.signif.., sep = "~,~"))` nothing on my plot change, just I get an error:

Error in paste(rr.label, p.signif, sep = "~`,`~") : 
  object 'p.signif' not found 

Please, how can I plot the stars (*, , *) or n.s. values instead of exact p-values on my plot? THank you very much.

My dummy data: (borrowed from http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/78-perfect-scatter-plots-with-correlation-and-marginal-histograms/)


library(ggpubr)
data("mtcars")
df <- mtcars
df$cyl <- as.factor(df$cyl)

ggscatter(df, x = "wt", y = "mpg",
          add = "reg.line",                         # Add regression line
          conf.int = TRUE,                          # Add confidence interval
          color = "cyl", palette = "jco",           # Color by groups "cyl"
          shape = "cyl"                             # Change point shape by groups "cyl"
)+
  stat_cor(aes(color = cyl,
               label =paste(..rr.label.., ..p.label.., sep = "~`,`~")), # HOW TO CHANGE p.label to show stars???
           label.x = 3)           # Add correlation coefficient


Answer:

You can use cut:

ggscatter(df, x = "wt", y = "mpg",
          add = "reg.line",                         # Add regression line
          conf.int = TRUE,                          # Add confidence interval
          color = "cyl", palette = "jco",           # Color by groups "cyl"
          shape = "cyl"                             # Change point shape by groups "cyl"
)+
  stat_cor(aes(color = cyl,
               label =paste(..rr.label.., cut(..p.., 
                                              breaks = c(-Inf, 0.0001, 0.001, 0.01, 0.05, Inf),
                                              labels = c("'****'", "'***'", "'**'", "'*'", "'ns'")), 
                            sep = "~")), 
           label.x = 3)   

Needless to say that showing the p-values (or, even better, the confidence intervals) is much better.

Question:

Utilizing the example package code in ggpubr, the ggdotchart function does not create separate segments as is shown in the example, instead there is only a single segment, though the dots seem to be placed in the correct orientation. Does anyone have any tips on what the problem may be? I've thought it may be due to factors, tibbles vs. df, but I haven't been able to determine the problem.

Code:

df <- diamonds %>%
  filter(color %in% c("J", "D")) %>%
  group_by(cut, color) %>%
  summarise(counts = n()) 

ggdotchart(df, x = "cut", y ="counts",
           color = "color", palette = "jco", size = 3, 
           add = "segment", 
           add.params = list(color = "lightgray", size = 1.5),
           position = position_dodge(0.3),
           ggtheme = theme_pubclean()
           )

With the expected output of:

But instead I am getting:


Answer:

Here is a way to get your desired plot without ggpubr::ggdotchart. The issue seems to be that geom_segment does not allow dodging, as discussed here: R - ggplot dodging geom_lines and here: how to jitter/dodge geom_segments so they remain parallel?.

# your data
df <- diamonds %>%
  filter(color %in% c("J", "D")) %>%
  group_by(cut, color) %>%
  summarise(counts = n())

The first step is to expand your data. We will need this when we call geom_line which allows for dodging. I took this idea from @Stibu's answer. We create a copy of df and change the counts column to be 0 in df2. Finally we use bind_rows to create a single data frame from df and df2.

df2 <- df
df2$counts <- 0

df_out <- purrr::bind_rows(df, df2)
df_out

Then I use ggplot to create / replicate your desired output.

ggplot(df_out, aes(x = cut, y = counts)) +
  geom_line(
    aes(col = color), # needed for dodging, we'll later change colors to "lightgrey"
    position = position_dodge(width = 0.3),
    show.legend = FALSE,
    size = 1.5
  ) +
  geom_point(
    aes(fill = color),
    data = subset(df_out, counts > 0),
    col = "transparent",
    shape = 21,
    size = 3,
    position = position_dodge(width = 0.3)
  ) +
  scale_color_manual(values = c("lightgray", "lightgray")) + #change line colors
  ggpubr::fill_palette(palette = "jco") +
  ggpubr::theme_pubclean()

Question:

I'm new to R and trying to show graphs and images on the same page with R. I tried to use library(ggpubr) and ggarrange() function.

to import the images I used library(png) and readPNG() to import my images.

The end result i'm aiming for is something like this:

The code i used to create the panel is:

library(ggpubr)
library(png)

data("ToothGrowth")

bxp <- ggboxplot(ToothGrowth, x = "dose", y = "len",
                 color = "dose", palette = "jco")
dp <- ggdotplot(ToothGrowth, x = "dose", y = "len",
                color = "dose", palette = "jco", binwidth = 1)

img1 <- readPNG("image1.png")
img2 <- readPNG("image2.png")

im_A <- ggplot() + background_image(img1) # tried to insert the image as background.. there must be a better way
im_B <- ggplot() + background_image(img2) 

ggarrange(im_A, im_B, dp, bxp, 
          labels = c("A", "B", "C", "D"),
          ncol = 2, nrow = 2)

of curse i inserted the images manually using power-point.

Thanks,


Answer:

I think your code is almost there. If you use the theme function to add some margins you can get something like this:

Code below. The only addition is theme(plot.margin = margin(t=1, l=1, r=1, b=1, unit = "cm")) for both images.

library(ggpubr)
library(png)

data("ToothGrowth")

bxp <- ggboxplot(ToothGrowth, x = "dose", y = "len",
                 color = "dose", palette = "jco")
dp <- ggdotplot(ToothGrowth, x = "dose", y = "len",
                color = "dose", palette = "jco", binwidth = 1)

img1 <- readPNG("~/Personal/Wallpapers/375501.png")
img2 <- readPNG("~/Personal/Wallpapers/665150.png")

im_A <- ggplot() + 
    background_image(img1) +
    # This ensures that the image leaves some space at the edges
    theme(plot.margin = margin(t=1, l=1, r=1, b=1, unit = "cm"))

im_B <- ggplot() + background_image(img2) + 
    theme(plot.margin = margin(t=1, l=1, r=1, b=1, unit = "cm"))

ggarrange(im_A, im_B, dp, bxp, 
          labels = c("A", "B", "C", "D"),
          ncol = 2, nrow = 2)

Question:

I want to plot the p value of Kruskal-Wallis test to my ggplot using the R function stat_compare_means from the package ggpubr.

However, the plotted value is different from the value if I simply run the function:

kruskal.test(value ~ type, data = Profile_melt)

my code to plot the p value is:

ggplot(Profile_melt, aes(type, value)) + 
  geom_boxplot(aes(fill = factor(type), alpha = 0.5), 
               outlier.shape = NA, show.legend = FALSE) +
  geom_jitter(width = 0.2, size = 2, show.legend = FALSE,
              aes(colour = factor(type)), alpha = 0.5) +
  theme_bw() +
  facet_grid(Case ~ Marker, scales = 'free') +
  stat_compare_means(comparison = list(c("Real", "Binomial")),method = 'kruskal.test')+
  background_grid(major = 'y', minor = "none") + # add thin horizontal lines 
  xlab('Category') +
  ylab('Cell counts (Frequencies)')+
  theme(axis.text = element_text(size = 15), 
        axis.title = element_text(size = 20), 
        legend.text = element_text(size = 38),
        legend.title = element_text(size = 30), 
        strip.background = element_rect(colour="black", fill="white"),
        strip.text = element_text(margin = margin(10, 10, 10, 10), size = 25)) +
  panel_border()

Here is my data sample data


Answer:

There are many code lines which may not be relevant to the question. Perhaps, your question could be:

why does

kruskal.test(value ~ type, data = Profile_melt)

#Kruskal-Wallis chi-squared = 4.9673, df = 1, p-value = 0.02583

produce a different p value from

ggboxplot(Profile_melt, x="type", y = "value") + 
  stat_compare_means(comparison = list(c("Real", "Binomial")), method = 'kruskal.test')

# p-value = 0.49

You could work out the reason by checking original code. The developer of ggpubr may explain this better, and perhaps fix it there if it is an issue. To get correct and consistent p value, remove comparison = list(c("Real", "Binomial")):

ggboxplot(Profile_melt, x="type", y = "value") + 
  stat_compare_means(method = 'kruskal.test')

or

Edit
ggboxplot(Profile_melt, x="type", y = "value") + 
  stat_compare_means(comparison = list(c("Real", "Binomial")))

With your other code, the graph looks like this:

Question:

I would like to compute an anova only including gr1-gr3 excluding gr4. Is this possible using stat_compare_means()?

x   value
gr1 3.543
gr1 2.12
gr1 4.56
gr2 3.3
gr2 2.12
gr2 4.1
gr3 3.32
gr3 2.3
gr3 3.3
gr4 3.325
gr4 2.355
gr4 3.34

library(ggpubr)
ggplot(df(aes(x,value)) + geom_boxplot() + stat_compare_means()

Answer:

This might help. There is a comparisons argument which you can use for this.

ggplot(dta, aes(x, value)) + 
    geom_boxplot() + 
    stat_compare_means(comparisons = list(1:2, 2:3, c(1, 3), c(1, 2, 3)))

EDIT: For ANOVA

ggplot(dta, aes(x, value)) + 
    geom_boxplot() + 
    stat_compare_means(method = "anova", label.y = 4.8, 
                       data = dta %>% filter(x != "gr4"))

Question:

I have a following graph:

My data looks like this:

structure(list(Time = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("30 (min)", "60 (min)", 
"90 (min)"), class = "factor"), CC = c(48L, 48L, 48L, 48L, 48L, 
48L, 48L, 48L, 48L, 48L, 48L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 
48L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 48L, 48L, 48L, 48L, 48L, 
48L, 48L, 48L, 48L, 48L, 48L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L), logSG = c(-2.33741545784603, -1.18658485472471, 0, 0, 
-0.491782643217124, 3.69448372083377, 4.5407591512423, 2.61268192437219, 
3.86060133588356, 0, 0, 0, 0, -1.96969067772072, 1.14869526758596, 
0, 0, 0.415146522977509, -0.493822991230427, 4.41801522125664, 
0.77745996724697, 0, 3.45614515898878, 5.0294054937422, 4.11407093388758, 
5.89718763629734, 0, 5.33908355911139, 5.07344324140438, 6.31055955347468, 
6.26791733683611, 6.25584328404724, 5.07460803880627, -1.14240744866778, 
6.77709746094989, 5.83911536618483, 2.67579059651909, -0.0581215539811307, 
7.58033720832469, 6.76794295683794, 6.93585146776912, 5.73947045554567, 
0, 6.14529708204681, 5.36852954870684, 6.37769853018143, 6.69908148274872, 
6.85439184110933, 5.92974818275192, 8.00503455012466, 6.90732500418721, 
7.72752338652543, 3.69803617369367, 3.94835058665366, 0.196314345332914, 
5.14495801055901, 7.13859356791309, 8.10996479358517, 5.69942374886411, 
6.97130919360524, 7.76540068911542, 7.40190399204928)), row.names = c(NA, 
-62L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x7faf890130e0>)

My code looks like this:

p <- ggboxplot(test, x="CC", y="logSG", color ="CC", palette=c("#E69F00", "#56B4E9"), alpha=.9, add="jitter", shape="CC", facet.by="Time", short.panel.labs=F, outlier.shape = NA, rotate=T)

p <- p + stat_compare_means(comparisons= list(c("48", "0")), label="p.signif", size=8)

facet(p, facet.by="Time", ncol=1)+ 
  theme_classic(base_size = 23)+
  theme(
    legend.position = "top",
    panel.grid.major = element_line(),
    panel.grid.minor.x = element_line(color="gray", size=0.1))

I would like:

  1. the significance bar further to the left (have more space between the bar & the highest value in 90(min) condition.
  2. have significance label (ns, *, **) to the right & at the middle of significance bar
  3. change jitter colors to match the fill color (but still have boxplot maintain the black borders)

These are references that I've been using: Link Link Link

Thank you for your help.


Current progress:

Whole data:

structure(list(Time = c("30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", "30 (min)", 
"30 (min)", "30 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", "60 (min)", 
"60 (min)", "60 (min)", "60 (min)", "60 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", "90 (min)", 
"90 (min)", "90 (min)", "90 (min)", "90 (min)"), CC = c("48", 
"48", "48", "48", "48", "48", "48", "48", "48", "48", "48", "48", 
"48", "48", "48", "48", "48", "48", "48", "48", "48", "48", "48", 
"48", "48", "48", "48", "48", "48", "48", "48", "48", "48", "48", 
"48", "48", "48", "48", "48", "48", "48", "48", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"48", "48", "48", "48", "48", "48", "48", "48", "48", "48", "48", 
"48", "48", "48", "48", "48", "48", "48", "48", "48", "48", "48", 
"48", "48", "48", "48", "48", "48", "48", "48", "48", "48", "48", 
"48", "48", "48", "48", "48", "48", "48", "48", "48", "48", "48", 
"48", "48", "48", "48", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "48", 
"48", "48", "48", "48", "48", "48", "48", "48", "48", "48", "48", 
"48", "48", "48", "48", "48", "48", "48", "48", "48", "48", "48", 
"48", "48", "48", "48", "48", "48", "48", "48", "48", "48", "48", 
"48", "48", "48", "48", "48", "48", "48", "48", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0"
), logSG = c(-2.33741545784603, -1.5359551356743, -0.706559805935718, 
0.708497262914779, -1.18658485472471, 0, 0.215424357929406, 0, 
0, -0.669164283172473, -1.05979396415459, -2.31882739909547, 
0, 0.42055843809296, 0.0345207061138516, 0.490214669861868, -0.491782643217124, 
-0.40943698558059, 5.29576170328759, 3.00786490791837, 3.69448372083377, 
0, 1.13692090507591, 3.90397709096783, 4.5407591512423, 0.959448191357494, 
0.812996456139986, 0.387663144815093, 2.61268192437219, 5.70801342338959, 
0, 4.06129455674501, 3.86060133588356, 2.96113466195081, 4.31811283391345, 
6.02174904012545, 0, 0, 3.63890951803723, 4.15432961681392, 0, 
5.62437201872284, 0, -0.172668794962532, 0, 0.885684238138353, 
0.458386726057045, 0.93033451187609, 0, -0.916540763129364, 0.672683864147678, 
0, -1.96969067772072, -0.211359094917249, 0, 0, 1.14869526758596, 
0.855485393729508, 1.51470800852286, 1.25969589114081, 0, 5.55360595405316, 
0, 0, 0, 0.608132989683934, 0.766711429029875, 0.0230319883972058, 
0.415146522977509, 0.989351045425203, 0.764590061654141, 0, -0.493822991230427, 
-3.04352293739034, 0.113154350383661, 0.477535633527187, 4.41801522125664, 
0, 0.790376357072563, 5.33363551675867, 0.77745996724697, -1.39120640548091, 
2.81038881541196, 0.909530012962848, 0, 0, 0, -1.32462975739634, 
3.45614515898878, 2.87635990847717, 6.14131074674228, 4.94317084553412, 
5.0294054937422, 3.6735726222342, 0, 0, 4.11407093388758, 0, 
0, 0, 5.89718763629734, 0, -2.83568067112007, -1.08417160451332, 
0, 0, 0, -1.79669470441478, 5.33908355911139, 4.34702414757653, 
4.97832646498993, 5.68601129052879, 5.07344324140438, 7.32969721447871, 
5.88124362908576, 3.95375517761101, 6.31055955347468, 6.68105022814714, 
0, 0, 6.26791733683611, 6.6552388904731, 6.1554345645557, 6.16783228738057, 
6.25584328404724, 7.40421295863169, 4.42098910134338, 4.34381971828829, 
5.07460803880627, 4.05138376423435, 5.66021948971494, 6.4161744214807, 
-1.14240744866778, -1.59148885378724, 4.96717393543566, 4.05185180788696, 
6.77709746094989, 7.33240191526984, 4.40167592777489, 5.00079217967616, 
5.83911536618483, 4.87041813930411, 5.4798591602912, 5.8840457498263, 
2.67579059651909, 0, 4.55901232476737, -1.60364223983752, -0.0581215539811307, 
2.82972718294857, 6.25577118949695, 6.72479834475368, 7.58033720832469, 
6.73711432644271, 6.00469556652024, 4.37286341669336, 6.76794295683794, 
6.95437999496909, 5.54915990369101, 7.19992007355504, 6.93585146776912, 
6.31917969823023, 6.88303863646214, 5.80298414483425, 5.73947045554567, 
5.42427157341921, 6.02067813111897, 7.06513961445597, 0, 5.14873092690417, 
4.99432854142356, 4.97613488925218, 6.14529708204681, 0.241208512880646, 
5.61581002162685, -0.428836094431404, 5.36852954870684, 6.969161017026, 
6.4311112305739, -0.488846717047216, 6.37769853018143, 6.06472694568864, 
5.91882885723854, 6.75183187908138, 6.69908148274872, 6.09707860117223, 
7.51181134639969, 7.70390188945798, 6.85439184110933, 7.06474253499617, 
6.06324707449019, 7.01225437796713, 5.92974818275192, 4.38046495874971, 
4.93237626658016, 6.42814116590149, 8.00503455012466, 6.52016300282035, 
5.631222408333, 6.00091375144581, 6.90732500418721, 7.06335795773428, 
6.5320791891557, 7.61232026906213, 7.72752338652543, 6.5686929033117, 
2.95760357016318, 0.536708150079365, 3.69803617369367, -0.762215947784048, 
0.813262309586256, 0.09473936757137, 3.94835058665366, 4.8306952746183, 
6.08818108540523, -2.28999797748857, 0.196314345332914, 4.89645977132752, 
4.92651109236787, 0, 5.14495801055901, 5.61854564628758, 4.6634157508477, 
5.61755666066751, 7.13859356791309, 7.53514925442216, 8.07256779525966, 
8.35325245369136, 8.10996479358517, 7.4087861988897, 7.39596849037868, 
7.59049483086555, 5.69942374886411, 6.51587453730492, 6.3129579179017, 
7.5183813575123, 6.97130919360524, 7.76889903023342, 7.95470129243914, 
7.2816428879755, 7.76540068911542, 7.66318176453257, 6.74009737270994, 
6.68085256625662, 7.40190399204928, 8.60372201882463, 6.62079617953904, 
8.22461607745356)), row.names = c(NA, -248L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x7faf890130e0>)

Current code:

ggplot(result, aes(x = as.factor(CC), y = logSG, color = as.factor(CC), 
               shape = as.factor(CC),
               fill = as.factor(CC)))+
  geom_boxplot(color = "black", alpha = 0.2, outlier.shape = NA)+
  geom_jitter(width = 0.2)+
  geom_signif(comparisons = list(c("48","0")), 
              y_position = max(result$logSG, na.rm = TRUE)+1, hjust = -.75, vjust = 1.5, color = "black", show.legend = FALSE,
              textsize = 8, map_signif_level=TRUE)+
  scale_color_manual(values = c("#E69F00", "#56B4E9"))+
  scale_fill_manual(values = c("#E69F00", "#56B4E9"))+
  facet_wrap(Time~., ncol = 1)+
  ylim(min(result$logSG, na.rm = TRUE)-.25, max(result$logSG, na.rm = TRUE)+3.5)+
  coord_flip()+
  labs(color = "CC",fill = "CC", shape = "CC", x = "CC")+
  theme_classic(base_size = 23)+
  theme(
    legend.position = "top",
    panel.grid.major = element_line(),
    panel.grid.minor.x = element_line(color="gray", size=0.1))  

I think coord_flip is doing something funky to hjust and vjust. Is there anyway to manipulate this? If not, I'm going to use illustrator or something.


Answer:

Using stat_compare_means, I did not find a way of getting what you want to adjust the position of the labeling for significance (I think the facetting is messing with the use of label.y argument), so I used geom_signif function from ggsignif packages and I play a little bit with hjust, vjust and y_position.

I also use ggplot to do the boxplot instead of ggboxplot because you can thus manipulate color and filling of the boxplot and of the jitter points separately.

Here, the code that I used based on your example:

library(ggplot2)
library(ggsignif)
ggplot(df, aes(x = as.factor(CC), y = logSG, color = as.factor(CC), 
               shape = as.factor(CC),
               fill = as.factor(CC)))+
  geom_boxplot(color = "black", alpha = 0.2)+
  geom_jitter(width = 0.2)+
  geom_signif(comparisons = list(c("48","0")), 
              y_position = 9, hjust = -0.75, vjust = 1.5, color = "black", show.legend = FALSE,
              textsize = 8, map_signif_level=TRUE)+
  scale_color_manual(values = c("#E69F00", "#56B4E9"))+
  scale_fill_manual(values = c("#E69F00", "#56B4E9"))+
  facet_wrap(Time~., ncol = 1)+
  ylim(-3,10)+
  coord_flip()+
  labs(color = "CC",fill = "CC", shape = "CC", x = "CC")+
  theme_classic(base_size = 23)+
  theme(
    legend.position = "top",
    panel.grid.major = element_line(),
    panel.grid.minor.x = element_line(color="gray", size=0.1))  

Does it look what you are trying to achieve ?

Question:

How do I edit a common legend title (make it bold font and enlarge the font size) using ggarrange?

Based on the six plots I have (p1 to p6), I thought the following would work:

p6 <-  p6 + theme(legend.title = element_text(size = 15, face = "bold")

The below ggarrange was used to combine the six plots:

p <- ggarrange(p1, p2, p3, p4, p5, p6,
          common.legend = TRUE, 
          legend = "bottom", 
          labels = c("1", "2", "3", "4", "5", "6"),
          # font.label = list(size = 10, color = "green"),
          nrow = 2, ncol = 4
          )

However, this does not change the common legend anything.


Answer:

You can extract the legend from your plot of interest and then arrange that legend alongside of your plots.

#libraries:

library(ggplot2)
library(ggpubr)
library(gridExtra)
library(grid)
#example plots:

p1 <- ggplot(mtcars) + 
        geom_point(aes(x=mpg, y=qsec, color = factor(cyl)))

p2 <- ggplot(mtcars) + 
        geom_point(aes(x=mpg, y=4*drat, color = factor(cyl))) + 
        theme(legend.title = element_text(size = 15, face = "bold"),
              legend.position="bottom")
#function to extract the legend of a ggplot; source:
#https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs

get_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}
#arranging the legend and plots in a grid:

p2_legend <- get_legend(p2)

grid.arrange(arrangeGrob(p1 + theme(legend.position="none"), 
                         p2 + theme(legend.position="none"), nrow=2), 
             p2_legend, 
             nrow=2,heights=c(10, 1))

Question:

I'm trying to reproduce this graph from ggpaired (ggpubr) but with violin plot instead of boxplot:

before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)

 d <- data.frame(before = before, after = after)
 ggpaired(d, cond1 = "before", cond2 = "after",
     fill = "condition", palette = "jco")

I have tried with geom_violin from ggplot2, but with no success at pairing the samples like ggpaired does


Answer:

You need to reshape your data first:

library(ggplot2)
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)

d <- data.frame(before = before, after = after)
d$obs <- 1:nrow(d)
d2 <- tidyr::gather(d, time, value, -obs)

ggplot(d2, aes(time, value)) + 
  geom_violin() +
  geom_point() +
  geom_line(aes(group = obs)) +
  scale_x_discrete(limits = c('before', 'after'))

Question:

I would like to align a ggtexttable {ggpubr} and a graphic {ggplot} by ggarrange {ggpubr} on the left side of the plot.

Is there a way to do this?

I tried the align argument but the objects are still centered

library(ggpubr)
library(ggplot2)
library(tidyverse)

df <- tibble(Col1 = 1:3,
         Col2 = rnorm(3))

plot <- df %>% 
  ggplot(aes(x = Col1,
         y = Col2)) + 
  geom_line()

table <- ggtexttable(df,
                     rows = NULL)

ggarrange(plot, table,
          ncol = 1, nrow = 2,
          heights = c(1, 0.5))

Answer:

There are several packages you can try. See more here

library(tidyverse)
library(ggpubr)

df <- tibble(Col1 = 1:3,
             Col2 = rnorm(3))

plot1 <- df %>% 
  ggplot(aes(x = Col1,
             y = Col2)) + 
  geom_line()

table1 <- ggtexttable(df,
                     rows = NULL)


library(cowplot)
bottom <- plot_grid(table1, NULL, NULL, NULL)
plot_grid(plot1, bottom, 
          nrow = 2)

library(patchwork)
plot1 / (table1 | plot_spacer() | plot_spacer() |  plot_spacer())

library(magrittr)
library(multipanelfigure)
figure1 <- multi_panel_figure(columns = 3, rows = 2, panel_label_type = c("none"))

figure1 %<>%
  fill_panel(plot1, column = 1:3, row = 1) %<>%
  fill_panel(table1, column = 1, row = 2) %<>%
  fill_panel(plot_spacer(), column = 2, row = 2) %<>%
  fill_panel(plot_spacer(), column = 3, row = 2)
figure1

Created on 2019-02-19 by the reprex package (v0.2.1.9000)