Hot questions for Using Ggplot2 in ggtree

Question:

I have performed statistical tests on lots of genera that are hierarchically structured with a tree, so I have a p-value for each of the genus in the tree.

I want to visualize both the tree and the p-value in a panel plot, as it's possible to do with ggtree.

Packages and tree data:

library(ape)
# source("https://bioconductor.org/biocLite.R"); biocLite("ggtree")
library(ggtree)

tree <- structure(list(
  edge = structure(c(102L, 103L, 104L, 105L, 106L, 
                     107L, 103L, 108L, 109L, 110L, 111L, 111L, 109L, 112L, 113L, 109L, 
                     114L, 115L, 115L, 115L, 115L, 115L, 115L, 114L, 116L, 109L, 117L, 
                     118L, 108L, 119L, 120L, 121L, 119L, 122L, 123L, 123L, 108L, 124L, 
                     125L, 126L, 125L, 127L, 127L, 108L, 128L, 129L, 130L, 130L, 129L, 
                     131L, 103L, 132L, 133L, 134L, 135L, 134L, 136L, 136L, 134L, 137L, 
                     137L, 134L, 138L, 138L, 134L, 139L, 139L, 134L, 140L, 134L, 141L, 
                     141L, 103L, 142L, 143L, 144L, 145L, 103L, 146L, 147L, 148L, 149L, 
                     146L, 150L, 151L, 152L, 103L, 153L, 154L, 155L, 156L, 153L, 157L, 
                     158L, 159L, 159L, 159L, 159L, 159L, 157L, 160L, 161L, 160L, 162L, 
                     103L, 163L, 164L, 165L, 166L, 165L, 167L, 167L, 165L, 168L, 168L, 
                     165L, 169L, 164L, 170L, 171L, 170L, 172L, 163L, 173L, 174L, 175L, 
                     175L, 175L, 173L, 176L, 177L, 177L, 177L, 173L, 178L, 179L, 179L, 
                     163L, 180L, 181L, 182L, 181L, 183L, 183L, 183L, 181L, 184L, 184L, 
                     184L, 184L, 184L, 184L, 184L, 184L, 184L, 184L, 181L, 185L, 185L, 
                     185L, 181L, 186L, 181L, 187L, 187L, 181L, 188L, 188L, 188L, 188L, 
                     188L, 163L, 189L, 190L, 191L, 191L, 163L, 192L, 193L, 194L, 194L, 
                     194L, 194L, 194L, 194L, 194L, 103L, 195L, 196L, 197L, 198L, 196L, 
                     199L, 200L, 103L, 201L, 202L, 203L, 204L, 204L, 102L, 205L, 206L, 
                     207L, 208L, 209L, 206L, 210L, 211L, 212L, 206L, 213L, 214L, 215L, 
                     103L, 104L, 105L, 106L, 107L, 1L, 108L, 109L, 110L, 111L, 2L, 
                     3L, 112L, 113L, 4L, 114L, 115L, 5L, 6L, 7L, 8L, 9L, 10L, 116L, 
                     11L, 117L, 118L, 12L, 119L, 120L, 121L, 13L, 122L, 123L, 14L, 
                     15L, 124L, 125L, 126L, 16L, 127L, 17L, 18L, 128L, 129L, 130L, 
                     19L, 20L, 131L, 21L, 132L, 133L, 134L, 135L, 22L, 136L, 23L, 
                     24L, 137L, 25L, 26L, 138L, 27L, 28L, 139L, 29L, 30L, 140L, 31L, 
                     141L, 32L, 33L, 142L, 143L, 144L, 145L, 34L, 146L, 147L, 148L, 
                     149L, 35L, 150L, 151L, 152L, 36L, 153L, 154L, 155L, 156L, 37L, 
                     157L, 158L, 159L, 38L, 39L, 40L, 41L, 42L, 160L, 161L, 43L, 162L, 
                     44L, 163L, 164L, 165L, 166L, 45L, 167L, 46L, 47L, 168L, 48L, 
                     49L, 169L, 50L, 170L, 171L, 51L, 172L, 52L, 173L, 174L, 175L, 
                     53L, 54L, 55L, 176L, 177L, 56L, 57L, 58L, 178L, 179L, 59L, 60L, 
                     180L, 181L, 182L, 61L, 183L, 62L, 63L, 64L, 184L, 65L, 66L, 67L, 
                     68L, 69L, 70L, 71L, 72L, 73L, 74L, 185L, 75L, 76L, 77L, 186L, 
                     78L, 187L, 79L, 80L, 188L, 81L, 82L, 83L, 84L, 85L, 189L, 190L, 
                     191L, 86L, 87L, 192L, 193L, 194L, 88L, 89L, 90L, 91L, 92L, 93L, 
                     94L, 195L, 196L, 197L, 198L, 95L, 199L, 200L, 96L, 201L, 202L, 
                     203L, 204L, 97L, 98L, 205L, 206L, 207L, 208L, 209L, 99L, 210L, 
                     211L, 212L, 100L, 213L, 214L, 215L, 101L), .Dim = c(214L, 2L)), 
  edge.length = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 
  Nnode = 114L, 
  tip.label = c("Brachyspira", 
                "Haemophilus", "Aggregatibacter", "Acinetobacter", "Klebsiella", 
                "Salmonella", "Escherichia", "Enterobacter", "Shigella", 
                "Citrobacter", "Hafnia", "Succinatimonas", "Corallococcus", 
                "Bilophila", "Desulfovibrio", "Azospirillum", "Acidiphilium", 
                "Acetobacter", "Sutterella", "Parasutterella", "Oxalobacter", 
                "Porphyromonas", "Paraprevotella", "Prevotella", "Alistipes", 
                "Rikenella", "Tannerella", "Parabacteroides", "Odoribacter", 
                "Butyricimonas", "Bacteroides", "Coprobacter", "Barnesiella", 
                "Fusobacterium", "Coraliomargarita", "Akkermansia", "Bifidobacterium", 
                "Gordonibacter", "Eggerthella", "Cryptobacterium", "Adlercreutzia", 
                "Enterorhabdus", "Collinsella", "Olsenella", "Lactobacillus", 
                "Weissella", "Oenococcus", "Lactococcus", "Streptococcus", 
                "Enterococcus", "Staphylococcus", "Bacillus", "Dialister", 
                "Veillonella", "Megasphaera", "Megamonas", "Mitsuokella", 
                "Selenomonas", "Phascolarctobacterium", "Acidaminococcus", 
                "Oscillibacter", "Intestinibacter", "Peptoclostridium", "Peptostreptococcus", 
                "Dorea", "Roseburia", "Anaerostipes", "Tyzzerella", "Coprococcus", 
                "Blautia", "Butyrivibrio", "Marvinbryantia", "Lachnoclostridium", 
                "Oribacterium", "Flavonifractor", "Intestinimonas", "Pseudoflavonifractor", 
                "Eubacterium", "Clostridium", "Butyricicoccus", "Faecalibacterium", 
                "Ruminococcus", "Anaerotruncus", "Subdoligranulum", "Ruminiclostridium", 
                "Parvimonas", "Peptoniphilus", "Catenibacterium", "Solobacterium", 
                "Coprobacillus", "Holdemania", "Erysipelatoclostridium", 
                "Turicibacter", "Stoquefichus", "Mycoplasma", "Acholeplasma", 
                "Pyramidobacter", "Synergistes", "Methanobrevibacter", "Methanomethylophilus", 
                "Methanoculleus"), root.edge = 1), 
  .Names = c("edge", "edge.length", 
             "Nnode", "tip.label", "root.edge"), class = "phylo", order = "cladewise"
)

Code:

df <- data.frame(id = tree$tip.label, p = runif(length(tree$tip.label)))

p1 <- ggtree(tree) +
  geom_tiplab()

facet_plot(p1, panel = "p-value", data = df, geom = geom_point, aes(x = p))

But here, the name of the genera is truncated so I modified the xlim parameter to see them completly.

p2 <- 
  ggtree(tree) + 
  geom_tiplab() + 
  xlim(c(0,7))

facet_plot(p2, panel = "p-value", data = df, geom = geom_point, aes(x = p))

It works! However, xlim spreads to the second panel... How can I fix that?

I tried to add xlim(0:1) or xlim = 0:1 in facet_plot() but that doesn't work...


Edit after F. Privé answer:

I need to keep my labels at the right of the branches because I have to add some labels/statistics on them. And I want them left-aligned.


Answer:

ggtree provides the xlim_expand function specifically for that purpose.

You need to specify the limits and the panel to which you want to apply it. In you case, you want to apply it to the Tree panel:

p1 <- ggtree(tree) +
  geom_tiplab(size = 2) + 
  xlim_expand(c(0,15), panel = "Tree")

facet_plot(p1, panel = "p-value", data = df, geom = geom_point, aes(x = p)) 

I exagerated the limits for the example but you can adjust it to your preference.

Question:

I'm building a tree with ggtree on R.

library(ggtree)

here is the newick file content:

((Tribolium_castaneum:1.00000,Anoplophora_glabripennis:1.00000):1.23589,(((((Ichneumoninae_B:1.00000,Ichneumoninae_A:1.00000)1:1.27089,((Tryphoninae_A:1.00000,Tryphoninae_B:1.00000)1:1.34418,((Ophioninae_A:1.00000,Campopleginae:1.00000)1:0.14225,(Cremastinae_B:1.00000,Cremastinae_A:1.00000)1:0.66653)1:0.19416)1:0.08638)1:0.63371,(((Microplitis_demolitor:1.00000,Cotesia_vestalis:1.00000)1:1.02258,(Macrocentrus_cingulum:1.00000,(Meteorus_colon_F:1.00000,Meteorus_cinctellus:1.00000)1:1.63033)1:0.04483)1:0.17281,(Aphidius_colemani:1.00000,(Fopius_arisanus:1.00000,(Diachasma_alloeum:1.00000,(Psyttalia_lounsbury:1.00000,Psyttalia_concolor:1.00000)1:0.70706)1:0.12291)1:1.20999)1:0.07801)1:0.39656)1:0.33771,((Orussus_abietinus:1.00000,(Cephus_cinctus:1.00000,(Neodiprion_lecontei:1.00000,Athalia_rosae:1.00000)1:1.17186)1:0.06599)1:0.09879,(Goniozus_legneri:1.00000,((Polistes_canadensis:1.00000,Polistes_dominulam:1.00000)1:1.90810,(((Dufourea_novaeangliae:1.00000,(Lasioglossum_albipes:1.00000,Nomia_melanderi:1.00000)1:0.20141)1:0.39884,(Megachile_rotundata:1.00000,(Ceratina_calcarata:1.00000,(Habropoda_laboriosa:1.00000,((Euglossa_dilemma:1.00000,Eufriesea_mexicana:1.00000)1:0.83283,(((Lepidotrigona_ventralis:1.00000,Melipona_quadrifasciata:1.00000)1:0.60748,(Bombus_impatiens:1.00000,Bombus_terrestris:1.00000)1:0.95254)1:0.18916,(Apis_mellifera:1.00000,(Apis_florea:1.00000,(Apis_dorsata:1.00000,Apis_cerana:1.00000)1:0.14919)1:0.04191)1:1.01881)1:0.03751)1:0.19407)1:0.05417)1:0.12994)1:0.16387)1:0.77078,((Dinoponera_quadriceps:1.00000,Harpegnathos_saltator:1.00000)1:0.80142,(Pseudomyrmex_gracilis:1.00000,(Ooceraea_biroi:1.00000,((Linepithema_humile:1.00000,(Lasius_niger:1.00000,(Formica_exsecta:1.00000,Camponotus_floridanus:1.00000)1:0.12539)1:0.61781)0.98:0.03658,(Pogonomyrmex_barbatus:1.00000,(Vollenhovia_emeryi:1.00000,((Monomorium_pharaonis:1.00000,(Solenopsis_fugax:1.00000,Solenopsis_invicta:1.00000)1:0.46257)1:0.21934,((Wasmannia_auropunctata:1.00000,(Cyphomyrmex_costatus:1.00000,(Trachymyrmex_zeteki:1.00000,(Trachymyrmex_septentrionalis:1.00000,(Trachymyrmex_cornetzi:1.00000,(Acromyrmex_echinatior:1.00000,(Atta_cephalotes:1.00000,Atta_colombica:1.00000)1:0.41172)1:0.15915)1:0.05554)1:0.28594)1:0.13185)1:0.55280)1:0.03648,(Temnothorax_curvispinosus:1.00000,(Aphaenogaster_rudis:1.00000,(Aphaenogaster_picea:1.00000,(Aphaenogaster_miamiana:1.00000,(Aphaenogaster_fulva:1.00000,(Aphaenogaster_ashmeadi:1.00000,Aphaenogaster_floridana:1.00000)1:0.18360)1:0.28954)1:0.34182)1:0.32515)1:1.17754)1:0.04502)0.64:0.01460)0.89:0.02079)1:0.12106)1:0.31960)1:0.08063)1:0.05751)1:0.17326)1:0.76734)1:0.06023)1:0.14466)1:0.25842)0.97:0.03545)0.99:0.03857,((((Platygaster_equestris:1.00000,Platygaster_orseoliae:1.00000)1:0.69930,(Trissolcus_brochymenae:1.00000,Trissolcus_japonicus:1.00000)1:1.79298)1:0.22798,((Trichopria_sp_970989:1.00000,Trichopria_drosophilae:1.00000)1:0.85941,((Trichogramma_brassicae:1.00000,Trichogramma_pretiosum:1.00000)1:2.32841,(Eretmocerus_eremicus:1.00000,(Encarsia_formosa:1.00000,((Anagyrus_pseudococci:1.00000,(Copidosoma_sp:1.00000,Copidosoma_floridanum:1.00000)1:1.04986)1:0.46123,((Ceratosolen_corneri:1.00000,(Ceratosolen_marchali:1.00000,Ceratosolen_solmsi:1.00000)1:0.66285)1:1.36401,((Ormyrus_nitidulus:1.00000,Ormyrus_pomaceus:1.00000)1:1.57330,((Torymus_sinensis:1.00000,(Torymus_geranii:1.00000,Torymus_flavipes:1.00000)1:0.17300)1:1.18566,(((Eupelmus_urozonus:1.00000,Eupelmus_kiefferif:1.00000)1:0.22452,(Eupelmus_annulatus:1.00000,Eupelmus_azureus:1.00000)1:0.47943)1:1.14280,((Megastigmus_dorsalis:1.00000,Megastigmus_stigmatizans:1.00000)1:1.33147,((Cecidostiba_fungosa:1.00000,Cecidostiba_semifascia:1.00000)1:0.13650,(Trichomalopsis_sarcophagae:1.00000,(Nasonia_vitripennis:1.00000,(Nasonia_giraulti:1.00000,Nasonia_longicornis:1.00000)1:0.21751)1:0.23007)1:0.53980)1:0.92370)0.48:0.00548)1:0.05788)0.84:0.02621)0.96:0.02843)1:0.07843)0.77:0.01624)0.8:0.02331)0.92:0.04228)1:0.66512)1:0.21996)1:0.04466,(((Ganaspis_sp:1.00000,Ganaspis_brasiliensis:1.00000)1:0.27165,(Lep_boulardi:1.00000,(Lep_heterotoma:1.00000,Lep_clavipes:1.00000)1:0.14791)1:0.64772)1:0.52394,(Synergus_japonicus:1.00000,Synergus_umbraculus:1.00000)1:1.41758)1:0.94088)1:0.08418):0.06505)1; 

tree = read.newick("newick_tree")

Here is the code I use:

colfunc <- colorRampPalette(c("red", "blue"))
col=colfunc(110)


for (i in 1:length(tree$node.label)){
  if(as.vector(tree$node.label[i])>0.99){
    col[i]<-'#CB1414'
  } else if(as.vector(tree$node.label[i])>0.90 & (as.vector(tree$node.label[i]) < 0.99)){
    col[i]<-'#F0F014'
  } else if(as.vector(tree$node.label[i])>0.76 & (as.vector(tree$node.label[i]) < 0.90)){
    col[i]<-'#098527'
  } else if(as.vector(tree$node.label[i])>0.51 & (as.vector(tree$node.label[i]) < 0.76)){
    col[i]<-'#18D8F7'
  } else if(as.vector(tree$node.label[i])>0 & (as.vector(tree$node.label[i]) < 0.50)){
    col[i]<-'#1B72DB'
  }
}
p<-ggtree(tree,size=0.5,branch.length="none")   +geom_tiplab(size = 2, col ="black")  + geom_nodepoint(color=col, alpha=1, size=1.5, show.legend = TRUE)+ 
  geom_treescale(x=30, y=1)

In order to get point color depending on the bootstrap level I used this part: geom_nodepoint(color=col, alpha=1, size=1.5, show.legend = TRUE) where col is a variable such as:

> col
  [1] "#CB1414" "#FC0002" "#FA0004" "#F70007" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414"
 [15] "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#F0F014" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414"
 [29] "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414"
 [43] "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#F0F014" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#098527" "#CB1414"
 [57] "#CB1414" "#18D8F7" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414"
 [71] "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#F0F014" "#098527" "#098527" "#CB1414"
 [85] "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#F0F014" "#CB1414" "#098527" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#1B72DB"
 [99] "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414" "#CB1414"

That I made depending on the number in this variable:

> tree$node.labe
  [1] "1"    ""     ""     "0.99" "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"   
 [21] "1"    "0.97" "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"   
 [41] "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "0.98" "1"    "1"    "1"    "1"    "0.89" "1"    "1"    "0.64" "1"    "1"   
 [61] "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"   
 [81] "0.92" "0.8"  "0.77" "1"    "1"    "1"    "1"    "1"    "0.96" "1"    "0.84" "1"    "1"    "1"    "1"    "1"    "1"    "0.48" "1"    "1"   
[101] "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"   

And now I just would like to display the 5 ranges color legend in the tree such as :

but the option show.legend = TRUE does not work for that. Does anyone have an idea? I also tried to add p + theme(legend.position="right") but there is still no legend.

Thank you for your help.

here is the deput(tree)

> dput(tree)
structure(list(edge = structure(c(112L, 113L, 113L, 112L, 114L, 
115L, 116L, 117L, 118L, 118L, 117L, 119L, 120L, 120L, 119L, 121L, 
122L, 122L, 121L, 123L, 123L, 116L, 124L, 125L, 126L, 126L, 125L, 
127L, 127L, 128L, 128L, 124L, 129L, 129L, 130L, 130L, 131L, 131L, 
132L, 132L, 115L, 133L, 134L, 134L, 135L, 135L, 136L, 136L, 133L, 
137L, 137L, 138L, 139L, 139L, 138L, 140L, 141L, 142L, 142L, 143L, 
143L, 141L, 144L, 144L, 145L, 145L, 146L, 146L, 147L, 148L, 148L, 
147L, 149L, 150L, 151L, 151L, 150L, 152L, 152L, 149L, 153L, 153L, 
154L, 154L, 155L, 155L, 140L, 156L, 157L, 157L, 156L, 158L, 158L, 
159L, 159L, 160L, 161L, 161L, 162L, 162L, 163L, 163L, 160L, 164L, 
164L, 165L, 165L, 166L, 167L, 167L, 168L, 168L, 166L, 169L, 170L, 
170L, 171L, 171L, 172L, 172L, 173L, 173L, 174L, 174L, 175L, 175L, 
176L, 176L, 169L, 177L, 177L, 178L, 178L, 179L, 179L, 180L, 180L, 
181L, 181L, 182L, 182L, 114L, 183L, 184L, 185L, 186L, 186L, 185L, 
187L, 187L, 184L, 188L, 189L, 189L, 188L, 190L, 191L, 191L, 190L, 
192L, 192L, 193L, 193L, 194L, 195L, 195L, 196L, 196L, 194L, 197L, 
198L, 198L, 199L, 199L, 197L, 200L, 201L, 201L, 200L, 202L, 203L, 
203L, 204L, 204L, 202L, 205L, 206L, 207L, 207L, 206L, 208L, 208L, 
205L, 209L, 210L, 210L, 209L, 211L, 212L, 212L, 211L, 213L, 213L, 
214L, 214L, 215L, 215L, 183L, 216L, 217L, 218L, 218L, 217L, 219L, 
219L, 220L, 220L, 216L, 221L, 221L, 113L, 1L, 2L, 114L, 115L, 
116L, 117L, 118L, 3L, 4L, 119L, 120L, 5L, 6L, 121L, 122L, 7L, 
8L, 123L, 9L, 10L, 124L, 125L, 126L, 11L, 12L, 127L, 13L, 128L, 
14L, 15L, 129L, 16L, 130L, 17L, 131L, 18L, 132L, 19L, 20L, 133L, 
134L, 21L, 135L, 22L, 136L, 23L, 24L, 137L, 25L, 138L, 139L, 
26L, 27L, 140L, 141L, 142L, 28L, 143L, 29L, 30L, 144L, 31L, 145L, 
32L, 146L, 33L, 147L, 148L, 34L, 35L, 149L, 150L, 151L, 36L, 
37L, 152L, 38L, 39L, 153L, 40L, 154L, 41L, 155L, 42L, 43L, 156L, 
157L, 44L, 45L, 158L, 46L, 159L, 47L, 160L, 161L, 48L, 162L, 
49L, 163L, 50L, 51L, 164L, 52L, 165L, 53L, 166L, 167L, 54L, 168L, 
55L, 56L, 169L, 170L, 57L, 171L, 58L, 172L, 59L, 173L, 60L, 174L, 
61L, 175L, 62L, 176L, 63L, 64L, 177L, 65L, 178L, 66L, 179L, 67L, 
180L, 68L, 181L, 69L, 182L, 70L, 71L, 183L, 184L, 185L, 186L, 
72L, 73L, 187L, 74L, 75L, 188L, 189L, 76L, 77L, 190L, 191L, 78L, 
79L, 192L, 80L, 193L, 81L, 194L, 195L, 82L, 196L, 83L, 84L, 197L, 
198L, 85L, 199L, 86L, 87L, 200L, 201L, 88L, 89L, 202L, 203L, 
90L, 204L, 91L, 92L, 205L, 206L, 207L, 93L, 94L, 208L, 95L, 96L, 
209L, 210L, 97L, 98L, 211L, 212L, 99L, 100L, 213L, 101L, 214L, 
102L, 215L, 103L, 104L, 216L, 217L, 218L, 105L, 106L, 219L, 107L, 
220L, 108L, 109L, 221L, 110L, 111L), .Dim = c(220L, 2L)), edge.length = c(1.23589, 
1, 1, 0.06505, 0.03857, 0.33771, 0.63371, 1.27089, 1, 1, 0.08638, 
1.34418, 1, 1, 0.19416, 0.14225, 1, 1, 0.66653, 1, 1, 0.39656, 
0.17281, 1.02258, 1, 1, 0.04483, 1, 1.63033, 1, 1, 0.07801, 1, 
1.20999, 1, 0.12291, 1, 0.70706, 1, 1, 0.03545, 0.09879, 1, 0.06599, 
1, 1.17186, 1, 1, 0.25842, 1, 0.14466, 1.9081, 1, 1, 0.06023, 
0.77078, 0.39884, 1, 0.20141, 1, 1, 0.16387, 1, 0.12994, 1, 0.05417, 
1, 0.19407, 0.83283, 1, 1, 0.03751, 0.18916, 0.60748, 1, 1, 0.95254, 
1, 1, 1.01881, 1, 0.04191, 1, 0.14919, 1, 1, 0.76734, 0.80142, 
1, 1, 0.17326, 1, 0.05751, 1, 0.08063, 0.03658, 1, 0.61781, 1, 
0.12539, 1, 1, 0.3196, 1, 0.12106, 1, 0.02079, 0.21934, 1, 0.46257, 
1, 1, 0.0146, 0.03648, 1, 0.5528, 1, 0.13185, 1, 0.28594, 1, 
0.05554, 1, 0.15915, 1, 0.41172, 1, 1, 0.04502, 1, 1.17754, 1, 
0.32515, 1, 0.34182, 1, 0.28954, 1, 0.1836, 1, 1, 0.08418, 0.04466, 
0.22798, 0.6993, 1, 1, 1.79298, 1, 1, 0.21996, 0.85941, 1, 1, 
0.66512, 2.32841, 1, 1, 0.04228, 1, 0.02331, 1, 0.01624, 0.46123, 
1, 1.04986, 1, 1, 0.07843, 1.36401, 1, 0.66285, 1, 1, 0.02843, 
1.5733, 1, 1, 0.02621, 1.18566, 1, 0.173, 1, 1, 0.05788, 1.1428, 
0.22452, 1, 1, 0.47943, 1, 1, 0.00548, 1.33147, 1, 1, 0.9237, 
0.1365, 1, 1, 0.5398, 1, 0.23007, 1, 0.21751, 1, 1, 0.94088, 
0.52394, 0.27165, 1, 1, 0.64772, 1, 0.14791, 1, 1, 1.41758, 1, 
1), Nnode = 110L, node.label = c("1", "", "", "0.99", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "0.97", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "0.98", "1", "1", "1", "1", "0.89", 
"1", "1", "0.64", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"0.92", "0.8", "0.77", "1", "1", "1", "1", "1", "0.96", "1", 
"0.84", "1", "1", "1", "1", "1", "1", "0.48", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1"), tip.label = c("Tribolium_castaneum", 
"Anoplophora_glabripennis", "Ichneumoninae_B", "Ichneumoninae_A", 
"Tryphoninae_A", "Tryphoninae_B", "Ophioninae_A", "Campopleginae", 
"Cremastinae_B", "Cremastinae_A", "Microplitis_demolitor", "Cotesia_vestalis", 
"Macrocentrus_cingulum", "Meteorus_colon_F", "Meteorus_cinctellus", 
"Aphidius_colemani", "Fopius_arisanus", "Diachasma_alloeum", 
"Psyttalia_lounsbury", "Psyttalia_concolor", "Orussus_abietinus", 
"Cephus_cinctus", "Neodiprion_lecontei", "Athalia_rosae", "Goniozus_legneri", 
"Polistes_canadensis", "Polistes_dominulam", "Dufourea_novaeangliae", 
"Lasioglossum_albipes", "Nomia_melanderi", "Megachile_rotundata", 
"Ceratina_calcarata", "Habropoda_laboriosa", "Euglossa_dilemma", 
"Eufriesea_mexicana", "Lepidotrigona_ventralis", "Melipona_quadrifasciata", 
"Bombus_impatiens", "Bombus_terrestris", "Apis_mellifera", "Apis_florea", 
"Apis_dorsata", "Apis_cerana", "Dinoponera_quadriceps", "Harpegnathos_saltator", 
"Pseudomyrmex_gracilis", "Ooceraea_biroi", "Linepithema_humile", 
"Lasius_niger", "Formica_exsecta", "Camponotus_floridanus", "Pogonomyrmex_barbatus", 
"Vollenhovia_emeryi", "Monomorium_pharaonis", "Solenopsis_fugax", 
"Solenopsis_invicta", "Wasmannia_auropunctata", "Cyphomyrmex_costatus", 
"Trachymyrmex_zeteki", "Trachymyrmex_septentrionalis", "Trachymyrmex_cornetzi", 
"Acromyrmex_echinatior", "Atta_cephalotes", "Atta_colombica", 
"Temnothorax_curvispinosus", "Aphaenogaster_rudis", "Aphaenogaster_picea", 
"Aphaenogaster_miamiana", "Aphaenogaster_fulva", "Aphaenogaster_ashmeadi", 
"Aphaenogaster_floridana", "Platygaster_equestris", "Platygaster_orseoliae", 
"Trissolcus_brochymenae", "Trissolcus_japonicus", "Trichopria_sp_970989", 
"Trichopria_drosophilae", "Trichogramma_brassicae", "Trichogramma_pretiosum", 
"Eretmocerus_eremicus", "Encarsia_formosa", "Anagyrus_pseudococci", 
"Copidosoma_sp", "Copidosoma_floridanum", "Ceratosolen_corneri", 
"Ceratosolen_marchali", "Ceratosolen_solmsi", "Ormyrus_nitidulus", 
"Ormyrus_pomaceus", "Torymus_sinensis", "Torymus_geranii", "Torymus_flavipes", 
"Eupelmus_urozonus", "Eupelmus_kiefferif", "Eupelmus_annulatus", 
"Eupelmus_azureus", "Megastigmus_dorsalis", "Megastigmus_stigmatizans", 
"Cecidostiba_fungosa", "Cecidostiba_semifascia", "Trichomalopsis_sarcophagae", 
"Nasonia_vitripennis", "Nasonia_giraulti", "Nasonia_longicornis", 
"Ganaspis_sp", "Ganaspis_brasiliensis", "Lep_boulardi", "Lep_heterotoma", 
"Lep_clavipes", "Synergus_japonicus", "Synergus_umbraculus")), class = "phylo", order = "cladewise")

Answer:

The number of node points in your tree is 221, with 111 final points. Thus, your col vector needs to have 221 elements; the first 111 specify the colors of the final points. Here is a way to create the proper col vector starting from tree$node.label:

x <- c(rep(NA,111), as.numeric(tree$node.label)*100)
col <- cut(x, breaks=c(0,50,75,90,99,100))
col <- factor(col, levels=rev(levels(col)))
col <- factor(col, labels=c("100%","91-99%","76-90%","51-75%","0-50%"))

Node points can be colored using the color aesthetic:

library(ggtree)
library(ggplot2)
p <- ggtree(tree, size=0.5,branch.length="none")   +
    geom_tiplab(size = 2, col ="black")  + 
    geom_point(aes(color=col), alpha=1, size=1.5, show.legend = TRUE) + 
    geom_treescale(x=30, y=1) +
    theme(legend.position="right") +
    scale_colour_manual(na.translate = F, name="Bootstrap support",
                        values=c("#CB1414","#F0F014","#098527","#18D8F7","#1B72DB")) +
    guides(color = guide_legend(override.aes = list(size = 5))) 
p

Question:

I have the following data and plot:

Data:

structure(list(type = c("mut", "mut", "mut", "mut", "mut", "mut", 
"mut", "mut", "gene", "gene", "gene", "gene"), gene = c("gyrA", 
"gyrA", "gyrB", "gyrB", "parC", "parC", "parE", "parE", "qnrA1", 
"qnrA1", "sul3", "sul3"), type2 = c(1, 1, 1, 1, 1, 1, 1, 1, 2, 
2, 2, 2), id = c("2014-01-7234-1-S", "2015-01-3004-1-S", "2014-01-2992-1-S", 
"2016-17-299-1-S", "2015-01-2166-1-S", "2014-01-4651-1-S", "2016-02-514-2-S", 
"2016-02-402-2-S", "2016-02-425-2-S", "2015-01-5140-1-S", "2016-02-522-2-S", 
"2016-02-739-2-S"), result = c("1", "0", "0", "0", "0", "0", 
"1", "1", "0", "0", "0", "1"), species = c("Broiler", "Pig", 
"Broiler", "Red fox", "Pig", "Broiler", "Wild bird", "Wild bird", 
"Wild bird", "Pig", "Wild bird", "Wild bird"), fillcol = c("Broiler_1", 
"Pig_0", "Broiler_0", "Red fox_0", "Pig_0", "Broiler_0", "Wild bird_1", 
"Wild bird_1", "Wild bird_0", "Pig_0", "Wild bird_0", "Wild bird_1"
)), row.names = c(NA, -12L), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), vars = "gene", drop = TRUE, indices = list(
    0:1, 2:3, 4:5, 6:7, 8:9, 10:11), group_sizes = c(2L, 2L, 
2L, 2L, 2L, 2L), biggest_group_size = 2L, labels = structure(list(
    gene = c("gyrA", "gyrB", "parC", "parE", "qnrA1", "sul3")), row.names = c(NA, 
-6L), class = "data.frame", vars = "gene", drop = TRUE, indices = list(
    0:1, 2:3, 4:5, 6:7, 8:9, 10:11), group_sizes = c(2L, 2L, 
2L, 2L, 2L, 2L), biggest_group_size = 2L, labels = structure(list(
    gene = c("gyrA", "gyrB", "parC", "parE", "qnrA1", "sul3")), row.names = c(NA, 
-6L), class = "data.frame", vars = "gene", drop = TRUE)))

Plot:

library(ggplot2)

p1 <- ggplot(test_df, aes(fct_reorder(gene, type2),
             factor(id),
             fill = fillcol,
             alpha = result)) +
  geom_tile(color = "white")+
  theme_minimal()+
  labs(fill = NULL)+
  theme(axis.text.x = element_text(angle = 90,
                                   hjust = 1,
                                   vjust = 0.3,
                                   size = 7),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        legend.position = "right")+
  guides(alpha = FALSE)+
  coord_fixed()

Additionally, I have the following tree object:

structure(list(edge = structure(c(23L, 23L, 22L, 22L, 21L, 21L, 
20L, 20L, 19L, 19L, 18L, 18L, 17L, 17L, 16L, 16L, 15L, 15L, 14L, 
14L, 13L, 13L, 1L, 3L, 2L, 9L, 22L, 23L, 4L, 5L, 20L, 21L, 11L, 
12L, 18L, 19L, 10L, 17L, 8L, 16L, 6L, 7L, 14L, 15L), .Dim = c(22L, 
2L)), edge.length = c(2, 2, 0, 0, 2.5, 0.5, 2, 2, 0.75, 0.25, 
0.5, 0.5, 2.41666666666667, 0.166666666666667, 3.0625, 0.145833333333333, 
3.38888888888889, 0.326388888888889, 3, 3, 0.5, 0.111111111111111
), tip.label = c("2016-02-425-2-S", "2016-02-522-2-S", "2015-01-2166-1-S", 
"2016-02-402-2-S", "2016-02-514-2-S", "2016-17-299-1-S", "2016-02-739-2-S", 
"2015-01-5140-1-S", "2014-01-2992-1-S", "2014-01-7234-1-S", "2014-01-4651-1-S", 
"2015-01-3004-1-S"), Nnode = 11L), class = "phylo", order = "postorder")

Which is plotted like this:

library(ggtree)

p2 <- ggtree(tree)+
  geom_treescale()+
  geom_tiplab(align = TRUE, linesize = 0, size = 1)+
  xlim(0, 4.2)

What I want to do is to combine the tree and the first plot, and order the first plot y-axis after the order in the tree, so that they match. I have tried to use some of the solutions here, but I can't seem to produce the same plot with the facet_plot function. Is there a way to identify maching values on the y-axis on both plots, and then combine them?

This is how I want it to look (approximately):


Answer:

We need to arrange the tile plot in the same order as the tree plot and then we need to lay the two plots out so they correspond. The first task is relatively straightforward, but I'm not sure how to do the second without some manual tweaking of the layout.

library(tidyverse)
library(ggtree)
library(grid)
library(gridExtra)

p2 <- ggtree(tree)+
  geom_treescale()+
  geom_tiplab(align = TRUE, linesize = 0, size = 3)+
  xlim(0, 4.2)

Now that we've created the tree plot, let's get the ordering of the y axis programmatically. We can do that using ggplot_build to get the plot structure.

p2b = ggplot_build(p2)

We can look at the data for the plot layout by running p2b$data in the console. This outputs a list with the various data frames that represent the plot structure. Looking these over, we can see that the fifth and six data frames have the node labels. We'll use the fifth one (p2b$data[[5]] and order them based on the y column to get a vector of node labels (p2b$data[[5]] %>% arrange(y) %>% pull(label))). Then we'll convert test_df$id to a factor variable with this node ordering.

test_df = test_df %>% 
  mutate(id = factor(id, levels=p2b$data[[5]] %>% arrange(y) %>% pull(label)))

(As another option, you can get the ordering of the nodes directly from p2 with p2$data %>% filter(isTip) %>% arrange(parent) %>% pull(label))

Now we can generate the tile plot p1 with a node order that corresponds to that of the tree plot.

p1 <- ggplot(test_df, aes(fct_reorder(gene, type2),
                          factor(id),
                          fill = fillcol,
                          alpha = result)) +
  geom_tile(color = "white")+
  theme_minimal()+
  labs(fill = NULL)+
  theme(axis.text.x = element_text(angle = 90,
                                   hjust = 1,
                                   vjust = 0.3,
                                   size = 7),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        legend.position = "right")+
  guides(alpha = FALSE)+
  coord_fixed()

We can see in the plot below that the labels correspond.

grid.arrange(p2, p1, ncol=2)

Now we need to lay out the two plots with only one set of labels and with the node lines matching up vertically with the tiles. I've done this with some manual tweaking below by creating a nullGrob() (basically a blank space below p1) and adjusting the heights argument to get the alignment. The layout can probably be done programmatically, but that would take some additional grob (graphical object) manipulation.

grid.arrange(p2 + theme(plot.margin=margin(0,-20,0,0)),
             arrangeGrob(p1 + theme(axis.text.y=element_blank()), 
                         nullGrob(), 
                         heights=c(0.98,0.02)), 
             ncol=2)

Question:

I am creating tanglegrams with the following code:

library(ggtree)
library(ape)

tree1 <- read.tree(text='(((A:4.2,B:4.2):3.1,C:7.3):6.3,D:13.6);')
tree2 <- read.tree(text='(((B:4.2,A:4.2):3.1,C:7.3):6.3,D:13.6);')

p1 <- ggtree(tree1)
p2 <- ggtree(tree2)

d1 <- p1$data
d2 <- p2$data

d2$x <- max(d2$x) - d2$x + max(d1$x) + 1

pp <- p1 + geom_tree(data=d2)

dd <- bind_rows(d1, d2) %>% 
  filter(!is.na(label))

final_plot <- pp + geom_line(aes(x, y, group=label), data=dd, color='grey')

What I want to do is to color the lines based on the position of the nodes. In other words, if the line is straight, meaning that they have the same position in both trees, the color should be x, while if they have changed, it should be y.

Something like this:

It would also be nice to get a legend for this to explain the colors.


Answer:

You can construct a column in dd that checks if the line will be horizontal. Here I grouped by label and checked whether the number of unique id's is 1. Then you use that column to the color argument in the aes of the line.

dd <- dd %>% group_by(label) %>% mutate(is.horiz = n_distinct(node) == 1)
pp + 
  geom_line(aes(x, y, group=label, color = is.horiz), data=dd) +
  scale_color_manual(values = c('TRUE' = "lightblue", 'FALSE' = "purple")) +
  theme(legend.position = c(.9,.9)) +
  labs(color = 'Horizontal Nodes')

You can play around with the colors of the lines and the names of everything.

Question:

I am using facet_plot() to add a barplot with trait values next to the tips of my tree. I need there to be a legend for the barplot, but could not find in the documentation or in a similar question how to do that. It seems that facet_plot() makes this a bit trickier.

Here is my code:

library(ggtree)
library(tidyverse)
library(ggstance) # for horizontal versions of geoms

# create some random tree and trait data
tree <- rtree(5)
traits <- tibble(
  node  = paste0("t", rep(1:5, 4)),
  trait = rep(LETTERS[1:4], 5),
  value = rnorm(n = 20, mean = 10, sd = 2))

# tree plot with barplot facet
treeplot <- ggtree(tree) + geom_tiplab(align = T)
facet_plot(treeplot,
           panel = "Trait",
           data = traits,
           geom = geom_barh,
           mapping = aes(x = value, fill = trait),
           stat = "identity")

I've tried to add + guides(fill = guide_legend()) or + scale_fill_discrete(), but to no avail.

How can I add a legend to the Trait facet? (And, in extension, to any additional facet?)


Answer:

We can add theme(legend.position="bottom") to get desired plot.

facet_plot(treeplot,
           panel = "Trait",
           data = traits,
           geom = geom_barh,
           mapping = aes(x = value, fill = trait),
           stat = "identity", show.legend = TRUE) +
  theme(legend.position = "bottom")

Question:

I have aligned some amino acid sequences in R and imported the distance matrix (dist_mat) for use in ggtree using tree <- ape::nj(dist_mat). It looks something like this:

    node parent branch.length          x         y     label isTip      branch    angle
1     1     14   0.000000000 0.00000000  3.000000  GAS05134  TRUE 0.000000000  90.0000
2     2     13   0.000000000 0.00000000  2.000000  GAS12252  TRUE 0.000000000  60.0000
3     3     13   0.000000000 0.00000000  1.000000  GAS12271  TRUE 0.000000000  30.0000
4     4     15   0.004565217 0.02000000  4.000000  GAS06216  TRUE 0.017717391 120.0000
5     5     18   0.060110914 0.85012362  7.000000 GAS131472  TRUE 0.820068164 210.0000
6     6     19   0.000000000 0.84990179  8.000000  GAS13399  TRUE 0.849901793 240.0000
7     7     19   0.000000000 0.84990179  9.000000  GAS11282  TRUE 0.849901793 270.0000
8     8     21   0.000000000 0.92485325 11.000000  GAS03101  TRUE 0.924853253 330.0000
9     9     21   0.000000000 0.92485325 12.000000   GAS0354  TRUE 0.924853253 360.0000
10   10     20   0.000000000 0.92485325 10.000000  GAS09426  TRUE 0.924853253 300.0000
11   11     22   0.000000000 0.91032609  5.000000  14GA0305  TRUE 0.910326087 150.0000
12   12     22   0.000000000 0.91032609  6.000000  14GA0286  TRUE 0.910326087 180.0000
13   13     13   0.000000000 0.00000000  2.447917      <NA> FALSE 0.000000000  73.4375
14   14     13   0.000000000 0.00000000  4.343750      <NA> FALSE 0.000000000 130.3125
15   15     14   0.015434783 0.01543478  5.687500      <NA> FALSE 0.007717391 170.6250
16   16     15   0.454136361 0.46957114  7.375000      <NA> FALSE 0.242502963 221.2500
17   17     16   0.031992271 0.50156341  9.250000      <NA> FALSE 0.485567279 277.5000
18   18     17   0.288449292 0.79001271  7.750000      <NA> FALSE 0.645788061 232.5000
19   19     18   0.059889086 0.84990179  8.500000      <NA> FALSE 0.819957250 255.0000
20   20     17   0.423289838 0.92485325 10.750000      <NA> FALSE 0.713208334 322.5000
21   21     20   0.000000000 0.92485325 11.500000      <NA> FALSE 0.924853253 345.0000
22   22     16   0.440754944 0.91032609  5.500000      <NA> FALSE 0.689948615 165.0000

A basic representation in gg_tree looks like this:

> gg_tree <- ggtree(size=0.2,tree, layout = "circular", branch.length = "none") + geom_tiplab2(color='blue', size=3) 

I then append some data to add aesthetics from the original data frame:

> gg_tree <- gg_tree %<+% DF
> head(DF, 12)
# A tibble: 12 x 4
   id        emm      tee     `50aa_HVR_peptide`                                
   <chr>     <chr>    <chr>   <chr>                                             
 1 GAS05134  emm1.0   tee1    NGDGNPREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA
 2 GAS12252  emm1.0   tee1    NGDGNPREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA
 3 GAS12271  emm1.0   tee1    NGDGNPREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA
 4 GAS06216  emm1.19  tee1    NGDGNLREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA
 5 GAS131472 emm100.0 tee28.1 RVTTRSQAQDAAGLKEKADKYEVRNHELEHNNEKLKTENSDLKTENSKLT
 6 GAS13399  emm100.5 tee28.1 RVTTRSQAQDAAGLKEKADKYEVRNHELEHNNEKLKTENSKLTSEKEELT
 7 GAS11282  emm100.5 tee28.1 RVTTRSQAQDAAGLKEKADKYEVRNHELEHNNEKLKTENSKLTSEKEELT
 8 GAS03101  emm101.0 tee14.2 ADHPSYTAAKDEVLSKFSVPGHVWAHEREKNDKLSSENEGLKAGLQEKEQ
 9 GAS0354   emm101.0 tee14.2 ADHPSYTAAKDEVLSKFSVPGHVWAHEREKNDKLSSENEGLKAGLQEKEQ
10 GAS09426  emm101.0 tee14.2 ADHPSYTAAKDEVLSKFSVPGHVWAHEREKNDKLSSENEGLKAGLQEKEQ
11 14GA0305  emm103.0 tee8    DSPRDVTSDLTTSMWKKKAEEAEAKASKFEKQLEDYKKAQKDYYEIEEKL
12 14GA0286  emm103.0 tee8    DSPRDVTSDLTTSMWKKKAEEAEAKASKFEKQLEDYKKAQKDYYEIEEKL

I would then like to add the information in the "tee" column as coloured text on the outside of the tree, my attempt is as follows:

> gg_tree + geom_text(size = 3,aes(angle=angle, color=tee, label=tee), hjust=-2)+
  theme(legend.position="right")

As you can see I have tried to use "hjust" to get the "geom_text" layer to not overlap with the "geom_tiplab2" layer, but the distance for each "tee" text to the tip label seems to be dramatically different.

Can anyone suggest how to get the "tee" text to run smoothly around the outside of the tree following on form the tip labels? Note: this happens with rectangular trees too, not just circular ones.

> sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.2

Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] ggtree_1.10.2       treeio_1.2.1        ggplot2_2.2.1       readxl_1.0.0       
 [5] readr_1.1.1         DECIPHER_2.6.0      RSQLite_2.0         Biostrings_2.46.0  
 [9] XVector_0.18.0      IRanges_2.12.0      S4Vectors_0.16.0    BiocGenerics_0.24.0

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.14     pillar_1.0.1     compiler_3.4.3   cellranger_1.1.0 plyr_1.8.4      
 [6] tools_3.4.3      zlibbioc_1.24.0  digest_0.6.13    bit_1.1-12       jsonlite_1.5    
[11] memoise_1.1.0    tibble_1.4.1     gtable_0.2.0     nlme_3.1-131     lattice_0.20-35 
[16] pkgconfig_2.0.1  rlang_0.1.6      cli_1.0.0        rstudioapi_0.7   DBI_0.7         
[21] rvcheck_0.0.9    hms_0.4.0        bit64_0.9-7      grid_3.4.3       glue_1.2.0      
[26] R6_2.2.2         purrr_0.2.4      tidyr_0.7.2      blob_1.1.0       magrittr_1.5    
[31] scales_0.5.0     assertthat_0.2.0 colorspace_1.3-2 ape_5.0          labeling_0.3    
[36] utf8_1.1.3       lazyeval_0.2.1   munsell_0.4.3    crayon_1.3.4 

Answer:

hjust and vjust don't go so well with coord_polar one trick to move the geom_text labels away from the center is too add a value to the x coordinate:

library(ggtree)
gg_tree + geom_text(size = 3, aes(angle = angle,
                                  color = tee,
                                  label = tee,
                                  x = x + 0.4), hjust = 0)+
  theme(legend.position = "right")

to install gg_tree:

source("https://bioconductor.org/biocLite.R")
biocLite("ggtree")

used data:

> dput(DF)
structure(list(id = structure(c(5L, 9L, 10L, 6L, 11L, 12L, 8L, 
3L, 4L, 7L, 2L, 1L), .Label = c("14GA0286", "14GA0305", "GAS03101", 
"GAS0354", "GAS05134", "GAS06216", "GAS09426", "GAS11282", "GAS12252", 
"GAS12271", "GAS131472", "GAS13399"), class = "factor"), emm = structure(c(1L, 
1L, 1L, 2L, 3L, 4L, 4L, 5L, 5L, 5L, 6L, 6L), .Label = c("emm1.0", 
"emm1.19", "emm100.0", "emm100.5", "emm101.0", "emm103.0"), class = "factor"), 
    tee = structure(c(1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 
    4L, 4L), .Label = c("tee1", "tee14.2", "tee28.1", "tee8"), class = "factor"), 
    X.50aa_HVR_peptide. = structure(c(4L, 4L, 4L, 3L, 5L, 6L, 
    6L, 1L, 1L, 1L, 2L, 2L), .Label = c("ADHPSYTAAKDEVLSKFSVPGHVWAHEREKNDKLSSENEGLKAGLQEKEQ", 
    "DSPRDVTSDLTTSMWKKKAEEAEAKASKFEKQLEDYKKAQKDYYEIEEKL", "NGDGNLREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA", 
    "NGDGNPREVIEDLAANNPAIQNIRLRHENKDLKARLENAMEVAGRDFKRA", "RVTTRSQAQDAAGLKEKADKYEVRNHELEHNNEKLKTENSDLKTENSKLT", 
    "RVTTRSQAQDAAGLKEKADKYEVRNHELEHNNEKLKTENSKLTSEKEELT"), class = "factor")), .Names = c("id", 
"emm", "tee", "X.50aa_HVR_peptide."), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))

> dput(tree)
structure(list(node = 1:22, parent = c(14L, 13L, 13L, 15L, 18L, 
19L, 19L, 21L, 21L, 20L, 22L, 22L, 13L, 13L, 14L, 15L, 16L, 17L, 
18L, 17L, 20L, 16L), branch.length = c(0, 0, 0, 0.004565217, 
0.060110914, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.015434783, 0.454136361, 
0.031992271, 0.288449292, 0.059889086, 0.423289838, 0, 0.440754944
), x = c(0, 0, 0, 0.02, 0.85012362, 0.84990179, 0.84990179, 0.92485325, 
0.92485325, 0.92485325, 0.91032609, 0.91032609, 0, 0, 0.01543478, 
0.46957114, 0.50156341, 0.79001271, 0.84990179, 0.92485325, 0.92485325, 
0.91032609), y = c(3, 2, 1, 4, 7, 8, 9, 11, 12, 10, 5, 6, 2.447917, 
4.34375, 5.6875, 7.375, 9.25, 7.75, 8.5, 10.75, 11.5, 5.5), label = structure(c(6L, 
10L, 11L, 7L, 12L, 13L, 9L, 4L, 5L, 8L, 3L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("<NA>", "14GA0286", "14GA0305", 
"GAS03101", "GAS0354", "GAS05134", "GAS06216", "GAS09426", "GAS11282", 
"GAS12252", "GAS12271", "GAS131472", "GAS13399"), class = "factor"), 
    isTip = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
    TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE), branch = c(0, 0, 0, 0.017717391, 
    0.820068164, 0.849901793, 0.849901793, 0.924853253, 0.924853253, 
    0.924853253, 0.910326087, 0.910326087, 0, 0, 0.007717391, 
    0.242502963, 0.485567279, 0.645788061, 0.81995725, 0.713208334, 
    0.924853253, 0.689948615), angle = c(90, 60, 30, 120, 210, 
    240, 270, 330, 360, 300, 150, 180, 73.4375, 130.3125, 170.625, 
    221.25, 277.5, 232.5, 255, 322.5, 345, 165)), .Names = c("node", 
"parent", "branch.length", "x", "y", "label", "isTip", "branch", 
"angle"), class = "data.frame", row.names = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19", "20", "21", "22"))

Question:

I am trying to plot a circular phylogenetic tree with bootstrap labeled nodes and user defined/colored tip labels. I got the bootstrap results and labels to work properly, but somehow I just couldn't fully draw the plot in my graphic device. There were always labels (which radiated away from the center of the circular phylo tree) missing portions of themselves. The only way was to set the tip label size to be very very small.

The DNAbin file, on which the distance matrix and bootstrap were based, consists of 82 strains of microbial data (812aa). Tip label was colored based on a separate attribute file.

dna_b<-read.dna("filename.txt", format="fasta")
dist.b<-dist.dna(dna_b)
tree_b<-nj(dist.b)
bp_b<-boot.phylo(tree_b, dna_b, B=1000, function(xx) nj(dist.dna<xx)))
tree_b2<-apeBoot(tree_b, bp_b)
phylo_b<-ggtree(tree_b2, layout='circular', size=0.02, branch.length = "none")+geom_text2(aes(label=bootstrap), size = 2)+geom_tiplab(aes(angle=angle))
#I changed the tiplab to original so using user defined attribute data was not required (the next line).
#phylo_b<-phylo_b %<+% isolate_attr +geom_tiplab(aes(color=tr_level,angle=angle),size=1)
phylo_b+theme(legend.position = "right", legend.text = element_text(size=3))+guides(colour = guide_legend(override.aes = list(size=3)))

I tried setting new graphic devices with bigger width and height but to no avail. Adjusting the margin didn't work either. By setting the tiplab size to smaller than 0.05 I could get the full plot, but that's really not optimal. Could really appreciate some help here.

Session info:

R version 3.3.3 (2017-03-06)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)

locale:
[1] LC_COLLATE=Chinese (Simplified)_China.936 
[2] LC_CTYPE=Chinese (Simplified)_China.936   
[3] LC_MONETARY=Chinese (Simplified)_China.936
[4] LC_NUMERIC=C                              
[5] LC_TIME=Chinese (Simplified)_China.936    

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
[1] ggtree_1.6.11       ggplot2_2.2.1       S4Vectors_0.12.2   
[4] BiocGenerics_0.20.0

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.11     lattice_0.20-35  ape_4.1          tidyr_0.6.3     
 [5] digest_0.6.12    grid_3.3.3       plyr_1.8.4       jsonlite_1.5    
 [9] nlme_3.1-131     gtable_0.2.0     magrittr_1.5     scales_0.4.1    
[13] rlang_0.1.1      lazyeval_0.2.0   labeling_0.3     tools_3.3.3     
[17] munsell_0.4.3    colorspace_1.3-2 tibble_1.3.3   

Rstudio version 1.0.136

Revised: As pointed out earlier, data should have been provided to reproduce the problem. I am attaching part of the data here, full data is too big. I tried using stringi command to randomly generate dna sequences, but random sequences often result in NaNs in the dist file. The following data was directly copied from the txt file referenced in the first line of code, with part of the name redacted~

Fasta Data (b):

>Bacillus_somenameherethatsverylone_R2_1_anothernumber  
AACTACGCTCGCGTATCGGATTATTGGGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGAAGTTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGAAAACTCTAGAGATAGAGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTAAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGGGGTAACCTTTATGGAGCCAGCCGCCTAAGTGGGACAGATGATTGGGGTGAAGTCGTACAAGGTACCCAAA

>Bacillus_sp._R2_8_anothernumber    
TTACTATGATAGCGTGTCGGATTATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCTTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCACACCTGCGAAGGTAAGCGAATCCCATAAAGCCATTCTCAGTTCGGATTGCAGGCTGCAACTCGCCTGCATGAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGTGAATACGTTCCCGGGCCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAGCACCCGAAGTCGTGACGTAACCTTCATGGAGCCAGTCAGCCTATGTGGGACAGATGATTGGGGTGAGTCGTACCAAAGTAAACACA

>Bacillus_sp._R2_9_anothernumber
GTTACCCCTGACGTGTCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCTTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAAGCCATTCTCAGTTCGGATTGCAGGCTGCAACTCGCCTGCATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGTCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTCATGTAGTCAGCCGCCTATGTGGGACAGATGATTGGGGTGAGTCGTACCAAGGAAAGCCAGC

>Bacillus_someotherlongname_R2_21_anothernumber
TTACTTTTCTACGTATCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGAAGTTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGAAAACTCTAGAGATAGAGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTAAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGGGGTAACCTTTATGGAGCCAGCCGCCTATGTGGGACAGATGATTGGGGTGAAGTCGTACCAAGGTAGCCCATA

>Bacillus_sp._R2_42_anothernumber   
ATTCTTTCTCGCGTGTCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCTTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAAGCCATTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTATGGAGCCAGCCGCCTAAAGTGGGACAGATGATTGGGGTGAAGTCGTACCACAGGTTAACCA

>Bacillus_sp._R2_75_anothernumber   
TCTTCAACCGCGTGTCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTTTTGCCCTCCCTAGAGATAGGGACTTCCCTTCGGGGACAAAAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTTAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGAGCTGCAAACCCGCGAGGGTAAGCGAATCTCATAAAGCCATTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGTGGATCAGCATGCCACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCGCAAGGAGCCAGCCGCCTAAAGTGGGACAGATGATTGGGGTGAGTCGACACAGGGGAAACCCA

>Bacillus_sp._R2_106_anothernumber
GCGGATATGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCGTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTTAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGGATGGTACAAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAAGCCATTCTCAGTTCGGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCCGGGCCTTGTACACACCCGCCCCGTCACACCACGAGAAGTTTGTAACACCCGAAAGTCGGGTGAGGTAACCCTTTTATGG

>Bacillus_somelongnameagain_R2_115_anothernumber
GTCGAATTATGGGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGAAGTTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGAAAACTCTAGAGATAGAGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTAAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTCGGATTGTAGCTGCAACTCGCCTACATGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGTCTTGTACAT

>Bacillus_sp._R2_116_anothernumber  
AAAACAGTGTTTGCTTTTTATTCAGATATTGGGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGAAGTTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGAAAACCCTAGAGATAGGGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTAAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGGGGTAACTTTATGGAGCCAGCCGCCTAAGTGGGACAGATGATTGGGGTGAAGTCGTACCAAGGTAAGCCAT

>Bacillus_sp._R3_1_anothernumber    
GTAGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCGTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAAAGCCATTCTCAGTTCGGATTGTAGGCTGCAAACTCGCCTACATGAAAGCCGGAAATCGCTAGTTATTCGCGGATCAGCATGCCGCGGTTGAATACGTTCCCCGGGCCTTGTAACACACCCGCCCCGTCACACCACGAGAGTTTTGTAACACCCGAAAGTCGGTGGAGGGTAACCCTTTTTATTG

>Bacillus_cereus_R3_2_anothernumber
GTAAGCATTTGGATGCATCTGGAATCGGTATATTGGGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGAAGTTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGAAAACCCTAGAGATAGGGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTAAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGGGGTAACCTTTTATGGAGCCAGCCGCCTAAAGTGGGACAGATGATTGGGGGTGAAGTCGTAACAAGGGTAGCC

>Bacillus_sp._R3_19_anothernumber
AACGGTTCAGCCACGTGTCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCGTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAAGCCATTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTATGGAGCCAGCCGCCTAAGTGGGACAGATGATTGGGGTGAAGTCGTACAACAGGAGGTAACA

>Bacillus_sp._R3_20_anothernumber
GGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCGTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAAAGCCATTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAAATAACGTTCCCGGGGCCTTGTACACACCGCCCCGTCACACCACGAGAGTTTGTAAACAACCCGAAAGTCGGGTGAGGTAACCCTTTATGGGAGCCAGCCGCCTTAAAGGTGGAACAGATGAATTGGGGTGAAAGTCGTAACAAGGGTAAGCCAA

>Bacillus_sp._R3_22_anothernumber
TCGGTACACTCACGTGTCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCGTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAAGCCATTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTATGGAGCCAGCCGCCTAAGGGGGGACAGATGATTGGGGTGAAGTCGACAACAAAGGGTATAACA

>Bacillus_sp._R3_74_anothernumber
AGGGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGAAGTTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGAAAACTCTAGAGATAGAGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTAAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGACCTTGTACACACCGCTCCGTCACACCACGAGAGTTTTGTTACTACCCGAAAGTCGGTGGGGGTAACCTTTATGGGAGTCAGGCCGCCTTAAGGTGGGACAGAATGATTGGGGGGTGAAAGTTCGTTAACCAAGGGTTAGCCAAATT

>Bacillus_sp._R3_76_anothernumber
AACACGTGACGTGTCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCGTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAAGCCATTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTATGGAGCCAGCCGCCTAAGTGGACAGATGATGGGGTGAGCTAC

>Bacillus_sp._R3_89_anothernumber
TCGTCTACCGCACGTGTCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCGTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAAGCCATTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTATGGAGCCAGCCGCCTAAGGTGGGACAGATGATTGGGGTGAAGTCGACACA

>Bacillus_sp._R4_18_anothernumber
GGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGAAGTTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGAAAACTCTAGAGATAGAGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTAAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTTCGGATTGTGGGCTGCAACTCGCCTACATGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGAGACCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTACACCCGAAGTCGGTGGAGTAACCCTTTATGGAGTCCAGCGCCTAAGTGGGACAGATGATTGGGGGTGAAGTCGTACCAGTACCCATCT

>Bacillus_someotherlongname_R4_23_anothernumber
AGGTACAGTAGACGTATCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGAAGTTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGAAAACTCTAGAGATAGAGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTAAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCTGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGGGGTAACTTTATGGAGCAGCCGCCTAAGGTGGGACAGATGATTGGGGTGAAGTCGTAACAAGGTAGCCA

>Bacillus_sp._R4_38_anothernumber
AAACTCTTTGTTTACTTATACCACGTGTCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTTTTGCCCTCCCTAGAGATAGGGACTTCCCTTCGGGGACAAAAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTTAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGAGCTGCGAACCCGCGAGGGTAAGCGAATCTCATAAAGCCATTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCGCAAGGAGTCAGCCGCCTAAGTGGGACAGATGATTGGGGTGAAGTCGTACACAAGTAAAACT

>Bacillus_somelongname_R5_1_anothernumber
CGATATTGGGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGAAGTTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGAAAACCCTAGAGATAGGGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTAAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCTGGGAACGCTAGTAATCGCGGATCACATGCCGCGGGGAATACGTTCCCGGGCCTTGTACACACCCGCCCGTCACACCACGAGAGTTTTGTTACACCCGAAATTCGGTGGGGTAACCTTCATGGGAGCCAGCCCGCCTAAGGGGGGACAGAATGATTGGGTGAAGTC

>Bacillus_sp._R5_6_anothernumber
TGAATAACAAACGACGTGTCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCGTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAAGCCATTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCTCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTATGGAGCCAGCCGCCTAAGGTGGGACAGATGATTGGGGTGAAGTCGTACAC

>Bacillus_sp._R5_8_anothernumber
TTAGGATATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCGTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAGCCATTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTATGGA

>Bacillus_sp._R5_10_anothernumber
TTACTTATGTGACGTGTCGGATTATTGGGCGTAAGCGCGCGCAGGTGGTTCCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGACAACCCTAGAGATAGGGCTTTCCCCTTCGGGGGACAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCAGCATTCAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGATGGTACAAAGGGCTGCAAACCTGCGAAGGTAAGCGAATCCCATAAGCCATTCTCAGTTCGGATTGCAGGCTGCAACTCGCCTGCATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTATGGAGCCAGCCGCCTAAGGGGGGACAGATGATTGGGGTGAAGTCGTACACAAGGTAAACTT

>Bacillus_somelongnamehere_R5_11_anothernumber
ATACTAACTTCTCACGTACGGATATTGGGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTAGAGGGTTTCCGCCCTTTAGTGCTGAAGTTAACGCATTAAGCACTCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTCTGAAAACTCTAGAGATAGAGCTTCTCCTTCGGGAGCAGAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCTTAGTTGCCATCATTAAGTTGGGCACTCTAAGGTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAAGAGCTGCAAGACCGCGAGGTGGAGCTAATCTCATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCTGGAATCGCTAGTAATCGCGGATCATCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGGGGTAACCTTTATGGAGCCAGCCGCCTAAGTGGGACAGATGATTGGGGTGAAGTCGTACAAGGGTAGCCATAAC

>Bacillus_longnamehere_R5_12_anothernumber
TACTTTCCCGCGTGTCGGATATTGGGCGTAAGCGCGCGCAGGTGGTTTCTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGGAAACTTGAGTGCAGAAGAGGATAGTGGAATTCCAAGTGTAGCGGTGAAATGCGTAGAGATTTGGAGGAACACCAGTGGCGAAGGCGACTATCTGGTCTGTAACTGACACTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGGGGGTTTCCGCCCCTCAGTGCTGCAGCTAACGCATTAAGCACTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCATTGACCACTGTAGAGATACAGTTTTCCCTTCGGGGACAACGGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTATTCTTAGTTGCCATCATTTAGTTGGGCACTCTAAGGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGACGGTACAAACGGTTGCCAACCCGCGAGGGGGAGCTAATCCGATAAAACCGTTCTCAGTTCGGATTGTAGGCTGCAACTCGCCTACATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGGGGTAACCTTTATGGAGCCAGCCGCCGAAGTGGGATAGATGATTGGGGTGAAGTCGTACCAAGGTACTAA


Answer:

Have you tried manually adjusting with xlim(xmin, xmax)?

Example:

tree <- rtree(10) ggtree(tree) + xlim(-10, 10)

Question:

My objective is pass lists as arguments to the function geom_point2 using lapply or analogously mapply. In similar situations, I had success passing a list (or lists) to geom_cladelabel as in:

mapply(function (x,y,z,w,v,u,t,s) geom_cladelabel(node=x, label=y,
align=F, etc. # Where x y z etc are lists. 

Problem is related to the use of aes inside geom_point2. (not in geom_cladelabel):

In the case of geom_point2, the node info is inside aes, and I could't do it. Normally I do not get any error message, but it does not work.

The objective is to make this example work, but using mapply instead of writting geom_point2 two times.

# source("https://bioconductor.org/biocLite.R")
# biocLite("ggtree")
library(ggtree)
library(ape)
#standard code
newstree<-rtree(10)
node1<-getMRCA(newstree,c(1,2))
node2<-getMRCA(newstree,c(3,4))
ggtree(newstree)+ 
geom_point2(aes(subset=(node ==node1) ), fill="black", size=3, shape=23)+
geom_point2(aes(subset=(node ==node2) ), fill="green", size=3, shape=23)

#desire to substitute the geom_point2 layers with mapply or lapply:
#lapply(c(node1,node2), function (x) geom_point2(aes(subset=(node=x)))))


Answer:

Here is a solution calling geom_point2 usig mapply:

library(ggtree)
ggtree(rtree(10)) + 
  mapply(function(x, y, z) 
    geom_point2(
      aes_string(subset=paste("node ==", x)), 
      fill=y, 
      size=10, 
      shape=z
    ), 
    x=c(11,12), 
    y=c("green", "firebrick"), 
    z=c(23,24)
  ) +
  geom_text2(aes(subset=!isTip, label=node)) 

The solution is in the aes_string(), which writes the value of x directly in the aesthetics. The default aes() does not pass on the value of x, but just the string "x". When plotting, ggtree then looks for a node called "x", and ends with an empty node list. I guess this has to do with the variable being stored in the mapply-environment and not being passed on to the plot.

PS: Sorry for my too quick answer with do.call() earlier. It is useful, but off-topic here.

Question:

I have a phylogenetic tree of the class phylo with 24 tips and 23 internal nodes. I ran a bootstrap analysis on this tree and the data using boot.phylo, which returned a vector of 23 bootstrap values. I created a ggtree object from my original tree and am now trying to add the bootstrap values to the nodes. I am doing something wrong but I don't know what.

Here is what I did:

gg.tr <- ggtree(mp.tree)
gg.tr + geom_label2(aes(subset=!isTip, label=bphylo$BP))

bphylo$BP is the vector of 23 bootstrap values. When I run this code, I get the following error:

Error: Aesthetics must be either length 1 or the same as the data (47): subset, label, x, y

I don't understand this error, because I only want to put the bootstrap values on 23 of the possible 47 locations.

When I call the following function, I get a value of 23:

length(which(gg.tr$data$isTip==FALSE))

If the length of gg.tr$data$isTip==FALSE is 23 and I have 23 bootstrap values, why am I getting an error telling me that my label is the wrong length?


Answer:

You can annotate your tree with geom_text. Without seeing your data, it's hard to know what is happening, but here is an example with a reproducible dataset.

library(devtools)
devtools::install_github("GuangchuangYu/ggtree")

bs <- data.frame(nodename = c("t30", "t12", "t22", "t26", "t6", "t17", "t4","t7", "t9", "t1", "t8", "t25","t23",
                          "t28", "t10", "t20", "t3",  "t11", "t19", "t29", "t2","t18", "t24",
                          "t27", "t15", "t13", "t14", "t16", "t5","t21"), bootstrap = c(runif(30, 85, 98)), stringsAsFactors = F)
bs$bootstrap <- round(bs$bootstrap, digits = 0)
rownames(bs) <- NULL
nodesupport = c(round(runif(59, 65, 80), digits = 0))

set.seed(0)
tree <- rtree(30)
p <- ggtree(tree)
p <- p %<+% bs + geom_tiplab()
p <- p + geom_text(aes(label = bootstrap), hjust = 1, vjust = -0.4, size = 3) + geom_nodelab(aes(label = nodesupport)) # specify your node label here, looks like BP
p