2013-02-11 71 views
2

我有THID数据帧:增加图例项中GGPLOT2

头(X)

 Date Company Region Units 
1 1/1/2012 Gateway America  0 
2 1/1/2012 Gateway Europe  0 
3 1/1/2012 Gateway America  0 
4 1/1/2012 Gateway Americas  0 
5 1/1/2012 Gateway Europe  0 
6 1/1/2012 Gateway Pacific  0 

X dput(X)

structure(list(Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1/1/2012", 
"1/12/2012", "1/2/2012"), class = "factor"), Company = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("Gateway", "HP", "IBM"), class = "factor"), 
    Region = structure(c(1L, 3L, 1L, 2L, 3L, 4L, 2L, 1L, 3L, 
    1L, 2L, 3L, 4L, 2L, 1L, 3L, 1L, 2L, 3L, 4L, 2L, 1L, 3L, 1L, 
    2L, 3L, 4L, 2L, 1L, 3L, 1L, 2L, 3L, 4L, 2L, 1L, 3L, 1L, 2L, 
    3L, 4L, 2L, 1L, 3L, 1L, 2L, 3L, 4L, 2L, 1L, 3L, 1L, 2L, 3L, 
    4L, 2L, 1L, 3L, 1L, 2L, 3L, 4L, 2L, 1L, 3L, 1L, 2L, 3L, 4L, 
    2L, 1L, 3L, 1L, 2L, 3L, 4L, 2L, 1L, 3L, 1L, 2L, 3L, 4L, 2L 
    ), .Label = c("America", "Americas", "Europe", "Pacific"), class = "factor"), 
    Units = c(1L, 3L, 1L, 6L, 20L, 2L, 2L, 10L, 2L, 1L, 2L, 4L, 
    6L, 30L, 2L, 15L, 10L, 3L, 4L, 7L, 9L, 12L, 34L, 50L, 3L, 
    2L, 4L, 3L, 1L, 3L, 3L, 1L, 4L, 0L, 1L, 0L, 0L, 1L, 0L, 4L, 
    0L, 0L, 0L, 0L, 5L, 0L, 8L, 0L, 0L, 0L, 0L, 0L, 9L, 0L, 56L, 
    10L, 0L, 0L, 5L, 7L, 0L, 0L, 8L, 0L, 2L, 0L, 4L, 0L, 5L, 
    7L, 0L, 0L, 8L, 10L, 0L, 6L, 0L, 4L, 4L, 0L, 2L, 0L, 5L, 
    0L)), .Names = c("Date", "Company", "Region", "Units"), class = "data.frame", row.names = c(NA, 
-84L)) 

我想建立一个热地图:

ggplot(x, aes(Date, Company, fill=Units)) + geom_tile(aes(fill=Units)) + facet_grid(~Region) + scale_fill_gradient(low="white", high="red") 

这个命令的作品,但我需要能够使用不同的颜色,而不是白色和红色,并增加了传说上的剥落。现在,默认情况下,有5个传说。我喜欢增加10.O将是白色的,其他应该与白色明显不同,以便用户能够注意到它。

我该如何使用ggplot增加图例值的数量并为每个图例分配不同的颜色?

+0

几乎所有的值都是零。我不认为,使用这个数据集,任何数量的颜色都会产生变化。或者这是一个测试数据集? – Arun 2013-02-11 15:14:22

+0

@阿伦,我刚刚更新了dput。它是实际数据的一部分。 – user1471980 2013-02-11 15:17:08

+0

是的,仍然'table(x $ units)'给出'c(75,4,1,4)'0s,2s,4s和10s。这是很多0。 – Arun 2013-02-11 15:26:44

回答

3

我发现它非常丰富的使用quantiles来绘制heatmapsas done here in this blog。这有助于生成倾斜的颜色集(如博客所示)。假设数据与您的数据相似(相当高的0),然后通过计算适当的分位数,我们可以创建一个倾斜的色彩图,该色彩图具有合适的标签,在视觉上非常出色且信息丰富。我修改了已经为此问题链接的博客地图中的代码,并添加了更多解释。博客文章必须获得所有的想法和实施的功劳。

在进入代码之前,我们必须对您的数据使用quantiles进行一些分析,以查看要使用的分位数。通过这样做:

quantile(x$Units, seq(0, 1, length.out = 25) 

#  0% 4.166667% 8.333333%  12.5% 16.66667% 20.83333%  25% 29.16667% 33.33333% 
# 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 
# 37.5% 41.66667% 45.83333%  50% 54.16667% 58.33333%  62.5% 66.66667% 70.83333% 
# 1.00000 1.00000 2.00000 2.00000 3.00000 3.00000 4.00000 4.00000 5.00000 
#  75% 79.16667% 83.33333%  87.5% 91.66667% 95.83333%  100% 
# 6.00000 7.00000 8.00000 9.62500 10.16667 25.41667 56.00000 

你看到0%位数对应于数据Units=0。并且直到33%(准确地说是33.33%)。所以,也许我们选择38%作为下一个分位数。然后说,60%,75%,90%并最终以100%完成。现在,我们有足够的水平,你想要的水平,他们在你的数据有意义的水平。

我们将需要zoo包来完成此操作。现在让我们来构建数据:

require(zoo) # for rollapply 
# the quantiles we just decided to categorise the data into classes. 
qtiles <- quantile(x$Units, probs = c(0, 38, 60, 75, 90, 100)/100) 
# a color palette 
c_pal  <- colorRampPalette(c("#3794bf", "#FFFFFF", 
         "#df8640"))(length(qtiles)-1) 
# since we are using quantile classes for fill levels, 
# we'll have to generate the appropriate labels 
labels <- rollapply(round(qtiles, 2), width = 2, by = 1, 
         FUN = function(i) paste(i, collapse = " : ")) 
# added the quantile interval in which the data falls, 
# which will be used for fill 
x$q.units <- findInterval(x$Units, qtiles, all.inside = TRUE) 

# Now plot 
library(ggplot2) 
p <- ggplot(data = x, aes(x = Date, y = Company, fill = factor(q.units))) 
p <- p + geom_tile(color = "black") 
p <- p + scale_fill_manual(values = c_pal, name = "", labels = labels) 
p <- p + facet_grid(~ Region) 
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 
p 

你得到这样的: ggplot2_heatmap_skewed

希望这有助于。

编辑:您还可以访问colorbrewer2.org以获得不错的调色板和自己设置颜色。例如:

# try out these colors: 
c_pal  <- c("#EDF8FB", "#B3CDE3", "#8C96C6", "#8856A7", "#810F7C") 
c_pal  <- c("#FFFFB2", "#FECC5C", "#FD8D3C", "#F03B20", "#BD0026") 

另外,尝试在代码中设置geom_tile(color = "black", alpha = 0.5")alpha

+0

哇,这真的很棒。非常感谢。 – user1471980 2013-02-11 16:38:47

+0

您可能希望以'x $ Date < - factor(x $ Date,levels = c(“1/1/2012”,“1/2/2012”,“1/12/2012“),命令= T)',以便日期按正确顺序(或通过将日期更改为实际日期) – Arun 2013-02-11 16:42:36

+0

这很好。我有一个问题。如果你通过R脚本来做这件事,你会如何选择分位数? – user1471980 2013-02-11 17:20:45