本文目录一览:
R语言1----桑基(sankey diagram)图的绘制--sankeyD3
实例分析:
### 安装与加载包
install.packages("devtools")
devtools::install_github("fbreitwieser/sankeyD3")
library(sankeyD3)
第一个为链接数据框 links(起点、靶点、权重、链接的特征1、链接的特征1.....);
然后根据links构建第二个为节点数据框nodes(起点与靶点、点的特征1、点的特征........)
nodes - data.frame(name=c(as.character(links$source), as.character(links$target)) %% unique())
然后基于nodes数据框构建links中节点的唯一标识符ID,而非根据节点的name
links$IDsource - match(links$source, nodes$name)-1
links$IDtarget - match(links$target, nodes$name)-1
sankeyNetwork( Links = links, Nodes = nodes, Source = "IDsource", Target = "IDtarget",
Value = "weight", NodeID = "name",nodeWidth =10,units = 'TWh',
height=300,width=300,colourScale=JS("d3.scaleOrdinal(d3.schemeCategory10);"),
numberFormat=".0f",fontSize = 8)
nodes$color-sample(c("red","orange","blue","green"),nrow(nodes),replace=T) #在这里进行随机自定义颜色,当然也可以按照自己的需求进行设置
sankeyNetwork(Links = links, Nodes = nodes,Source = "IDsource", Target = "IDtarget",
Value = "weight", NodeID = "name",nodeWidth =10,units = 'TWh',
height=300,width=300,numberFormat=".0f",fontSize = 8, NodeColor = "color" )
也可以根据节点自定义的分类对节点进行颜色的绘制
nodes$group-rep("水果",nrow(nodes))
nodes$group[nodes$name %in% c("上海","深圳","北京","南京")]-"城市"
nodes$group[nodes$name %in% c("律师","老师","白领","公务员","记者","化妆师")]-"职业"
sankeyNetwork(Links = links, Nodes = nodes,Source = "IDsource", Target = "IDtarget",
Value = "weight", NodeID = "name",nodeWidth =10,units = 'TWh',
numberFormat=".0f",fontSize = 8,height=300,width=300,
NodeGroup="group",colourScale=JS("d3.scaleOrdinal(d3.schemeCategory10);") )
对于缎带的颜色设置同理也可以对其进行分组颜色设置(这里按照其统计量进行分组设置,当然也可以按照其他进行分组)
links$group-rep("A",nrow(links))
links$group[links$weight500 links$weight=100]-"B"
links$group[links$weight100]-"C"
sankeyNetwork(Links = links, Nodes = nodes,Source = "IDsource", Target = "IDtarget",
Value = "weight", NodeID = "name",nodeWidth =10,units = 'TWh',
numberFormat=".0f",fontSize = 8,height=300,width=300,
NodeGroup="group", LinkGroup = "group",
colourScale=JS("d3.scaleOrdinal(d3.schemeCategory10);"))
有时候想要缎带根据其宽度进行一定透明度的变化,可以使用 linkType="path1"参数进行设置
install.packages("webshot")
library(webshot)
if(!is_phantomjs_installed()){
install_phantomjs()
}
library(webshot)
p-sankeyNetwork(Links = links, Nodes = nodes,Source = "IDsource", Target = "IDtarget",
Value = "weight", NodeID = "name",nodeWidth =10,units = 'TWh',
numberFormat=".0f",fontSize = 8,height=300,width=300,
NodeGroup="group",LinkGroup = "group",
colourScale=JS("d3.scaleOrdinal(d3.schemeCategory10);"))
### 将结果存储PDF
saveNetwork(p,"sankey.html")
webshot("sankey.html" , "sankey.pdf")
在线绘制桑基图
现在许多文章都利用桑基图来展示ceRNA网络、功能富集等图,也有很多R包专门可以用来画桑基图,不过我在这里发现ggplot2包非常强大,也可以画桑基图,现在来给大家介绍一下
首先加载需要的R包
###########加载
library(ggplot2)
library(ggalluvial)
library(RColorBrewer)
这里以miRNA-mRNA的关系为例,数据格式如下
##########整合
miRNA_mRNA$Freq=1#定义纵坐标,一般默认为1
miRNA_mRNA_long- to_lodes_form(miRNA_mRNA,
axes = 1:2,#将miRNA和mRNA分别编号
id = "Cohort") #改为长数据便于画图
下面就可以开始画桑基图了,主要需要ggplot的geom_flow和geom_stratum两个函数
geom_flow--画流动图
我们看一下只画geom_flow的效果
###########geom_flow
ggplot(miRNA_mRNA_long,
aes(x =factor(x,level = c("miRNA","SYMBOL")),y=Freq,stratum = stratum, alluvium = Cohort,fill = stratum, label =stratum)) +
geom_flow( width = 1/3)+#画流动图
geom_text(stat ="stratum" , size =3) +#添加名字
scale_x_discrete(limits = c() )+#去掉横坐标轴
theme_bw()+#定义主题
theme(legend.position = "none",
axis.title = element_blank(),
axis.text.y= element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())+
scale_fill_manual(values = colorRampPalette(brewer.pal(8, "Accent"))(20))#定义颜色
geom_flow--画流动图
我们看一下只画geom_stratum的效果
###########geom_stratum
ggplot(miRNA_mRNA_long,
aes(x =factor(x,level = c("miRNA","SYMBOL")),y=Freq,stratum = stratum, alluvium = Cohort,fill = stratum, label =stratum)) +
geom_stratum( width = 1/3,linetype=1,size=0.5,alpha =0.5,color = "black") +#画冲击图
geom_text(stat ="stratum" , size =3) +#添加名字
scale_x_discrete(limits = c() )+#去掉横坐标轴
theme_bw()+#定义主题
theme(legend.position = "none",
axis.title = element_blank(),
axis.text.y= element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())+
scale_fill_manual(values = colorRampPalette(brewer.pal(8, "Accent"))(20))#定义颜色
整合后
###########绘图
ggplot(miRNA_mRNA_long,
aes(x =factor(x,level = c("miRNA","SYMBOL")),y=Freq,stratum = stratum, alluvium = Cohort,fill = stratum, label =stratum)) +
geom_flow( width = 1/3)+#画流动图
geom_stratum( width = 1/3,linetype=1,size=0.5,alpha =0.5,color = "black") +#画冲击图
geom_text(stat ="stratum" , size =3) +#添加名字
scale_x_discrete(limits = c() )+#去掉横坐标轴
theme_bw()+#定义主题
theme(legend.position = "none",
axis.title = element_blank(),
axis.text.y= element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())+#去掉边界线
scale_fill_manual(values = colorRampPalette(brewer.pal(8, "Accent"))(20))#定义颜色
改变边框模式linetype
###########绘图
ggplot(miRNA_mRNA_long,
aes(x =factor(x,level = c("miRNA","SYMBOL")),y=Freq,stratum = stratum, alluvium = Cohort,fill = stratum, label =stratum)) +
geom_flow( width = 1/3)+#画流动图
geom_stratum( width = 1/3,linetype=0,size=0.5,alpha =0.5,color = "black") +#画冲击图
geom_text(stat ="stratum" , size =3) +#添加名字
scale_x_discrete(limits = c() )+#去掉横坐标轴
theme_bw()+#定义主题
theme(legend.position = "none",
axis.title = element_blank(),
axis.text.y= element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())+#去掉边界线
scale_fill_manual(values = colorRampPalette(brewer.pal(8, "Accent"))(20))#定义颜色
echarts图表——桑基图&路径图
桑基图(Sankey diagram),即桑基能量分流图,也叫桑基能量平衡图。它是一种特定类型的流程图,延伸的分支的宽度对应数据流量的大小,通常应用于能源、材料成分、金融等数据的可视化分析。
桑基图最明显的特征就是,始末端的分支宽度总和相等,即所有主支宽度的总和应与所有分出去的分支宽度的总和相等,保持能量的平衡。
echarts中的路径图不是指学习路径图这种,而是基于地图。个人感觉更偏向道路规划图
-