Category Archives: datavisual

Linkedin Network Analysis with ggplot2

In this part, we will demonstrate how to create a network graph like the one you see in the Linkedin InMap http://community.tradeking.com/upload/0002/3315/linkedin-inmap-jude.jpg

You may achieve the same result using igraph but in this post, we will focus on how to do it in ggplot2. We will not focus on how to download the linkedin data and format it into an appropriate data structure. We assume that you can get your data in the appropriate format like the LinkedinData (download for example), an adjacent matrix like the following:


Jeff Grif Cody Bolh Curtis Blag Eric Wood
Jeff Grif 0 0 0 0
Cody Bolh 1 0 0 0
Curtis Blag 1 0 0 0
Eric Wood 1 0 0 0

Our output will be as beautiful as below image:

Linkedin InMap with R

Linkedin InMap with R

We will use the following code to draw based on that data:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
LinkedinData<-read.table("LinkedinData",header=TRUE)
names<-as.character(LinkedinData[,1])
LinkedinData<-LinkedinData[,-1]
rownames(LinkedinData)=names
colnames(LinkedinData)=names
 
LinkedinData<-data.matrix(LinkedinData)
layoutCoordinates <- gplot(LinkedinData, displaylabels=TRUE)  # Get graph layout coordinates
 
LinkedinList <- melt(LinkedinData)  # Convert to list of ties only
LinkedinList <- LinkedinList[LinkedinList$value > 0, ]
 
##community dectection 
g <- graph.adjacency(LinkedinData, weighted=T, mode = "undirected")
###cluster
lead<-leading.eigenvector.community(g)
temp<-layoutCoordinates
temp<-data.frame(temp,lead$names,lead$membership,colSums(LinkedinData)+rowSums(LinkedinData))
colnames(temp)<-c("x","y","name","group","size")
 
 
 
# Function to generate paths between each connected node
edgeMaker <- function(whichRow, len = 100, curved = TRUE){
  index.temp1<-which(LinkedinList[whichRow,1]==temp[,3], arr.ind = TRUE)
  index.temp2<-which(LinkedinList[whichRow,2]==temp[,3], arr.ind = TRUE)
 
  if(temp[index.temp1,5]>=temp[index.temp2,5]){
    index<-c(index.temp1,index.temp2)
  }else{
    index<-c(index.temp2,index.temp1)
  }
 
  fromC <- layoutCoordinates[index[1], ]  # Origin
  toC <- layoutCoordinates[index[2], ]  # Terminus
 
  # Add curve:
  graphCenter <- colMeans(layoutCoordinates)  # Center of the overall graph
  bezierMid <- c(fromC[1], toC[2])  # A midpoint, for bended edges
  bezierMid<-as.matrix(bezierMid)
  distance1 <- sum((graphCenter - bezierMid)^2)
  if(distance1 < sum((graphCenter - c(toC[1], fromC[2]))^2)){
    bezierMid <- c(toC[1], fromC[2])
  }  # To select the best Bezier midpoint
  bezierMid <- (fromC + toC + bezierMid) / 3  # Moderate the Bezier midpoint
  if(curved == FALSE){bezierMid <- (fromC + toC) / 2}  # Remove the curve
 
  edge <- data.frame(bezier(c(fromC[1], bezierMid[1], toC[1]),  # Generate
                            c(fromC[2], bezierMid[2], toC[2])   # X & y
                            ,evaluation = len))  # Bezier path coordinates
  edge$Sequence <- 1:len  # For size and colour weighting in plot
  edge$Group <- paste(LinkedinList[whichRow, 1:2], collapse = ">")
  edge$index<-rep(temp[index[2],4],len)  
  return(edge)
}
 
# Generate a (curved) edge path for each pair of connected nodes
allEdges <- lapply(1:nrow(LinkedinList), edgeMaker, len = 500, curved = TRUE)
allEdges <- do.call(rbind, allEdges)  # a fine-grained path ^, with bend ^
 
#cleaning plot
new_theme_empty <- theme_bw()
new_theme_empty$line <- element_blank()
new_theme_empty$rect <- element_blank()
new_theme_empty$strip.text <- element_blank()
new_theme_empty$axis.text <- element_blank()
new_theme_empty$plot.title <- element_blank()
new_theme_empty$axis.title <- element_blank()
new_theme_empty$plot.margin <- structure(c(0, 0, -1, -1), unit = "lines",
                                         valid.unit = 3L, class = "unit")
 
 
zp1 <- ggplot(allEdges)  # Pretty simple plot code
zp1 <- zp1 + geom_path(aes(x = x, y = y,group = Group,colour=factor(index)))
zp1 <- zp1 + geom_point(data = temp,aes(x = x, y = y,fill=factor(group),
                            size=factor(size)),pch = 21) 
zp1<-zp1+geom_text(data=temp,aes(x=x,y=y,label=name,hjust = 0, vjust = 0))
zp1 <- zp1 + new_theme_empty  # Clean up plot
zp1<-zp1+theme(legend.position="none")
 
#print(zp1)
zp1