R-bloggers Blogosphere

R-bloggers.com is a blog aggregator of content collected from bloggers who write about R. Currently publishes the posts of 114 Blogs.
Analyzing the inlink’s structure of this Blogosphere (with Yahoo Site Explorer) there are almost 5.000 interconnected websites. Exactly 4,608 nodes (vertices) and 6700 unique connections (edges).

[ to be continued... ]

?Download Rcode.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
################################################
#   IMPORT URL ( Blogsphere di R-Blogger ) 
#   il file RBlogList.csv contiene gli URL
################################################
 
setwd(" ***working path*** ")
RBlog <- read.csv("RBlogList.csv",sep=",",header=TRUE)
RBlog <- RBlog[,c("RBLOGID","RBLOGLABEL","RBLOGURL")]
RBlogURL <- RBlog[,c("RBLOGID","RBLOGURL")]
RBlogURL$RBLOGURL <- sub("^((ht|f)tp(s?)://|~/|/)","",RBlogURL$RBLOGURL,perl=TRUE)
RBlogURL$RBLOGURL <- sub("^(www.)","",RBlogURL$RBLOGURL,perl=TRUE)
RBlogURL$RBLOGURL <- gsub("[/][~]?",".",RBlogURL$RBLOGURL)
 
################################################
#   BASE EDGELIST
################################################
 
RBlogEdges <- cbind(rep("r-bloggers.com",length(RBlogURL$RBLOGURL)),RBlogURL$RBLOGURL)
RBlogEdges <- RBlogEdges[2:nrow(RBlogEdges),]
 
################################################
#  IMPORT INLINKS di Yahoo Site Explorer
#  gli InLinks sono salvati in file *_inlinks.txt
################################################ 
 
LinksFile <- Sys.glob('*_inlinks.txt')
LinksDB <- lapply(LinksFile,function(.file){
	.input <- read.delim(.file,sep="\t",skip=1,header=TRUE,quote="",fill=TRUE)
	.url <- sub("_inlinks.txt{1}", "\\1", .file, perl=TRUE)
	.input$FONTE <- .url
	.input
	})
LinksData <- do.call(rbind,LinksDB)
 
RBlogLinks <- as.data.frame(cbind(LinksData$FONTE,as.character(LinksData$URL)))
names(RBlogLinks) <- c("RBLOGID","INLINKS")
RBlogInLink <- merge(RBlogURL,RBlogLinks, by="RBLOGID", all.x=TRUE)
 
RBlogInLink$INLINKS <- sub("^((ht|f)tp(s?)://|~/|/)","",RBlogInLink$INLINKS,perl=TRUE)
RBlogInLink$INLINKS <- sub("^(www.)","",RBlogInLink$INLINKS,perl=TRUE)
RBlogInLink$AdjINLINKS <- sub("[/][^[:space:]]*","",RBlogInLink$INLINKS)
RBlogEdges <- rbind(RBlogEdges,cbind(RBlogInLink$AdjINLINKS,RBlogInLink$RBLOGURL))
RBlogEdges<-na.omit(RBlogEdges)
 
################################################
# NETWORK GRAPH
################################################
 
require(igraph)
net.graph<-graph.edgelist(RBlogEdges)
net.graph<-simplify(net.graph,remove.loops=TRUE,remove.multiple=FALSE)
V(net.graph)$label <- c(as.character(RBlog$RBLOGLABEL),rep("",nrow(as.data.frame(V(net.graph)$name))-nrow(RBlog)))
V(net.graph)$size <- c(5,rep(3.5,nrow(RBlog)-1),rep(1.5,nrow(as.data.frame(V(net.graph)$name))-nrow(RBlog)))
V(net.graph)$color <- c("#FFFF00",rep("#FF000080",nrow(RBlog)-1),rep("#7FFFD480",nrow(as.data.frame(V(net.graph)$name))-nrow(RBlog)))
 
net.layout<-layout.fruchterman.reingold(net.graph)
par(mar=c(1, 1, 2, 1), oma=c(1,1,1,1))
png("Blogosphere_all2.png",height=2000,width=2000,res=100,pointsize=33)
plot(net.graph,layout=net.layout,vertex.label=NA,edge.arrow.size=0,edge.color="#E0EEEE")
title("Social Network Analysis on all Blog's inlinks\n",cex.main = 1,col.main="black")
legend("topleft", c("R-bloggers Hub","R-bloggers Websites","Other Websites"), cex=0.8, col=c("#FFFF00","#FF0000","#7FFFD4"), pch=19)
dev.off()