我们的R和你的Excel

这次是上次利用R语言抓取网页表格数据实例的修改版,上次的虽然可以显示了,但是在项目过多时排版会非常乱,而且全部列出来的话意义不大,这次就是仅列出下载量前5的Tweak。

画出的统计图大概像这样

Code

require(RCurl)
require(XML)
# Get HTML Content and Parse HTML Tree
require(datasets)
require(grDevices)
require(graphics)
library(showtext)
# Drawing
Developer <- "YOUR_DEVELOPER_NAME_ON_BIGBOSS"
# Read Developer Name
htmlCode <- getURL( paste("http://apt.thebigboss.org/stats.php?dev="Developersep "") )
htmlCode <- readLines(tmp <- textConnection(htmlCode))close(tmp)
# Get HTML Content
HTMLDOM <- htmlTreeParse(htmlCode, error = function(...){}, useInternalNodes = TRUE)
# Parse HTML Tree
download<- xpathSApply(HTMLDOM"//table//td"xmlValue)
# X Path Inquiry
font.add("Kaiti""Kaiti.ttc")
plot.new()
showtext.begin()
# Using Kaiti For Drawing
tweak_count <- as.numeric(download[(seq(from = 3, to = length(download))) %% 4 == 1])
tweak_name  <- paste(download[(seq(from = 1, to = length(download))) %% 4 == 2], tweak_count, sep = "n  ")
# Get tweak name and download count
# Set sep equals to "n  " for format
tweak_map<- cbind(tweak_counttweak_name)
stat <- order(tweak_count, tweak_name, na.last = NA, decreasing = TRUE)
# Binding and sort
pie.sales <- as.numeric(tweak_map[stat, 1][seq(from = 1to = 5)])
names(pie.sales) <- tweak_map[stat, 2][seq(from = 1to = 5)]
# Set Tweak Name and Downloads Count (Top 5) 

pie(pie.sales, col = rainbow(length(pie.sales)), edges = 400, radius = 0.7)
title(main = Developer, cex.main = 1.4, font.main = 3, family="Kaiti")
title(xlab = "Bigboss Downloads Count (Top 5)", cex.lab = 0.8, font.lab = 3family="Kaiti")

# Draw!

showtext.end()
# End

Leave a Reply

Your email address will not be published. Required fields are marked *

4 × 4 =