《R语言实验四.docx》由会员分享,可在线阅读,更多相关《R语言实验四.docx(48页珍藏版)》请在三一办公上搜索。
1、精选优质文档-倾情为你奉上实验四 绘制常用统计图形、对图形进行参数设置【实验类型】验证性【实验学时】2 学时【实验目的】1、掌握基本统计图形的绘制方法和所表达的意义;2、掌握图形参数的设置与多图环境设置;3、了解 ggplot2 绘图包和其它绘图包的使用方法。【实验内容】1、绘制基本的统计图形,包括散点图、箱线图、Q-Q 图和协同图等;2、对图形进行参数设置,包括添加图题、标签、点、线和颜色等;3、使用 gglplot2 和其它绘图包绘制常见统计图形。【实验方法或步骤】第一部分、课件例题:4.1data(cars) #调取数据集,用data()可查看R所有数据集cars #车速与制动距离的数据
2、(R自带)attach(cars) #连接数据集par(mai=c(0.9, 0.9, 0.6, 0.3) #图形边缘空白(边距)的宽度for (i in c(p, l, b, c, o, h, s, S, n) #9种类型 plot(speed, dist, type=i, main = paste(type = , i, , sep = ) # 为双引号detach() #取消连接数据集4.2dfdata.frame(Age=c(13,13,14,12,12,15,11,15,14,14,14,15,12,13,12,16,12,11,15), Height=c(144,166,163,1
3、43,152,169,130,159,160,175,161,170,146,159,150,183,165,146,169), Weight=c(38.1,44.5,40.8,34.9,38.3,50.8,22.9,51.0,46.5,51.0,46.5,60.3,37.7,38.1,45.1,68.0,58.1,38.6,50.8) #数据框pairs(df) #多组图pairs( Age + Height + Weight, data=df) #与上述结果相4.3coplot(WeightHeight|Age, data=df) #年龄条件下的协同图4.4点图VADeaths #Virg
4、inia州在1940年的人口死亡率数据(R自带)me1- apply(VADeaths, 1, mean) #矩阵的行向量的均值me2- apply(VADeaths, 2, mean) #矩阵的列向量的均值dotchart(VADeaths, gdata=me2, gpch=19, #按类型分类 main = Death Rates in Virginia - 1940)dotchart(t(VADeaths), gdata=me1, gpch=19, #按年龄分类 main = Death Rates in Virginia - 1940)4.5饼图pie.sales-c(39, 200,
5、 42, 15, 67, 276, 27, 66);names(pie.sales)-c(EUL,PES,EFA,EDD,ELDR,EPP,UNE,other) #各候选人的得票结果# figure1,默认色彩,逆时针pie(pie.sales,radius = 0.9,main =Ordinary chart)# figure2,彩虹色彩,顺时针pie(pie.sales,radius=0.9,col=rainbow(8),clockwise =TRUE,main=Rainbow colours)# figure3,灰度色彩,顺时针pie(pie.sales,radius =0.9,cloc
6、kwise =TRUE,col =gray(seq(0.4,1.0,length=8),main=Grey colours)# figure4,阴影色彩,逆时针pie(pie.sales,radius=0.9,density=10,angle=15+15*1:8,main=The density of shading lines)4.6条形图pie.sales-c(39, 200, 42, 15, 67, 276, 27, 66);par(mai=c(0.9, 0.9, 0.3, 0.3) #定义图像边距# figure1, 添加一条线rbarplot(pie.sales,space=1,co
7、l=rainbow(8);lines(r,pie.sales,type=h,col=1,lwd=2)# figure2,用text()添加平均值mp - barplot(VADeaths);tot-colMeans(VADeaths); text(mp, tot+ 3, format(tot), xpd = TRUE, col = blue) # figure3, 添加条形的颜色barplot(VADeaths, space = 0.5, col = c(lightblue, mistyrose, lightcyan, lavender, cornsilk)# figure4, 条形平行排列b
8、arplot(VADeaths, beside = TRUE, col = c(lightblue, mistyrose,lightcyan, lavender, cornsilk), legend = rownames(VADeaths),ylim = c(0, 100)4.7直方图dfdata.frame(Age=c(13,13,14,12,12,15,11,15,14,14,14,15,12,13,12,16,12,11,15), Height=c(144,166,163,143,152,169,130,159,160,175,161,170,146,159,150,183,165,14
9、6,169), Weight=c(38.1,44.5,40.8,34.9,38.3,50.8,22.9,51.0,46.5,51.0,46.5,60.3,37.7,38.1,45.1,68.0,58.1,38.6,50.8) #数据框par(mai=c(0.9, 0.9, 0.6, 0.3) #图形边距attach(df) #连接数据框# figure1,增加直方图和外框的颜色,以及相应的频数hist(Height, col=lightblue, border=red, labels = TRUE, ylim=c(0, 7.2)# figure2,使用线条阴影并利用text()标出频数,用li
10、nes()绘出数据的密度曲线(蓝色)和正态分布密度曲线(红色)r-hist(Height,breaks=12,freq=FALSE,density=10,angle = 15+30*1:6);text(r$mids, 0, r$counts, adj=c(.5, -.5),cex=1.2 );lines(density(Height),col=blue,lwd=2);x-seq(from=130, to=190, by=0.5);lines(x, dnorm(x,mean(Height), sd(Height), col=red, lwd=2)detach() #取消连接数据框4.8箱线图(1
11、)xc(25,45,50,54,55,61,64,68,72,75,75,78,79,81,83,84,84,84,85,86,86,86,87,89,89,89,90,91,91,92,100)fivenum(x) #上、下四分位数,中位数, 最大和最小值boxplot(x) #绘制箱线图(2)InsectSprays #数据框,其中count为昆虫数目,spray为杀虫剂的类型boxplot(countspray,data =InsectSprays,col=lightgray)#矩形箱线图boxplot(countspray,data=InsectSprays,notch=TRUE,co
12、l=2:7,add=TRUE)4.9 QQ图df-data.frame(Age=c(13,13,14,12,12,15,11,15,14,14,14,15,12,13,12,16,12,11,15),Height=c(144,166,163,143,152,169,130,159,160,175,161,170,146,159,150,183,165,146,169),Weight=c(38.1,44.5,40.8,34.9,38.3,50.8,22.9,51.0,46.5,51.0,46.5,60.3,37.7,38.1,45.1,68.0,58.1,38.6,50.8) #数据框par(m
13、ai=c(0.9, 0.9, 0.6, 0.3)attach(df)qqnorm(Weight) #数据的正态Q-Q图qqline(Weight) #在Q-Q图上增加一条理论直线y =x +qqnorm(Height)qqline(Height)detach()4.10 三维透视图perspy - x - seq(-7.5, 7.5, by = 0.5) #定义域f-function(x,y)r-sqrt(x2+y2) + 2-52 #加上一个很小的量2-52是为了避免在下一行运算时分母为零z-sin(r)/r;z-outer(x,y,f) #对f作外积运算形成网格par(mai=c(0.0,
14、0.2,0.0,0.1) #图像边距persp(x,y,z,theta=30,phi=15,expand=.7,col=lightblue,xlab=X,ylab=Y,zlab=Z) #绘制三维图4.11 等值线contoury-x - seq(-3, 3, by = 0.125) #定义域f-function(x,y)z-3*(1-x)2*exp(-x2-(y+1)2)-10*(x/5-x3-y5)*exp(-x2-y2)-1/3*exp(-(x+1)2 -y2);z - outer(x, y, f) #对函数f作外积运算形成网格par(mai=c(0.8, 0.8, 0.2, 0.2) #
15、图像边距contour(x,y,z,levels=seq(-6.5,8,by=0.75),xlab=X,ylab=Y,col=blue) #绘制等值线4.12 添加点、线、文字或符号data(iris) #调用数据op-par(mai=c(1,1,0.3,0.3),cex=1.1) #定义图形参数x-iris$Petal.Length;y-iris$Petal.Widthplot(x,y,type=n,xlab=Petal Length,ylab=Petal Width,cex.lab=1.3)Species-c(setosa,versicolor,virginica)pch-c(24,22,
16、25) #图中点的形状for(i in 1:3)index-iris$Species=Speciesi;points(xindex,yindex,pc=pchi,col=i+1,bg=i+1) #添加点par(op) #访问当前图形参数设置text(c(3, 2.5, 4),c(0.25, 1.5, 2.25),labels=Species,font=2,col=c(2,3,4),cex=1.5) #添加文字说明4.13 添加直线、线段和图例data(cars)Q1-function(beta,data) sum(abs(data,2-beta1-beta2*data,1) #偏差的绝对值之和
17、Qinf-function(beta,data) max(abs(data,2-beta1-beta2*data,1)z1-optim(c(-17,4),Q1,data=cars);zinf-optim(c(-17, 4),Qinf,data =cars);lm.sol-lm(distspeed,data=cars) #线性回归op-par(mai=c(.9,.9,.5,0.1),cex=1.1) #绘图参数plot(cars,main=Stopping Distance versus Speed,ylim=c(0,140),xlab=Speed (mph),ylab=Distance (ft
18、),pch=19,col=magenta,cex.lab=1.2)abline(lm.sol,lwd=2,col=blue) #加线abline(a = z1$par1, b = z1$par2, lty = 4, lwd=2, col=red)abline(a = zinf$par1, b = zinf$par2, lty=5, lwd=2, col=green)pre-predict(lm.sol); x0 - cars$speed23; y0 - cars$dist23segments(x0, y0, x1 = x0, y1 = pre23, col= 1, lwd=2) #加线段和符号
19、expr-expression(paste(, xi, yi, ); text(x0+1.5, y0, expr);expr1-expression(min=sum(beta0+beta1*xi-yi)2,i=1,n);expr2-expression(min=sum(abs(beta0+beta1*xi-yi),i=1,n);expr3-expression(min=max(abs(beta0+beta1*xi-yi),i)legend(4, 140, legend=c(expr1, expr2, expr3),col=c(blue, red, green),lty=c(1,4,5),lwd
20、=2);par(op) #加图例4.14 添加图题、坐标轴与边框plot(cars,main =, axes = F) # 散点图,不含图题、坐标轴title(main = 制动距离与车速 ) # 添加图题axis(side = 1); axis(side = 2) # 添加坐标轴box(lty = 2, lwd = 2, col = 2) # 添加边框4.15绘制多边形和阴影区域。#绘制多边形op - par(mai=c(0.9, 0.9, 0.6, 0.3)x - c(1, 15, 20, 30, 15); y - c(10, 1, 20, 15, 30)plot(x, y, type=n
21、, main = Polygon)polygon(x,y,density=5,angle=15,lwd=2,border=red,lty=2,col=yellow2)#绘制正态分布的上侧分位数x-seq(-4,4,by=0.1);plot(x,dnorm(x),type=l,lwd=2,col=4,xlim=c(-3,3),ylim=c(-0.01,0.4),ylab=Normal Density,main=Shadow);abline(h=0,v=0)z-qnorm(1-0.05);xx-seq(z,4,by=0.1)polygon(c(xx,z),c(dnorm(xx),dnorm(4),
22、col=yellow1)text(z,-0.015,expression(Zalpha),adj=0.4,cex=1.1)text(2,0.02,expression(alpha),adj=0.5,cex=1.5)legend(-3,0.4,expression(alpha=0.05),adj=0.2)par(op)4.16par(omi=c(.5,.5,.5,.5);par(mfrow=c(3, 2)par(mar=c(3,2,2,1) # figrue 1plot(c(0,10),c(0,10),type=n,axes=F,xlab=,ylab=)text(5,5,labels=图1,ce
23、x=1.5);box(which=figure,lwd=2);box(lwd=2,lty=2)par(mar=c(3,3,2,1) # figrue 2boxplot(countspray,data=InsectSprays,col=lightgray)boxplot(countspray,data=InsectSprays,notch=TRUE,col=2:7,add=TRUE);box(which =figure,lwd=2)Height-c(144,166,163,143,152,169,130,159,160,175,161,170,146,159,150,183,165,146,16
24、9)par(mar=c(4.5, 4.5, 2, 1) # figrue 3hist(Height, col=lightblue, border=red, labels = TRUE, ylim=c(0, 7.2);box(which = figure, lwd=2)plot(c(0,10),c(0,10), type=n, axes=F, xlab=, ylab=)text(5,5, labels=图4, cex=1.5)par(mar=c(3, 2, 2, 1) # figrue 4box(lwd=2, lty=2); box(which = figure,lwd=2)par(mar=c(
25、3, 3, 2, 1) # figrue 5plot(cars); box(which = figure,lwd=2)par(mar=c(2, 2, 1, 1) # figrue 6plot(c(0,10),c(0,10), type=n, axes=F, xlab=, ylab=)box(); text(5,5, labels=mfg=c(3,2,3,2), cex=1.5)box(which = figure, lwd=2); box(which = outer, lwd=2)mtext(总图题, line=1, outer=T, cex=1.5)4.17op-par(lwd=2, omi
26、 = c(.1, .1, .1, .1) # alayout(matrix(1:4, 2, 2)layout.show(4)layout(matrix(1:6, 3, 2, byrow=TRUE) # blayout.show(6)layout(matrix(c(1,2,3,3), 2, 2, byrow=TRUE) # clayout.show(3)layout(matrix(1:4, 2, 2, byrow=TRUE), widths=c(3,1), heights=c(1,3) # dlayout.show(4)layout(matrix(c(1,1,2,1), 2, 2), width
27、s=c(2,1), heights=c(1,2)layout.show(2) # elayout(matrix(c(0,1,2,3), 2, 2), widths=c(1,3), heights=c(1,3)layout.show(3) # fpar(op) #恢复原来的图形参数par(mfrow = c(1,1) #取消一页多图4.18library(ggplot2)library(gridExtra) # 加载包 ( 需先安装 )# 散点图p1-ggplot(iris,aes(x=Petal.Length,y=Petal.Width)+(base_family=STKaiti,base_s
28、ize=9)+geom_point(aes(colour=Species)+labs(title=散点图);p1# 箱线图p2-ggplot(iris,aes(x = Species,y = Sepal.Length)+theme_gray(base_family = STKaiti,base_size = 9)+geom_violin(aes(fill = Species),show.legend = F)+labs(title = 箱线图 )+theme(plot.title = element_text(hjust = 0.5);p2#19 # 融合汽缸数 (cyl) 和档位数 (gea
29、r) 这两个变量library(reshape2)mtcars.m-melt(mtcars, id = c(mpg, disp, hp, drat,wt, qsec, am ,vs, carb) #id 中不含 cyl 和 gearhead(mtcars)mtcars.m#20p1-ggplot(data = mtcars); summary(p1)p2 - ggplot(data = mtcars, mapping = aes(x = wt, y = hp, color =gear); summary(p2) #aes() 指定了横纵坐标分别为 wt 和 hp, 颜色为gear 这三种图形属
30、性p - ggplot(mtcars, aes(x = mpg, y = wt, color = factor(gear)#设定默认的映射关系p + geom_point()#沿用默认的映射关系来绘制散点图p + geom_point(aes(shape = factor(carb)#添加图层中的shape的映射关系p + geom_point(aes(y = carb)#修改默认的y的映射关系, 注意图中y轴名称仍然是默认的wtp + geom_point(aes(color = NULL)#删除默认的color映射关系#21#矩阵散点图和平行坐标图分析 iris 中变量间的关系#GGall
31、y包中的ggscatmat()可绘制矩阵散点图library(GGally)ggscatmat(data = iris,1:5,columns = 1:4,color = Species , alpha = 0.8)+theme_bw(base_family = STKaiti , base_size = 10)+theme(plot.title = element_text(hjust = 0.5)+ggtitle(矩阵散点图) #columns表示绘制矩阵散点图的变量, color 为指定数据中的分组变量#使用平行坐标图分析每个样本在各个特征上的变化情况ggparcoord(data =
32、iris,1:5,columns = 1:4,groupColumn = Species,scale = center)+theme_bw(base_family = STKaiti,base_size = 10)+theme(plot.title = element_text(hjust = 0.5),legend.position = bottom)+ggtitle(平行坐标图)+labs(x = )#22 例4.6: :直方图探索 120 年来奥运会运动员数据集的信息# 读取数据,数据融合library(readr); library(dplyr)athlete_events - rea
33、d_csv(F:/文档/大学课程/R语言/ch04/athlete_events.csv)noc_regions - read_csv(F:/文档/大学课程/R语言/ch04/noc_regions.csv)athletedata - inner_join(athlete_events,noc_regions,1:2,by=c(NOC=NOC)summary(athletedata); head(athletedata); str(athletedata) # 查看数据# 查看每个国家参与奥运会运动员人数plotdata %group_by(region)%summarise(number=n
34、()% arrange(desc(number)# 可视化前40个人数多的国家的参与人数ggplot(plotdata1:30,aes(x=reorder(region,number),y=number)+ theme_bw(base_family = STKaiti)+ geom_bar(aes(fill=number),stat = identity,show.legend = F)+ coord_flip()+scale_fill_gradient(low = #56B1F7, high = #132B43)+ labs(x=地区,y=运动员人数,title=不同地区奥运会运动员人数)+
35、 theme(axis.text.x = element_text(vjust = 0.5), plot.title = element_text(hjust = 0.5)#23 例4.7: :热力图探索 120 年来奥运会数据集男女运动员变化# 可视化数据,分析参赛运动员男女人数的变化library(RColorBrewer)# 人数最多的30个地区,不同年份运动员人数变化region30 %group_by(region)% summarise(number=n()% arrange(desc(number)region30 - region30$region1:30# 不同性别下的,可视
36、化人数最多的15个地区,不同年份运动员人数变化plotdata % group_by(region,Year,Sex)% summarise(number=n()#绘热力图ggplot(data=plotdata, aes(x=Year,y=region) + theme_bw(base_family = STKaiti) + geom_tile(aes(fill = number),colour = white)+ scale_fill_gradientn(colours=rev(brewer.pal(10,RdYlGn)+ scale_x_continuous(breaks=unique(
37、 plotdata$Year) + theme(axis.text.x = element_text(angle = 90,vjust = 0.5)+ facet_wrap(Sex,nrow = 2)#24 例4.8: :表情图探索奥运会数据集各地区奖牌数量# “USA”,“Germany”,“France” ,“UK”,“Russia”,“China”6个地区获奖情况library(ggChernoff)# 查看不同季节举办的的奥运会运动员人数变化region6 - c(USA,Germany,France ,UK,Russia,China)index - (athletedata$regi
38、on %in% region6)&(!is.na(athletedata$Medal)&(athletedata$Season=Summer)plotdata - athletedataindex,plotdata2 %group_by(Year,region)% summarise(Medalnum=n()# 绘制表情图ggplot(plotdata2,aes(x=Year,y=Medalnum)+ theme_bw(base_family = STKaiti)+ geom_line()+ geom_chernoff(fill = goldenrod1)+ facet_wrap(region
39、,ncol = 2)+ labs(x=举办时间,y=奖牌数)#26 例4.10: :使用韦恩图分析集合之间的关系#分析几个向量之间的交集library(VennDiagram) #VennDiagram包最多可以绘制5个集合的韦恩图library(grid)library(futile.logger)#绘制4个数组的韦恩图vcol - c(red,blue,green,DeepPink)T-venn.diagram(list(First =c(1:30), Second=seq(1,50,by = 2), Third =seq(2,50,by = 2), Four = c(20,70), fi
40、lename = NULL,lwd = 0.5, fill = vcol,alpha = 0.5,margin = 0.1)grid.draw(T)#27 例4.11: :使用奥运会 120 年的运动员数据集树形图可视化# 树图可视化数据library(treemap); library(readr); library(dplyr)athlete_events - read_csv(F:/文档/大学课程/R语言/ch04/athlete_events.csv)noc_regions - read_csv(F:/文档/大学课程/R语言/ch04/noc_regions.csv)athleteda
41、ta - inner_join(athlete_events,noc_regions,1:2,by=c(NOC=NOC)plotdata % group_by(region,Sex)% summarise(number=n()# 计算奖牌数量plotdata2 % group_by(region,Sex)% summarise(Medalnum=n()# 合并数据plotdata3 - inner_join(plotdata2,plotdata,by=c(region, Sex)# 使用treemap 可视化数据treemap(plotdata3,index = c(Sex,region),v
42、Size = number, vColor = Medalnum,type=value,palette=RdYlGn, title = 不同性别下每个国家的运动员人数,fontfamily.title = STKaiti, title.legend = 奖牌数量,fontfamily.legend=STKaiti)#28 例4.12: :可视化美国的各个飞机场之间的航班联系#使用飞机航线数据可视化library(maps); library(geosphere)# 读取飞机航线的数据usaairline - read.csv(F:/文档/大学课程/R语言/ch04/usaairline.csv
43、)airportusa - read.csv(F:/文档/大学课程/R语言/ch04/airportusa.csv)map(state,col=palegreen, fill=TRUE, bg=black, lwd=0.1)# 添加起点的位置points(x=airportusa$Longitude, y=airportusa$Latitude, pch=19, cex=0.4,col=tomato)col.1 - adjustcolor(orange, alpha=0.4)# 添加边for(i in 1:nrow(usaairline) node1 - usaairlinei,c(Latitude.x,Longitude.x) node2 - usaairlinei,c(Latitude.y,Longitude.y) arc - gcIntermediate( c(node1$Longitude.x, node1$Latitude.x), c(node2$Longitude.y, node2$Latitude.y), n=1000,