##1
savehistory('delete')
loadhistory('myfile')
save.image('myfile')
save(objectlist,file = 'myfile')
load('myfile')
dir.create()
##2
cells <- c(1,26,24,68)
rnames <- c("R1", "R2")
cnames <- c("C1", "C2")
mymatrix <- matrix(cells, nrow=2, ncol=2, byrow=TRUE,
dimnames=list(rnames, cnames))
mymatrix <- matrix(cells, nrow=2, ncol=2, byrow=FALSE,
dimnames=list(rnames, cnames))
z <- array(1:24, c(2,3,4), dimnames=list(dim1, dim2, dim3))
str()
summary()
class()
dim()
range()
mydata<-data.frame(age=numeric(0),gender=character(0),weight=numeric(0))
mydata<-edit(mydata)#等价于:
fix(data)
attach()
detach()
with(mtcars,{
stats<-summary(mtcars)
wai<<-summary(mtcars)
})
grades <- read.table("studentgrades.csv", header=TRUE,
row.names="StudentID", sep=",",
colClasses=c("character", "character", "character",
"numeric", "numeric", "numeric"))
status<-c('poor','improved','excellent','poor')
status<-factor(status,order=T,levels = c('poor','improved','excellent'))
sex<-factor(sex,levels = c(1,2),labels = c('male','female'))
mydatatxt <- "
age gender weight
25 m 166
30 f 115
18 f 120
"
mydata <- read.table(header=TRUE, text=mydatatxt)
read.table()#的一些选项:
header
sep
row.names()
colnames()
na.strings#na.strings('-9','?')
colClasses()
skip
stringsAsFactor
text
gzfile()
unz()
##3
abline(lm(mpg~wt))#加了一条最优拟合曲线
pdf('xx.pdf')
plot()
dev.off()
par(no.readonly = T)
par(lty=2,pch=17)
pch#符号
cex#文本大小缩放倍数
lty#线条类型
lwd#线宽
col
col.axis#坐标轴刻度
col.lab
col.main
col.sub
fg
bg
font#字体族
pin#以英寸表示的图形尺寸
mar#边界大小c(5,4,4,2)下左上右
mycolors<-rainbow(n)
pie(rep(1,10),labels = mycolors,col=mycolors)
axis()#坐标轴选项
axis(2, at=x, labels=x, col.axis="red", las=2)
#1234分别表示下左上右,画坐标轴的方位
x <- c(1:10)
y <- x
z <- 10/x
opar <- par(no.readonly=TRUE)
par(mar=c(5, 4, 4, 8) + 0.1)
plot(x, y, type="b",
pch=21, col="red",
yaxt="n", lty=3, ann=FALSE)
#ann=FALSE是移除默认标签
lines(x, z, type="b", pch=22, col="blue", lty=2)
axis(2, at=x, labels=x, col.axis="red", las=2)
axis(4, at=z, labels=round(z, digits=2),
col.axis="blue", las=2, cex.axis=0.7, tck=-.01)
mtext("y=1/x", side=4, line=3, cex.lab=1, las=2, col="blue")
title("An Example of Creative Axes",
xlab="X values",
ylab="Y=X")
par(opar)
dose <- c(20, 30, 40, 45, 60)
drugA <- c(16, 20, 27, 40, 60)
drugB <- c(15, 18, 25, 31, 40)
opar <- par(no.readonly=TRUE)
par(lwd=2, cex=1.5, font.lab=2)
plot(dose, drugA, type="b",
pch=15, lty=1, col="red", ylim=c(0, 60),
main="Drug A vs. Drug B",
xlab="Drug Dosage", ylab="Drug Response")
lines(dose, drugB, type="b",
pch=17, lty=2, col="blue")
abline(h=c(30), lwd=1.5, lty=2, col="gray")
library(Hmisc)
#添加次要刻度线
minor.tick(nx=3, ny=3, tick.ratio=0.5)
legend("topleft", inset=.05, title="Drug Type", c("A","B"),
lty=c(1, 2), pch=c(15, 17), col=c("red", "blue"))
par(opar)
id.method='identify'
plotmath()#数学标注
par(mfrow=c(2,2))
layout(matrix(1,1,2,3),2,2,byrow=T,weights=c(1,3),heights=c(1,2))
par(fig=c(x1,x2,y1,y2))
#用来精确摆放图片位置
opar <- par(no.readonly=TRUE)
par(fig=c(0, 0.8, 0, 0.8))
plot(mtcars$mpg, mtcars$wt,
xlab="Miles Per Gallon",
ylab="Car Weight")
par(fig=c(0, 0.8, 0.55, 1), new=TRUE)
boxplot(mtcars$mpg, horizontal=TRUE, axes=FALSE)
par(fig=c(0.65, 1, 0, 0.8), new=TRUE)
boxplot(mtcars$wt, axes=T)
mtext("Enhanced Scatterplot", side=3, outer=TRUE, line=-3)
par(opar)
title()
axis()
abline()
lines()
text()
mtext(plotmath())
##4
x%%y#余数
x%/%y#整除
mydata<-transform(mydata,sumx=x1+x2,
meanx=(x1+x2)/2)
isTRUE()
leadership$agecat[leadership$age >= 55 &
leadership$age <= 75] <- "Middle Aged"
names(leadership)[6:10]<-c('','','')
rename(data,c(odname='newname',odname='newname',...))
#plyr包对数据集操作很强大,试着学习
is.na()
is.infinite()
na.omit()
leadership$age[leadership$age==99]<-NA
myformat<-'%m/%d/%y'
as.Date(leadership$date,myformat)
Sys.Date()
dob <- as.Date("1956-10-12")
difftime(today, dob, units="weeks")
as.logical()
as.numeric()
as.character()
as.data.frame()
as.matrix()
as.vector()
newdata<-leadership[order(leadership$age),]
#各行依照gender的age进行排序
attach(eadership)
neadata<-leadership[order(gender,age),]
neadata<-leadership[order(gender,-age),]
detach(leadership)
merge(data1,data2,by=c('ID','',...))
myvars<-paste('q',1:10,sep = '')
mtcars[!names(mtcars) %in% c('cyl','hp')]
#把一些变量给剔除了
newdata <- leadership[leadership$gender=="M" &
leadership$age > 30,]
newdata<-subset(mtcars,cyl>2 & cyl<8,select=c(mpg,am,hp,cyl))
mysample<-mtcars[sample(1:nrow(mtcars),3,replace = F),]
#从mtcars的行中,无放回抽取3个
##5
sqrt()
ceiling()
floor()
trunc()
round()
signif(x,digits = n)
exp(x)
log(x,base = n)
z<-mean(x,trim=0.05,na.rm=T)
#丢弃最大5%和最小5%后算平均数
sd()#标准差
var()#方差
quantile()
range()
scale(x,center = T,scale = T)
#一般scale()进行均值为0标准,差为1的标准化
newdata<-transform(mydata,myvar=scale(myvar)*10+50)#均值为50,标准差为10
#的标准化
mvrnorm(n,mean,sigma)
#sigma是方差-协方差矩阵
library(MASS)
options(digits = 3)
set.seed(1234)
mean <- c(230.7, 146.7, 3.6)
sigma <- matrix( c(15360.8, 6721.2, -47.1,
6721.2, 4700.9, -16.5,
-47.1, -16.5, 0.3), nrow=3, ncol=3)
set.seed(1234)
mydata<-mvrnorm(500,mean,sigma)
mydata<-as.data.frame(mydata)
names(mydata)<-c('y','x1','x2')
nchar()#计算x中字符数量
x<-'abcde'
substr(x,2,4)
grep('A',c('b','A','c'),fixed = TRUE)
#这个还可以匹配正则表达式,可以仔细查一下,P93
sub("\\s",'.',"hello there")
#"\\s"表示空格
toupper()
tolower()
y<-strsplit('abc','')
unlist(y)[2]
sapply(y, '[',2)#和上面返回的是相同结果
#注意:grep(),sub(),strsplit()都可以匹配正则表达式,文本的时候:fixed=T,正则的时候:fixed=F
seq(from,to,by)
rep()
cut()
pretty()
cat()
name<='han'
cat('Hello',name,'\n')
#'\b'表示退格
#apply可以用到数组的某个维度,sapply和lapply可以用于列表
z <- scale(roster[,2:4])
score <- apply(z, 1, mean)
roster <- cbind(roster, score)
y <- quantile(score, c(.8,.6,.4,.2))
roster$grade[score >= y[1]] <- "A"
roster$grade[score < y[1] & score >= y[2]] <- "B"
#按照四分位数划分等级
name <- strsplit((roster$Student), " ")
Lastname <- sapply(name, "[", 2)
if (!is.factor(grade)) grade<-as.factor(grade) else print('Grade already is a factor')
ifelse(cond,statement1,statement2)
ifelse(score>0.5,print('passed'),print('failed'))
outcome<-ifelse(score>0.5,'passed','passed')
mydate <- function(type="long") {
switch(type,
long = format(Sys.time(), "%A %B %d %Y"),
short = format(Sys.time(), "%m-%d-%y"),
cat(type, "is not a recognized type\n"))
}
data<-aggregate(data,by=list(cyl,mp),FUN=mean,na.rm=TRUE)
#深学一下reshape2
library(reshape2)
md<-melt(mtcars,id=c('am','cyl'))
dcast(md,cyl~variable,mean)
#各变量按照cyl的分类,计算平均值
dcast(md,am~variable,mean)
#各变量按照mpg的分类,计算平均值
dcast(md,am~cyl,mean)
dcast(md,am+cyl~variable)
dcast(md,am+variable~cyl)
dcast(md,cyl_variable~am)
##6
means<-mean[order(means$x),]
barplot(counts,
main="Grouped Bar Plot",
xlab="Treatment", ylab="Frequency",
col=c("red", "yellow", "green"),
legend=rownames(counts), beside=TRUE)
par(las=2)
par(mar=c(5,8,4,2))
#棘状图
library(vcd)
spine(counts)
#饼图
par(mfrow=c(2,2))
slices <- c(10, 12,4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie(slices,labels = lbls,main = 'simple')
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct)
lbls <- paste(lbls,"%",sep="")
pie(slices,labels = lbls, col=rainbow(length(lbls)),
main="Pie Chart with Percentages")
library(plotrix)
library(plotrix)
pie3D(slices, labels=lbls,explode=0.1,
main="3D Pie Chart ")
mytable <- table(state.region)
lbls <- paste(names(mytable), "\n", mytable, sep="")
pie(mytable, labels = lbls,
main="Pie Chart from a dataframe\n (with sample sizes)")
fan.plot(slices,labels = lbls)
par(opar)
hist()
plot(density())
attach(mtcars)
cyl.f<-factor(cyl,levels = c(4,6,8),labels = c("4 cylinder", "6 cylinder", "8 cylinder"))
length(levels(cyl.f))
colfill<-c(2:(2+length(levels(cyl.f))))
legend(locator(1),levels(cyl.f))
boxplot(mpg~cyl)
vioplot()
dotchart()
x <- mtcars[order(mtcars$mpg),]
x$cyl <- factor(x$cyl)
x$color[x$cyl==4] <- "red"
x$color[x$cyl==6] <- "blue"
x$color[x$cyl==8] <- "darkgreen"
dotchart(x$mpg,
labels = row.names(x),
cex=.7,
pch=19,
groups = x$cyl,
gcolor = "black",
color = x$color,
main = "Gas Mileage for Car Models\ngrouped by cylinder",
xlab = "Miles Per Gallon")
##7
mystats<-function(x,na.omit=F){
if (na.omit)
x<-x[!is.na(x)]
m<-mean(x)
n<-length(x)
s <- sd(x)
skew <- sum((x-m)^3/s^3)/n
kurt <- sum((x-m)^4/s^4)/n - 3
return(c(n=n, mean=m, stdev=s, skew=skew, kurtosis=kurt))
}
myvars <- c("mpg", "hp", "wt")
sapply(mtcars[myvars], mystats)
sapply(mtcars[myvars], mystats,na.omit=T)
library(psych)
describe(mtcars[myvars])
dstats<-function(x)sapply(x, mystats)
by(mtcars[myvars],mtcars$am,dstats)
library(doBy)
summaryBy(mpg+hp+wt~am,data = mtcars,FUN=mystats)
library(psych)
describeBy(mtcars[myvars],list(am=mtcars$am))
table()
xtabs()
ftable()
prop.table()
margin.table()
library(vcd)
mytable<-xtabs(~Treatment+Improved, data=Arthritis)
#独立检验,原假设是独立的,P值小的话,会拒绝原假设
chisq.test(mytable)
fisher.test(mytable)
mantelhaen.test(mytable)
#相关检验,较大值,大相关
assocstats(mytable)
cor(x,use,method)
use:遇到缺失数据的处理方式
method:pearson,spearman,kendall
cov()
#协方差
pcor(u,cov(x))
#偏相关
library(psych)
corr.test()
pcor.test()
#独立t检验
t.test(y~x,data)
#x是二分变量
t.test(y1,y2)
#非独检验
tt.test(y1,y2,paired = T)
sapply(UScrime[c('U1','U2')], function(x)(c(mean=mean(x),sd=sd(x))))
wilcox.test()
kruskal.test()
friedman.test()
source("http://www.statmethods.net/RiA/wmc.txt")
states <- data.frame(state.region, state.x77)
wmc(Illiteracy ~ state.region, data=states, method="holm")
#这个方法可以很大程度控制总体一误差率