c()
or 1:4
or sample()
or numeric()
etc.a < 5 & a > 1
or c('A','B') %in% c('A','C','D')
is.
functions or change them with as.
help()
to figure out how to use new functions you don’t know yet!sample()
to generate a vector of 1000 names from Jack, Jill, and Mary.%in%
to count how many are Jill or Mary.help()
to figure out how to use the substr
function to get the first letter of the name. Then, use that to count how many names are Jack or Jill.names <- sample(c('Jack','Jill','Mary'),1000,replace=T)
sum(names %in% c('Jill','Mary'))
firstletter <- substr(names,1,1)
sum(firstletter == "J")
names <- factor(names)
numbers <- 63:302
sum(numbers < 99 | numbers > 266)
read.csv()
or data()
, or create it with data.frame()
or tibble()
filter()
to pick a subset of observationsselect()
to pick a subset of variablesrename()
to rename variablesmutate()
to create new variables%>%
to chain together commandsfor (i = 1:10) {}
loopEcdat
library and get the Computers
data setbigHD
if the hd
is above medianads
and trend
variablesfor
loop to print out the median price for each level of ram
unique()
library(Ecdat)
data(Computers)
Computers <- Computers %>%
mutate(bigHD = hd > median(hd)) %>%
select(-ads,-trend) %>%
filter(premium == "yes")
for (i in unique(Computers$ram)) {
print(median(filter(Computers,ram==i)$price))
}
table()
, mean()
, sd()
, quantile()
and functions for 0, 50, 100% percentiles min()
median()
max()
stargazer()
to get a bunch of summary stats at onceplot(density(x))
, hist()
, barplot(table())
points()
, lines()
, abline()
table
to look at the distribution of ram
, then make a barplot
of itabline(v=)
to overlay the 0, 10, 20, …, 100% percentiles on it as blue vertical lineslibrary(stargazer)
stargazer(Computers,type='text')
table(Computers$ram)
barplot(table(Computers$ram))
plot(density(Computers$price),xlab='Price',main='Distribution of Computer Price')
abline(v=quantile(Computers$price,0:10/10),col='blue')
prop.table(table(x,y),margin=)
cor()
group_by(x) %>% summarize(mean(y))
to get mean of y within values of xcut(x,breaks=10)
to put x into “bins” to explain y with1-var(residuals)/var(y)
plot(x,y)
or overlaid density plotsprop.table
with both margins to see if cd and multi look dependentcut
to make 10 bins of hd
hd
, and residualshd
, and calculate correlationprice
(y-axis) against hd
(x-axis)prop.table(table(Computers$cd,Computers$multi),margin=1)
prop.table(table(Computers$cd,Computers$multi),margin=2)
Computers <- Computers %>%
mutate(hdbins = cut(hd,breaks=10)) %>%
group_by(hdbins) %>%
mutate(priceav = mean(price)) %>%
mutate(res = price - priceav)
#variance explained
1 - var(Computers$res)/var(Computers$price)
plot(Computers$hd,Computers$price,xlab="Size of Hard Drive",ylab='Price')
rnorm()
, runif()
, sample()
for
loop to make data and analyze. Store result in the vectorAt.War
(logical, equal to 1 10% of the time)Net.Exports
(uniform, min -1, max 1, then subtract 3*At.War)GDP.Growth
(normal, mean 0, sd 3, then add + Net.Exports + At.War)cor()
between GDP.Growth and Net.Exports, and between GDP.Growth and residuallibrary(stargazer)
GDPcor <- c()
rescor <- c()
for (i in 1:1000) {
df <- tibble(At.War = sample(0:1,500,replace=T,prob=c(.9,.1))) %>%
mutate(Net.Exports = runif(500,-1,1)-3*At.War) %>%
mutate(GDP.Growth = rnorm(500,0,3)+Net.Exports+At.War) %>%
group_by(At.War) %>%
mutate(residual = GDP.Growth - mean(GDP.Growth))
GDPcor[i] <- cor(df$GDP.Growth,df$Net.Exports)
rescor[i] <- cor(df$residual,df$Net.Exports)
}
stargazer(data.frame(rescor,GDPcor),type='text')
Reminders: