c() or 1:4 or sample() or numeric() etc.a < 5 & a > 1 or c('A','B') %in% c('A','C','D')is. functions or change them with as.help() to figure out how to use new functions you don’t know yet!sample() to generate a vector of 1000 names from Jack, Jill, and Mary.%in% to count how many are Jill or Mary.help() to figure out how to use the substr function to get the first letter of the name. Then, use that to count how many names are Jack or Jill.names <- sample(c('Jack','Jill','Mary'),1000,replace=T)
sum(names %in% c('Jill','Mary'))
firstletter <- substr(names,1,1)
sum(firstletter == "J")
names <- factor(names)
numbers <- 63:302
sum(numbers < 99 | numbers > 266)read.csv() or data(), or create it with data.frame() or tibble()filter() to pick a subset of observationsselect() to pick a subset of variablesrename() to rename variablesmutate() to create new variables%>% to chain together commandsfor (i = 1:10) {} loopEcdat library and get the Computers data setbigHD if the hd is above medianads and trend variablesfor loop to print out the median price for each level of ram
unique()library(Ecdat)
data(Computers)
Computers <- Computers %>%
mutate(bigHD = hd > median(hd)) %>%
select(-ads,-trend) %>%
filter(premium == "yes")
for (i in unique(Computers$ram)) {
print(median(filter(Computers,ram==i)$price))
}table(), mean(), sd(), quantile() and functions for 0, 50, 100% percentiles min() median() max()stargazer() to get a bunch of summary stats at onceplot(density(x)), hist(), barplot(table())points(), lines(), abline()table to look at the distribution of ram, then make a barplot of itabline(v=) to overlay the 0, 10, 20, …, 100% percentiles on it as blue vertical lineslibrary(stargazer)
stargazer(Computers,type='text')
table(Computers$ram)
barplot(table(Computers$ram))
plot(density(Computers$price),xlab='Price',main='Distribution of Computer Price')
abline(v=quantile(Computers$price,0:10/10),col='blue')prop.table(table(x,y),margin=)cor()group_by(x) %>% summarize(mean(y)) to get mean of y within values of xcut(x,breaks=10) to put x into “bins” to explain y with1-var(residuals)/var(y)plot(x,y) or overlaid density plotsprop.table with both margins to see if cd and multi look dependentcut to make 10 bins of hdhd, and residualshd, and calculate correlationprice (y-axis) against hd (x-axis)prop.table(table(Computers$cd,Computers$multi),margin=1)
prop.table(table(Computers$cd,Computers$multi),margin=2)
Computers <- Computers %>%
mutate(hdbins = cut(hd,breaks=10)) %>%
group_by(hdbins) %>%
mutate(priceav = mean(price)) %>%
mutate(res = price - priceav)
#variance explained
1 - var(Computers$res)/var(Computers$price)
plot(Computers$hd,Computers$price,xlab="Size of Hard Drive",ylab='Price')rnorm(), runif(), sample()for loop to make data and analyze. Store result in the vectorAt.War (logical, equal to 1 10% of the time)Net.Exports (uniform, min -1, max 1, then subtract 3*At.War)GDP.Growth (normal, mean 0, sd 3, then add + Net.Exports + At.War)cor() between GDP.Growth and Net.Exports, and between GDP.Growth and residuallibrary(stargazer)
GDPcor <- c()
rescor <- c()
for (i in 1:1000) {
df <- tibble(At.War = sample(0:1,500,replace=T,prob=c(.9,.1))) %>%
mutate(Net.Exports = runif(500,-1,1)-3*At.War) %>%
mutate(GDP.Growth = rnorm(500,0,3)+Net.Exports+At.War) %>%
group_by(At.War) %>%
mutate(residual = GDP.Growth - mean(GDP.Growth))
GDPcor[i] <- cor(df$GDP.Growth,df$Net.Exports)
rescor[i] <- cor(df$residual,df$Net.Exports)
}
stargazer(data.frame(rescor,GDPcor),type='text')Reminders: