Chapter 9 Exercise class

9.1 Exercise 7.3

연습문제 7.3

g <- factor(airquality$Month)
airq_split <- split(airquality, g)
df <- airq_split[[1]]

ozone_func <- function(df){
  oz_mean <- mean(df$Ozone, na.rm=T)
  return(oz_mean)
}

ozone_func(df)
ozone_means <- unlist(lapply(airq_split, ozone_func))

9.2 Exercise 9.1

airquality 데이터의 모든 변수에 대한 월별 평균 및 편차 구하기 (dplyr 함수 이용)

airquality %>% str
airquality %>% 
  group_by(Month) %>% 
  summarise(ozone_mean=mean(Ozone, na.rm=T), temp_mean=mean(Temp, na.rm=T))

airquality %>% 
  group_by(Month) %>% 
  summarise_all(mean, na.rm=T)

## mean
aq_mean <- airquality %>% 
  group_by(Month) %>% 
  dplyr::select(-Day) %>% 
  summarise_all(mean, na.rm=T)

## sd
aq_sd <- airquality %>% 
  group_by(Month) %>% 
  dplyr::select(-Day) %>% 
  summarise_all(sd, na.rm=T)

9.3 Exercise 9.2

예제 9.1 데이터 막대그래프로 표현 (에러바 포함)

## melt and join
aq_mean_mlt <- aq_mean %>% melt(id.var="Month") 
aq_sd_mlt <- aq_sd %>% melt(id.var="Month") 
aq_join <- inner_join(aq_mean_mlt, aq_sd_mlt, by=c("Month", "variable"))
aq_join %>% head
data.frame(aq_mean_mlt, aq_sd_mlt$value) %>% head

ggplot(aq_join, aes(x=Month, y=value.x, fill=variable)) +
  geom_bar(stat="identity", position="dodge") +
  geom_errorbar(aes(ymax=value.x+value.y, ymin=value.x-value.y),
                position=position_dodge(0.9),
                width=0.4)

9.4 Exercise 9.3

7.4 Babies dataset에서 data filtering 후 wt와 gestation의 산포도 그리기

library(UsingR)
head(babies)

babies %>% dplyr::select(wt, gestation, smoke) %>% 
  ggplot(aes(x=wt, y=gestation)) +
  geom_point()

b1 <- babies %>% dplyr::select(wt, gestation, smoke) %>% 
  filter(gestation!=999 & smoke!=9) %>% 
  mutate(smokef=factor(smoke, labels = c("naver", 
                                         "smoke now",
                                         "until current pre",
                                         "once did, not now")))

b1 %>% str

plot(b1$smoke)

ggplot(b1, aes(x=wt, y=gestation, color=smokef)) +
  geom_point() +
  facet_wrap(.~smokef, nrow=2) +
  geom_smooth(method="lm", color="black") +
  theme_bw()

# average gestation in each smoke group 
b1 %>% group_by(smokef) %>% summarise(mean(gestation))

# correlation between gestation and wt in each smoke group
b1 %>% group_by(smokef) %>% summarise(cor(gestation, wt))

9.5 Exercise 9.4

  • (Categorical) data generation
  • Save the data to an excel file (생략)
  • Read the data from the excel file (생략)
  • Visualize the data using Boxplot (바이올린)
n <- 200
x1 <- rnorm(n, mean=0, sd=2)
x2 <- rnorm(n, mean=5, sd=2)
x <- data.frame(x1, x2)
boxplot(x)
x_mlt <- melt(x)
ggplot(x_mlt, aes(x=variable, y=value)) +
  geom_boxplot()
ggplot(x_mlt, aes(x=variable, y=value, fill=variable)) +
  geom_violin(trim=FALSE) +
  geom_jitter(shape=16, position=position_jitter(0.2), color="#00000055") +
  scale_fill_brewer(palette="Blues") +
  geom_boxplot(width=0.1, fill="white")+
  theme_minimal() +
  labs(y="value", 
       x="variable",
       fill=element_blank()) +
  theme(legend.position = "none", 
        axis.text = element_text(size=12),
        axis.title=element_text(size=14))

크리에이티브 커먼즈 라이선스
이 저작물은 크리에이티브 커먼즈 저작자표시-비영리-변경금지 4.0 국제 라이선스에 따라 이용할 수 있습니다.