Chapter 8 Day2 강의 정리
8.1 Class 1 - ggplot 활용 1
8.1.1 목표
- ggplot 개념 이해
- 그리는 방법 (단계) 이해
8.1.2 ggplot 문법
library(tidyverse)
head(iris)
data(iris)
%>% str
iris ggplot(data=iris) +
geom_point(mapping=aes(x=Petal.Length, y=Petal.Width))
ggplot(data=iris,
mapping = aes(x=Petal.Length,
y=Petal.Width,
color=Species)) +
geom_point(size=3) +
theme_bw()
<- data.frame(x1=rnorm(100))
dat ggplot(dat, aes(x=x1)) +
geom_bar()
ggplot(dat, aes(x=x1)) +
geom_bar(stat="bin", bins=30)
<- as.factor(c(1:3))
x1 <- c(33, 10, 82)
y1 <- data.frame(x1, y1)
dat str(dat)
ggplot(dat, aes(x=x1, y=y1, group=1)) +
geom_bar(stat="identity") +
geom_line(size=2) +
geom_point(size=4, pch=21, fill="white") +
guides(fill=FALSE) +
xlab("Discrete cases") + ylab("Value") +
ylim(c(0,100))+
ggtitle("Line for x:discrete and y:value")
ggplot(dat, aes(x=x1, y=y1, group=1)) +
geom_bar(stat="identity", fill=x1) +
geom_line(size=2) +
geom_point(size=4, pch=21, fill="white") +
guides(fill=FALSE) +
xlab("Discrete cases") + ylab("Value") +
ylim(c(0,100))+
ggtitle("Line for x:discrete and y:value")
data(mtcars)
%>% str
mtcars ggplot(mtcars, aes(x=mpg, y=hp)) +
geom_point() +
geom_smooth()
<- rnorm(200, 75, 5)
weights <- weights + rnorm(200, 100, 5)
heights <- sample(c("A", "B", "C", "D"), size=length(heights), replace = T)
classes <- data.frame(heights, weights, classes)
mydata str(mydata)
ggplot(mydata, aes(x=weights, y=heights, color=classes)) +
geom_point() +
geom_smooth()
8.1.3 facet 사용법
하나의 변수를 하나의 축에 mapping해서 그릴 경우 facet_wrap 사용, nrow, ncol로 다른 축 그래프 갯수 조절. 두 개의 변수를 x, y축에 각각 mapping해서 그래프를 나누어 그릴 때 facet_grid 사용
ggplot(iris,
aes(x=Petal.Length,
y=Petal.Width,
color=Species)) +
geom_point() +
geom_smooth() +
facet_wrap(~Species, scale="free")
%>% str
Orange data(Orange)
## 1
ggplot(data=Orange,
aes(x=age, y=circumference, color=Tree)) +
geom_point() +
geom_line()
## 2
ggplot(data=Orange,
aes(x=age, y=circumference)) +
geom_point() +
geom_line() +
geom_smooth() +
facet_wrap(~Tree)
data(InsectSprays)
%>%
InsectSprays ggplot(aes(x=spray, y=count, fill=spray)) +
geom_bar(stat="identity",
position="dodge") +
facet_wrap(~spray)
8.2 Class 2 - ggplot 활용 2
8.2.1 목적
- theme 사용법 알기
- 에러바 그리기
8.2.2 theme 사용
x, y mapping, geometry 요소들 외에 글씨 크기나 화면 구성 등의 설정을 할 경우 theme 함수를 사용함. ggplot2 book 참고
<- data.frame(x=rlnorm(1000, log(10), log(2.5)))
mydf %>% str
mydf <- ggplot(mydf, aes(x=x)) +
p geom_histogram()
+
p theme_bw()+
scale_x_log10()
에러바 있는 막대그래프 그리기
%>% str
airquality data(airquality)
<- airquality %>%
airmean filter(complete.cases(.)) %>%
select(-Day) %>%
group_by(Month) %>%
summarise(across(everything(), mean)) %>%
pivot_longer(-Month, values_to = "mean")
<- airquality %>%
airsd filter(complete.cases(.)) %>%
select(-Day) %>%
group_by(Month) %>%
summarise(across(everything(), sd)) %>%
pivot_longer(-Month, values_to = "sd")
<- left_join(airmean, airsd, by=c("Month", "name"))
airdata
ggplot(airdata, aes(x=Month,
y=mean,
fill=name)) +
geom_bar(stat="identity",
position="dodge",
color="#000000") +
geom_errorbar(aes(ymin=mean-sd,
ymax=mean+sd),
position=position_dodge(width=0.9),
width=0.4) +
theme_bw()
8.3 class 3 - S3 클래스 학습
8.3.1 목적
- R언어에서 S3 클래스 이해
- Biostrings 패키지 사용법 학습
8.3.2 S3 클래스 이해
<- data.frame(x=c(1:5), y=LETTERS[1:5])
df
dfclass(df)
class(df) <- "myclass"
class(df)
<- 1:10
x class(x)
attr(x, "class")
<- matrix(1:9, 3,3)
mt <- data.frame(1:3, 4:6, 7:9)
df
class(mt)
str(mt)
str(df)
<- ggplot2::diamonds
diamonds data(diamonds)
summary(diamonds$carat)
summary(diamonds$cut)
<- function(x){
mysum if(x =="charicter"){
else{
}
}print(sum(x))
}
mysum(c(10, "20"))
library(Homo.sapiens)
class(Homo.sapiens)
Homo.sapiensmethods(class="OrganismDb")
?cds<- cds(Homo.sapiens)
tmp tmp
8.3.3 Biostrings 패키지
library(Biostrings)
<- DNAString("ACGT-N")
dna1 class(dna1)
1]
dna1[2:3]
dna1[
<- DNAStringSet(c("ACGT", "GTCA", "GCTA"))
dna2 1]
dna2[class(dna2[1])
1]] dna2[[
DNA_BASES
DNA_ALPHABET
IUPAC_CODE_MAP GENETIC_CODE
<- sample(DNA_BASES, 10, replace = T)
x0
x0<- "ATG"
s1 <- "CCC"
s2 <- paste(s1, s2, sep="")
s3
s3<- paste(x0, collapse="")
x1 x1
8.4 class 4 - Biostrings 활용
8.4.1 목적
- Biostrings 패키지 활용한 코돈 분석
DNAString<- paste(sample(DNA_BASES, 10, replace = T), collapse="")
x0 subseq(x0, 1, 3)
<- DNAString(x0)
x1 letterFrequency(x1, letters = c("G", "C"))
<- rep("", 10)
x0 for(i in 1:length(x0)){
<- paste(sample(DNA_BASES, 30, replace = T), collapse="")
tmp <- paste("ATG", tmp, "TAG", sep="")
x0[i]
}
x0length(x0)
<- DNAStringSet(x0)
x1
x1#names(x1) <- c("DNA1", "DNA2".. )
names(x1) <- paste("DNA", 1:10, sep="")
x1
?letterFrequency<- letterFrequency(x1, letters=c("G", "C"))
tmpd <- (tmpd[,1]+tmpd[,2])/nchar(x1[[1]])
tmpv names(tmpv) <- names(x1)
barplot(tmpv)
%>%
tmpd %>%
data.frame mutate(GC=G+C, name=names(x1), n=nchar(x1)) %>%
mutate(GCR = GC/n) %>%
ggplot(aes(x=name, y=GCR)) +
geom_bar(stat="identity")
data(yeastSEQCHR1)
yeastSEQCHR1nchar(yeastSEQCHR1)
<- DNAString(yeastSEQCHR1)
yeast1
<- trinucleotideFrequency(yeast1)
tri
names(tri) <- GENETIC_CODE[names(tri)]
tri
<- data.frame(freq=tri, aa=names(tri))
tmpd %>%
tmpd ggplot(aes(x=aa, y=freq, fill=aa)) +
geom_bar(stat="identity")