Chapter 8 Day2 강의 정리

8.1 Class 1 - ggplot 활용 1

8.1.1 목표

  • ggplot 개념 이해
  • 그리는 방법 (단계) 이해

8.1.2 ggplot 문법

library(tidyverse)

head(iris)
data(iris)

iris %>% str
ggplot(data=iris) +
  geom_point(mapping=aes(x=Petal.Length, y=Petal.Width))


ggplot(data=iris, 
       mapping = aes(x=Petal.Length, 
                     y=Petal.Width,
                     color=Species)) +
  geom_point(size=3) +
  theme_bw()

dat <- data.frame(x1=rnorm(100))
ggplot(dat, aes(x=x1)) +
  geom_bar()

ggplot(dat, aes(x=x1)) +
  geom_bar(stat="bin", bins=30)



x1 <- as.factor(c(1:3))
y1 <- c(33, 10, 82)
dat <- data.frame(x1, y1)
str(dat)

ggplot(dat, aes(x=x1, y=y1, group=1)) +
  geom_bar(stat="identity") +
  geom_line(size=2) +
  geom_point(size=4, pch=21, fill="white") +
  guides(fill=FALSE) +
  xlab("Discrete cases") + ylab("Value") +
  ylim(c(0,100))+
  ggtitle("Line for x:discrete and y:value")
  

ggplot(dat, aes(x=x1, y=y1, group=1)) +
  geom_bar(stat="identity", fill=x1) +
  geom_line(size=2) +
  geom_point(size=4, pch=21, fill="white") +
  guides(fill=FALSE) +
  xlab("Discrete cases") + ylab("Value") +
  ylim(c(0,100))+
  ggtitle("Line for x:discrete and y:value")

data(mtcars)
mtcars %>% str
ggplot(mtcars, aes(x=mpg, y=hp)) +
  geom_point() +
  geom_smooth()



weights <- rnorm(200, 75, 5)
heights <- weights + rnorm(200, 100, 5)
classes <- sample(c("A", "B", "C", "D"), size=length(heights), replace = T)
mydata <- data.frame(heights, weights, classes)
str(mydata)

ggplot(mydata, aes(x=weights, y=heights, color=classes)) +
  geom_point() +
  geom_smooth()

8.1.3 facet 사용법

하나의 변수를 하나의 축에 mapping해서 그릴 경우 facet_wrap 사용, nrow, ncol로 다른 축 그래프 갯수 조절. 두 개의 변수를 x, y축에 각각 mapping해서 그래프를 나누어 그릴 때 facet_grid 사용

ggplot(iris, 
       aes(x=Petal.Length, 
           y=Petal.Width,
           color=Species)) +
  geom_point() +
  geom_smooth() +
  facet_wrap(~Species, scale="free")
Orange %>% str
data(Orange)


## 1
ggplot(data=Orange, 
       aes(x=age, y=circumference, color=Tree)) +
  geom_point() +
  geom_line()

## 2
ggplot(data=Orange, 
       aes(x=age, y=circumference)) +
  geom_point() + 
  geom_line() +
  geom_smooth() +
  facet_wrap(~Tree)

data(InsectSprays)
InsectSprays %>% 
  ggplot(aes(x=spray, y=count, fill=spray)) +
  geom_bar(stat="identity",
           position="dodge") +
  facet_wrap(~spray) 

8.2 Class 2 - ggplot 활용 2

8.2.1 목적

  • theme 사용법 알기
  • 에러바 그리기

8.2.2 theme 사용

x, y mapping, geometry 요소들 외에 글씨 크기나 화면 구성 등의 설정을 할 경우 theme 함수를 사용함. ggplot2 book 참고

mydf <- data.frame(x=rlnorm(1000, log(10), log(2.5)))
mydf %>% str
p <- ggplot(mydf, aes(x=x)) +
  geom_histogram()
p + 
  theme_bw()+
  scale_x_log10()

에러바 있는 막대그래프 그리기

airquality %>% str
data(airquality)

airmean <-  airquality %>% 
  filter(complete.cases(.)) %>% 
  select(-Day) %>% 
  group_by(Month) %>% 
  summarise(across(everything(), mean)) %>% 
  pivot_longer(-Month, values_to = "mean")

airsd <- airquality %>% 
  filter(complete.cases(.)) %>% 
  select(-Day) %>% 
  group_by(Month) %>% 
  summarise(across(everything(), sd)) %>% 
  pivot_longer(-Month, values_to = "sd")


airdata <- left_join(airmean, airsd, by=c("Month", "name"))

ggplot(airdata, aes(x=Month, 
                    y=mean, 
                    fill=name)) +
  geom_bar(stat="identity", 
           position="dodge",
           color="#000000") +
  geom_errorbar(aes(ymin=mean-sd,
                    ymax=mean+sd),
                position=position_dodge(width=0.9), 
                width=0.4) +
  theme_bw()

8.3 class 3 - S3 클래스 학습

8.3.1 목적

  • R언어에서 S3 클래스 이해
  • Biostrings 패키지 사용법 학습

8.3.2 S3 클래스 이해

df <- data.frame(x=c(1:5), y=LETTERS[1:5])
df
class(df)

class(df) <- "myclass"
class(df)



x <- 1:10
class(x)
attr(x, "class")


mt <- matrix(1:9, 3,3)
df <- data.frame(1:3, 4:6, 7:9)

class(mt)
str(mt)
str(df)


diamonds <- ggplot2::diamonds
data(diamonds)


summary(diamonds$carat)
summary(diamonds$cut)

mysum <- function(x){
  if(x =="charicter"){
    
  }else{
    
  }
  print(sum(x))
}



mysum(c(10, "20"))

library(Homo.sapiens)
class(Homo.sapiens)

Homo.sapiens
methods(class="OrganismDb")

?cds
tmp <- cds(Homo.sapiens)
tmp

8.3.3 Biostrings 패키지

library(Biostrings)

dna1 <- DNAString("ACGT-N")
class(dna1)
dna1[1]
dna1[2:3]

dna2 <- DNAStringSet(c("ACGT", "GTCA", "GCTA"))
dna2[1]
class(dna2[1])
dna2[[1]]
DNA_BASES
DNA_ALPHABET
IUPAC_CODE_MAP
GENETIC_CODE
x0 <- sample(DNA_BASES, 10, replace = T)
x0
s1 <- "ATG"
s2 <- "CCC"
s3 <- paste(s1, s2, sep="")
s3
x1 <- paste(x0, collapse="")
x1

8.4 class 4 - Biostrings 활용

8.4.1 목적

  • Biostrings 패키지 활용한 코돈 분석
DNAString
x0 <- paste(sample(DNA_BASES, 10, replace = T), collapse="")
subseq(x0, 1, 3)
x1 <- DNAString(x0)
letterFrequency(x1, letters = c("G", "C"))
x0 <- rep("", 10)
for(i in 1:length(x0)){
  tmp <- paste(sample(DNA_BASES, 30, replace = T), collapse="")
  x0[i] <- paste("ATG", tmp, "TAG", sep="")
}
x0
length(x0)
x1 <- DNAStringSet(x0)
x1
#names(x1) <- c("DNA1", "DNA2".. )
names(x1) <- paste("DNA", 1:10, sep="")
x1


?letterFrequency
tmpd <- letterFrequency(x1, letters=c("G", "C"))
tmpv <- (tmpd[,1]+tmpd[,2])/nchar(x1[[1]])
names(tmpv) <- names(x1)
barplot(tmpv)



tmpd %>% 
  data.frame %>% 
  mutate(GC=G+C, name=names(x1), n=nchar(x1)) %>% 
  mutate(GCR = GC/n) %>% 
  ggplot(aes(x=name, y=GCR)) +
    geom_bar(stat="identity")
data(yeastSEQCHR1)
yeastSEQCHR1
nchar(yeastSEQCHR1)
yeast1 <- DNAString(yeastSEQCHR1)


tri <- trinucleotideFrequency(yeast1)

names(tri) <- GENETIC_CODE[names(tri)]
tri

tmpd <- data.frame(freq=tri, aa=names(tri))
tmpd %>% 
  ggplot(aes(x=aa, y=freq, fill=aa)) +
  geom_bar(stat="identity")