본문 바로가기
R

(R)Make a new variable/ mutate()

by jangpiano 2020. 7. 27.
반응형

mutate() : make a new variable 


'msleep' is a data set in ggplot2 which consists of 'name', 'genus' , 'vore',...'bodywt'. 

I will make a new data set named 'new_msleep' which contains a new variable 'ratio_ram' which does not exist in 'msleep' data set. 



>library(ggplot2)  -----------for dataset 'msleep' in ggplot2

>library(dplyr) ------------for mutate(), filter()



> str(msleep)

tibble [61 x 12] (S3: tbl_df/tbl/data.frame)

 $ name        : chr [1:61] "Owl monkey" "Mountain beaver" "Greater short-tailed shrew" "Cow" ...

 $ genus       : chr [1:61] "Aotus" "Aplodontia" "Blarina" "Bos" ...

 $ vore        : chr [1:61] "omni" "herbi" "omni" "herbi" ...

 $ order       : chr [1:61] "Primates" "Rodentia" "Soricomorpha" "Artiodactyla" ...

 $ conservation: chr [1:61] NA "nt" "lc" "domesticated" ...

 $ sleep_total : num [1:61] 17 14.4 14.9 4 14.4 8.7 10.1 5.3 9.4 10 ...

 $ sleep_rem   : num [1:61] 1.8 2.4 2.3 0.7 2.2 1.4 2.9 0.6 0.8 0.7 ...

 $ sleep_cycle : num [1:61] NA NA 0.133 0.667 0.767 ...

 $ awake       : num [1:61] 7 9.6 9.1 20 9.6 15.3 13.9 18.7 14.6 14 ...

 $ brainwt     : num [1:61] 0.0155 NA 0.00029 0.423 NA NA 0.07 0.115 0.0055 NA ...

 $ bodywt      : num [1:61] 0.48 1.35 0.019 600 3.85 ...


<make a new variable 'ratio_ram'>------mutate()


> msleep<-msleep%>%filter(!is.na(sleep_rem))%>%mutate(ratio_ram=sleep_rem/sleep_total)


> str(msleep)

tibble [61 x 12] (S3: tbl_df/tbl/data.frame)

 $ name        : chr [1:61] "Owl monkey" "Mountain beaver" "Greater short-tailed shrew" "Cow" ...

 $ genus       : chr [1:61] "Aotus" "Aplodontia" "Blarina" "Bos" ...

 $ vore        : chr [1:61] "omni" "herbi" "omni" "herbi" ...

 $ order       : chr [1:61] "Primates" "Rodentia" "Soricomorpha" "Artiodactyla" ...

 $ conservation: chr [1:61] NA "nt" "lc" "domesticated" ...

 $ sleep_total : num [1:61] 17 14.4 14.9 4 14.4 8.7 10.1 5.3 9.4 10 ...

 $ sleep_rem   : num [1:61] 1.8 2.4 2.3 0.7 2.2 1.4 2.9 0.6 0.8 0.7 ...

 $ sleep_cycle : num [1:61] NA NA 0.133 0.667 0.767 ...

 $ awake       : num [1:61] 7 9.6 9.1 20 9.6 15.3 13.9 18.7 14.6 14 ...

 $ brainwt     : num [1:61] 0.0155 NA 0.00029 0.423 NA NA 0.07 0.115 0.0055 NA ...

 $ bodywt      : num [1:61] 0.48 1.35 0.019 600 3.85 ...

 $ ratio_ram   : num [1:61] 0.106 0.167 0.154 0.175 0.153 ...


<make a new variable 'sleep_rank' > ------mutate(ifelse())

> summary(msleep$sleep_total)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 

   1.90    8.00   10.00   10.39   13.70   19.90


>msleep<-msleep%>%mutate(sleep_rank=ifelse(sleep_total<=8,"little",ifelse(sleep_total<13.70,"moderate","much")))


> str(msleep)


tibble [61 x 13] (S3: tbl_df/tbl/data.frame)

 $ name        : chr [1:61] "Owl monkey" "Mountain beaver" "Greater short-tailed shrew" "Cow" ...

 $ genus       : chr [1:61] "Aotus" "Aplodontia" "Blarina" "Bos" ...

 $ vore        : chr [1:61] "omni" "herbi" "omni" "herbi" ...

 $ order       : chr [1:61] "Primates" "Rodentia" "Soricomorpha" "Artiodactyla" ...

 $ conservation: chr [1:61] NA "nt" "lc" "domesticated" ...

 $ sleep_total : num [1:61] 17 14.4 14.9 4 14.4 8.7 10.1 5.3 9.4 10 ...

 $ sleep_rem   : num [1:61] 1.8 2.4 2.3 0.7 2.2 1.4 2.9 0.6 0.8 0.7 ...

 $ sleep_cycle : num [1:61] NA NA 0.133 0.667 0.767 ...

 $ awake       : num [1:61] 7 9.6 9.1 20 9.6 15.3 13.9 18.7 14.6 14 ...

 $ brainwt     : num [1:61] 0.0155 NA 0.00029 0.423 NA NA 0.07 0.115 0.0055 NA ...

 $ bodywt      : num [1:61] 0.48 1.35 0.019 600 3.85 ...

 $ ratio_ram   : num [1:61] 0.106 0.167 0.154 0.175 0.153 ...

 $ sleep_rank  : chr [1:61] "much" "much" "much" "little" ...



<application>

 


>View(airquality)




> airquality$temperature<-ifelse(airquality$Temp<79,"warm","hot")


> table(airquality$temperature)


 hot warm 

  79   74 


> airquality$OZONE<-ifelse(airquality$Ozone<31.50,"little","much")


> table(airquality$OZONE)


little   much 

    58     58


>View(airquality)


> OZONE_temperature<-airquality%>%filter(!is.na(OZONE))%>%group_by(OZONE,temperature)

%>%summarise(n=n())%>%mutate(total_ozone=sum(n))%>%mutate(ratio=round(n/total_ozone*100,1))


> OZONE_temperature

# A tibble: 4 x 5

# Groups:   OZONE [2]

  OZONE  temperature     n total_ozone ratio

  <chr>  <chr>       <int>       <int> <dbl>

1 little hot            10          58  17.2

2 little warm           48          58  82.8

3 much   hot            50          58  86.2

4 much   warm            8          58  13.8


  total_ozone

 ratio

  

(little,hot)+(little,warm)=58 --for row 1,2


 

(much,hot)+(much,hot)=58 --for row 3,4


 10/58*100 --for row1

              -- ratio of hot days out of low ozone days 

48/58*100 --for row 2 

              -- ratio of warm days out of low ozone days

50/58*100 --for row 3

              -- ratio of hot days out of high ozone days

8/58*100  --for row 4

              --ratio of warm days out of high ozone days 




반응형