본문 바로가기
R

(R) count the number of elements/ count()

by jangpiano 2020. 7. 29.
반응형

count(): count the number of elements of each variable



>?count



df%>%group_by(a,b)%>%summarise(n=n()   

          ∥

df%>%count(a,b)      


> mpg2<-mpg%>%select(manufacturer,class)%>%filter(manufacturer=="audi")

> mpg2

# A tibble: 18 x 2

   manufacturer class  

   <chr>        <chr>  

 1 audi         compact

 2 audi         compact

 3 audi         compact

 4 audi         compact

 5 audi         compact

 6 audi         compact

 7 audi         compact

 8 audi         compact

 9 audi         compact

10 audi         compact

11 audi         compact

12 audi         compact

13 audi         compact

14 audi         compact

15 audi         compact

16 audi         midsize

17 audi         midsize

18 audi         midsize --------------------------------------you can find that audi consists of class==compact, midsize



> manufacturer_class<-mpg%>%count(manufacturer,class)%>%group_by(manufacturer)%>%mutate(ratio=n/sum(n)*100)


> manufacturer_class

# A tibble: 32 x 4

# Groups:   manufacturer [15]

   manufacturer class          n ratio

   <chr>        <chr>      <int> <dbl>

 1 audi         compact       15  83.3  ------15/(15+3)

 2 audi         midsize        3  16.7 -----3/(15+3)

 3 chevrolet    2seater        5  26.3 -----5/(5+5+9)

 4 chevrolet    midsize        5  26.3 -----5/(5+5+9)

 5 chevrolet    suv            9  47.4 -----9/(5+5+9)

 6 dodge        minivan       11  29.7

 7 dodge        pickup        19  51.4

 8 dodge        suv            7  18.9

 9 ford         pickup         7  28. 

10 ford         subcompact     9  36  

# ... with 22 more rows


<application>


> airquality$temperature<-ifelse(airquality$Temp<79,"warm","hot")

> table(airquality$temperature)


 hot warm 

  79   74 


> airquality$OZONE<-ifelse(airquality$Ozone<31.50,"little","much")

> table(airquality$OZONE)


  little   much 

    58     58 


 df%>%group_by(a,b)%>%summarise(n=n()

 df%>%count(a,b)  

 

> OZONE_temperature<-

airquality%>%filter(!is.na(OZONE))

%>%group_by(OZONE,temperature)

%>%summarise(n=n())

%>%mutate(total_OZONE=sum(n))

%>%mutate(ratio=round(n/total_ozone*100,1



> OZONE_temperature

# A tibble: 4 x 5

# Groups:   OZONE [2]

  OZONE  temperature     n total_OZONE ratio

  <chr>  <chr>       <int>       <int> <dbl>

1 little hot        10           58     17.2

2 little warm       48           58     82.8

3 much   hot        50           58     86.2

4 much   warm       8            58     13.8

> OZONE_temperature2<-airquality%>%filter(!is.na(OZONE))

%>%count(OZONE,temperature)%>%group_by(OZONE)

%>%mutate(ratio=n/sum(n)*100)




> OZONE_temperature2

# A tibble: 4 x 4

# Groups:   OZONE [2]

  OZONE  temperature     n    ratio


  <chr>  <chr>       <int>    <dbl>


1 little hot            10     17.2


2 little warm           48     82.8


3 much   hot            50     86.2


4 much   warm            8     13.8

*you can see that using count() is more efficient for this case*




반응형