본문 바로가기
R

(R) functions for tables/ table(), ftable(),addmargins(), prop.table(), margin.table()

by jangpiano 2020. 11. 8.
반응형

<table()>

table function is generally used for the frequency of each elements in variables. 


*table of a variable (a variable: gender)

> x = c(rep("M", 3), rep("F", 2), rep(NA, 3))

> x

[1] "M" "M" "M" "F" "F" NA  NA  NA 

> table(x)

x

F M 

2 3 


Without 'exclude=NULL' we reject missing values automatically. If you want to add NA in the table, you can add 'exclude=NULL'


Include NA in table

 Not include NA in table

 >table(x, exclude = NULL)  #To include the missing values. 

   x

       F    M <NA> 

       2    3    3 

 > table(x)

x

F M 

2 3 

> as.data.frame(table(x,exclude=NULL))

     x Freq

1    F    2

2    M    3

3 <NA>    3 

 > as.data.frame(table(x))

  x Freq

1 F    2

2 M    3


When you only focus on the frequency of male in the data, 


> table(x)["M"] 


*table of multiple variables


> school=c(rep("A", 2), rep("B", 4), rep("C", 4))

> school

 [1] "A" "A" "B" "B" "B" "B" "C" "C" "C" "C"

> gender= c(rep("M", 2), rep("F", 6), "M","F")

> gender

 [1] "M" "M" "F" "F" "F" "F" "F" "F" "M" "F"

> major=c(rep(c("AMS","CS","BUS"),2),"TSM","AMS","AMS","CS")

> major

 [1] "AMS" "CS"  "BUS" "AMS" "CS"  "BUS" "TSM" "AMS" "AMS" "CS" 


If you have different lengths of variables, you cannot combine those data in table. Example I give below consists of three variables which has the equal length 10. 


> table(school,gender,major)

, , major = AMS


      gender

school F M

     A 0 1

     B 1 0

     C 1 1


, , major = BUS


      gender

school F M

     A 0 0

     B 2 0

     C 0 0


, , major = CS


      gender

school F M

     A 0 1

     B 1 0

     C 1 0


, , major = TSM


      gender

school F M

     A 0 0

     B 0 0

     C 1 0


data.frame(school,gender,major) 

as.data.frame(table(school,gender,major))

 > data.frame(school,gender,major)

   school gender major

1       A      M   AMS

2       A      M    CS

3       B      F   BUS

4       B      F   AMS

5       B      F    CS

6       B      F   BUS

7       C      F   TSM

8       C      F   AMS

9       C      M   AMS

10      C      F    CS

You can make a data frame which includes frequency. 


> as.data.frame(table(school,gender,major))

   school gender major Freq

1       A      F   AMS    0

2       B      F   AMS    1

3       C      F   AMS    1

4       A      M   AMS    1

5       B      M   AMS    0

6       C      M   AMS    1

7       A      F   BUS    0

8       B      F   BUS    2

9       C      F   BUS    0

10      A      M   BUS    0

11      B      M   BUS    0

12      C      M   BUS    0

13      A      F    CS    0

14      B      F    CS    1

15      C      F    CS    1

16      A      M    CS    1

17      B      M    CS    0

18      C      M    CS    0

19      A      F   TSM    0

20      B      F   TSM    0

21      C      F   TSM    1

22      A      M   TSM    0

23      B      M   TSM    0

24      C      M   TSM    0 



<ftable()>

ftable() function display the table consists of multiple variables in a flat form. 


 table(school,gender,m

 ftable(school, gender,major)

 > table(school,gender,major)

, , major = AMS


      gender

school F M

     A 0 1

     B 1 0

     C 1 1


, , major = BUS


      gender

school F M

     A 0 0

     B 2 0

     C 0 0


, , major = CS


      gender

school F M

     A 0 1

     B 1 0

     C 1 0


, , major = TSM


      gender

school F M

     A 0 0

     B 0 0

     C 1 0

 > ftable(school,gender,major)

              major AMS BUS CS TSM

school gender                     

A      F              0   0  0   0

       M              1   0  1   0

B      F              1   2  1   0

       M              0   0  0   0

C      F              1   0  1   1

       M              1   0  0   0


<addmargins- add tendencies to table>


> gender= c(rep("M", 2), rep("F", 6), "M","F")

> gender

 [1] "M" "M" "F" "F" "F" "F" "F" "F" "M" "F"

> major=c(rep(c("AMS","CS","BUS"),2),"TSM","AMS","AMS","CS")

> major

 [1] "AMS" "CS"  "BUS" "AMS" "CS"  "BUS" "TSM" "AMS" "AMS" "CS" 


>a=table(gender, major)


> addmargins(a)  -----------add sum of frequencies for each row and column. 

      major

gender AMS BUS CS TSM Sum

   F          2     2     2    1   7

   M         2     0     1   0   3

   Sum     4     2     3   1   10


table(gender, major)

 addmargins(table(gender, major))

 > table(gender,major)

      major

gender AMS BUS CS TSM

     F   2   2  2   1

     M   2   0  1   0

> addmargins(table(gender,major))

      major

gender AMS BUS CS TSM Sum

   F     2   2  2   1   7

   M     2   0  1   0   3

   Sum   4   2  3   1  10



 addmargins(table(gender, major))

 addmargins(table(gender, major),margin=1)

 addmargins(table(gender, major),margin=2)

 > addmargins(table(gender,major))

      major

gender AMS BUS CS TSM Sum

   F          2       2     2    1     7

   M         2       0     1    0    3

   Sum     4       2      3   1     10

#table +sums for eacn row 


> addmargins(table(gender, major),margin=1) 

      major

gender AMS BUS CS TSM

   F        2        2     2   1

   M       2        0     1   0

   Sum   4        2     3    1 

#table +sums for each columns  


      major

gender AMS BUS CS TSM Sum

     F        2      2      2   1   7

     M        2     0      1   0   3


addmargins(a,FUN=mean)  -----------add mean of frequencies for each row and column. 

Margins computed over dimensions

in the following order:

1: gender

2: major

      major

gender  AMS  BUS   CS  TSM mean

  F         2.00 2.00 2.00 1.00 1.75

  M        2.00 0.00 1.00 0.00 0.75

  mean  2.00 1.00 1.50 0.50 1.25

addmargins(table(gender, major),margin=1,FUN=mean) 

 addmargins(table(gender, major),margin=2,FUN=mean) 

 > addmargins(table(gender, major),margin=1,FUN=mean) 

      major

gender AMS BUS  CS TSM

  F         2.0 2.0 2.0 1.0

  M        2.0 0.0 1.0 0.0

  mean  2.0 1.0 1.5 0.5

> addmargins(table(gender, major),margin=2,FUN=mean) 

      major

gender  AMS  BUS   CS  TSM mean

     F      2.00 2.00 2.00 1.00 1.75

     M     2.00 0.00 1.00 0.00 0.75 


 addmargins(table(gender, major),margin=c(1,2),FUN=c(mean,sum)) 


#means for rows, sums for columns 

 addmargins(table(gender, major),margin=c(2,1),FUN=c(mean,sum))


#means for column, sums for rows

 > addmargins(table(gender, major),margin=c(1,2),FUN=c(mean,sum)) 

Margins computed over dimensions

in the following order:

1: gender

2: major

      major

gender AMS BUS  CS TSM sum

  F    2.0 2.0 2.0 1.0 7.0

  M    2.0 0.0 1.0 0.0 3.0

  mean 2.0 1.0 1.5 0.5 5.0

> addmargins(table(gender, major),margin=c(2,1),FUN=c(mean,sum))

Margins computed over dimensions

in the following order:

1: major

2: gender

      major

gender  AMS  BUS   CS  TSM mean

   F   2.00 2.00 2.00 1.00 1.75

   M   2.00 0.00 1.00 0.00 0.75

   sum 4.00 2.00 3.00 1.00 2.50


<tables for conditional proportions for margin> 


proportion of each categories, you can get 1 by adding up all proportions.

> prop.table(table(gender, major)) 

      major

gender AMS BUS  CS TSM

     F     0.2   0.2   0.2  0.1

     M    0.2   0.0   0.1 0.0



conditional proportion given 'gender' marginal distribution, you can get 1 by adding up all proportions of rows.

#sum of each rows of the table=1

> prop.table(table(gender, major),margin=1)

      major

gender       AMS       BUS        CS       TSM

     F 0.2857143 0.2857143 0.2857143 0.1428571

     M 0.6666667 0.0000000 0.3333333 0.0000000



conditional proportion given 'major' marginal distribution, you can get 1 by adding up all proportions of columns.

sum of each column of the table =1

> prop.table(table(gender, major),margin=2) 

      major

gender       AMS       BUS        CS       TSM

     F 0.5000000 1.0000000 0.6666667 1.0000000

     M 0.5000000 0.0000000 0.3333333 0.0000000


<table for marginal distribution>

 > table(gender, major)

      major

gender AMS BUS CS TSM

     F       2      2     2   1

     M      2      0     1   0


> margin.table(table(gender, major),margin=1) -----marginal distribution of variable in the row ' gender'

gender

F M 

7 3 

> margin.table(table(gender, major),margin=2) -----marginal distribution of variable in the column ' major'

major

AMS BUS  CS TSM 

  4   2   3   1 

반응형