head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
aggregate(wt ~ cyl+gear+carb,
data = mtcars, median)
## cyl gear carb wt
## 1 4 3 1 2.4650
## 2 6 3 1 3.3375
## 3 4 4 1 2.0675
## 4 8 3 2 3.4800
## 5 4 4 2 2.9650
## 6 4 5 2 1.8265
## 7 8 3 3 3.7800
## 8 8 3 4 5.2500
## 9 6 4 4 3.1575
## 10 8 5 4 3.1700
## 11 6 5 6 2.7700
## 12 8 5 8 3.5700
library(dplyr)
mtcars %>%
group_by(cyl,carb) %>%
summarise(disp_mean = mean(disp),
am_unique = length(unique(am)),
wt_25 = quantile(wt,probs=0.25),
wt_75 = quantile(wt,probs=0.75))
## # A tibble: 9 x 6
## # Groups: cyl [3]
## cyl carb disp_mean am_unique wt_25 wt_75
## <dbl> <dbl> <dbl> <int> <dbl> <dbl>
## 1 4 1 91.4 2 1.94 2.32
## 2 4 2 117. 2 1.75 3.06
## 3 6 1 242. 1 3.28 3.40
## 4 6 4 164. 2 2.81 3.44
## 5 6 6 145 1 2.77 2.77
## 6 8 2 346. 1 3.44 3.60
## 7 8 3 276. 1 3.76 3.92
## 8 8 4 406. 2 3.64 5.32
## 9 8 8 301 1 3.57 3.57
mtcars %>%
group_by(cyl,gear) %>%
summarise_at(c("wt","qsec"),c("mean","sd"))
## # A tibble: 8 x 6
## # Groups: cyl [3]
## cyl gear wt_mean qsec_mean wt_sd qsec_sd
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 3 2.46 20.0 NA NA
## 2 4 4 2.38 19.6 0.601 1.45
## 3 4 5 1.83 16.8 0.443 0.141
## 4 6 3 3.34 19.8 0.173 0.552
## 5 6 4 3.09 17.7 0.413 1.12
## 6 6 5 2.77 15.5 NA NA
## 7 8 3 4.10 17.1 0.768 0.802
## 8 8 5 3.37 14.6 0.283 0.0707
mtcars %>%
group_by(cyl,vs) %>%
summarise(across(c("carb","gear"),
list(mean=mean,
sd=sd,
n=length)))
## # A tibble: 5 x 8
## # Groups: cyl [3]
## cyl vs carb_mean carb_sd carb_n gear_mean gear_sd gear_n
## <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <int>
## 1 4 0 2 NA 1 5 NA 1
## 2 4 1 1.5 0.527 10 4 0.471 10
## 3 6 0 4.67 1.15 3 4.33 0.577 3
## 4 6 1 2.5 1.73 4 3.5 0.577 4
## 5 8 0 3.5 1.56 14 3.29 0.726 14
mtcars %>%
group_by(cyl) %>%
summarise(across(c("carb","gear"), ~
c(mean(.x),
sd(.x),
quantile(.x, c(0.25,0.5,0.75)))))
## # A tibble: 15 x 3
## # Groups: cyl [3]
## cyl carb gear
## <dbl> <dbl> <dbl>
## 1 4 1.55 4.09
## 2 4 0.522 0.539
## 3 4 1 4
## 4 4 2 4
## 5 4 2 4
## 6 6 3.43 3.86
## 7 6 1.81 0.690
## 8 6 2.5 3.5
## 9 6 4 4
## 10 6 4 4
## 11 8 3.5 3.29
## 12 8 1.56 0.726
## 13 8 2.25 3
## 14 8 3.5 3
## 15 8 4 3
iris %>%
group_by(Species) %>%
summarise(across(starts_with("Petal"),
list(mean, sd),
.names = "{.col}.fn{.fn}"))
## # A tibble: 3 x 5
## Species Petal.Length.fn1 Petal.Length.fn2 Petal.Width.fn1 Petal.Width.fn2
## * <fct> <dbl> <dbl> <dbl> <dbl>
## 1 setosa 1.46 0.174 0.246 0.105
## 2 versicolor 4.26 0.470 1.33 0.198
## 3 virginica 5.55 0.552 2.03 0.275
mtcars %>%
summarise_all(list(min=min,max=max))
## mpg_min cyl_min disp_min hp_min drat_min wt_min qsec_min vs_min am_min
## 1 10.4 4 71.1 52 2.76 1.513 14.5 0 0
## gear_min carb_min mpg_max cyl_max disp_max hp_max drat_max wt_max qsec_max
## 1 3 1 33.9 8 472 335 4.93 5.424 22.9
## vs_max am_max gear_max carb_max
## 1 1 1 5 8
library(plyr)
ddply(mtcars,
.(cyl, gear),
summarise,
mean_drat = mean(drat),
mean_mpg = mean(mpg)
)
## cyl gear mean_drat mean_mpg
## 1 4 3 3.700000 21.500
## 2 4 4 4.110000 26.925
## 3 4 5 4.100000 28.200
## 4 6 3 2.920000 19.750
## 5 6 4 3.910000 19.750
## 6 6 5 3.620000 19.700
## 7 8 3 3.120833 15.050
## 8 8 5 3.880000 15.400
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
psych::describe(mtcars)
## vars n mean sd median trimmed mad min max range skew
## mpg 1 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61
## cyl 2 32 6.19 1.79 6.00 6.23 2.97 4.00 8.00 4.00 -0.17
## disp 3 32 230.72 123.94 196.30 222.52 140.48 71.10 472.00 400.90 0.38
## hp 4 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73
## drat 5 32 3.60 0.53 3.70 3.58 0.70 2.76 4.93 2.17 0.27
## wt 6 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42
## qsec 7 32 17.85 1.79 17.71 17.83 1.42 14.50 22.90 8.40 0.37
## vs 8 32 0.44 0.50 0.00 0.42 0.00 0.00 1.00 1.00 0.24
## am 9 32 0.41 0.50 0.00 0.38 0.00 0.00 1.00 1.00 0.36
## gear 10 32 3.69 0.74 4.00 3.62 1.48 3.00 5.00 2.00 0.53
## carb 11 32 2.81 1.62 2.00 2.65 1.48 1.00 8.00 7.00 1.05
## kurtosis se
## mpg -0.37 1.07
## cyl -1.76 0.32
## disp -1.21 21.91
## hp -0.14 12.12
## drat -0.71 0.09
## wt -0.02 0.17
## qsec 0.34 0.32
## vs -2.00 0.09
## am -1.92 0.09
## gear -1.07 0.13
## carb 1.26 0.29
library(summarytools)
dfSummary(mtcars,
plain.ascii = FALSE,
style = "grid",
graph.magnif = 0.75,
valid.col = FALSE,
tmp.img.dir = "/tmp")
Dimensions: 32 x 11
Duplicates: 0
No | Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing |
---|---|---|---|---|---|
1 | mpg [numeric] |
Mean (sd) : 20.1 (6) min < med < max: 10.4 < 19.2 < 33.9 IQR (CV) : 7.4 (0.3) |
25 distinct values | 0 (0.0%) |
|
2 | cyl [numeric] |
Mean (sd) : 6.2 (1.8) min < med < max: 4 < 6 < 8 IQR (CV) : 4 (0.3) |
4 : 11 (34.4%) 6 : 7 (21.9%) 8 : 14 (43.8%) |
0 (0.0%) |
|
3 | disp [numeric] |
Mean (sd) : 230.7 (123.9) min < med < max: 71.1 < 196.3 < 472 IQR (CV) : 205.2 (0.5) |
27 distinct values | 0 (0.0%) |
|
4 | hp [numeric] |
Mean (sd) : 146.7 (68.6) min < med < max: 52 < 123 < 335 IQR (CV) : 83.5 (0.5) |
22 distinct values | 0 (0.0%) |
|
5 | drat [numeric] |
Mean (sd) : 3.6 (0.5) min < med < max: 2.8 < 3.7 < 4.9 IQR (CV) : 0.8 (0.1) |
22 distinct values | 0 (0.0%) |
|
6 | wt [numeric] |
Mean (sd) : 3.2 (1) min < med < max: 1.5 < 3.3 < 5.4 IQR (CV) : 1 (0.3) |
29 distinct values | 0 (0.0%) |
|
7 | qsec [numeric] |
Mean (sd) : 17.8 (1.8) min < med < max: 14.5 < 17.7 < 22.9 IQR (CV) : 2 (0.1) |
30 distinct values | 0 (0.0%) |
|
8 | vs [numeric] |
Min : 0 Mean : 0.4 Max : 1 |
0 : 18 (56.2%) 1 : 14 (43.8%) |
0 (0.0%) |
|
9 | am [numeric] |
Min : 0 Mean : 0.4 Max : 1 |
0 : 19 (59.4%) 1 : 13 (40.6%) |
0 (0.0%) |
|
10 | gear [numeric] |
Mean (sd) : 3.7 (0.7) min < med < max: 3 < 4 < 5 IQR (CV) : 1 (0.2) |
3 : 15 (46.9%) 4 : 12 (37.5%) 5 : 5 (15.6%) |
0 (0.0%) |
|
11 | carb [numeric] |
Mean (sd) : 2.8 (1.6) min < med < max: 1 < 2 < 8 IQR (CV) : 2 (0.6) |
1 : 7 (21.9%) 2 : 10 (31.2%) 3 : 3 ( 9.4%) 4 : 10 (31.2%) 6 : 1 ( 3.1%) 8 : 1 ( 3.1%) |
0 (0.0%) |
library(GGally)
a <- mtcars
a$cyl <- as.factor(a$cyl)
ggpairs(a,
columns = c(1,2,6,7),
mapping = ggplot2::aes(colour=as.factor(vs)),
lower = list(continuous = wrap("smooth", alpha = 0.3, size=0.1),
discrete = "blank", combo="blank"))