All the code below is in R
options(width=110)
simp <- read.csv("~/Dropbox/simpsons_eps-imdb-rating.csv", header = TRUE)
str(simp)
'data.frame': 600 obs. of 6 variables:
$ season : int 1 1 2 2 2 2 2 2 2 2 ...
$ number_in_season : int 10 12 1 4 6 8 10 13 15 17 ...
$ us_viewers_in_millions: num 30.3 30.4 33.6 26.1 25.4 26.2 24.8 26.2 26.8 21.2 ...
$ views : int 50816 62561 59575 64959 50691 57605 56486 58277 47426 44331 ...
$ imdb_rating : num 7.4 8.3 8.2 8.1 8 8.4 7.8 8 8.2 7.6 ...
$ imdb_votes : int 1511 1716 1638 1457 1366 1522 1340 1329 1413 1243 ...
summary(simp)
season number_in_season us_viewers_in_millions views imdb_rating imdb_votes
Min. : 1.0 Min. : 1.00 Min. : 2.320 Min. : 144 Min. :4.500 Min. : 104.0
1st Qu.: 7.0 1st Qu.: 6.00 1st Qu.: 7.055 1st Qu.: 41302 1st Qu.:6.900 1st Qu.: 560.0
Median :14.0 Median :11.00 Median :10.300 Median : 46036 Median :7.300 Median : 697.0
Mean :14.1 Mean :11.59 Mean :11.843 Mean : 48759 Mean :7.386 Mean : 832.4
3rd Qu.:21.0 3rd Qu.:17.00 3rd Qu.:15.250 3rd Qu.: 57594 3rd Qu.:8.000 3rd Qu.:1095.0
Max. :28.0 Max. :25.00 Max. :33.600 Max. :171408 Max. :9.200 Max. :3734.0
NA's :6 NA's :4 NA's :3 NA's :3
simp2 <- simp[with(simp, order(season, number_in_season)), ]
simp2 <- simp2[!simp2$season == 28, ]
simp3 <- matrix(NA, nrow = max(simp2$number_in_season), ncol = max(simp2$season))
for (i in 1:ncol(simp3)) {
n <- length(simp3[ , i])
m <- length(simp2[simp2$season == i, "imdb_rating"])
if (n == m) simp3[ , i] <- simp2[simp2$season == i, "imdb_rating"]
else simp3[ , i] <-
matrix(c(simp2[simp2$season == i, "imdb_rating"], rep(NA, n - m)), nrow = 1)
}
rownames(simp3) <- 1:25 ; colnames(simp3) <- 1:27
pheatmap::pheatmap(simp3
, border_color = NA
, cluster_rows = FALSE
, cluster_cols = FALSE
, main = "The Simpsons: IMDb Rating\nRows: Episodes, Columns: Seasons"
, display_numbers = TRUE)
Sys.time()
[1] "2017-12-17 17:02:32 +03"