Some explorations of music: Taste of music critics vs the public. Most acclaimed years of music. When did rock music die?
The music preferences of critics is from Acclaimed Music, and lists of most sold music is from Tsort.
imports
library(pacman)
p_load(tidyverse, magrittr, pander, naniar, glue, readxl, rap, janitor, scales, zoo)
source('../../src/extra.R', echo = F, encoding="utf-8")
read data
excel_to_tsv <- function(){
read_excel("files/top_10000_songs_180715.xlsx") %>%
select(Critic_rank = "PLACE\r\n2018-JUL-15", Artist, Song, Year) %>%
write_tsv("files/acclaimed_songs.tsv")
read_excel("files/top_3000_albums_180715.xlsx") %>%
select(Critic_rank = "PLACE\r\n2018-JUL-15", Artist, Album, Year) %>%
write_tsv("files/acclaimed_albums.tsv")
}
artist_c <- read_tsv("files/artist_corrections.tsv")
album_c <- read_tsv("files/album_corrections.tsv")
song_c <- read_tsv("files/song_corrections.tsv")
exclude_albums = c("1", "Saturday Night Fever", "The Bodyguard",
"Grease: The Original Soundtrack")
correct_songs <- function(df){
df %>% mutate(song = stringr::str_replace_all
(song, setNames(song_c$old, song_c$new))) %>%
mutate(song = tools::toTitleCase(as.character(song)))
}
correct_albums <- function(df){
df %>% mutate(album = stringr::str_replace_all
(album, setNames(album_c$old, album_c$new))) %>%
mutate(album = tools::toTitleCase(as.character(album))) %>%
filter(!album %in% exclude_albums)
}
correct_artists <- function(df){
df %>% mutate(artist = stringr::str_replace_all
(artist, setNames(artist_c$old, artist_c$new))) %>%
mutate(artist = tools::toTitleCase(as.character(artist)))
}
selling_songs <- read_csv("files/selling_songs.csv") %>% clean_names() %>%
select(artist, song = name, public_rank = position) %>%
correct_artists() %>% correct_songs()
acclaimed_songs <- read_tsv("files/acclaimed_songs.tsv") %>% clean_names() %>%
correct_artists() %>% correct_songs()
selling_albums <- read.csv("files/selling_albums.csv") %>% clean_names() %>%
select(artist, album = name, year, public_rank = position) %>%
correct_albums() %>% correct_artists()
acclaimed_albums <- read_tsv("files/acclaimed_albums.tsv") %>% clean_names() %>%
correct_albums() %>% correct_artists()
genres <- read_delim(delim=";", "files/genres.csv", col_types = cols(.default = "c")) %>% clean_names()
create score
high_n <- 7000
get_rank <- function(col) {
return(log(log(10 + high_n)) - log(log(10 + col)))
}
rankify <- function(df){
df %>% replace_na(list(public_rank=high_n, critic_rank=high_n)) %>%
mutate(public_value = get_rank(public_rank),
critic_value = get_rank(critic_rank),
dif = critic_value - public_value,
sum = critic_value + public_value) %>%
replace_with_na(list(public_rank=high_n, critic_rank=high_n))
}
songs = full_join(acclaimed_songs, selling_songs, by=c("artist", "song")) %>% rankify()
albums = full_join(acclaimed_albums, selling_albums, by=c("artist", "album")) %>% rankify()
code
albums_year <- albums %>% mutate(year = year.x) %>% group_by(year) %>%
summarise(acclaim_albums = sum(critic_value))
songs_year <- songs %>% group_by(year) %>%
summarise(acclaim_songs = sum(critic_value))
acclaim <- inner_join(albums_year, songs_year, by="year") %>%
mutate(acclaim = acclaim_songs + acclaim_albums) %>% filter(year > 1949)
plot_acclaim <- function(df){
df %>%
mutate(decade = cut_interval(year, length=10, right=F)) %>%
ggplot(aes(x = year, y = acclaim, fill=decade, order=decade)) +
geom_bar(stat="identity") +
theme(axis.text.x = element_text(face="bold", color="#993333", size=11),
text = element_text(size=15)) +
guides(fill=FALSE) +
scale_x_continuous(
breaks=c(1955, 1965, 1975, 1985, 1995, 2005, 2015),
labels=c("1950s", "1960s", "1970s", "1980s", "1990s", "2000s", "2010s")) +
scale_fill_manual(values=jcol) +
labs(y = "Critical acclaim", x = "Year")
}
Plot based on albums and singles
acclaim %>% plot_acclaim()
Plot based on albums only
albums_year %>% filter(year > 1949) %>%
mutate(acclaim = acclaim_albums) %>% plot_acclaim()
code
plotg <- function(df, a, b, label, col){
df %>%
ggplot(aes_string(a, b, label=label, color=col)) +
geom_abline(intercept=0, slope=1, linetype="dashed") +
geom_text(check_overlap=T) +
scale_colour_gradient2(low = muted("red"), high=muted("blue")) +
guides(color=F, size=F) +
theme(
axis.title.y = element_text(margin = margin(t = 0, r = 30, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 30, r = 0, b = 0, l = 0)),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_text(size=20)
) +
labs(x = "More liked by critics", y = "More liked by the public")
}
songs
songs %>% mutate(dif = critic_value - public_value) %>%
plotg("critic_value", "public_value", "song", "dif")
albums
albums %>% mutate(album_dif = critic_value - public_value) %>%
plotg("critic_value", "public_value", "album", "album_dif")
code
genres %<>%
inner_join(albums, by=c("artist", "album")) %>%
rap(isRock = logical() ~"Rock" %in% c(parent1, parent2, parent3),
isPop = logical() ~"Pop" %in% c(parent1, parent2, parent3),
isElectronic = logical() ~"Electronic" %in% c(parent1, parent2, parent3),
isRB = logical() ~"R&B" %in% c(parent1, parent2, parent3),
isFolk = logical() ~"Folk" %in% c(parent1, parent2, parent3),
isDance = logical() ~"Dance" %in% c(parent1, parent2, parent3),
isHiphop = logical() ~"Hip Hop" %in% c(parent1, parent2, parent3),
isJazz = logical() ~"Jazz" %in% c(parent1, parent2, parent3)
)
genres_critics <- genres %>%
group_by(year) %>%
summarise(
All = sum(critic_value),
Rock = sum(critic_value[isRock]) /All,
Pop = sum(critic_value[isPop]) / All,
Electronic = sum(critic_value[isElectronic]) / All,
"R&B" = sum(critic_value[isRB]) / All,
#Folk = sum(critic_value[isFolk]) / All,
#Dance = sum(critic_value[isDance]) / All,
Hiphop = sum(critic_value[isHiphop]) / All,
Jazz = sum(critic_value[isJazz]) / All)
genres_public <- genres %>%
group_by(year) %>%
summarise(
All = sum(public_value),
Rock = sum(public_value[isRock]) / All,
Pop = sum(public_value[isPop]) / All,
Electronic = sum(public_value[isElectronic]) / All,
"R&B" = sum(public_value[isRB]) / All,
#Folk = sum(public_value[isFolk]) / All,
#Dance = sum(public_value[isDance]) / All,
Hiphop = sum(public_value[isHiphop]) / All,
Jazz = sum(public_value[isJazz]) / All)
plot_genres <- function(df){
df %>% select(-All) %>%
gather(genre, acclaim, -year) %>%
filter(year > 1958) %>%
group_by(genre) %>%
mutate(acclaim = rollmean(acclaim, 5, na.pad=TRUE)) %>%
ungroup() %>%
drop_na() %>%
ggplot(aes(x=year, y=acclaim, group=genre, color=genre)) +
geom_line(size=1.3) +
scale_color_manual(values=jcol) +
scale_x_discrete(breaks = seq(1960, 2010, by = 10)) +
theme(axis.text.y = element_blank(),
axis.text.x = element_text(face="bold", color="#993333", size=12),
text = element_text(size=18),
legend.text = element_text(size=15)) +
labs(x = "Year")
}
critics
plot_genres(genres_critics) + ylab("Critical acclaim")
public
plot_genres(genres_public) + ylab("Public popularity")