Female Nobel Laureates
R
Visuals
2022
Get Data
An API is available to get the raw data needed from the relevant website.
#get laureate data with nobel prize API
res1 = GET('http://api.nobelprize.org/2.1/laureates?limit=1000')
json_laureate = fromJSON(rawToChar(res1$content))
laureate <- json_laureate$laureates
laureate %>% glimpse()Rows: 981
Columns: 23
$ id <chr> "745", "102", "779", "259", "1004", "114", "982", "9…
$ knownName <df[,3]> <data.frame[26 x 3]>
$ givenName <df[,3]> <data.frame[26 x 3]>
$ familyName <df[,3]> <data.frame[26 x 3]>
$ fullName <df[,3]> <data.frame[26 x 3]>
$ fileName <chr> "spence", "bohr", "ciechanover", "klug", "gurnah"…
$ gender <chr> "male", "male", "male", "male", "male", "male", "…
$ birth <df[,2]> <data.frame[26 x 2]>
$ wikipedia <df[,2]> <data.frame[26 x 2]>
$ wikidata <df[,2]> <data.frame[26 x 2]>
$ sameAs <list> <"https://www.wikidata.org/wiki/Q157245", "https:…
$ links <list> [<data.frame[2 x 6]>], [<data.frame[2 x 6]>], [<d…
$ nobelPrizes <list> [<data.frame[1 x 12]>], [<data.frame[1 x 12]>], [<da…
$ death <df[,2]> <data.frame[26 x 2]>
$ orgName <df[,3]> <data.frame[26 x 3]>
$ acronym <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ founded <df[,2]> <data.frame[26 x 2]>
$ nativeName <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penName <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penNameOf <df[,1]> <data.frame[26 x 1]>
$ foundedCountry <df[,3]> <data.frame[26 x 3]>
$ foundedCountryNow <df[,3]> <data.frame[26 x 3]>
$ foundedContinent <df[,1]> <data.frame[26 x 1]>
Data Munging
Award Winner - Names
#create data of noble laureates
df_laureate <- laureate %>%
unnest(c(fullName, givenName, familyName, birth), names_repair = tidyr_legacy) %>%
select(id, en, en1, en2, gender, date, place) %>%
rename(id="id",
last_name = "en1",
first_name = "en",
full_name = "en2",
birth_date = "date") %>%
unnest(place) %>%
unnest(cityNow, countryNow, names_repair = tidyr_legacy) %>%
select(id, full_name, first_name, last_name, birth_date, gender, en, en1) %>%
rename(birth_city = "en",
birth_country = "en1")
df_laureate %>% glimpse()Rows: 981
Columns: 8
$ id <chr> "745", "102", "779", "259", "1004", "114", "982", "981",…
$ full_name <chr> "A. Michael Spence", "Aage Niels Bohr", "Aaron Ciechanov…
$ first_name <chr> "A. Michael", "Aage N.", "Aaron", "Aaron", "Abdulrazak",…
$ last_name <chr> "Spence", "Bohr", "Ciechanover", "Klug", "Gurnah", "Sala…
$ birth_date <chr> "1943-00-00", "1922-06-19", "1947-10-01", "1926-08-11", …
$ gender <chr> "male", "male", "male", "male", "male", "male", "male", …
$ birth_city <chr> "Montclair, NJ", "Copenhagen", "Haifa", "Zelvas", NA, "J…
$ birth_country <chr> "USA", "Denmark", "Israel", "Lithuania", NA, "Pakistan",…
Award Categories
#create data set of awards (noble prizes)
df_prize <- laureate %>% select(id, nobelPrizes) %>%
unnest(nobelPrizes, repair = "universal") %>%
select(id, awardYear, category) %>%
unnest(category) %>%
select(id, awardYear, en) %>%
rename(laureate_id = "id", award_year = "awardYear", category = "en")
df_prize %>% glimpse()Rows: 989
Columns: 3
$ laureate_id <chr> "745", "102", "779", "259", "1004", "114", "982", "981", "…
$ award_year <chr> "2001", "1975", "2004", "1982", "2021", "1979", "2019", "2…
$ category <chr> "Economic Sciences", "Physics", "Chemistry", "Chemistry", …
Join the Data
#combine the two datasets
df_prize_laureate <- left_join(df_prize, df_laureate, by = c("laureate_id"="id"))
#convert year to integer
df_prize_laureate$award_year <- as.integer(df_prize_laureate$award_year)
df_prize_laureate$count <- 1
df_prize_laureate %>% glimpse()Rows: 989
Columns: 11
$ laureate_id <chr> "745", "102", "779", "259", "1004", "114", "982", "981",…
$ award_year <int> 2001, 1975, 2004, 1982, 2021, 1979, 2019, 2019, 2009, 20…
$ category <chr> "Economic Sciences", "Physics", "Chemistry", "Chemistry"…
$ full_name <chr> "A. Michael Spence", "Aage Niels Bohr", "Aaron Ciechanov…
$ first_name <chr> "A. Michael", "Aage N.", "Aaron", "Aaron", "Abdulrazak",…
$ last_name <chr> "Spence", "Bohr", "Ciechanover", "Klug", "Gurnah", "Sala…
$ birth_date <chr> "1943-00-00", "1922-06-19", "1947-10-01", "1926-08-11", …
$ gender <chr> "male", "male", "male", "male", "male", "male", "male", …
$ birth_city <chr> "Montclair, NJ", "Copenhagen", "Haifa", "Zelvas", NA, "J…
$ birth_country <chr> "USA", "Denmark", "Israel", "Lithuania", NA, "Pakistan",…
$ count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
Reshape the Data
#reshape data
df_grouping <- df_prize_laureate %>%
complete(category = unique(df_prize_laureate$category), award_year = 1901:2021) %>%
group_by(category,award_year) %>%
summarize(total_count = sum(count),
male_count = sum(count[gender == "male"]),
female_count = sum(count[gender == "female"])) %>%
mutate(
grouping=case_when(female_count == total_count ~ "Female",
male_count == total_count ~ "Male",
female_count > 0 ~"Mixed Team"),
award_decade = round(award_year / 10) * 10,
year_split = case_when(award_year >= 1981 ~"1981-2021",
award_year >= 1941 ~"1941-1980",
award_year >= 1901~"1901-1940")
)
#categories
category_list = c("Physiology or Medicine","Physics","Chemistry","Literature","Peace","Economic Sciences")
#factor categories, i.e. Economic Sciences not introduced until later
df_grouping$category <- factor(df_grouping$category, levels = rev(category_list))
#palette
pal<-c('#D90368','#2274A5','#F1C40F')
df_grouping %>% glimpse()Rows: 732
Columns: 8
Groups: category [6]
$ category <fct> Chemistry, Chemistry, Chemistry, Chemistry, Chemistry, Ch…
$ award_year <int> 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 191…
$ total_count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, NA, NA, 1, N…
$ male_count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 1, 1, 1, NA, NA, 1, N…
$ female_count <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, NA, NA, 0, N…
$ grouping <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "…
$ award_decade <dbl> 1900, 1900, 1900, 1900, 1900, 1910, 1910, 1910, 1910, 191…
$ year_split <chr> "1901-1940", "1901-1940", "1901-1940", "1901-1940", "1901…
Visualize
#plot
g1 <- ggplot(df_grouping, aes(x = award_year, y = category, fill = grouping)) +
geom_tile(color = "white", width = .9, height = .9) +
scale_fill_manual(values = pal, na.value = "grey85",
guide = guide_legend(title.position = "top", title.hjust=0.5)) +
facet_wrap(~ year_split, ncol = 1, scales = "free_x") +
labs(title = "Noble Prize Laureates",
caption ="Data from Noble Prize API",
subtitle = 'Note: Some categories and years have more than one recipient, \n "Mixed" denotes a team with male and female laureates',
x = "Year",
y = "Category",
fill = "Recipient Gender") +
theme_void() +
theme(text = element_text(),
legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 20, vjust = 5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5, vjust = 6, size = 12),
#axis.title.x = element_text(family = "nunito"),
#axis.text = element_text(family = "nunito"),
axis.text.y = element_text(hjust = 1, size = 10),
strip.text.x = element_text(size = 12),
plot.caption = element_text(size = 10, hjust = 0.95),
plot.margin = unit(c(1.1, 0.8, 0.8, 0.8), "cm"),
legend.spacing.x = unit(0.8, 'cm'),
legend.box.margin=margin(0, 0, 0.25, 0))
g1
Awarded Women List
df_prize_laureate %>% filter(gender == "female") %>% select(full_name, birth_country, award_year) %>%
arrange(award_year) %>%
rename("Fulll Name" = full_name, "Country" = birth_country, "Year Awarded" = award_year) %>%
gt() %>% tab_header(title = md("**Nobel Laureate Women**"))| Nobel Laureate Women | ||
| Fulll Name | Country | Year Awarded |
|---|---|---|
| Marie Curie, née Sklodowska | Poland | 1903 |
| Baroness Bertha Sophie Felicita von Suttner, née Countess Kinsky von Chinic und Tettau | Czech Republic | 1905 |
| Selma Ottilia Lovisa Lagerlöf | Sweden | 1909 |
| Marie Curie, née Sklodowska | Poland | 1911 |
| Grazia Deledda | Italy | 1926 |
| Sigrid Undset | Denmark | 1928 |
| Jane Addams | USA | 1931 |
| Irène Joliot-Curie | France | 1935 |
| Pearl Buck | USA | 1938 |
| Gabriela Mistral | Chile | 1945 |
| Emily Greene Balch | USA | 1946 |
| Gerty Theresa Cori, née Radnitz | Czech Republic | 1947 |
| Maria Goeppert Mayer | Poland | 1963 |
| Dorothy Crowfoot Hodgkin | Egypt | 1964 |
| Nelly Sachs | Germany | 1966 |
| Elizabeth Williams | Northern Ireland | 1976 |
| Mairead Corrigan | Northern Ireland | 1976 |
| Rosalyn Yalow | USA | 1977 |
| Mother Teresa | North Macedonia | 1979 |
| Alva Myrdal | Sweden | 1982 |
| Barbara McClintock | USA | 1983 |
| Rita Levi-Montalcini | Italy | 1986 |
| Gertrude B. Elion | USA | 1988 |
| Aung San Suu Kyi | Myanmar | 1991 |
| Nadine Gordimer | South Africa | 1991 |
| Rigoberta Menchú Tum | Guatemala | 1992 |
| Toni Morrison | USA | 1993 |
| Christiane Nüsslein-Volhard | Germany | 1995 |
| Wislawa Szymborska | Poland | 1996 |
| Jody Williams | USA | 1997 |
| Shirin Ebadi | Iran | 2003 |
| Elfriede Jelinek | Austria | 2004 |
| Linda B. Buck | USA | 2004 |
| Wangari Muta Maathai | Kenya | 2004 |
| Doris Lessing | Iran | 2007 |
| Françoise Barré-Sinoussi | France | 2008 |
| Ada E. Yonath | Israel | 2009 |
| Carol W. Greider | USA | 2009 |
| Elinor Ostrom | USA | 2009 |
| Elizabeth H. Blackburn | Australia | 2009 |
| Herta Müller | Romania | 2009 |
| Ellen Johnson Sirleaf | Liberia | 2011 |
| Leymah Gbowee | Liberia | 2011 |
| Tawakkol Karman | Yemen | 2011 |
| Alice Munro | Canada | 2013 |
| Malala Yousafzai | Pakistan | 2014 |
| May-Britt Moser | Norway | 2014 |
| Svetlana Alexievich | Ukraine | 2015 |
| Tu Youyou | China | 2015 |
| Donna Strickland | Canada | 2018 |
| Frances H. Arnold | USA | 2018 |
| Nadia Murad Basee Taha | Iraq | 2018 |
| Olga Tokarczuk | Poland | 2018 |
| Esther Duflo | France | 2019 |
| Andrea Ghez | USA | 2020 |
| Emmanuelle Charpentier | France | 2020 |
| Jennifer A. Doudna | USA | 2020 |
| Louise Glück | USA | 2020 |
| Maria Ressa | Philippines | 2021 |
| Annie Ernaux | France | 2022 |
| Carolyn R. Bertozzi | USA | 2022 |
Female Laureates Count by Country
df_prize_laureate %>% filter(gender == "female") %>% select(full_name, birth_country, award_year) %>%
rename("Country" = birth_country) %>%
group_by(Country) %>% summarise("Country Count" = n()) %>% arrange(desc(`Country Count`)) %>%
gt() %>% tab_header(title = md("**Nobel Laureate Women by Country**"))| Nobel Laureate Women by Country | |
| Country | Country Count |
|---|---|
| USA | 16 |
| France | 5 |
| Poland | 5 |
| Canada | 2 |
| Czech Republic | 2 |
| Germany | 2 |
| Iran | 2 |
| Italy | 2 |
| Liberia | 2 |
| Northern Ireland | 2 |
| Sweden | 2 |
| Australia | 1 |
| Austria | 1 |
| Chile | 1 |
| China | 1 |
| Denmark | 1 |
| Egypt | 1 |
| Guatemala | 1 |
| Iraq | 1 |
| Israel | 1 |
| Kenya | 1 |
| Myanmar | 1 |
| North Macedonia | 1 |
| Norway | 1 |
| Pakistan | 1 |
| Philippines | 1 |
| Romania | 1 |
| South Africa | 1 |
| Ukraine | 1 |
| Yemen | 1 |