TidyTuesday2025-Week37

Code

library(tidyr)
library(dplyr)
library(ggplot2)

all_recipes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-09-16/all_recipes.csv')
cuisines <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-09-16/cuisines.csv')

Code

#EDA to see if data has missingness or not
colSums(is.na(all_recipes))

          name            url         author date_published    ingredients 
             0              0              0              0              9 
      calories            fat          carbs        protein     avg_rating 
           200            356            214            248            972 
 total_ratings        reviews      prep_time      cook_time     total_time 
           972           1073              0              0              0 
      servings 
            21

Code

colSums(is.na(cuisines))

          name        country            url         author date_published 
             0              0              0              0              0 
   ingredients       calories            fat          carbs        protein 
             1             32             55             35             39 
    avg_rating  total_ratings        reviews      prep_time      cook_time 
            97             97            108              0              0 
    total_time       servings 
             0              2

Code

#We drop all NA values so when we summarize later everything is accurate
cuisines_clean <- cuisines |>
  drop_na()

#Our research question is: which countries consistently have the highest carbs, calories, fat, and protien in their cuisine?

We developed this research question because we have a dataset of thousands of different recipies, all labelled with their nutrient amounts, as well as the country of origin of the dish. We decided this question would be a good one to pursue so we can gain insight into the cooking styles of different countries.

Code

#summarize nutrients
cuisines_clean <- cuisines_clean %>%
  group_by(country) %>%
  summarize(calories = mean(calories),
            fat = mean(fat),
            carbs = mean(carbs),
            protein = mean(protein))

Code

#pivot longer to create our final graph
summary_extremes <- cuisines_clean |>
  pivot_longer(cols = c(fat, protein, carbs, calories),
               names_to = "nutrient",
               values_to = "value") |>
  group_by(nutrient) |>
  summarise(
    avg_value = mean(value, na.rm = TRUE),

    min_value = min(value, na.rm = TRUE),
    min_country = country[which.min(value)],

    max_value = max(value, na.rm = TRUE),
    max_country = country[which.max(value)],

    .groups = "drop"
  ) |>
  pivot_longer(
    cols = c(min_value, avg_value, max_value),
    names_to = "stat",
    values_to = "value"
  ) |>
  mutate(
    stat = recode(stat,
                  min_value = "Minimum",
                  avg_value = "Average",
                  max_value = "Maximum")
  ) |>
  mutate(
    label = case_when(
      stat == "Minimum" ~ min_country,
      stat == "Maximum" ~ max_country,
      TRUE ~ "Average"
    )
  ) |>
  mutate(
    label = case_when(
      label == "Cajun and Creole" ~ "Cajun",
      label == "Australian and New Zealander" ~ "Australian",
      TRUE ~ label
    )
  ) |>
  mutate(
    nutrient = case_when(
      nutrient == "calories" ~ "Calories",
      nutrient == "carbs" ~ "Carbs",
      nutrient == "fat" ~ "Fat",
      nutrient == "protein" ~ "Protein",
      TRUE ~ nutrient
    )
  )

Code

#final visualization
ggplot(summary_extremes, aes(x = nutrient, y = value, fill = stat)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_text(
    aes(label = label),
    position = position_dodge(width = 0.8),
    vjust = -0.3,
    size = 3
  ) +
  labs(title = "Countries with the Highest and Lowest Nutrient Densities in Cuisine",
       subtitle = "",
       caption = "Source: https://github.com/rfordatascience/tidytuesday/blob/main/data/2025/2025-09-16/readme.md \nAll NA values dropped",
       x = "",
       y = "Value (cal or g)",
       fill = "") +
  theme(
    legend.position = "top",
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 10),
    axis.title = element_text(face = "bold", size = 11),
    plot.caption = element_text(color = "gray40")
  )

--- title: "TidyTuesday2025-Week37" --- ```{r} library(tidyr) library(dplyr) library(ggplot2) all_recipes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-09-16/all_recipes.csv') cuisines <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-09-16/cuisines.csv') ``` ```{r} #EDA to see if data has missingness or not colSums(is.na(all_recipes)) colSums(is.na(cuisines)) ``` ```{r} #We drop all NA values so when we summarize later everything is accurate cuisines_clean <- cuisines |> drop_na() ``` #Our research question is: which countries consistently have the highest carbs, calories, fat, and protien in their cuisine? We developed this research question because we have a dataset of thousands of different recipies, all labelled with their nutrient amounts, as well as the country of origin of the dish. We decided this question would be a good one to pursue so we can gain insight into the cooking styles of different countries. ```{r} #summarize nutrients cuisines_clean <- cuisines_clean %>% group_by(country) %>% summarize(calories = mean(calories), fat = mean(fat), carbs = mean(carbs), protein = mean(protein)) ``` ```{r} #pivot longer to create our final graph summary_extremes <- cuisines_clean |> pivot_longer(cols = c(fat, protein, carbs, calories), names_to = "nutrient", values_to = "value") |> group_by(nutrient) |> summarise( avg_value = mean(value, na.rm = TRUE), min_value = min(value, na.rm = TRUE), min_country = country[which.min(value)], max_value = max(value, na.rm = TRUE), max_country = country[which.max(value)], .groups = "drop" ) |> pivot_longer( cols = c(min_value, avg_value, max_value), names_to = "stat", values_to = "value" ) |> mutate( stat = recode(stat, min_value = "Minimum", avg_value = "Average", max_value = "Maximum") ) |> mutate( label = case_when( stat == "Minimum" ~ min_country, stat == "Maximum" ~ max_country, TRUE ~ "Average" ) ) |> mutate( label = case_when( label == "Cajun and Creole" ~ "Cajun", label == "Australian and New Zealander" ~ "Australian", TRUE ~ label ) ) |> mutate( nutrient = case_when( nutrient == "calories" ~ "Calories", nutrient == "carbs" ~ "Carbs", nutrient == "fat" ~ "Fat", nutrient == "protein" ~ "Protein", TRUE ~ nutrient ) ) ``` ```{r, fig.width=8, fig.height=6} #final visualization ggplot(summary_extremes, aes(x = nutrient, y = value, fill = stat)) + geom_col(position = position_dodge(width = 0.8), width = 0.7) + geom_text( aes(label = label), position = position_dodge(width = 0.8), vjust = -0.3, size = 3 ) + labs(title = "Countries with the Highest and Lowest Nutrient Densities in Cuisine", subtitle = "", caption = "Source: https://github.com/rfordatascience/tidytuesday/blob/main/data/2025/2025-09-16/readme.md \nAll NA values dropped", x = "", y = "Value (cal or g)", fill = "") + theme( legend.position = "top", plot.title = element_text(face = "bold", size = 16), plot.subtitle = element_text(size = 10), axis.title = element_text(face = "bold", size = 11), plot.caption = element_text(color = "gray40") ) ```