# ken_kwapis, greg_daniels, b_j_novak , # pam, phyllis, ryan, toby, erin, jan , # … with 126 more rows, and 22 more variables: michael, oscar , # season episode episode_name andy angela darryl dwight jim kelly kevin Office % distinct(season, episode, episode_name) %>% inner_join(characters) %>% inner_join(creators) %>% inner_join(office_ratings %>% select(episode_name, imdb_rating)) %>% Next, let’s find the season and episode number for each episode, and then finally let’s put it all together into one dataset for modeling. # Celotta`, `Randall Einhorn`, `Brent Forrester`, `Jeffrey
# Feig`, `Gene Stupnitsky`, `Lee Eisenberg`, `Jennifer # ... with 125 more rows, and 9 more variables: `Mindy Kaling`, `Paul # episode_name `Ken Kwapis` `Greg Daniels` `B.J. I’m choosing here to combine this into one category in modeling, for a simpler model, since these are often the same individuals.Ĭreators % distinct(episode_name, director, writer) %>% pivot_longer(director :writer, names_to = "role", values_to = "person") %>% separate_rows(person, sep = " ") %>% add_count(person) %>% filter(n > 10) %>% distinct(episode_name, person) %>% mutate(person_value = 1) %>% pivot_wider( Next, let’s find which directors and writers are involved in each episode. # ... with 175 more rows, and 5 more variables: Phyllis, Ryan , # episode_name Andy Angela Darryl Dwight Jim Kelly Kevin Michael Oscar Pam First, let’s find out how many times characters speak per episode.Ĭharacters % count(episode_name, character) %>% add_count(character, wt = n, name = "character_count") %>% filter(character_count > 800) %>% select( -character_count) %>% pivot_wider( We are going to use several different kinds of features for modeling. # 10 1 1 pilot Ken Kwapis Ricky Gervais Stephen Merch… Pam # 9 1 1 pilot Ken Kwapis Ricky Gervais Stephen Merch… Michael # 8 1 1 pilot Ken Kwapis Ricky Gervais Stephen Merch… Pam # 7 1 1 pilot Ken Kwapis Ricky Gervais Stephen Merch… Michael # 6 1 1 pilot Ken Kwapis Ricky Gervais Stephen Merch… Michael # 5 1 1 pilot Ken Kwapis Ricky Gervais Stephen Merch… Michael # 4 1 1 pilot Ken Kwapis Ricky Gervais Stephen Merch… Jim # 3 1 1 pilot Ken Kwapis Ricky Gervais Stephen Merch… Michael # 2 1 1 pilot Ken Kwapis Ricky Gervais Stephen Merch… Jim # 1 1 1 pilot Ken Kwapis Ricky Gervais Stephen Merch… Michael # season episode episode_name director writer character ) %>% select(season, episode, episode_name, director, writer, character) Episode_name = str_remove_all(episode_name, remove_regex),Įpisode_name = str_to_lower(episode_name),