bones-clarke/himalayan_expedition_outcome_prediction

license: cc-by-nc-4.0 tags: - keras - himalayan - expedition - climate - weather - mountains

This model is has been trained on a combination of this data:
And my own, extracted data that I downloaded and then added to the expedition data above:
Historic Weather Data for Himalayan Peaks
An expedition score to measure expedition success or failure was determined and then the data was used to predict the score, which in essence, predicts the outcome of the expedition. Details on the how the expedition score was derived will be provided soon, along with example usage.
Results:
Feature Importances:
How the target variable 'expedition_score' was calculated:
# Create successful summit metric per member and per expedition
def create_target_variables(df):
    df = df.copy()
    
    # Calculate mean summit days per peak
    peak_smtdays_mean = df.groupby('peakid')['smtdays'].mean().reset_index()
    peak_smtdays_mean.columns = ['peakid', 'peak_mean_smtdays']

    # Merge back to main dataframe
    df = df.merge(peak_smtdays_mean, on='peakid', how='left')

    # Calculate difference from peak mean (peak_mean - smtdays)
    df['smtdays_to_mean_peak'] = df['peak_mean_smtdays'] - df['smtdays']
    
    # High point to summit
    df['highpoint_summit'] = df['heightm'] - df['highpoint']
    
    # Create success_on_route: True if any of the 4 success columns is True
    df['success_on_route'] = df[['success1', 'success2', 'success3', 'success4']].any(axis=1)

    # Winter ascent
    df['winter_ascent'] = df['season'] == 'Winter'
    
    # expedition success
    df['min_one_summit_by_member'] = df['members_summited'] &gt; 0
    df['member_summit_ratio'] = df['members_summited'] / df['total_members']
    
    df.loc[df['members_summited'] &gt; 0, 'summited_death_ratio'] = (
        df.loc[df['members_summited'] &gt; 0, 'members_deaths'] / 
        df.loc[df['members_summited'] &gt; 0, 'members_summited']
        )

    # Numerical success score (0-100)
    df['expedition_success_score'] = 0.0  # Default for failed expeditions
    
    # For successful expeditions, calculate score
    successful_mask = df['success_on_route']
    df.loc[successful_mask, 'expedition_success_score'] = (
        1 +  # Base score for completing route
        np.minimum(100, df.loc[successful_mask, 'member_summit_ratio'].fillna(0) * 10) -  # Overall Exped summit ratio bonus
        np.minimum(40, df.loc[successful_mask, 'members_deaths'].fillna(0) * 100) - # Member death penalty
        np.minimum(60, df.loc[successful_mask, 'hired_deaths'].fillna(0) * 200) +  # Hired member death penalty
        df.loc[successful_mask, 'msolo'] * 400 +  # Solo achievement bonus
        df.loc[successful_mask, 'mspeed'] * 100 +  # Speed achievement bonus
        df.loc[successful_mask, 'msuccess'] * 20 +  # Individual success achievement bonus
        df.loc[successful_mask, 'mtraverse'] * 10 - # Traverse achievement bonus
        df.loc[successful_mask, 'disputed'] * 100 + # Disputed penalty
        df.loc[successful_mask, 'disabled'] * 200 + # Disabled bonus
        (df.loc[successful_mask, 'oxygen_required'] * df.loc[successful_mask, 'mo2none'] * 50) + # No O2 bonus - added logic to check to see if peak trad requires o2
        (df.loc[successful_mask, 'oxygen_required'] * df.loc[successful_mask, 'o2none'] * 50) + # Exped no O2 bonus
        df.loc[successful_mask, 'winter_ascent'] * 100 + # Winter ascent bonus
        df.loc[successful_mask, 'smtdays_to_mean_peak'] * 10 + # Add timeframe bonus
        df.loc[successful_mask, 'summit_approach_severe_weather_hours'] * 1.4 +
        df.loc[successful_mask, 'summit_approach_extreme_cold_hours'] * 1.4 +
        df.loc[successful_mask, 'summit_approach_high_wind_hours'] +
        df.loc[successful_mask, 'summit_approach_precip_total'] * 1.4 +
        df.loc[successful_mask, 'summit_approach_temp_min'] * -1.4 +
        df.loc[successful_mask, 'full_wind_max'] +
        df.loc[successful_mask, 'full_weather_code_mode'] +
        df.loc[successful_mask, 'full_precip_total'] +
        df.loc[successful_mask, 'full_extreme_cold_hours'] * 1.4 +
        df.loc[successful_mask, 'full_high_wind_hours'] +
        df.loc[successful_mask, 'full_low_visibility_hours'] * 1.4 -
        df.loc[successful_mask, 'death_day_severe_weather_hours'] * 100 - 
        df.loc[successful_mask, 'death_day_extreme_cold_hours'] * 100 -
        df.loc[successful_mask, 'death_day_high_wind_hours'] * 100 -
        df.loc[successful_mask, 'death_day_low_visibility_hours'] * 100 +
        df.loc[successful_mask, 'decision_window_precip_total'] * 1.4 +
        df.loc[successful_mask, 'decision_window_temp_min'] * -1.4 +
        df.loc[successful_mask, 'early_phase_low_visibility_hours'] * 1.4 +
        df.loc[successful_mask, 'smttime'] / 2 # 
    )
    
    # For unsuccessful expeditions, calculate negative score
    unsuccessful_mask = ~df['success_on_route']
    df.loc[unsuccessful_mask, 'expedition_failure_score'] = (
        -10 -  # Base negative score for failed route
        np.maximum(100, df.loc[unsuccessful_mask, 'members_deaths'].fillna(0) * 100) - # Member death penalty 
        np.maximum(200, df.loc[unsuccessful_mask, 'hired_deaths'].fillna(0) * 200) + # Hired member death penalty 
        np.minimum(10, df.loc[unsuccessful_mask, 'member_summit_ratio'].fillna(0) * 2) - # Member summit ratio bonus for partial success
        df.loc[unsuccessful_mask, 'death'] * 80 + # penalty for dying
        (df.loc[unsuccessful_mask, 'oxygen_required'] * df.loc[unsuccessful_mask, 'mo2none'] * 100) + # No O2 bonus for risk-taking
        (df.loc[unsuccessful_mask, 'oxygen_required'] * df.loc[unsuccessful_mask, 'o2none'] * 100) + # Exped no O2 bonus for risk-taking
        df.loc[unsuccessful_mask, 'disabled'] * 20 + # Disabled bonus (reduced from success case)
        df.loc[unsuccessful_mask, 'winter_ascent'] * 120 + # Winter ascent risk bonus (reduced from success case)
        df.loc[unsuccessful_mask, 'smtdays_to_mean_peak'] * 10 - # Reduced timeframe bonus
        df.loc[unsuccessful_mask, 'disputed'] * 50 - # Reduced disputed penalty
        df.loc[unsuccessful_mask, 'rope'] / 4 - # penalty for resources used
        df.loc[unsuccessful_mask, 'camps'] * 16 - # penalty for resources used
        df.loc[unsuccessful_mask, 'death_day_severe_weather_hours'] * 50 - # penalty for death on severe weather day
        df.loc[unsuccessful_mask, 'death_day_extreme_cold_hours'] * 5 -
        df.loc[unsuccessful_mask, 'death_day_high_wind_hours'] * 50 +
        df.loc[unsuccessful_mask, 'mperhighpt'] / 100 - # bonus for member high point
        df.loc[unsuccessful_mask, 'decision_window_precip_hours'] * 10 - # bad decision making  
        df.loc[unsuccessful_mask, 'decision_window_severe_weather_hours'] * 10 +
        df.loc[unsuccessful_mask, 'smttime'] / 10 - 
        df.loc[unsuccessful_mask, 'total_hired'] * 4 - # penalty for all those hired
        df.loc[unsuccessful_mask, 'deathhgtm'] / 10 # death hight penalty
    )
    
    # Create a combined score column
    df['expedition_score'] = df['expedition_success_score'].fillna(0) + df['expedition_failure_score'].fillna(0)
    
    return df

pre_fe_df = create_target_variables(final_df_with_weather)