diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index 222527c465..1cc35874fe 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -141,6 +141,7 @@ jobs: matrix: region: - prototype_mtc + - prototype_arc - placeholder_psrc - prototype_marin - prototype_mtc_extended diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index 5fe3375b31..89c41521a3 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -16,12 +16,13 @@ from activitysim.abm.tables import shadow_pricing from activitysim.core import estimation, los, tracing, util, workflow from activitysim.core.configuration.base import ( + ComputeSettings, PreprocessorSettings, PydanticReadable, - ComputeSettings, ) from activitysim.core.configuration.logit import TourLocationComponentSettings from activitysim.core.expressions import assign_columns +from activitysim.core.interaction_sample import _resolve_sample_method logger = logging.getLogger(__name__) @@ -760,6 +761,21 @@ def get_disaggregate_logsums( state, "disaggregate_accessibility.yaml" ) + if ( + _resolve_sample_method( + state, getattr(disagg_model_settings, "compute_settings", None) + ) + == "poisson" + ): + logger.warning( + "Using Poisson sampling method for disaggregate accessibility calculations. Currently the results will" + + " differ from those obtained with monte-carlo or eet sampling by a constant shift of" + + f" log({disagg_model_settings.DESTINATION_SAMPLE_SIZE}) if you are using the common correction factor" + + " `log(pick_count / prob)` in location choice specs. The results of the Poisson method are unbiased," + + " i.e., they agree with the results obtained with a full destination sample, unlike those for" + + " monte-carlo or eet sampling." + ) + for model_name in [ "workplace_location", "school_location", diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py index 68a8a3b9b4..b77fe71cfb 100644 --- a/activitysim/abm/models/joint_tour_participation.py +++ b/activitysim/abm/models/joint_tour_participation.py @@ -20,8 +20,8 @@ ) from activitysim.core.configuration.base import ComputeSettings, PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings -from activitysim.core.util import assign_in_place, reindex from activitysim.core.exceptions import InvalidTravelError +from activitysim.core.util import assign_in_place, reindex logger = logging.getLogger(__name__) @@ -131,6 +131,7 @@ def participants_chooser( choosers: pd.DataFrame, spec: pd.DataFrame, trace_label: str, + nest_spec: Optional[dict, LogitNestSpec] = None, ) -> tuple[pd.Series, pd.Series]: """ custom alternative to logit.make_choices for simulate.simple_simulate @@ -250,14 +251,21 @@ def participants_chooser( f"{num_tours_remaining} tours could not be satisfied after {iter} iterations" ) - choice_function = ( - logit.make_choices_utility_based - if state.settings.use_explicit_error_terms - else logit.make_choices - ) - choices, rands = choice_function( - state, probs_or_utils, trace_label=trace_label, trace_choosers=choosers - ) + if state.settings.use_explicit_error_terms: + choices, rands = logit.make_choices_utility_based( + state, + probs_or_utils, + trace_label=trace_label, + trace_choosers=choosers, + nest_spec=nest_spec, + ) + else: + choices, rands = logit.make_choices( + state, + probs_or_utils, + trace_label=trace_label, + trace_choosers=choosers, + ) participate = choices == PARTICIPATE_CHOICE # satisfaction indexed by tour_id @@ -429,14 +437,6 @@ def joint_tour_participation( if i not in model_settings.compute_settings.protect_columns: model_settings.compute_settings.protect_columns.append(i) - # TODO EET: this is related to the difference in nested logit and logit choice as per comment in - # make_choices_utility_based. As soon as alt_order_array is removed from arguments to - # make_choices_explicit_error_term_nl this guard can be removed - if state.settings.use_explicit_error_terms: - assert ( - nest_spec is None - ), "Nested logit model custom chooser for EET requires name_mapping, currently not implemented in jtp" - custom_chooser = participants_chooser choices = simulate.simple_simulate_by_chunk_id( diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index 7f032a8ae6..46e1448540 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -15,10 +15,14 @@ TourLocationComponentSettings, TourModeComponentSettings, ) -from activitysim.core.interaction_sample import interaction_sample +from activitysim.core.exceptions import DuplicateWorkflowTableError +from activitysim.core.interaction_sample import ( + _resolve_sample_method, + interaction_sample, +) from activitysim.core.interaction_sample_simulate import interaction_sample_simulate +from activitysim.core.logit import AltsContext from activitysim.core.util import reindex -from activitysim.core.exceptions import DuplicateWorkflowTableError """ The school/workplace location model predicts the zones in which various people will @@ -116,6 +120,8 @@ def _location_sample( chunk_tag, trace_label, zone_layer=None, + stable_alt_positions=None, + n_total_alts=None, ): """ select a sample of alternative locations. @@ -211,6 +217,8 @@ def _location_sample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer=zone_layer, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, explicit_chunk_size=model_settings.explicit_chunk, compute_settings=model_settings.compute_settings.subcomponent_settings( "sample" @@ -226,6 +234,7 @@ def location_sample( persons_merged, network_los, dest_size_terms, + full_dest_size_terms, estimator, model_settings: TourLocationComponentSettings, chunk_size, @@ -245,6 +254,16 @@ def location_sample( alt_dest_col_name = model_settings.ALT_DEST_COL_NAME + if state.settings.use_explicit_error_terms: + stable_alt_positions = full_dest_size_terms.index.get_indexer( + dest_size_terms.index + ) + assert (stable_alt_positions >= 0).all() + n_total_alts = len(full_dest_size_terms) + else: + stable_alt_positions = None + n_total_alts = None + choices = _location_sample( state, segment_name, @@ -257,6 +276,8 @@ def location_sample( chunk_size, chunk_tag, trace_label, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, ) return choices @@ -366,6 +387,7 @@ def location_presample( chunk_size, chunk_tag, trace_label, + full_dest_size_terms=None, ): trace_label = tracing.extend_trace_label(trace_label, "presample") @@ -378,6 +400,39 @@ def location_presample( state, dest_size_terms, network_los, model_settings ) + if full_dest_size_terms is None: + full_dest_size_terms = dest_size_terms + + if state.settings.use_explicit_error_terms: + full_taz_index = pd.Index( + network_los.map_maz_to_taz(full_dest_size_terms.index), name=DEST_TAZ + ) + full_taz_index = full_taz_index[~full_taz_index.duplicated()] + stable_alt_positions = full_taz_index.get_indexer(TAZ_size_terms.index) + assert (stable_alt_positions >= 0).all() + n_total_alts = len(full_taz_index) + + # The TAZ presample call below passes stable_alt_positions for both EET and Poisson sampling, so each TAZ is + # keyed to its position in the full TAZ universe. The MAZ-for-TAZ second stage only receives full_taz_index for + # Poisson: that stage uses one per-(chooser, TAZ) uniform to pick a MAZ within each sampled TAZ. Under Poisson + # each sampled TAZ appears at most once per chooser, so the per-TAZ uniform produces an independent MAZ choice. + # Under EET sampling (importance sampling with replacement) the same TAZ can appear multiple times in a + # chooser's sample and would all share one uniform, forcing every duplicate to pick the same MAZ. An EET-stable + # MAZ-for-TAZ would need a (TAZ, occurrence-rank)-keyed draw and many more random numbers per chooser; that's + # too expensive with the current RNG, revisit if a counter-based RNG is adapted. + sample_compute_settings = getattr(model_settings, "compute_settings", None) + if sample_compute_settings is not None: + sample_compute_settings = sample_compute_settings.subcomponent_settings( + "sample" + ) + taz_sample_method = _resolve_sample_method(state, sample_compute_settings) + use_stable_taz_index = taz_sample_method == "poisson" + else: + full_taz_index = None + stable_alt_positions = None + n_total_alts = None + use_stable_taz_index = False + # convert MAZ zone_id to 'TAZ' in choosers (persons_merged) # persons_merged[HOME_TAZ] = persons_merged[HOME_MAZ].map(maz_to_taz) assert HOME_MAZ in persons_merged @@ -411,6 +466,8 @@ def location_presample( chunk_tag, trace_label, zone_layer="taz", + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, ) # print(f"taz_sample\n{taz_sample}") @@ -423,7 +480,12 @@ def location_presample( # choose a MAZ for each DEST_TAZ choice, choice probability based on MAZ size_term fraction of TAZ total maz_choices = tour_destination.choose_MAZ_for_TAZ( - state, taz_sample, MAZ_size_terms, trace_label, model_settings + state, + taz_sample, + MAZ_size_terms, + trace_label, + model_settings, + full_taz_index=full_taz_index if use_stable_taz_index else None, ) assert DEST_MAZ in maz_choices @@ -462,6 +524,8 @@ def run_location_sample( 23751, 14, 0.972732479292, 2 """ + full_dest_size_terms = dest_size_terms + logger.debug( f"dropping {(~(dest_size_terms.size_term > 0)).sum()} " f"of {len(dest_size_terms)} rows where size_term is zero" @@ -496,6 +560,7 @@ def run_location_sample( chunk_size, chunk_tag=f"{chunk_tag}.presample", trace_label=trace_label, + full_dest_size_terms=full_dest_size_terms, ) else: @@ -505,6 +570,7 @@ def run_location_sample( persons_merged, network_los, dest_size_terms, + full_dest_size_terms, estimator, model_settings, chunk_size, @@ -603,6 +669,7 @@ def run_location_simulate( chunk_tag, trace_label, skip_choice=False, + alts_context: AltsContext | None = None, ): """ run location model on location_sample annotated with mode_choice logsum @@ -712,6 +779,7 @@ def run_location_simulate( compute_settings=model_settings.compute_settings.subcomponent_settings( "simulate" ), + alts_context=alts_context, ) if not want_logsums: @@ -756,6 +824,7 @@ def run_location_choice( model_settings : dict chunk_size : int trace_label : str + skip_choice : bool Returns ------- @@ -789,6 +858,13 @@ def run_location_choice( logger.info(f"{trace_label} skipping segment {segment_name}: no choosers") continue + if state.settings.use_explicit_error_terms: + # dest_size_terms contains 0-attraction zones so using this directly here, important for stable error terms + # when a zone goes from 0 base -> nonzero project + alts_context = AltsContext.from_series(dest_size_terms.index) + else: + alts_context = None + # - location_sample location_sample_df = run_location_sample( state, @@ -841,6 +917,7 @@ def run_location_choice( trace_label, "simulate.%s" % segment_name ), skip_choice=skip_choice, + alts_context=alts_context, ) if estimator: @@ -1031,6 +1108,18 @@ def iterate_location_choice( ] persons_merged_df_ = persons_merged_df_.sort_index() + # reset rng offsets to identical state on each iteration. This ensures that the same set of random numbers is + # used on each iteration for the persons being re-simulated, so sampling and final choice draws are + # reproducible across shadow-pricing iterations. + # Scoped to the persons channel for these specific rows via reset_offsets_for_df so the dedicated + # shadow_pricing_persons channel (registered under EET) keeps its offset across iterations and advances + # naturally on each iteration's update_shadow_prices call. + if state.settings.use_explicit_error_terms and iteration > 1: + logger.debug( + f"{trace_label} resetting random number generator offsets for iteration {iteration}" + ) + state.get_rn_generator().reset_offsets_for_df(persons_merged_df_) + choices_df_, save_sample_df = run_location_choice( state, persons_merged_df_, @@ -1092,6 +1181,11 @@ def iterate_location_choice( ) break + # Drop the dedicated shadow_pricing RNG channel (registered lazily under EET by spc.update_shadow_prices) so it + # doesn't survive into the next location_choice model (e.g., school after work) — both models share the same + # channel name and would otherwise collide on the no-overlap assert in SimpleChannel.extend_domain. No-op for MC. + spc.cleanup_rng_channel(state) + # - shadow price table if locutor: if spc.use_shadow_pricing and model_settings.SHADOW_PRICE_TABLE: diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py index 32f3aabee2..b07ec5b873 100644 --- a/activitysim/abm/models/parking_location_choice.py +++ b/activitysim/abm/models/parking_location_choice.py @@ -21,6 +21,7 @@ from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_sample_simulate import interaction_sample_simulate +from activitysim.core.logit import AltsContext from activitysim.core.tracing import print_elapsed_time from activitysim.core.util import assign_in_place, drop_unused_columns from activitysim.core.exceptions import DuplicateWorkflowTableError @@ -112,6 +113,7 @@ def parking_destination_simulate( chunk_size, trace_hh_id, trace_label, + alts_context: AltsContext | None = None, ): """ Chose destination from destination_sample (with od_logsum and dp_logsum columns added) @@ -150,6 +152,7 @@ def parking_destination_simulate( trace_label=trace_label, trace_choice_name="parking_loc", explicit_chunk_size=model_settings.explicit_chunk, + alts_context=alts_context, ) # drop any failed zero_prob destinations @@ -211,6 +214,9 @@ def choose_parking_location( ) destination_sample.index = np.repeat(trips.index.values, len(alternatives)) destination_sample.index.name = trips.index.name + # use full land_use index to ensure AltsContext spans full range of potential zones + land_use = state.get_dataframe("land_use") + alts_context = AltsContext.from_series(land_use.index) destinations = parking_destination_simulate( state, @@ -223,6 +229,7 @@ def choose_parking_location( chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, + alts_context=alts_context, ) if want_sample_table: diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 853cfc35e9..08f7f760aa 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -30,12 +30,16 @@ ) from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.configuration.logit import LocationComponentSettings -from activitysim.core.interaction_sample import interaction_sample +from activitysim.core.exceptions import DuplicateWorkflowTableError, InvalidTravelError +from activitysim.core.interaction_sample import ( + _resolve_sample_method, + interaction_sample, +) from activitysim.core.interaction_sample_simulate import interaction_sample_simulate +from activitysim.core.logit import AltsContext from activitysim.core.skim_dictionary import DataFrameMatrix from activitysim.core.tracing import print_elapsed_time from activitysim.core.util import assign_in_place, reindex -from activitysim.core.exceptions import InvalidTravelError, DuplicateWorkflowTableError logger = logging.getLogger(__name__) @@ -214,6 +218,7 @@ def _destination_sample( preprocessor_setting_name="alts_preprocessor_sample", ) + # Trip destination keeps the alternative universe here so stable_alt_positions is not needed. choices = interaction_sample( state, choosers=trips, @@ -294,6 +299,7 @@ def choose_MAZ_for_TAZ( alt_dest_col_name, trace_label, model_settings, + full_taz_index=None, ): """ Convert taz_sample table with TAZ zone sample choices to a table with a MAZ zone chosen for each TAZ @@ -366,17 +372,24 @@ def choose_MAZ_for_TAZ( # for random_for_df, we need df with de-duplicated chooser canonical index chooser_df = pd.DataFrame(index=taz_sample.index[~taz_sample.index.duplicated()]) - num_choosers = len(chooser_df) assert chooser_df.index.name == chooser_id_col - # to make choices, rands for each chooser (one rand for each sampled TAZ) - # taz_sample_size will be model_settings['SAMPLE_SIZE'] samples, except if we are estimating - taz_sample_size = taz_choices.groupby(chooser_id_col)[DEST_TAZ].count().max() + # to make choices, draw enough rands for the chooser with the largest TAZ sample, + # then keep only the draws corresponding to actual TAZ rows for each chooser. + taz_choice_counts = ( + taz_choices.groupby(chooser_id_col)[DEST_TAZ] + .count() + .reindex(chooser_df.index) + .astype(np.int64) + ) + taz_sample_size = taz_choice_counts.max() + uniform_taz_choice_counts = (taz_choice_counts == taz_sample_size).all() - # taz_choices index values should be contiguous - assert ( - taz_choices[chooser_id_col] == np.repeat(chooser_df.index, taz_sample_size) - ).all() + # taz_choices rows should remain grouped by chooser in chooser_df order + expected_chooser_ids = np.repeat( + chooser_df.index.to_numpy(), taz_choice_counts.to_numpy() + ) + assert (taz_choices[chooser_id_col].to_numpy() == expected_chooser_ids).all() # we need to choose a MAZ for each DEST_TAZ choice # probability of choosing MAZ based on MAZ size_term fraction of TAZ total @@ -444,14 +457,36 @@ def choose_MAZ_for_TAZ( # prob array with one row TAZ_choice, one column per alternative row_sums = padded_maz_sizes.sum(axis=1) maz_probs = np.divide(padded_maz_sizes, row_sums.reshape(-1, 1)) - assert maz_probs.shape == (num_choosers * taz_sample_size, max_maz_count) - - rands = ( - state.get_rn_generator() - .random_for_df(chooser_df, n=taz_sample_size) - .reshape(-1, 1) - ) - assert len(rands) == num_choosers * taz_sample_size + if full_taz_index is not None: + full_taz_index = pd.Index(full_taz_index, name=DEST_TAZ) + taz_positions = full_taz_index.get_indexer(taz_choices[DEST_TAZ]) + assert (taz_positions >= 0).all() + chooser_rands = np.asarray( + state.get_rn_generator().random_for_df(chooser_df, n=len(full_taz_index)) + ) + chooser_row_positions = np.repeat( + np.arange(len(chooser_df)), taz_choice_counts.to_numpy() + ) + rands = chooser_rands[chooser_row_positions, taz_positions].reshape(-1, 1) + assert len(rands) == len(taz_choices) + elif uniform_taz_choice_counts: + assert maz_probs.shape == (len(chooser_df) * taz_sample_size, max_maz_count) + rands = ( + state.get_rn_generator() + .random_for_df(chooser_df, n=taz_sample_size) + .reshape(-1, 1) + ) + assert len(rands) == len(chooser_df) * taz_sample_size + else: + assert maz_probs.shape == (len(taz_choices), max_maz_count) + chooser_rands = np.asarray( + state.get_rn_generator().random_for_df(chooser_df, n=taz_sample_size) + ) + chooser_rand_mask = ( + np.arange(taz_sample_size) < taz_choice_counts.to_numpy()[:, np.newaxis] + ) + rands = chooser_rands[chooser_rand_mask].reshape(-1, 1) + assert len(rands) == len(taz_choices) assert len(rands) == maz_probs.shape[0] # make choices @@ -627,6 +662,29 @@ def destination_presample( network_los.map_maz_to_taz(alternatives.index) ).sum() + # Trip destination keeps the alternative universe in `alternatives`, so the active TAZ set after aggregation always + # equals the full TAZ universe and stable_alt_positions is not needed at the TAZ presample call itself (unlike + # tour_destination / location_choice, which filter zero-attraction zones before presampling). full_taz_index is + # still computed here for the MAZ-for-TAZ second stage, but only for Poisson sampling: that stage uses one + # per-(chooser, TAZ) uniform to pick a MAZ within each sampled TAZ. Under Poisson each sampled TAZ appears at most + # once per chooser, so the per-TAZ uniform produces an independent MAZ choice. Under EET sampling (importance + # sampling with replacement) the same TAZ can appear multiple times in a chooser's sample and would allshare one + # uniform, forcing every duplicate to pick the same MAZ. An EET-stable MAZ-for-TAZ would need a + # (TAZ, occurrence-rank)-keyed draw and many more random numbers per chooser; that's too expensive with the + # current RNG, revisit if a counter-based RNG is adapted. + full_taz_index = None + if state.settings.use_explicit_error_terms: + sample_compute_settings = getattr(model_settings, "compute_settings", None) + if sample_compute_settings is not None: + sample_compute_settings = sample_compute_settings.subcomponent_settings( + "sample" + ) + taz_sample_method = _resolve_sample_method(state, sample_compute_settings) + if taz_sample_method == "poisson": + full_taz_index = pd.Index( + alternatives.index, name=f"{alt_dest_col_name}_TAZ" + ) + # # i did this but after changing alt_dest_col_name to 'trip_dest' it # # shouldn't be needed anymore # alternatives.index.name = ALT_DEST_TAZ @@ -658,6 +716,7 @@ def destination_presample( alt_dest_col_name, trace_label, model_settings, + full_taz_index=full_taz_index, ) assert alt_dest_col_name in maz_sample @@ -950,6 +1009,7 @@ def trip_destination_simulate( skim_hotel, estimator, trace_label, + alts_context: AltsContext | None = None, ): """ Chose destination from destination_sample (with od_logsum and dp_logsum columns added) @@ -1036,6 +1096,7 @@ def trip_destination_simulate( trace_choice_name="trip_dest", estimator=estimator, explicit_chunk_size=model_settings.explicit_chunk, + alts_context=alts_context, ) if not want_logsums: @@ -1080,6 +1141,10 @@ def choose_trip_destination( t0 = print_elapsed_time() + # use full index (including zero-size zones) to ensure stable random results + # fetch alts_context early so we don't worry about mutating alternatives first + alts_context = AltsContext.from_series(alternatives.index) + # - trip_destination_sample destination_sample = trip_destination_sample( state, @@ -1126,7 +1191,6 @@ def choose_trip_destination( destination_sample["dp_logsum"] = 0.0 t0 = print_elapsed_time("%s.compute_logsums" % trace_label, t0, debug=True) - destinations = trip_destination_simulate( state, primary_purpose=primary_purpose, @@ -1138,6 +1202,7 @@ def choose_trip_destination( skim_hotel=skim_hotel, estimator=estimator, trace_label=trace_label, + alts_context=alts_context, ) dropped_trips = ~trips.index.isin(destinations.index) @@ -1520,13 +1585,13 @@ def run_trip_destination( """ When using the trip destination model with sharrow, it is necessary - to set a value for `purpose_index_num` in the trip destination - annotate trips preprocessor. This allows for an optimized compiled + to set a value for `purpose_index_num` in the trip destination + annotate trips preprocessor. This allows for an optimized compiled lookup of the size term from the array of size terms. The value of - `purpose_index_num` should be the integer column position in the size - matrix, with usual zero-based numpy indexing semantics (i.e. the first + `purpose_index_num` should be the integer column position in the size + matrix, with usual zero-based numpy indexing semantics (i.e. the first column is zero). The preprocessor expression most likely needs to be - "size_terms.get_cols(df.purpose)" unless some unusual transform of + "size_terms.get_cols(df.purpose)" unless some unusual transform of size terms has been employed. """ diff --git a/activitysim/abm/models/trip_scheduling_choice.py b/activitysim/abm/models/trip_scheduling_choice.py index 81d908ef1b..a5e17eb414 100644 --- a/activitysim/abm/models/trip_scheduling_choice.py +++ b/activitysim/abm/models/trip_scheduling_choice.py @@ -279,6 +279,15 @@ def run_trip_scheduling_choice( ) in chunk.adaptive_chunked_choosers(state, indirect_tours, trace_label): # Sort the choosers and get the schedule alternatives choosers = choosers.sort_index() + # FIXME-EET: For explicit error term choices, we need a stable alternative ID. Currently, we use + # SCHEDULE_ID, which justs enumerates all schedule alternatives, of which there are choosers times + # alternative, in the order they are processed, which depends on if there stops on outward/return leg. + # We might want to change SCHEDULE_ID to a fixed pattern of all possible combinations of + # (outbound, main, inbound) duration for the maximum possible tour duration (max time window). For + # 30min intervals, this leads to 1225 alternatives and therefore reasonable memory-wise for random numbers. + # It looks like all that would need to change for this is the generation of the schedule alternatives and + # the lookup of choices as elements in schedule after simulation because choosers are indexed by tour_id. + schedules = generate_schedule_alternatives(choosers).sort_index() # preprocessing alternatives diff --git a/activitysim/abm/models/util/test/test_cdap.py b/activitysim/abm/models/util/test/test_cdap.py index 20dc6b2410..20d68f2dd9 100644 --- a/activitysim/abm/models/util/test/test_cdap.py +++ b/activitysim/abm/models/util/test/test_cdap.py @@ -5,6 +5,7 @@ import os.path +import numpy as np import pandas as pd import pandas.testing as pdt import pytest @@ -176,3 +177,84 @@ def test_build_cdap_spec_hhsize2(people, model_settings): ).astype("float") pdt.assert_frame_equal(utils, expected, check_names=False) + + +def test_cdap_explicit_error_terms_parity(people, model_settings): + person_type_map = model_settings.get("PERSON_TYPE_MAP", {}) + + # Increase population to get more stable distribution for parity check + # We'll just duplicate the existing people a few times + large_people = pd.concat([people] * 500).reset_index(drop=True) + large_people.index.name = "person_id" + + assert people.household_id.is_monotonic_increasing + large_people["hhid_diff"] = large_people.household_id.diff().fillna(0).astype(int) + large_people.loc[large_people["hhid_diff"] < 0, "hhid_diff"] = 1 + large_people["household_id"] = large_people.hhid_diff.cumsum() + + assert large_people["household_id"].is_monotonic_increasing + + # Run without explicit error terms + state_no_eet = workflow.State.make_default(__file__) + cdap_indiv_spec = state_no_eet.filesystem.read_model_spec( + file_name="cdap_indiv_and_hhsize1.csv" + ) + interaction_coefficients = pd.read_csv( + state_no_eet.filesystem.get_config_file_path( + "cdap_interaction_coefficients.csv" + ), + comment="#", + ) + interaction_coefficients = cdap.preprocess_interaction_coefficients( + interaction_coefficients + ) + cdap_fixed_relative_proportions = pd.DataFrame( + {"activity": ["M", "N", "H"], "coefficient": [0.33, 0.33, 0.34]} + ) + + state_no_eet.settings.use_explicit_error_terms = False + state_no_eet.rng().set_base_seed(42) + state_no_eet.rng().begin_step("test_no_eet") + state_no_eet.rng().add_channel("person_id", large_people) + state_no_eet.rng().add_channel( + "household_id", + large_people.drop_duplicates("household_id").set_index("household_id"), + ) + + choices_no_eet = cdap.run_cdap( + state_no_eet, + large_people, + person_type_map, + cdap_indiv_spec, + interaction_coefficients, + cdap_fixed_relative_proportions, + locals_d=None, + ) + + # Run with explicit error terms + state_eet = workflow.State.make_default(__file__) + state_eet.settings.use_explicit_error_terms = True + state_eet.rng().set_base_seed(42) + state_eet.rng().begin_step("test_eet") + state_eet.rng().add_channel("person_id", large_people) + state_eet.rng().add_channel( + "household_id", + large_people.drop_duplicates("household_id").set_index("household_id"), + ) + + choices_eet = cdap.run_cdap( + state_eet, + large_people, + person_type_map, + cdap_indiv_spec, + interaction_coefficients, + cdap_fixed_relative_proportions, + locals_d=None, + ) + + # Compare distributions + dist_no_eet = choices_no_eet.value_counts(normalize=True).sort_index() + dist_eet = choices_eet.value_counts(normalize=True).sort_index() + + # Check that they are reasonably close + pdt.assert_series_equal(dist_no_eet, dist_eet, atol=0.05, check_names=False) diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py index d99803bd7d..d60fca05b4 100644 --- a/activitysim/abm/models/util/tour_destination.py +++ b/activitysim/abm/models/util/tour_destination.py @@ -12,15 +12,19 @@ from activitysim.core import ( config, estimation, + expressions, los, simulate, tracing, workflow, - expressions, ) from activitysim.core.configuration.logit import TourLocationComponentSettings -from activitysim.core.interaction_sample import interaction_sample +from activitysim.core.interaction_sample import ( + _resolve_sample_method, + interaction_sample, +) from activitysim.core.interaction_sample_simulate import interaction_sample_simulate +from activitysim.core.logit import AltsContext from activitysim.core.util import reindex logger = logging.getLogger(__name__) @@ -81,6 +85,8 @@ def _destination_sample( chunk_tag, trace_label: str, zone_layer=None, + stable_alt_positions=None, + n_total_alts=None, ): model_spec = simulate.spec_for_segment( state, @@ -154,6 +160,8 @@ def _destination_sample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer=zone_layer, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, explicit_chunk_size=model_settings.explicit_chunk, compute_settings=model_settings.compute_settings.subcomponent_settings( "sample" @@ -177,6 +185,7 @@ def destination_sample( model_settings: TourLocationComponentSettings, network_los, destination_size_terms, + full_destination_size_terms, estimator, chunk_size, trace_label, @@ -196,6 +205,15 @@ def destination_sample( # the name of the dest column to be returned in choices alt_dest_col_name = model_settings.ALT_DEST_COL_NAME + if state.settings.use_explicit_error_terms: + stable_alt_positions = full_destination_size_terms.index.get_indexer( + destination_size_terms.index + ) + assert (stable_alt_positions >= 0).all() + n_total_alts = len(full_destination_size_terms) + else: + stable_alt_positions = None + n_total_alts = None choices = _destination_sample( state, @@ -208,6 +226,8 @@ def destination_sample( alt_dest_col_name, chunk_tag=chunk_tag, trace_label=trace_label, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, ) return choices @@ -264,7 +284,12 @@ def aggregate_size_terms(dest_size_terms, network_los): def choose_MAZ_for_TAZ( - state: workflow.State, taz_sample, MAZ_size_terms, trace_label, model_settings + state: workflow.State, + taz_sample, + MAZ_size_terms, + trace_label, + model_settings, + full_taz_index=None, ): """ Convert taz_sample table with TAZ zone sample choices to a table with a MAZ zone chosen for each TAZ @@ -332,17 +357,24 @@ def choose_MAZ_for_TAZ( # for random_for_df, we need df with de-duplicated chooser canonical index chooser_df = pd.DataFrame(index=taz_sample.index[~taz_sample.index.duplicated()]) - num_choosers = len(chooser_df) assert chooser_df.index.name == chooser_id_col - # to make choices, rands for each chooser (one rand for each sampled TAZ) - # taz_sample_size will be model_settings['SAMPLE_SIZE'] samples, except if we are estimating - taz_sample_size = taz_choices.groupby(chooser_id_col)[DEST_TAZ].count().max() + # to make choices, draw enough rands for the chooser with the largest TAZ sample, + # then keep only the draws corresponding to actual TAZ rows for each chooser. + taz_choice_counts = ( + taz_choices.groupby(chooser_id_col)[DEST_TAZ] + .count() + .reindex(chooser_df.index) + .astype(np.int64) + ) + taz_sample_size = taz_choice_counts.max() + uniform_taz_choice_counts = (taz_choice_counts == taz_sample_size).all() - # taz_choices index values should be contiguous - assert ( - (taz_choices[chooser_id_col] == np.repeat(chooser_df.index, taz_sample_size)) - ).all() + # taz_choices rows should remain grouped by chooser in chooser_df order + expected_chooser_ids = np.repeat( + chooser_df.index.to_numpy(), taz_choice_counts.to_numpy() + ) + assert (taz_choices[chooser_id_col].to_numpy() == expected_chooser_ids).all() # we need to choose a MAZ for each DEST_TAZ choice # probability of choosing MAZ based on MAZ size_term fraction of TAZ total @@ -400,11 +432,34 @@ def choose_MAZ_for_TAZ( # prob array with one row TAZ_choice, one column per alternative row_sums = padded_maz_sizes.sum(axis=1) maz_probs = np.divide(padded_maz_sizes, row_sums.reshape(-1, 1)) - assert maz_probs.shape == (num_choosers * taz_sample_size, max_maz_count) - rands = state.get_rn_generator().random_for_df(chooser_df, n=taz_sample_size) - rands = rands.reshape(-1, 1) - assert len(rands) == num_choosers * taz_sample_size + if full_taz_index is not None: + full_taz_index = pd.Index(full_taz_index, name=DEST_TAZ) + taz_positions = full_taz_index.get_indexer(taz_choices[DEST_TAZ]) + assert (taz_positions >= 0).all() + chooser_rands = np.asarray( + state.get_rn_generator().random_for_df(chooser_df, n=len(full_taz_index)) + ) + chooser_row_positions = np.repeat( + np.arange(len(chooser_df)), taz_choice_counts.to_numpy() + ) + rands = chooser_rands[chooser_row_positions, taz_positions].reshape(-1, 1) + assert len(rands) == len(taz_choices) + elif uniform_taz_choice_counts: + assert maz_probs.shape == (len(chooser_df) * taz_sample_size, max_maz_count) + rands = state.get_rn_generator().random_for_df(chooser_df, n=taz_sample_size) + rands = rands.reshape(-1, 1) + assert len(rands) == len(chooser_df) * taz_sample_size + else: + assert maz_probs.shape == (len(taz_choices), max_maz_count) + chooser_rands = np.asarray( + state.get_rn_generator().random_for_df(chooser_df, n=taz_sample_size) + ) + chooser_rand_mask = ( + np.arange(taz_sample_size) < taz_choice_counts.to_numpy()[:, np.newaxis] + ) + rands = chooser_rands[chooser_rand_mask].reshape(-1, 1) + assert len(rands) == len(taz_choices) assert len(rands) == maz_probs.shape[0] # make choices @@ -556,6 +611,7 @@ def destination_presample( model_settings: TourLocationComponentSettings, network_los, destination_size_terms, + full_destination_size_terms, estimator, trace_label, ): @@ -570,6 +626,33 @@ def destination_presample( MAZ_size_terms, TAZ_size_terms = aggregate_size_terms( destination_size_terms, network_los ) + if state.settings.use_explicit_error_terms: + full_taz_index = pd.Index( + network_los.map_maz_to_taz(full_destination_size_terms.index), name=DEST_TAZ + ) + full_taz_index = full_taz_index[~full_taz_index.duplicated()] + stable_alt_positions = full_taz_index.get_indexer(TAZ_size_terms.index) + assert (stable_alt_positions >= 0).all() + + # The TAZ presample call below passes stable_alt_positions for both EET and Poisson sampling, so each TAZ is + # keyed to its position in the full TAZ universe. The MAZ-for-TAZ second stage only receives full_taz_index for + # Poisson: that stage uses one per-(chooser, TAZ) uniform to pick a MAZ within each sampled TAZ. Under Poisson + # each sampled TAZ appears at most once per chooser, so the per-TAZ uniform produces an independent MAZ choice. + # Under EET sampling (importance sampling with replacement) the same TAZ can appear multiple times in a + # chooser's sample and would all share one uniform, forcing every duplicate to pick the same MAZ. An + # EET-stable MAZ-for-TAZ would need a (TAZ, occurrence-rank)-keyed draw and many more random numbers per + # chooser; that's too expensive with the current RNG, revisit if a counter-based RNG is adapted. + sample_compute_settings = getattr(model_settings, "compute_settings", None) + if sample_compute_settings is not None: + sample_compute_settings = sample_compute_settings.subcomponent_settings( + "sample" + ) + taz_sample_method = _resolve_sample_method(state, sample_compute_settings) + use_stable_taz_index = taz_sample_method == "poisson" + else: + full_taz_index = None + stable_alt_positions = None + use_stable_taz_index = False orig_maz = model_settings.CHOOSER_ORIG_COL_NAME assert orig_maz in choosers @@ -594,11 +677,18 @@ def destination_presample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer="taz", + stable_alt_positions=stable_alt_positions, + n_total_alts=len(full_taz_index) if full_taz_index is not None else 0, ) # choose a MAZ for each DEST_TAZ choice, choice probability based on MAZ size_term fraction of TAZ total maz_choices = choose_MAZ_for_TAZ( - state, taz_sample, MAZ_size_terms, trace_label, model_settings + state, + taz_sample, + MAZ_size_terms, + trace_label, + model_settings, + full_taz_index=full_taz_index if use_stable_taz_index else None, ) assert DEST_MAZ in maz_choices @@ -615,6 +705,7 @@ def run_destination_sample( model_settings: TourLocationComponentSettings, network_los, destination_size_terms, + full_destination_size_terms, estimator, chunk_size, trace_label, @@ -663,6 +754,7 @@ def run_destination_sample( model_settings, network_los, destination_size_terms, + full_destination_size_terms, estimator, trace_label, ) @@ -675,6 +767,7 @@ def run_destination_sample( model_settings, network_los, destination_size_terms, + full_destination_size_terms, estimator, chunk_size, trace_label, @@ -874,6 +967,14 @@ def run_destination_simulate( log_alt_losers = state.settings.log_alt_losers + if state.settings.use_explicit_error_terms: + # use full land_use index to ensure AltsContext spans full range of potential destinations + # (maintains stable random number generation even if zones flip zero/non-zero size) + land_use = state.get_dataframe("land_use") + alts_context = AltsContext.from_series(land_use.index) + else: + alts_context = None + choices = interaction_sample_simulate( state, choosers, @@ -891,6 +992,7 @@ def run_destination_simulate( estimator=estimator, skip_choice=skip_choice, compute_settings=model_settings.compute_settings, + alts_context=alts_context, ) if not want_logsums: @@ -938,6 +1040,10 @@ def run_tour_destination( segment_destination_size_terms = size_term_calculator.dest_size_terms_df( segment_name, segment_trace_label ) + full_segment_destination_size_terms = ( + size_term_calculator.destination_size_terms[[segment_name]].copy() + ) + full_segment_destination_size_terms.columns = ["size_term"] if choosers.shape[0] == 0: logger.info( @@ -955,6 +1061,7 @@ def run_tour_destination( model_settings, network_los, segment_destination_size_terms, + full_segment_destination_size_terms, estimator, chunk_size=state.settings.chunk_size, trace_label=tracing.extend_trace_label(segment_trace_label, "sample"), diff --git a/activitysim/abm/models/util/tour_od.py b/activitysim/abm/models/util/tour_od.py index 5ec9dd493f..0e490207d7 100644 --- a/activitysim/abm/models/util/tour_od.py +++ b/activitysim/abm/models/util/tour_od.py @@ -22,7 +22,10 @@ ) from activitysim.core.configuration.base import PreprocessorSettings from activitysim.core.configuration.logit import TourLocationComponentSettings -from activitysim.core.interaction_sample import interaction_sample +from activitysim.core.interaction_sample import ( + _resolve_sample_method, + interaction_sample, +) from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.util import reindex @@ -213,6 +216,10 @@ def _od_sample( preprocessor_setting_name="alts_preprocessor_sample", ) + # Not passing stable_alt_positions here: the cross product of origins and destinations + # would make the per-chooser draw count (n_total_alts, or n_total_alts * sample_size for + # EET sampling) prohibitive under the current sequential RNG. Revisit with a counter-based + # RNG. choices = interaction_sample( state, choosers, @@ -345,6 +352,7 @@ def choose_MAZ_for_TAZ( trace_label, addtl_col_for_unique_key=None, dest_maz_id_col=DEST_MAZ, + full_taz_index=None, ): """ Convert taz_sample table with TAZ zone sample choices to a table with a MAZ zone chosen for each TAZ @@ -423,17 +431,24 @@ def choose_MAZ_for_TAZ( # for random_for_df, we need df with de-duplicated chooser canonical index chooser_df = pd.DataFrame(index=taz_sample.index[~taz_sample.index.duplicated()]) - num_choosers = len(chooser_df) assert chooser_df.index.name == chooser_id_col - # to make choices, rands for each chooser (one rand for each sampled TAZ) - # taz_sample_size will be model_settings['SAMPLE_SIZE'] samples, except if we are estimating - taz_sample_size = taz_choices.groupby(chooser_id_col)[DEST_TAZ].count().max() + # to make choices, draw enough rands for the chooser with the largest TAZ sample, + # then keep only the draws corresponding to actual TAZ rows for each chooser. + taz_choice_counts = ( + taz_choices.groupby(chooser_id_col)[DEST_TAZ] + .count() + .reindex(chooser_df.index) + .astype(np.int64) + ) + taz_sample_size = taz_choice_counts.max() + uniform_taz_choice_counts = (taz_choice_counts == taz_sample_size).all() - # taz_choices index values should be contiguous - assert ( - taz_choices[chooser_id_col] == np.repeat(chooser_df.index, taz_sample_size) - ).all() + # taz_choices rows should remain grouped by chooser in chooser_df order + expected_chooser_ids = np.repeat( + chooser_df.index.to_numpy(), taz_choice_counts.to_numpy() + ) + assert (taz_choices[chooser_id_col].to_numpy() == expected_chooser_ids).all() # we need to choose a MAZ for each DEST_TAZ choice # probability of choosing MAZ based on MAZ size_term fraction of TAZ total @@ -493,11 +508,33 @@ def choose_MAZ_for_TAZ( # prob array with one row TAZ_choice, one column per alternative row_sums = padded_maz_sizes.sum(axis=1) maz_probs = np.divide(padded_maz_sizes, row_sums.reshape(-1, 1)) - assert maz_probs.shape == (num_choosers * taz_sample_size, max_maz_count) - - rands = state.get_rn_generator().random_for_df(chooser_df, n=taz_sample_size) - rands = rands.reshape(-1, 1) - assert len(rands) == num_choosers * taz_sample_size + if full_taz_index is not None: + full_taz_index = pd.Index(full_taz_index, name=DEST_TAZ) + taz_positions = full_taz_index.get_indexer(taz_choices[DEST_TAZ]) + assert (taz_positions >= 0).all() + chooser_rands = np.asarray( + state.get_rn_generator().random_for_df(chooser_df, n=len(full_taz_index)) + ) + chooser_row_positions = np.repeat( + np.arange(len(chooser_df)), taz_choice_counts.to_numpy() + ) + rands = chooser_rands[chooser_row_positions, taz_positions].reshape(-1, 1) + assert len(rands) == len(taz_choices) + elif uniform_taz_choice_counts: + assert maz_probs.shape == (len(chooser_df) * taz_sample_size, max_maz_count) + rands = state.get_rn_generator().random_for_df(chooser_df, n=taz_sample_size) + rands = rands.reshape(-1, 1) + assert len(rands) == len(chooser_df) * taz_sample_size + else: + assert maz_probs.shape == (len(taz_choices), max_maz_count) + chooser_rands = np.asarray( + state.get_rn_generator().random_for_df(chooser_df, n=taz_sample_size) + ) + chooser_rand_mask = ( + np.arange(taz_sample_size) < taz_choice_counts.to_numpy()[:, np.newaxis] + ) + rands = chooser_rands[chooser_rand_mask].reshape(-1, 1) + assert len(rands) == len(taz_choices) assert len(rands) == maz_probs.shape[0] # make choices @@ -592,6 +629,15 @@ def choose_MAZ_for_TAZ( return taz_choices_w_maz +def resolve_sample_method(state, model_settings): + sample_compute_settings = getattr(model_settings, "compute_settings", None) + if sample_compute_settings is not None: + sample_compute_settings = sample_compute_settings.subcomponent_settings( + "sample" + ) + return _resolve_sample_method(state, sample_compute_settings) + + @workflow.func def od_presample( state: workflow.State, @@ -600,6 +646,7 @@ def od_presample( model_settings: TourODSettings, network_los, destination_size_terms, + full_destination_size_terms, estimator, chunk_size, trace_label, @@ -615,6 +662,27 @@ def od_presample( destination_size_terms, network_los ) + # The OD presample call below does not pass stable_alt_positions: the cross product of + # origins and destinations is too large for the current sequential-RNG cost (see comment + # at the OD sample call in _od_sample). full_taz_index is still computed here for the + # MAZ-for-TAZ second stage, but only for Poisson sampling: that stage uses one + # per-(chooser, TAZ) uniform to pick a MAZ within each sampled TAZ. Under Poisson each + # sampled TAZ appears at most once per chooser, so the per-TAZ uniform produces an + # independent MAZ choice. Under EET sampling (importance sampling with replacement) the + # same TAZ can appear multiple times in a chooser's sample and would all share one + # uniform, forcing every duplicate to pick the same MAZ. An EET-stable MAZ-for-TAZ would + # need a (TAZ, occurrence-rank)-keyed draw and many more random numbers per chooser; that's + # too expensive with the current RNG, revisit if a counter-based RNG is adapted. + taz_sample_method = resolve_sample_method(state, model_settings) + if taz_sample_method == "poisson": + full_taz_index = pd.Index( + network_los.map_maz_to_taz(full_destination_size_terms.index), + name=DEST_TAZ, + ) + full_taz_index = full_taz_index[~full_taz_index.duplicated()] + else: + full_taz_index = None + # create wrapper with keys for this lookup - in this case there is a ORIG_TAZ # in the choosers and a DEST_TAZ in the alternatives which get merged during # interaction the skims will be available under the name "skims" for any @ expressions @@ -654,6 +722,7 @@ def od_presample( MAZ_size_terms, trace_label, addtl_col_for_unique_key=ORIG_MAZ, + full_taz_index=full_taz_index, ) # outputs @@ -675,6 +744,7 @@ def run_od_sample( model_settings: TourODSettings, network_los, destination_size_terms, + full_destination_size_terms, estimator, chunk_size, trace_label, @@ -722,6 +792,7 @@ def run_od_sample( model_settings, network_los, destination_size_terms, + full_destination_size_terms, estimator, chunk_size, trace_label, @@ -1093,6 +1164,10 @@ def run_tour_od( segment_destination_size_terms = size_term_calculator.dest_size_terms_df( segment_name, trace_label ) + full_segment_destination_size_terms = ( + size_term_calculator.destination_size_terms[[segment_name]].copy() + ) + full_segment_destination_size_terms.columns = ["size_term"] if choosers.shape[0] == 0: logger.info( @@ -1110,6 +1185,7 @@ def run_tour_od( model_settings, network_los, segment_destination_size_terms, + full_segment_destination_size_terms, estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label( diff --git a/activitysim/abm/models/util/vectorize_tour_scheduling.py b/activitysim/abm/models/util/vectorize_tour_scheduling.py index c199ef40da..2d5ec5c455 100644 --- a/activitysim/abm/models/util/vectorize_tour_scheduling.py +++ b/activitysim/abm/models/util/vectorize_tour_scheduling.py @@ -17,6 +17,7 @@ from activitysim.core.configuration.base import ComputeSettings, PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_sample_simulate import interaction_sample_simulate +from activitysim.core.logit import AltsContext from activitysim.core.util import reindex logger = logging.getLogger(__name__) @@ -850,6 +851,13 @@ def _schedule_tours( log_alt_losers = state.settings.log_alt_losers + if state.settings.use_explicit_error_terms: + # use full TDD alternatives index to ensure AltsContext spans full range of potential slots + tdd_alts = state.get_injectable("tdd_alts") + alts_context = AltsContext.from_series(tdd_alts.index) + else: + alts_context = None + choices = interaction_sample_simulate( state, tours, @@ -862,6 +870,7 @@ def _schedule_tours( trace_label=tour_trace_label, estimator=estimator, compute_settings=compute_settings, + alts_context=alts_context, ) chunk_sizer.log_df(tour_trace_label, "choices", choices) @@ -968,7 +977,7 @@ def schedule_tours( if len(result_list) > 1: choices = pd.concat(result_list) - assert len(choices.index == len(tours.index)) + assert len(choices.index) == len(tours.index) return choices diff --git a/activitysim/abm/tables/shadow_pricing.py b/activitysim/abm/tables/shadow_pricing.py index 04c5eafc21..b3c20108e8 100644 --- a/activitysim/abm/tables/shadow_pricing.py +++ b/activitysim/abm/tables/shadow_pricing.py @@ -17,8 +17,8 @@ from activitysim.core import logit, tracing, util, workflow from activitysim.core.configuration import PydanticReadable from activitysim.core.configuration.logit import TourLocationComponentSettings +from activitysim.core.exceptions import MissingNameError, SystemConfigurationError from activitysim.core.input import read_input_table -from activitysim.core.exceptions import SystemConfigurationError, MissingNameError logger = logging.getLogger(__name__) @@ -290,6 +290,9 @@ def __init__( self.choices_by_iteration = pd.DataFrame() self.global_pending_persons = 1 self.sampled_persons = pd.DataFrame() + # Under EET, simulation-method shadow pricing uses a dedicated RNG channel to be independent of the location + # choice randoms. Registered lazily on first call to update_shadow_prices. + self._sp_rng_channel_registered = False if ( self.use_shadow_pricing @@ -699,6 +702,44 @@ def check_fit(self, state: workflow.State, iteration): return converged + _SP_RNG_CHANNEL = "shadow_pricing_persons" + + def cleanup_rng_channel(self, state): + """ + Drop the dedicated shadow_pricing_persons RNG channel if it was registered. Called at the end of + iterate_location_choice so the channel doesn't survive into the next model (e.g., school after work) — which + would otherwise fail the no-overlap assert in SimpleChannel.extend_domain when the next SPC tries to register + the same persons. No-op under MC (channel was never registered). + """ + if not self._sp_rng_channel_registered: + return + state.get_rn_generator().drop_channel(self._SP_RNG_CHANNEL) + self._sp_rng_channel_registered = False + + def _ensure_sp_rng_channel(self, state): + """ + Lazily register a dedicated RNG channel for shadow-pricing re-simulation draws under EET. The channel covers + the same persons as the main persons channel but has its own per-person offsets, so its draws don't consume + the main persons channel and aren't reset by the per-iteration location-choice reset. + """ + if self._sp_rng_channel_registered: + return + if not ( + self.use_shadow_pricing + and self.shadow_settings.SHADOW_PRICE_METHOD == "simulation" + and state.settings.use_explicit_error_terms + ): + return + persons = state.get_dataframe("persons_merged") + # add_channel only consumes the index; the renamed axis is what maps this channel to probs DataFrames with + # index name SP_RNG_CHANNEL. We rename the axis on a thin view (one dummy column) so the domain DF isn't + # pandas-"empty" (which would log a spurious warning). + domain = pd.DataFrame( + {"_": 0}, index=persons.index.rename(self._SP_RNG_CHANNEL) + ) + state.get_rn_generator().add_channel(self._SP_RNG_CHANNEL, domain) + self._sp_rng_channel_registered = True + def update_shadow_prices(self, state): """ Adjust shadow_prices based on relative values of modeled_size and desired_size. @@ -738,6 +779,8 @@ def update_shadow_prices(self, state): assert self.desired_size is not None assert self.shadow_prices is not None + self._ensure_sp_rng_channel(state) + if shadow_price_method == "ctramp": # - CTRAMP """ @@ -899,12 +942,25 @@ def update_shadow_prices(self, state): if (len(choices) > 0) & (~converged): # person's probability of being selected for re-simulation is from the zonal sample rate sample_rates = choices.map(zonal_sample_rate.to_dict()) + # Under EET we route through a dedicated RNG channel so shadow-pricing draws are isolated from the + # persons-channel reset that location_choice does between iterations. Under MC we leave the index + # name alone so draws continue to consume the main persons channel exactly as before. + probs_index = choices.index + if state.settings.use_explicit_error_terms: + logger.debug( + f"Renaming probs index from {probs_index.name} to {self._SP_RNG_CHANNEL} for EET RNG channel matching." + ) + probs_index = probs_index.rename(self._SP_RNG_CHANNEL) probs = pd.DataFrame( data={"0": 1 - sample_rates, "1": sample_rates}, - index=choices.index, + index=probs_index, ) # using ActivitySim's RNG to make choices for repeatability current_sample, rands = logit.make_choices(state, probs) + if state.settings.use_explicit_error_terms: + current_sample.index = current_sample.index.rename( + choices.index.name + ) current_sample = current_sample[current_sample == 1] if len(sampled_persons) == 0: diff --git a/activitysim/abm/test/test_misc/configs_test_misc/joint_tour_participation.csv b/activitysim/abm/test/test_misc/configs_test_misc/joint_tour_participation.csv new file mode 100644 index 0000000000..d81df1ab13 --- /dev/null +++ b/activitysim/abm/test/test_misc/configs_test_misc/joint_tour_participation.csv @@ -0,0 +1,2 @@ +Description,Expression,participate,not_participate +Adult participation,adult,0.5,-0.5 diff --git a/activitysim/abm/test/test_misc/configs_test_misc/joint_tour_participation.yaml b/activitysim/abm/test/test_misc/configs_test_misc/joint_tour_participation.yaml new file mode 100644 index 0000000000..8db2410c08 --- /dev/null +++ b/activitysim/abm/test/test_misc/configs_test_misc/joint_tour_participation.yaml @@ -0,0 +1,5 @@ +SPEC: joint_tour_participation.csv +COEFFICIENTS: joint_tour_participation_coefficients.csv +participation_choice: participate +max_participation_choice_iterations: 100 +FORCE_PARTICIPATION: True diff --git a/activitysim/abm/test/test_misc/configs_test_misc/joint_tour_participation_coefficients.csv b/activitysim/abm/test/test_misc/configs_test_misc/joint_tour_participation_coefficients.csv new file mode 100644 index 0000000000..237d519172 --- /dev/null +++ b/activitysim/abm/test/test_misc/configs_test_misc/joint_tour_participation_coefficients.csv @@ -0,0 +1,2 @@ +expression,coefficient +adult,1.0 diff --git a/activitysim/abm/test/test_misc/test_joint_tour_participation.py b/activitysim/abm/test/test_misc/test_joint_tour_participation.py new file mode 100644 index 0000000000..5aa15c6e8e --- /dev/null +++ b/activitysim/abm/test/test_misc/test_joint_tour_participation.py @@ -0,0 +1,158 @@ +import numpy as np +import pandas as pd +import pandas.testing as pdt +import pytest + +from activitysim.abm.models import joint_tour_participation +from activitysim.core import logit, workflow + +from .test_trip_departure_choice import add_canonical_dirs + + +@pytest.fixture +def candidates(): + # Create synthetic candidates for Joint Tour Participation + # JTP chooses whether each candidate participates in a joint tour. + # We include varied compositions and preschoolers to exercise the + # get_tour_satisfaction logic properly. + num_tours_per_comp = 500 + compositions = ["MIXED", "ADULTS", "CHILDREN"] + num_candidates_per_tour = 4 + + total_tours = num_tours_per_comp * len(compositions) + num_candidates = total_tours * num_candidates_per_tour + + # Ensure reproducibility + rng = np.random.default_rng(42) + + tour_ids = np.repeat(np.arange(total_tours), num_candidates_per_tour) + comp_values = np.repeat(compositions, num_tours_per_comp * num_candidates_per_tour) + + df = pd.DataFrame( + { + "tour_id": tour_ids, + "household_id": tour_ids, # simplified for mock + "person_id": np.arange(num_candidates), + "composition": comp_values, + }, + index=pd.Index(np.arange(num_candidates), name="participant_id"), + ) + + # Assign adult and preschooler status based on composition + # MIXED: at least one adult and one child + # ADULTS: all adults + # CHILDREN: all children + df["adult"] = False + df["person_is_preschool"] = False + + for i, comp in enumerate(compositions): + mask = df.composition == comp + indices = df[mask].index + + if comp == "ADULTS": + df.loc[indices, "adult"] = True + elif comp == "CHILDREN": + df.loc[indices, "adult"] = False + # Some children are preschoolers + df.loc[ + rng.choice(indices, len(indices) // 4, replace=False), + "person_is_preschool", + ] = True + elif comp == "MIXED": + # For each tour, make the first person an adult, rest children + tour_start_indices = indices[::num_candidates_per_tour] + df.loc[tour_start_indices, "adult"] = True + # Other members are children, some might be preschoolers + other_indices = indices[~indices.isin(tour_start_indices)] + df.loc[ + rng.choice(other_indices, len(other_indices) // 3, replace=False), + "person_is_preschool", + ] = True + + return df + + +@pytest.fixture +def model_spec(): + # Simple spec with two alternatives: 'participate' and 'not_participate' + return pd.DataFrame( + {"participate": [0.8, -0.2], "not_participate": [0.0, 0.0]}, + index=pd.Index(["adult", "person_is_preschool"], name="Expression"), + ) + + +def test_jtp_explicit_error_terms_parity(candidates, model_spec): + """ + Test that joint tour participation results are statistically similar + between MNL and Explicit Error Terms (EET) using realistic candidate scenarios. + """ + # Create random utilities for the candidates that vary by attribute + rng = np.random.default_rng(42) + + # Base utility + some noise + base_util = (candidates.adult * 0.5) - (candidates.person_is_preschool * 1.0) + utils = pd.DataFrame( + { + "participate": base_util + rng.standard_normal(len(candidates)), + "not_participate": 0, + }, + index=candidates.index, + ) + + # Run without EET (MNL) + state_no_eet = add_canonical_dirs("configs_test_misc").default_settings() + state_no_eet.settings.use_explicit_error_terms = False + state_no_eet.rng().set_base_seed(42) + state_no_eet.rng().begin_step("test_no_eet") + state_no_eet.rng().add_channel("participant_id", candidates) + + # MNL path expects probabilities + probs_no_eet = logit.utils_to_probs(state_no_eet, utils, trace_label="test_no_eet") + choices_no_eet, _ = joint_tour_participation.participants_chooser( + state_no_eet, + probs_no_eet, + candidates, + model_spec, + trace_label="test_no_eet", + ) + + # Run with EET + state_eet = add_canonical_dirs("configs_test_misc").default_settings() + state_eet.settings.use_explicit_error_terms = True + state_eet.rng().set_base_seed(42) + state_eet.rng().begin_step("test_eet") + state_eet.rng().add_channel("participant_id", candidates) + + # EET path expects raw utilities + choices_eet, _ = joint_tour_participation.participants_chooser( + state_eet, + utils.copy(), + candidates, + model_spec, + trace_label="test_eet", + ) + + # Compare distributions of number of participants per tour + # Choice 0 is 'participate' + no_eet_participation_counts = ( + (choices_no_eet == 0).groupby(candidates.tour_id).sum() + ) + eet_participation_counts = (choices_eet == 0).groupby(candidates.tour_id).sum() + + dist_no_eet = no_eet_participation_counts.value_counts(normalize=True).sort_index() + dist_eet = eet_participation_counts.value_counts(normalize=True).sort_index() + + # Check that the distribution of participation counts is close + pdt.assert_series_equal(dist_no_eet, dist_eet, atol=0.05, check_names=False) + + # Also check average participation by composition for deeper parity check + comp_parity_no_eet = no_eet_participation_counts.groupby( + candidates.groupby("tour_id")["composition"].first() + ).mean() + comp_parity_eet = eet_participation_counts.groupby( + candidates.groupby("tour_id")["composition"].first() + ).mean() + + pdt.assert_series_equal( + comp_parity_no_eet, comp_parity_eet, atol=0.1, check_names=False + ) diff --git a/activitysim/abm/test/test_misc/test_location_choice_sampling.py b/activitysim/abm/test/test_misc/test_location_choice_sampling.py new file mode 100644 index 0000000000..34bd41b675 --- /dev/null +++ b/activitysim/abm/test/test_misc/test_location_choice_sampling.py @@ -0,0 +1,389 @@ +from __future__ import annotations + +import pandas as pd + +from activitysim.abm.models import location_choice +from activitysim.core import workflow + + +class _DummySkimDict: + def wrap(self, orig_key, dest_key): + return type("WrappedSkims", (), {"orig_key": orig_key, "dest_key": dest_key})() + + +class _DummyNetworkLos: + def __init__(self, maz_to_taz): + self._maz_to_taz = maz_to_taz + + def map_maz_to_taz(self, maz_index): + return pd.Index([self._maz_to_taz[maz] for maz in maz_index], name="TAZ") + + def get_skim_dict(self, layer): + assert layer == "taz" + return _DummySkimDict() + + +def test_location_presample_uses_taz_stable_mapping(monkeypatch): + captured = {} + + def fake_load_shadow_price_calculator(_state, _model_settings): + return type( + "ShadowPriceCalculator", + (), + { + "use_shadow_pricing": False, + }, + )() + + def fake_location_sample( + _state, + _segment_name, + _choosers, + alternatives, + _skims, + _estimator, + _model_settings, + alt_dest_col_name, + _chunk_size, + _chunk_tag, + _trace_label, + zone_layer=None, + stable_alt_positions=None, + n_total_alts=None, + ): + captured["alt_dest_col_name"] = alt_dest_col_name + captured["zone_layer"] = zone_layer + captured["active_taz_index"] = alternatives.index.copy() + captured["stable_alt_positions"] = stable_alt_positions + captured["n_total_alts"] = n_total_alts + return pd.DataFrame( + {"dest_TAZ": [1]}, + index=pd.Index([1001], name="person_id"), + ) + + def fake_choose_maz_for_taz( + _state, + _taz_sample, + _maz_size_terms, + _trace_label, + _model_settings, + full_taz_index=None, + ): + captured["full_taz_index"] = full_taz_index + return pd.DataFrame( + {"dest_MAZ": [101]}, + index=pd.Index([1001], name="person_id"), + ) + + monkeypatch.setattr( + location_choice.shadow_pricing, + "load_shadow_price_calculator", + fake_load_shadow_price_calculator, + ) + monkeypatch.setattr(location_choice, "_location_sample", fake_location_sample) + monkeypatch.setattr( + location_choice.tour_destination, + "choose_MAZ_for_TAZ", + fake_choose_maz_for_taz, + ) + + state = workflow.State().default_settings() + model_settings = type( + "ModelSettings", + (), + { + "ALT_DEST_COL_NAME": "zone_id", + "SIMULATE_CHOOSER_COLUMNS": [location_choice.HOME_MAZ], + }, + )() + persons_merged = pd.DataFrame( + { + location_choice.HOME_MAZ: [9001], + location_choice.HOME_TAZ: [90], + }, + index=pd.Index([1001], name="person_id"), + ) + network_los = _DummyNetworkLos({101: 1, 102: 2, 103: 3}) + + active_dest_size_terms = pd.DataFrame( + { + "size_term": [1.0, 2.0], + "shadow_price_size_term_adjustment": [1.0, 1.0], + "shadow_price_utility_adjustment": [0.0, 0.0], + }, + index=pd.Index([101, 103], name="zone_id"), + ) + full_dest_size_terms = pd.DataFrame( + { + "size_term": [1.0, 0.0, 2.0], + "shadow_price_size_term_adjustment": [1.0, 1.0, 1.0], + "shadow_price_utility_adjustment": [0.0, 0.0, 0.0], + }, + index=pd.Index([101, 102, 103], name="zone_id"), + ) + + out = location_choice.location_presample( + state, + "segment", + persons_merged, + network_los, + active_dest_size_terms, + estimator=None, + model_settings=model_settings, + chunk_size=0, + chunk_tag="test_chunk", + trace_label="test_trace", + full_dest_size_terms=full_dest_size_terms, + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame({"zone_id": [101]}, index=pd.Index([1001], name="person_id")), + ) + pd.testing.assert_index_equal( + captured["active_taz_index"], + pd.Index([1, 3], name=location_choice.DEST_TAZ), + ) + assert captured["alt_dest_col_name"] == location_choice.DEST_TAZ + assert captured["zone_layer"] == "taz" + assert captured["n_total_alts"] is None + assert captured["stable_alt_positions"] is None + assert captured["full_taz_index"] is None + + +def test_location_presample_passes_full_taz_index_for_eet_poisson(monkeypatch): + captured = {} + + def fake_load_shadow_price_calculator(_state, _model_settings): + return type( + "ShadowPriceCalculator", + (), + { + "use_shadow_pricing": False, + }, + )() + + def fake_location_sample( + _state, + _segment_name, + _choosers, + alternatives, + _skims, + _estimator, + _model_settings, + alt_dest_col_name, + _chunk_size, + _chunk_tag, + _trace_label, + zone_layer=None, + stable_alt_positions=None, + n_total_alts=None, + ): + captured["alt_dest_col_name"] = alt_dest_col_name + captured["zone_layer"] = zone_layer + captured["active_taz_index"] = alternatives.index.copy() + captured["stable_alt_positions"] = stable_alt_positions.copy() + captured["n_total_alts"] = n_total_alts + return pd.DataFrame( + {"dest_TAZ": [1]}, + index=pd.Index([1001], name="person_id"), + ) + + def fake_choose_maz_for_taz( + _state, + _taz_sample, + _maz_size_terms, + _trace_label, + _model_settings, + full_taz_index=None, + ): + captured["full_taz_index"] = full_taz_index + return pd.DataFrame( + {"dest_MAZ": [101]}, + index=pd.Index([1001], name="person_id"), + ) + + monkeypatch.setattr( + location_choice.shadow_pricing, + "load_shadow_price_calculator", + fake_load_shadow_price_calculator, + ) + monkeypatch.setattr(location_choice, "_location_sample", fake_location_sample) + monkeypatch.setattr( + location_choice.tour_destination, + "choose_MAZ_for_TAZ", + fake_choose_maz_for_taz, + ) + + state = workflow.State().default_settings() + state.settings.use_explicit_error_terms = True + model_settings = type( + "ModelSettings", + (), + { + "ALT_DEST_COL_NAME": "zone_id", + "SIMULATE_CHOOSER_COLUMNS": [location_choice.HOME_MAZ], + }, + )() + persons_merged = pd.DataFrame( + { + location_choice.HOME_MAZ: [9001], + location_choice.HOME_TAZ: [90], + }, + index=pd.Index([1001], name="person_id"), + ) + network_los = _DummyNetworkLos({101: 1, 102: 2, 103: 3}) + + active_dest_size_terms = pd.DataFrame( + { + "size_term": [1.0, 2.0], + "shadow_price_size_term_adjustment": [1.0, 1.0], + "shadow_price_utility_adjustment": [0.0, 0.0], + }, + index=pd.Index([101, 103], name="zone_id"), + ) + full_dest_size_terms = pd.DataFrame( + { + "size_term": [1.0, 0.0, 2.0], + "shadow_price_size_term_adjustment": [1.0, 1.0, 1.0], + "shadow_price_utility_adjustment": [0.0, 0.0, 0.0], + }, + index=pd.Index([101, 102, 103], name="zone_id"), + ) + + out = location_choice.location_presample( + state, + "segment", + persons_merged, + network_los, + active_dest_size_terms, + estimator=None, + model_settings=model_settings, + chunk_size=0, + chunk_tag="test_chunk", + trace_label="test_trace", + full_dest_size_terms=full_dest_size_terms, + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame({"zone_id": [101]}, index=pd.Index([1001], name="person_id")), + ) + pd.testing.assert_index_equal( + captured["full_taz_index"], + pd.Index([1, 2, 3], name=location_choice.DEST_TAZ), + ) + + +def test_location_sample_uses_maz_stable_mapping(monkeypatch): + captured = {} + + def fake_load_shadow_price_calculator(_state, _model_settings): + return type( + "ShadowPriceCalculator", + (), + { + "use_shadow_pricing": False, + }, + )() + + def fake_location_sample( + _state, + _segment_name, + _choosers, + alternatives, + _skims, + _estimator, + _model_settings, + alt_dest_col_name, + _chunk_size, + _chunk_tag, + _trace_label, + zone_layer=None, + stable_alt_positions=None, + n_total_alts=None, + ): + captured["alt_dest_col_name"] = alt_dest_col_name + captured["zone_layer"] = zone_layer + captured["active_maz_index"] = alternatives.index.copy() + captured["stable_alt_positions"] = stable_alt_positions.copy() + captured["n_total_alts"] = n_total_alts + return pd.DataFrame( + {"zone_id": [101]}, + index=pd.Index([1001], name="person_id"), + ) + + monkeypatch.setattr( + location_choice.shadow_pricing, + "load_shadow_price_calculator", + fake_load_shadow_price_calculator, + ) + monkeypatch.setattr(location_choice, "_location_sample", fake_location_sample) + + state = workflow.State().default_settings() + state.settings.use_explicit_error_terms = True + model_settings = type( + "ModelSettings", + (), + { + "ALT_DEST_COL_NAME": "zone_id", + "SIMULATE_CHOOSER_COLUMNS": [location_choice.HOME_MAZ], + }, + )() + persons_merged = pd.DataFrame( + { + location_choice.HOME_MAZ: [9001], + }, + index=pd.Index([1001], name="person_id"), + ) + network_los = type( + "DummyNetworkLos", + (), + { + "get_default_skim_dict": lambda self: _DummySkimDict(), + }, + )() + + active_dest_size_terms = pd.DataFrame( + { + "size_term": [1.0, 2.0], + "shadow_price_size_term_adjustment": [1.0, 1.0], + "shadow_price_utility_adjustment": [0.0, 0.0], + }, + index=pd.Index([101, 103], name="zone_id"), + ) + full_dest_size_terms = pd.DataFrame( + { + "size_term": [1.0, 0.0, 2.0], + "shadow_price_size_term_adjustment": [1.0, 1.0, 1.0], + "shadow_price_utility_adjustment": [0.0, 0.0, 0.0], + }, + index=pd.Index([101, 102, 103], name="zone_id"), + ) + + out = location_choice.location_sample( + state, + "segment", + persons_merged, + network_los, + active_dest_size_terms, + full_dest_size_terms, + estimator=None, + model_settings=model_settings, + chunk_size=0, + chunk_tag="test_chunk", + trace_label="test_trace", + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame({"zone_id": [101]}, index=pd.Index([1001], name="person_id")), + ) + pd.testing.assert_index_equal( + captured["active_maz_index"], + pd.Index([101, 103], name="zone_id"), + ) + assert captured["alt_dest_col_name"] == "zone_id" + assert captured["zone_layer"] is None + assert captured["n_total_alts"] == 3 + assert list(captured["stable_alt_positions"]) == [0, 2] diff --git a/activitysim/abm/test/test_misc/test_shadow_pricing_simulate.py b/activitysim/abm/test/test_misc/test_shadow_pricing_simulate.py index 22de62a6d8..032e2d1aa9 100644 --- a/activitysim/abm/test/test_misc/test_shadow_pricing_simulate.py +++ b/activitysim/abm/test/test_misc/test_shadow_pricing_simulate.py @@ -1,17 +1,17 @@ +from __future__ import annotations + import os from pathlib import Path + import numpy as np +import openmatrix as omx import pandas as pd - import pytest -import openmatrix as omx - +from activitysim.abm.models.location_choice import run_location_choice from activitysim.abm.tables import shadow_pricing -from activitysim.core import workflow, los +from activitysim.core import los, workflow from activitysim.core.configuration.logit import TourLocationComponentSettings -from activitysim.abm.models.location_choice import run_location_choice - LAND_USE_FIELDS = [ "e01_nrm", @@ -578,3 +578,97 @@ def test_shadow_pricing_simulate(state, model_settings, network_los): choices_df.index ), ) + + +def test_shadow_pricing_dedicated_rng_channel_eet_only( + state, model_settings, network_los +): + """ + Under EET, ShadowPriceCalculator should register a dedicated + shadow_pricing_persons RNG channel on first call to update_shadow_prices, + and route its re-simulation draws through it so they are isolated from + the main persons channel. Under MC, no channel is registered and the + pre-existing coupling with the main RNG is preserved. + """ + from activitysim.core import logit + + model_settings.LOGSUM_SETTINGS = None + rng = state.get_rn_generator() + + # --- MC case: helper is a no-op --- + state.settings.use_explicit_error_terms = False + spc_mc = shadow_pricing.load_shadow_price_calculator(state, model_settings) + spc_mc._ensure_sp_rng_channel(state) + + assert not spc_mc._sp_rng_channel_registered + assert "shadow_pricing_persons" not in rng.channels + + # --- EET case: channel registered, idempotent, draws advance offsets --- + state.settings.use_explicit_error_terms = True + spc_eet = shadow_pricing.load_shadow_price_calculator(state, model_settings) + spc_eet._ensure_sp_rng_channel(state) + + assert spc_eet._sp_rng_channel_registered + assert "shadow_pricing_persons" in rng.channels + + # Idempotent re-registration + rng_channels_before = set(rng.channels.keys()) + spc_eet._ensure_sp_rng_channel(state) + assert set(rng.channels.keys()) == rng_channels_before + + # Channel covers the same person ids as persons_merged + persons = state.get_dataframe("persons_merged") + sp_channel = rng.channels["shadow_pricing_persons"] + pd.testing.assert_index_equal( + sp_channel.row_states.index, persons.index, check_names=False + ) + + # Draws via make_choices on a probs DF indexed by shadow_pricing_persons + # advance the dedicated channel's offsets each call, demonstrating the + # channel keeps its offset across iterations (no reset between calls). + rng.begin_step("test_shadow_pricing_sp_channel_draws") + + probs = pd.DataFrame( + {"0": [0.5] * len(persons), "1": [0.5] * len(persons)}, + index=persons.index.rename("shadow_pricing_persons"), + ) + + offsets_before = sp_channel.row_states["offset"].copy() + logit.make_choices(state, probs) + offsets_after_first = sp_channel.row_states["offset"].copy() + assert ( + offsets_after_first > offsets_before + ).all(), "shadow_pricing_persons channel offsets should advance after first draw" + + logit.make_choices(state, probs) + offsets_after_second = sp_channel.row_states["offset"] + assert (offsets_after_second > offsets_after_first).all(), ( + "shadow_pricing_persons channel offsets should advance further on second draw " + "(channel is not reset between shadow-pricing iterations)" + ) + + rng.end_step("test_shadow_pricing_sp_channel_draws") + + # cleanup_rng_channel drops the channel and resets the flag so the SPC can + # be re-used (or a fresh SPC for the next location_choice model can + # re-register the channel without colliding on extend_domain's no-overlap + # assert in SimpleChannel). + spc_eet.cleanup_rng_channel(state) + assert "shadow_pricing_persons" not in rng.channels + assert not spc_eet._sp_rng_channel_registered + + # Idempotent: calling cleanup again is a no-op + spc_eet.cleanup_rng_channel(state) + + # A fresh SPC can register the channel cleanly after cleanup (simulates the + # work-then-school sequential model pattern). + spc_eet_2 = shadow_pricing.load_shadow_price_calculator(state, model_settings) + spc_eet_2._ensure_sp_rng_channel(state) + assert "shadow_pricing_persons" in rng.channels + spc_eet_2.cleanup_rng_channel(state) + + # cleanup_rng_channel on an MC-only SPC is also a no-op + spc_mc.cleanup_rng_channel(state) + + # Reset for hygiene (other tests in this module assume MC default) + state.settings.use_explicit_error_terms = False diff --git a/activitysim/abm/test/test_misc/test_tour_destination_sampling.py b/activitysim/abm/test/test_misc/test_tour_destination_sampling.py new file mode 100644 index 0000000000..2b74da1676 --- /dev/null +++ b/activitysim/abm/test/test_misc/test_tour_destination_sampling.py @@ -0,0 +1,409 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import numpy as np +import pandas as pd + +from activitysim.abm.models.util import tour_destination +from activitysim.core import workflow + + +class _DummySkimDict: + def wrap(self, orig_key, dest_key): + return type("WrappedSkims", (), {"orig_key": orig_key, "dest_key": dest_key})() + + +class _DummyNetworkLos: + zone_system = 2 + + def __init__(self, maz_to_taz): + self._maz_to_taz = maz_to_taz + + def map_maz_to_taz(self, maz_index): + return pd.Index([self._maz_to_taz[maz] for maz in maz_index], name="TAZ") + + def get_default_skim_dict(self): + return _DummySkimDict() + + def get_skim_dict(self, layer): + assert layer == "taz" + return _DummySkimDict() + + +class _DummyRng: + def __init__(self, draws): + self._draws = np.asarray(draws) + self.calls = [] + + def random_for_df(self, df, n): + self.calls.append(n) + assert self._draws.shape == (len(df), n) + return self._draws.copy() + + +class _DummyState: + def __init__(self, draws, use_explicit_error_terms=False): + self.settings = SimpleNamespace( + trace_hh_id=None, + use_explicit_error_terms=use_explicit_error_terms, + ) + self._rng = _DummyRng(draws) + + def get_rn_generator(self): + return self._rng + + +def test_destination_presample_uses_taz_stable_mapping(monkeypatch): + captured = {} + + def fake_destination_sample( + _state, + _spec_segment_name, + _choosers, + destination_size_terms, + _skims, + _estimator, + _model_settings, + alt_dest_col_name, + chunk_tag, + trace_label, + zone_layer=None, + stable_alt_positions=None, + n_total_alts=None, + ): + captured["alt_dest_col_name"] = alt_dest_col_name + captured["zone_layer"] = zone_layer + captured["active_taz_index"] = destination_size_terms.index.copy() + captured["stable_alt_positions"] = stable_alt_positions.copy() + captured["n_total_alts"] = n_total_alts + captured["chunk_tag"] = chunk_tag + captured["trace_label"] = trace_label + return pd.DataFrame( + {tour_destination.DEST_TAZ: [1]}, + index=pd.Index([7001], name="tour_id"), + ) + + def fake_choose_maz_for_taz( + _state, + _taz_sample, + _maz_size_terms, + _trace_label, + _model_settings, + full_taz_index=None, + ): + captured["full_taz_index"] = full_taz_index + return pd.DataFrame( + {tour_destination.DEST_MAZ: [101]}, + index=pd.Index([7001], name="tour_id"), + ) + + monkeypatch.setattr( + tour_destination, "_destination_sample", fake_destination_sample + ) + monkeypatch.setattr(tour_destination, "choose_MAZ_for_TAZ", fake_choose_maz_for_taz) + + state = workflow.State().default_settings() + state.settings.use_explicit_error_terms = True + choosers = pd.DataFrame( + {"origin": [101]}, + index=pd.Index([7001], name="tour_id"), + ) + model_settings = type( + "ModelSettings", + (), + { + "ALT_DEST_COL_NAME": "zone_id", + "CHOOSER_ORIG_COL_NAME": "origin", + }, + )() + network_los = _DummyNetworkLos({101: 1, 102: 2, 103: 3}) + + active_destination_size_terms = pd.DataFrame( + {"size_term": [1.0, 2.0]}, + index=pd.Index([101, 103], name="zone_id"), + ) + full_destination_size_terms = pd.DataFrame( + {"size_term": [1.0, 0.0, 2.0]}, + index=pd.Index([101, 102, 103], name="zone_id"), + ) + + out = tour_destination.destination_presample( + state, + "segment", + choosers, + model_settings, + network_los, + active_destination_size_terms, + full_destination_size_terms, + estimator=None, + trace_label="test_trace", + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame({"zone_id": [101]}, index=pd.Index([7001], name="tour_id")), + ) + pd.testing.assert_index_equal( + captured["active_taz_index"], + pd.Index([1, 3], name=tour_destination.DEST_TAZ), + ) + assert captured["alt_dest_col_name"] == tour_destination.DEST_TAZ + assert captured["zone_layer"] == "taz" + assert captured["n_total_alts"] == 3 + assert list(captured["stable_alt_positions"]) == [0, 2] + pd.testing.assert_index_equal( + captured["full_taz_index"], + pd.Index([1, 2, 3], name=tour_destination.DEST_TAZ), + ) + + +def test_choose_maz_for_taz_supports_variable_taz_counts(): + state = _DummyState([[0.2, 0.81], [0.1, 0.9]]) + + taz_sample = pd.DataFrame( + { + tour_destination.DEST_TAZ: [1, 2, 2], + "prob": [0.4, 0.6, 1.0], + "pick_count": [1, 1, 1], + }, + index=pd.Index([7001, 7001, 7002], name="tour_id"), + ) + maz_size_terms = pd.DataFrame( + { + "zone_id": [101, 102, 201, 202], + tour_destination.DEST_TAZ: [1, 1, 2, 2], + "size_term": [1.0, 3.0, 4.0, 1.0], + } + ) + + out = tour_destination.choose_MAZ_for_TAZ( + state, + taz_sample, + maz_size_terms, + "test_trace", + SimpleNamespace(ESTIMATION_SAMPLE_SIZE=0, SAMPLE_SIZE=0), + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame( + { + tour_destination.DEST_MAZ: [101, 202, 201], + "prob": [0.10, 0.12, 0.80], + "pick_count": [1, 1, 1], + }, + index=pd.Index([7001, 7001, 7002], name="tour_id"), + ), + ) + + +def test_choose_maz_for_taz_preserves_fixed_width_path(): + state = _DummyState([[0.2, 0.81], [0.1, 0.9]]) + + taz_sample = pd.DataFrame( + { + tour_destination.DEST_TAZ: [1, 2, 1, 2], + "prob": [0.4, 0.6, 0.25, 0.75], + "pick_count": [1, 1, 1, 1], + }, + index=pd.Index([7001, 7001, 7002, 7002], name="tour_id"), + ) + maz_size_terms = pd.DataFrame( + { + "zone_id": [101, 102, 201, 202], + tour_destination.DEST_TAZ: [1, 1, 2, 2], + "size_term": [1.0, 3.0, 4.0, 1.0], + } + ) + + out = tour_destination.choose_MAZ_for_TAZ( + state, + taz_sample, + maz_size_terms, + "test_trace", + SimpleNamespace(ESTIMATION_SAMPLE_SIZE=0, SAMPLE_SIZE=0), + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame( + { + tour_destination.DEST_MAZ: [101, 202, 101, 202], + "prob": [0.10, 0.12, 0.0625, 0.15], + "pick_count": [1, 1, 1, 1], + }, + index=pd.Index([7001, 7001, 7002, 7002], name="tour_id"), + ), + ) + + +def test_choose_maz_for_taz_eet_poisson_uses_full_taz_positions(): + state = _DummyState([[0.99, 0.2, 0.99, 0.99, 0.8]]) + + taz_sample = pd.DataFrame( + { + tour_destination.DEST_TAZ: [2, 5], + "prob": [0.5, 0.25], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="tour_id"), + ) + maz_size_terms = pd.DataFrame( + { + "zone_id": [201, 202, 501, 502], + tour_destination.DEST_TAZ: [2, 2, 5, 5], + "size_term": [3.0, 1.0, 3.0, 1.0], + } + ) + + out = tour_destination.choose_MAZ_for_TAZ( + state, + taz_sample, + maz_size_terms, + "test_trace", + SimpleNamespace(ESTIMATION_SAMPLE_SIZE=0, SAMPLE_SIZE=0), + full_taz_index=pd.Index([1, 2, 3, 4, 5], name=tour_destination.DEST_TAZ), + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame( + { + tour_destination.DEST_MAZ: [201, 502], + "prob": [0.375, 0.0625], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="tour_id"), + ), + ) + assert state.get_rn_generator().calls == [5] + + +def test_choose_maz_for_taz_uses_sample_width_when_full_taz_index_omitted(): + state = _DummyState([[0.2, 0.81]]) + + taz_sample = pd.DataFrame( + { + tour_destination.DEST_TAZ: [2, 5], + "prob": [0.5, 0.25], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="tour_id"), + ) + maz_size_terms = pd.DataFrame( + { + "zone_id": [201, 202, 501, 502], + tour_destination.DEST_TAZ: [2, 2, 5, 5], + "size_term": [3.0, 1.0, 3.0, 1.0], + } + ) + + out = tour_destination.choose_MAZ_for_TAZ( + state, + taz_sample, + maz_size_terms, + "test_trace", + SimpleNamespace(ESTIMATION_SAMPLE_SIZE=0, SAMPLE_SIZE=0), + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame( + { + tour_destination.DEST_MAZ: [201, 502], + "prob": [0.375, 0.0625], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="tour_id"), + ), + ) + assert state.get_rn_generator().calls == [2] + + +def test_destination_sample_uses_maz_stable_mapping(monkeypatch): + captured = {} + + def fake_destination_sample( + _state, + _spec_segment_name, + _choosers, + destination_size_terms, + _skims, + _estimator, + _model_settings, + alt_dest_col_name, + chunk_tag, + trace_label, + zone_layer=None, + stable_alt_positions=None, + n_total_alts=None, + ): + captured["active_maz_index"] = destination_size_terms.index.copy() + captured["stable_alt_positions"] = stable_alt_positions.copy() + captured["n_total_alts"] = n_total_alts + captured["alt_dest_col_name"] = alt_dest_col_name + captured["zone_layer"] = zone_layer + return pd.DataFrame( + {"zone_id": [101], "person_id": [55]}, + index=pd.Index([7001], name="tour_id"), + ) + + monkeypatch.setattr( + tour_destination, "_destination_sample", fake_destination_sample + ) + + state = workflow.State().default_settings() + state.settings.use_explicit_error_terms = True + choosers = pd.DataFrame( + {"origin": [101], "person_id": [55]}, + index=pd.Index([7001], name="tour_id"), + ) + model_settings = type( + "ModelSettings", + (), + { + "ALT_DEST_COL_NAME": "zone_id", + "CHOOSER_ORIG_COL_NAME": "origin", + "CHOOSER_ID_COLUMN": "person_id", + }, + )() + network_los = _DummyNetworkLos({101: 1, 102: 2, 103: 3}) + + active_destination_size_terms = pd.DataFrame( + {"size_term": [1.0, 2.0]}, + index=pd.Index([101, 103], name="zone_id"), + ) + full_destination_size_terms = pd.DataFrame( + {"size_term": [1.0, 0.0, 2.0]}, + index=pd.Index([101, 102, 103], name="zone_id"), + ) + + out = tour_destination.destination_sample( + state, + "segment", + choosers, + model_settings, + network_los, + active_destination_size_terms, + full_destination_size_terms, + estimator=None, + chunk_size=0, + trace_label="test_trace", + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame( + {"zone_id": [101], "person_id": [55]}, + index=pd.Index([7001], name="tour_id"), + ), + ) + pd.testing.assert_index_equal( + captured["active_maz_index"], + pd.Index([101, 103], name="zone_id"), + ) + assert list(captured["stable_alt_positions"]) == [0, 2] + assert captured["n_total_alts"] == 3 + assert captured["alt_dest_col_name"] == "zone_id" + assert captured["zone_layer"] is None diff --git a/activitysim/abm/test/test_misc/test_tour_od_sampling.py b/activitysim/abm/test/test_misc/test_tour_od_sampling.py new file mode 100644 index 0000000000..5383e524e5 --- /dev/null +++ b/activitysim/abm/test/test_misc/test_tour_od_sampling.py @@ -0,0 +1,318 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import numpy as np +import pandas as pd + +from activitysim.abm.models.util import tour_od +from activitysim.core import workflow + + +class _DummySkimDict: + def wrap(self, orig_key, dest_key): + return type("WrappedSkims", (), {"orig_key": orig_key, "dest_key": dest_key})() + + +class _DummyNetworkLos: + zone_system = 2 + + def __init__(self, maz_to_taz): + self._maz_to_taz = maz_to_taz + + def map_maz_to_taz(self, maz_index): + return pd.Index([self._maz_to_taz[maz] for maz in maz_index], name="TAZ") + + def get_skim_dict(self, layer): + assert layer == "taz" + return _DummySkimDict() + + +class _DummyRng: + def __init__(self, draws): + self._draws = np.asarray(draws) + self.calls = [] + + def random_for_df(self, df, n): + self.calls.append(n) + assert self._draws.shape == (len(df), n) + return self._draws.copy() + + +class _DummyState: + @staticmethod + def make(draws, use_explicit_error_terms=False): + state = workflow.State().default_settings() + state.settings.trace_hh_id = None + state.settings.use_explicit_error_terms = use_explicit_error_terms + rng = _DummyRng(draws) + state._dummy_rng = rng + state.get_rn_generator = lambda: rng + return state + + +def test_od_presample_passes_full_taz_index_for_eet_poisson(monkeypatch): + captured = {} + + def fake_od_sample( + _state, + _spec_segment_name, + _choosers, + _network_los, + destination_size_terms, + _origin_id_col, + _dest_id_col, + _skims, + _estimator, + _model_settings, + alt_od_col_name, + _chunk_size, + chunk_tag, + trace_label, + ): + captured["active_taz_index"] = destination_size_terms.index.copy() + captured["alt_od_col_name"] = alt_od_col_name + captured["chunk_tag"] = chunk_tag + captured["trace_label"] = trace_label + return pd.DataFrame( + { + alt_od_col_name: ["101_1", "101_3"], + "prob": [0.5, 0.25], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="tour_id"), + ) + + def fake_choose_maz_for_taz( + _state, + _taz_sample, + _maz_size_terms, + _trace_label, + addtl_col_for_unique_key=None, + dest_maz_id_col=tour_od.DEST_MAZ, + full_taz_index=None, + ): + captured["addtl_col_for_unique_key"] = addtl_col_for_unique_key + captured["dest_maz_id_col"] = dest_maz_id_col + captured["full_taz_index"] = full_taz_index + return pd.DataFrame( + { + dest_maz_id_col: [101], + tour_od.ORIG_MAZ: [101], + "prob": [0.5], + "pick_count": [1], + }, + index=pd.Index([7001], name="tour_id"), + ) + + monkeypatch.setattr(tour_od, "_od_sample", fake_od_sample) + monkeypatch.setattr(tour_od, "choose_MAZ_for_TAZ", fake_choose_maz_for_taz) + + state = workflow.State().default_settings() + state.settings.use_explicit_error_terms = True + choosers = pd.DataFrame( + {tour_od.ORIG_TAZ: [1]}, + index=pd.Index([7001], name="tour_id"), + ) + model_settings = type( + "ModelSettings", + (), + { + "ALT_DEST_COL_NAME": "alt_dest", + "CHOOSER_ORIG_COL_NAME": "origin", + }, + )() + network_los = _DummyNetworkLos({101: 1, 102: 2, 103: 3}) + + active_destination_size_terms = pd.DataFrame( + {"size_term": [1.0, 2.0]}, + index=pd.Index([101, 103], name="zone_id"), + ) + full_destination_size_terms = pd.DataFrame( + {"size_term": [1.0, 0.0, 2.0]}, + index=pd.Index([101, 102, 103], name="zone_id"), + ) + + out = tour_od.od_presample( + state, + "segment", + choosers, + model_settings, + network_los, + active_destination_size_terms, + full_destination_size_terms, + estimator=None, + chunk_size=0, + trace_label="test_trace", + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame( + {"alt_dest": [101], "origin": [101], "prob": [0.5], "pick_count": [1]}, + index=pd.Index([7001], name="tour_id"), + ), + ) + pd.testing.assert_index_equal( + captured["active_taz_index"], + pd.Index([1, 3], name=tour_od.DEST_TAZ), + ) + assert captured["alt_od_col_name"] == tour_od.get_od_id_col( + tour_od.ORIG_MAZ, tour_od.DEST_TAZ + ) + assert captured["chunk_tag"] == "tour_od.presample" + assert captured["addtl_col_for_unique_key"] == tour_od.ORIG_MAZ + pd.testing.assert_index_equal( + captured["full_taz_index"], + pd.Index([1, 2, 3], name=tour_od.DEST_TAZ), + ) + + +def test_od_presample_omits_full_taz_index_for_eet_non_poisson(monkeypatch): + captured = {} + + def fake_od_sample( + _state, + _spec_segment_name, + _choosers, + _network_los, + destination_size_terms, + _origin_id_col, + _dest_id_col, + _skims, + _estimator, + _model_settings, + alt_od_col_name, + _chunk_size, + chunk_tag, + trace_label, + ): + captured["active_taz_index"] = destination_size_terms.index.copy() + captured["alt_od_col_name"] = alt_od_col_name + captured["chunk_tag"] = chunk_tag + captured["trace_label"] = trace_label + return pd.DataFrame( + { + alt_od_col_name: ["101_1", "101_3"], + "prob": [0.5, 0.25], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="tour_id"), + ) + + def fake_choose_maz_for_taz( + _state, + _taz_sample, + _maz_size_terms, + _trace_label, + addtl_col_for_unique_key=None, + dest_maz_id_col=tour_od.DEST_MAZ, + full_taz_index=None, + ): + captured["addtl_col_for_unique_key"] = addtl_col_for_unique_key + captured["dest_maz_id_col"] = dest_maz_id_col + captured["full_taz_index"] = full_taz_index + return pd.DataFrame( + { + dest_maz_id_col: [101], + tour_od.ORIG_MAZ: [101], + "prob": [0.5], + "pick_count": [1], + }, + index=pd.Index([7001], name="tour_id"), + ) + + monkeypatch.setattr(tour_od, "_od_sample", fake_od_sample) + monkeypatch.setattr(tour_od, "choose_MAZ_for_TAZ", fake_choose_maz_for_taz) + + state = workflow.State().default_settings() + state.settings.use_explicit_error_terms = True + state.settings.sample_method = "eet" + choosers = pd.DataFrame( + {tour_od.ORIG_TAZ: [1]}, + index=pd.Index([7001], name="tour_id"), + ) + model_settings = type( + "ModelSettings", + (), + { + "ALT_DEST_COL_NAME": "alt_dest", + "CHOOSER_ORIG_COL_NAME": "origin", + }, + )() + network_los = _DummyNetworkLos({101: 1, 102: 2, 103: 3}) + + active_destination_size_terms = pd.DataFrame( + {"size_term": [1.0, 2.0]}, + index=pd.Index([101, 103], name="zone_id"), + ) + full_destination_size_terms = pd.DataFrame( + {"size_term": [1.0, 0.0, 2.0]}, + index=pd.Index([101, 102, 103], name="zone_id"), + ) + + out = tour_od.od_presample( + state, + "segment", + choosers, + model_settings, + network_los, + active_destination_size_terms, + full_destination_size_terms, + estimator=None, + chunk_size=0, + trace_label="test_trace", + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame( + {"alt_dest": [101], "origin": [101], "prob": [0.5], "pick_count": [1]}, + index=pd.Index([7001], name="tour_id"), + ), + ) + assert captured["full_taz_index"] is None + + +def test_choose_maz_for_taz_eet_uses_full_taz_positions_with_origin_key(): + state = _DummyState.make([[0.99, 0.2, 0.99, 0.99, 0.8]]) + + taz_sample = pd.DataFrame( + { + tour_od.DEST_TAZ: [2, 5], + "prob": [0.5, 0.25], + "pick_count": [1, 1], + tour_od.ORIG_MAZ: [9001, 9001], + }, + index=pd.Index([7001, 7001], name="tour_id"), + ) + maz_size_terms = pd.DataFrame( + { + "zone_id": [201, 202, 501, 502], + tour_od.DEST_TAZ: [2, 2, 5, 5], + "size_term": [3.0, 1.0, 3.0, 1.0], + } + ) + + out = tour_od.choose_MAZ_for_TAZ( + state, + taz_sample, + maz_size_terms, + "test_trace", + addtl_col_for_unique_key=tour_od.ORIG_MAZ, + full_taz_index=pd.Index([1, 2, 3, 4, 5], name=tour_od.DEST_TAZ), + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame( + { + tour_od.DEST_MAZ: [201, 502], + tour_od.ORIG_MAZ: [9001, 9001], + "prob": [0.375, 0.0625], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="tour_id"), + ), + ) + assert state.get_rn_generator().calls == [5] diff --git a/activitysim/abm/test/test_misc/test_trip_departure_choice.py b/activitysim/abm/test/test_misc/test_trip_departure_choice.py index 94d47f57ac..d6645ce94f 100644 --- a/activitysim/abm/test/test_misc/test_trip_departure_choice.py +++ b/activitysim/abm/test/test_misc/test_trip_departure_choice.py @@ -187,3 +187,60 @@ def test_apply_stage_two_model(model_spec, trips): pd.testing.assert_index_equal(departures.index, trips.index) departures = pd.concat([trips, departures], axis=1) + + +def test_tdc_explicit_error_terms_parity(model_spec, trips): + setup_dirs() + model_settings = tdc.TripDepartureChoiceSettings() + + # Increase population for statistical convergence + large_trips = pd.concat([trips] * 500).reset_index(drop=True) + large_trips.index.name = "trip_id" + # Ensure tour_ids are distinct for the expanded set + large_trips["tour_id"] = ( + large_trips.groupby("tour_id").cumcount() * 1000 + large_trips["tour_id"] + ) + + # Trip departure choice uses tour_leg_id as the random channel index + tour_legs = tdc.get_tour_legs(large_trips) + + # Run without explicit error terms + state_no_eet = add_canonical_dirs("configs_test_misc").default_settings() + state_no_eet.settings.use_explicit_error_terms = False + state_no_eet.rng().set_base_seed(42) + state_no_eet.rng().begin_step("test_no_eet") + state_no_eet.rng().add_channel("trip_id", large_trips) + state_no_eet.rng().add_channel("tour_leg_id", tour_legs) + + departures_no_eet = tdc.apply_stage_two_model( + state_no_eet, + model_spec, + large_trips, + 0, + "TEST Trip Departure No EET", + model_settings=model_settings, + ) + + # Run with explicit error terms + state_eet = add_canonical_dirs("configs_test_misc").default_settings() + state_eet.settings.use_explicit_error_terms = True + state_eet.rng().set_base_seed(42) + state_eet.rng().begin_step("test_eet") + state_eet.rng().add_channel("trip_id", large_trips) + state_eet.rng().add_channel("tour_leg_id", tour_legs) + + departures_eet = tdc.apply_stage_two_model( + state_eet, + model_spec, + large_trips, + 0, + "TEST Trip Departure EET", + model_settings=model_settings, + ) + + # Compare distributions + dist_no_eet = departures_no_eet.value_counts(normalize=True).sort_index() + dist_eet = departures_eet.value_counts(normalize=True).sort_index() + + # Check that they are reasonably close (within 5% for this sample size) + pd.testing.assert_series_equal(dist_no_eet, dist_eet, atol=0.05, check_names=False) diff --git a/activitysim/abm/test/test_misc/test_trip_destination_sampling.py b/activitysim/abm/test/test_misc/test_trip_destination_sampling.py new file mode 100644 index 0000000000..e8aae5164d --- /dev/null +++ b/activitysim/abm/test/test_misc/test_trip_destination_sampling.py @@ -0,0 +1,419 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import numpy as np +import pandas as pd + +from activitysim.abm.models import trip_destination +from activitysim.core import workflow +from activitysim.core.skim_dictionary import DataFrameMatrix + + +class _DummySkimHotel: + def sample_skims(self, presample): + return {"presample": presample} + + +class _DummyNetworkLos: + zone_system = 2 + + def __init__(self, maz_to_taz): + self._maz_to_taz = maz_to_taz + + def map_maz_to_taz(self, maz_index): + return pd.Index([self._maz_to_taz[maz] for maz in maz_index], name="zone_id") + + def get_maz_to_taz_series(self, _state): + return pd.Series(self._maz_to_taz) + + +class _DummyRng: + def __init__(self, draws): + self._draws = np.asarray(draws) + self.calls = [] + + def random_for_df(self, df, n): + self.calls.append(n) + assert self._draws.shape == (len(df), n) + return self._draws.copy() + + +class _DummyState: + def __init__(self, draws, use_explicit_error_terms=False): + self.settings = SimpleNamespace( + trace_hh_id=None, + use_explicit_error_terms=use_explicit_error_terms, + ) + self._rng = _DummyRng(draws) + + def get_rn_generator(self): + return self._rng + + +def test_destination_sample_retains_full_maz_universe(monkeypatch): + captured = {} + + def fake_destination_sample( + _state, + _primary_purpose, + _trips, + alternatives, + _model_settings, + _size_term_matrix, + skims, + alt_dest_col_name, + _estimator, + chunk_tag, + trace_label, + zone_layer=None, + ): + captured["alternatives_index"] = alternatives.index.copy() + captured["alt_dest_col_name"] = alt_dest_col_name + captured["chunk_tag"] = chunk_tag + captured["trace_label"] = trace_label + captured["zone_layer"] = zone_layer + captured["presample"] = skims["presample"] + return pd.DataFrame( + {"dest_taz": [101]}, + index=pd.Index([7001], name="trip_id"), + ) + + monkeypatch.setattr( + trip_destination, "_destination_sample", fake_destination_sample + ) + + state = workflow.State().default_settings() + trips = pd.DataFrame(index=pd.Index([7001], name="trip_id")) + model_settings = type("ModelSettings", (), {"ALT_DEST_COL_NAME": "dest_taz"})() + + alternatives = pd.DataFrame( + {"eatout": [1.0, 0.0, 2.0]}, + index=pd.Index([101, 102, 103], name="dest_taz"), + ) + size_term_matrix = DataFrameMatrix(alternatives) + + out = trip_destination.destination_sample( + state, + "eatout", + trips, + alternatives, + model_settings, + size_term_matrix, + _DummySkimHotel(), + estimator=None, + chunk_size=0, + trace_label="test_trace", + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame({"dest_taz": [101]}, index=pd.Index([7001], name="trip_id")), + ) + pd.testing.assert_index_equal( + captured["alternatives_index"], + pd.Index([101, 102, 103], name="dest_taz"), + ) + assert captured["alt_dest_col_name"] == "dest_taz" + assert captured["chunk_tag"] == "trip_destination.sample" + assert captured["zone_layer"] is None + assert captured["presample"] is False + + +def test_destination_presample_retains_full_taz_universe(monkeypatch): + captured = {} + + def fake_destination_sample( + _state, + _primary_purpose, + _trips, + alternatives, + _model_settings, + size_term_matrix, + skims, + alt_dest_col_name, + _estimator, + chunk_tag, + trace_label, + zone_layer=None, + ): + captured["alternatives_index"] = alternatives.index.copy() + captured["size_term_index"] = size_term_matrix.df.index.copy() + captured["alt_dest_col_name"] = alt_dest_col_name + captured["chunk_tag"] = chunk_tag + captured["trace_label"] = trace_label + captured["zone_layer"] = zone_layer + captured["presample"] = skims["presample"] + return pd.DataFrame( + {"dest_taz": [1]}, + index=pd.Index([7001], name="trip_id"), + ) + + def fake_choose_maz_for_taz( + _state, + _taz_sample, + _maz_size_terms, + _trips, + _network_los, + _alt_dest_col_name, + _trace_label, + _model_settings, + full_taz_index=None, + ): + captured["full_taz_index"] = full_taz_index + return pd.DataFrame( + {"dest_taz": [101]}, + index=pd.Index([7001], name="trip_id"), + ) + + monkeypatch.setattr( + trip_destination, "_destination_sample", fake_destination_sample + ) + monkeypatch.setattr(trip_destination, "choose_MAZ_for_TAZ", fake_choose_maz_for_taz) + + state = workflow.State().default_settings() + trips = pd.DataFrame( + {"origin": [101], "tour_leg_dest": [103]}, + index=pd.Index([7001], name="trip_id"), + ) + model_settings = type( + "ModelSettings", + (), + { + "ALT_DEST_COL_NAME": "dest_taz", + "TRIP_ORIGIN": "origin", + "PRIMARY_DEST": "tour_leg_dest", + }, + )() + network_los = _DummyNetworkLos({101: 1, 102: 2, 103: 3}) + + alternatives = pd.DataFrame( + {"eatout": [1.0, 0.0, 2.0]}, + index=pd.Index([101, 102, 103], name="dest_taz"), + ) + size_term_matrix = DataFrameMatrix(alternatives) + + out = trip_destination.destination_presample( + state, + "eatout", + trips, + alternatives, + model_settings, + size_term_matrix, + _DummySkimHotel(), + network_los, + estimator=None, + trace_label="test_trace", + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame({"dest_taz": [101]}, index=pd.Index([7001], name="trip_id")), + ) + pd.testing.assert_index_equal( + captured["alternatives_index"], + pd.Index([1, 2, 3], name="zone_id"), + ) + pd.testing.assert_index_equal( + captured["size_term_index"], + pd.Index([1, 2, 3], name="zone_id"), + ) + assert captured["alt_dest_col_name"] == "dest_taz" + assert captured["chunk_tag"] == "trip_destination.presample" + assert captured["zone_layer"] == "taz" + assert captured["presample"] is True + assert captured["full_taz_index"] is None + + +def test_destination_presample_passes_full_taz_index_for_eet_poisson(monkeypatch): + captured = {} + + def fake_destination_sample( + _state, + _primary_purpose, + _trips, + alternatives, + _model_settings, + size_term_matrix, + skims, + alt_dest_col_name, + _estimator, + chunk_tag, + trace_label, + zone_layer=None, + ): + captured["alternatives_index"] = alternatives.index.copy() + captured["size_term_index"] = size_term_matrix.df.index.copy() + captured["alt_dest_col_name"] = alt_dest_col_name + captured["chunk_tag"] = chunk_tag + captured["trace_label"] = trace_label + captured["zone_layer"] = zone_layer + captured["presample"] = skims["presample"] + return pd.DataFrame( + {"dest_taz": [1]}, + index=pd.Index([7001], name="trip_id"), + ) + + def fake_choose_maz_for_taz( + _state, + _taz_sample, + _maz_size_terms, + _trips, + _network_los, + _alt_dest_col_name, + _trace_label, + _model_settings, + full_taz_index=None, + ): + captured["full_taz_index"] = full_taz_index + return pd.DataFrame( + {"dest_taz": [101]}, + index=pd.Index([7001], name="trip_id"), + ) + + monkeypatch.setattr( + trip_destination, "_destination_sample", fake_destination_sample + ) + monkeypatch.setattr(trip_destination, "choose_MAZ_for_TAZ", fake_choose_maz_for_taz) + + state = workflow.State().default_settings() + state.settings.use_explicit_error_terms = True + trips = pd.DataFrame( + {"origin": [101], "tour_leg_dest": [103]}, + index=pd.Index([7001], name="trip_id"), + ) + model_settings = type( + "ModelSettings", + (), + { + "ALT_DEST_COL_NAME": "dest_taz", + "TRIP_ORIGIN": "origin", + "PRIMARY_DEST": "tour_leg_dest", + }, + )() + network_los = _DummyNetworkLos({101: 1, 102: 2, 103: 3}) + + alternatives = pd.DataFrame( + {"eatout": [1.0, 0.0, 2.0]}, + index=pd.Index([101, 102, 103], name="dest_taz"), + ) + size_term_matrix = DataFrameMatrix(alternatives) + + out = trip_destination.destination_presample( + state, + "eatout", + trips, + alternatives, + model_settings, + size_term_matrix, + _DummySkimHotel(), + network_los, + estimator=None, + trace_label="test_trace", + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame({"dest_taz": [101]}, index=pd.Index([7001], name="trip_id")), + ) + pd.testing.assert_index_equal( + captured["full_taz_index"], + pd.Index([1, 2, 3], name="dest_taz_TAZ"), + ) + + +def test_choose_maz_for_taz_eet_poisson_uses_full_taz_positions(): + state = _DummyState([[0.99, 0.2, 0.99, 0.99, 0.8]]) + network_los = _DummyNetworkLos({201: 2, 202: 2, 501: 5, 502: 5}) + + taz_sample = pd.DataFrame( + { + "dest_taz": [2, 5], + "prob": [0.5, 0.25], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="trip_id"), + ) + maz_size_terms = DataFrameMatrix( + pd.DataFrame( + {"eatout": [3.0, 1.0, 3.0, 1.0]}, + index=pd.Index([201, 202, 501, 502], name="dest_taz"), + ) + ) + trips = pd.DataFrame( + {"purpose": ["eatout"]}, + index=pd.Index([7001], name="trip_id"), + ) + + out = trip_destination.choose_MAZ_for_TAZ( + state, + taz_sample, + maz_size_terms, + trips, + network_los, + "dest_taz", + "test_trace", + SimpleNamespace(ESTIMATION_SAMPLE_SIZE=0, SAMPLE_SIZE=0), + full_taz_index=pd.Index([1, 2, 3, 4, 5], name="dest_taz_TAZ"), + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame( + { + "dest_taz": [201, 502], + "prob": [0.375, 0.0625], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="trip_id"), + ), + ) + assert state.get_rn_generator().calls == [5] + + +def test_choose_maz_for_taz_uses_sample_width_when_full_taz_index_omitted(): + state = _DummyState([[0.2, 0.81]]) + network_los = _DummyNetworkLos({201: 2, 202: 2, 501: 5, 502: 5}) + + taz_sample = pd.DataFrame( + { + "dest_taz": [2, 5], + "prob": [0.5, 0.25], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="trip_id"), + ) + maz_size_terms = DataFrameMatrix( + pd.DataFrame( + {"eatout": [3.0, 1.0, 3.0, 1.0]}, + index=pd.Index([201, 202, 501, 502], name="dest_taz"), + ) + ) + trips = pd.DataFrame( + {"purpose": ["eatout"]}, + index=pd.Index([7001], name="trip_id"), + ) + + out = trip_destination.choose_MAZ_for_TAZ( + state, + taz_sample, + maz_size_terms, + trips, + network_los, + "dest_taz", + "test_trace", + SimpleNamespace(ESTIMATION_SAMPLE_SIZE=0, SAMPLE_SIZE=0), + ) + + pd.testing.assert_frame_equal( + out, + pd.DataFrame( + { + "dest_taz": [201, 502], + "prob": [0.375, 0.0625], + "pick_count": [1, 1], + }, + index=pd.Index([7001, 7001], name="trip_id"), + ), + ) + assert state.get_rn_generator().calls == [2] diff --git a/activitysim/abm/test/test_misc/test_trip_scheduling_choice.py b/activitysim/abm/test/test_misc/test_trip_scheduling_choice.py index 6823a5b123..8401c785c1 100644 --- a/activitysim/abm/test/test_misc/test_trip_scheduling_choice.py +++ b/activitysim/abm/test/test_misc/test_trip_scheduling_choice.py @@ -1,9 +1,11 @@ -import numpy as np -import pandas as pd -import pytest +from __future__ import annotations + import os from pathlib import Path +import numpy as np +import pandas as pd +import pytest from activitysim.abm.models import trip_scheduling_choice as tsc from activitysim.abm.tables.skims import skim_dict diff --git a/activitysim/core/configuration/base.py b/activitysim/core/configuration/base.py index aad266ff12..d8e59d0827 100644 --- a/activitysim/core/configuration/base.py +++ b/activitysim/core/configuration/base.py @@ -135,9 +135,14 @@ class ComputeSettings(PydanticBase): Sharrow settings for a component. """ - # Make this more general compute settings and use for explicit error term overrides - # Default None work for sub-components defined in getter below (eet_subcomponent) - use_explicit_error_terms: None | bool | dict[str, bool] = None + sample_method: None | Literal["monte_carlo", "eet", "poisson"] = None + """ + Override the alternative sampling method used by `interaction_sample`. + + When unset, `interaction_sample` preserves legacy behavior: it uses + `monte_carlo` when explicit error terms are off and `poisson` when they + are on. + """ sharrow_skip: bool | dict[str, bool] = False """Skip sharrow when evaluating this component. @@ -222,13 +227,6 @@ def should_skip(self, subcomponent: str) -> bool: else: return bool(self.sharrow_skip) - def eet_subcomponent(self, subcomponent: str) -> bool: - """Check for EET overrides for a particular subcomponent.""" - if isinstance(self.use_explicit_error_terms, dict): - return self.use_explicit_error_terms.get(subcomponent, None) - else: - return self.use_explicit_error_terms - @contextmanager def pandas_option_context(self): """Context manager to set pandas options for compute settings.""" @@ -277,7 +275,7 @@ def subcomponent_settings(self, subcomponent: str) -> ComputeSettings: use_numba=self.use_numba, drop_unused_columns=self.drop_unused_columns, protect_columns=self.protect_columns, - use_explicit_error_terms=self.eet_subcomponent(subcomponent), + sample_method=self.sample_method, ) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 6feb16e468..cf5bfb297b 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -1,9 +1,9 @@ from __future__ import annotations -from pathlib import Path -from typing import Any, Literal import struct import time +from pathlib import Path +from typing import Any, Literal from pydantic import model_validator, validator @@ -704,6 +704,7 @@ def _check_store_skims_in_shm(self): "instrument", "sharrow", "use_explicit_error_terms", + "sample_method", ) """ Setting to log on startup. @@ -781,18 +782,25 @@ def _check_store_skims_in_shm(self): """ Make choice from random utility model by drawing from distribution of unobserved part of utility and taking the maximum of total utility. - + Defaults to standard Monte Carlo method, i.e., calculating probabilities and then drawing a single uniform random number to draw from cumulative probabily. .. versionadded:: 1.6 """ + sample_method: None | Literal["monte_carlo", "eet", "poisson"] = None + """ + Sampling method to use in `activitysim.core.interaction_sample`. + + When unset, `monte_carlo` is used when `use_explicit_error_terms` is false and `poisson` is used when it is true. + """ + check_model_settings: bool = True """ run checks to validate that YAML settings files are loadable and spec and coefficent csv can be resolved. - should catch many common errors early, including missing required configurations or specified coefficient labels without defined values. + should catch many common errors early, including missing required configurations or specified coefficient labels without defined values. """ other_settings: dict[str, Any] = None diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py index 93834c690a..b438745763 100644 --- a/activitysim/core/interaction_sample.py +++ b/activitysim/core/interaction_sample.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +import typing import numpy as np import pandas as pd @@ -17,91 +18,174 @@ util, workflow, ) +from activitysim.core.chunk import ChunkSizer from activitysim.core.configuration.base import ComputeSettings from activitysim.core.exceptions import SegmentedSpecificationError from activitysim.core.skim_dataset import DatasetWrapper from activitysim.core.skim_dictionary import SkimWrapper +if typing.TYPE_CHECKING: + from activitysim.core.random import Random + logger = logging.getLogger(__name__) DUMP = False +InteractionSampleMethod = typing.Literal["monte_carlo", "eet", "poisson"] + -def make_sample_choices_utility_based( +def _resolve_sample_method( state: workflow.State, - choosers, - utilities, - alternatives, - sample_size, - alternative_count, - alt_col_name, - allow_zero_probs, - trace_label, - chunk_sizer, -): - assert isinstance(utilities, pd.DataFrame) - assert utilities.shape == (len(choosers), alternative_count) + compute_settings: ComputeSettings | None, +) -> InteractionSampleMethod: + sampling_method = None + if compute_settings is not None: + sampling_method = compute_settings.sample_method + if sampling_method is None: + sampling_method = state.settings.sample_method + if sampling_method is None: + return "poisson" if state.settings.use_explicit_error_terms else "monte_carlo" + if sampling_method not in typing.get_args(InteractionSampleMethod): + raise ValueError( + f"Unsupported sample_method {sampling_method!r}; expected one of {typing.get_args(InteractionSampleMethod)}" + ) + logger.debug(f"Using sample_method={sampling_method}") + return sampling_method + + +def _poisson_sample_alternatives_inner( + probs: pd.DataFrame, + poisson_inclusion_probs_values: np.ndarray, + rng: Random, + trace_label: str | None, + chunk_sizer: ChunkSizer, + stable_alt_positions: np.ndarray | None = None, + n_total_alts: int | None = None, +) -> np.ndarray: + """ + Draw one Bernoulli inclusion decision per chooser-alternative pair. - assert isinstance(alternatives, pd.DataFrame) - assert len(alternatives) == alternative_count + Returns a dense 2-D array aligned to `probs` where sampled alternatives + contain their Poisson inclusion probability and unsampled alternatives are + `np.nan`. + """ + if stable_alt_positions is None and n_total_alts is None: + rands = rng.random_for_df(probs, n=probs.shape[1]) + elif stable_alt_positions is not None and n_total_alts is not None: + rands = rng.random_for_df_stable_alt_positions( + probs, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + else: + raise ValueError( + "stable_alt_positions and n_total_alts must both be provided or omitted together" + ) + chunk_sizer.log_df(trace_label, "rands", rands) + return np.where( + rands < poisson_inclusion_probs_values, poisson_inclusion_probs_values, np.nan + ) - if allow_zero_probs: - zero_probs = ( - utilities.sum(axis=1) <= utilities.shape[1] * logit.UTIL_UNAVAILABLE + +def _poisson_fallback_sample_alternatives( + probs: pd.DataFrame, + sample_size: int, + rng: Random, + trace_label: str | None, + chunk_sizer: ChunkSizer, + stable_alt_positions: np.ndarray | None = None, + n_total_alts: int | None = None, +) -> np.ndarray: + """ + Fallback sampler used when Poisson retries still leave empty chooser rows. + + This path samples exactly `sample_size` distinct alternatives per chooser + without replacement by ranking one random score per alternative. The + returned array uses the same sparse chooser-by-alternative representation as + the Poisson path: chosen alternatives are `1.0`, unchosen alternatives are + `np.nan`. + """ + if sample_size > probs.shape[1]: + logger.info( + f"Poisson fallback sampling without replacement with sample_size={sample_size} > number of alternatives=" + + f"{probs.shape[1]}; returning all alternatives for {len(probs)} choosers" ) - if zero_probs.all(): - return pd.DataFrame( - columns=[alt_col_name, "rand", "prob", choosers.index.name] - ) - if zero_probs.any(): - # remove from sample - utilities = utilities[~zero_probs] - choosers = choosers[~zero_probs] + return np.full(probs.shape, 1.0) - utils_array = utilities.to_numpy() - chunk_sizer.log_df(trace_label, "utils_array", utils_array) - chosen_destinations = [] + if stable_alt_positions is None and n_total_alts is None: + fallback_rands = rng.random_for_df(probs, n=probs.shape[1]) + elif stable_alt_positions is not None and n_total_alts is not None: + fallback_rands = rng.random_for_df_stable_alt_positions( + probs, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + else: + raise ValueError( + "stable_alt_positions and n_total_alts must both be provided or omitted together" + ) + chunk_sizer.log_df(trace_label, "fallback_rands", fallback_rands) - rands = state.get_rn_generator().gumbel_for_df(utilities, n=alternative_count) - chunk_sizer.log_df(trace_label, "rands", rands) + chosen_positions = np.argpartition( + fallback_rands, + kth=sample_size - 1, + axis=1, + )[:, :sample_size] - # TODO-EET [janzill Jun2022]: using for-loop to keep memory usage low, an array of dimension - # (len(choosers), alternative_count, sample_size) can get very large. Probably better to - # use chunking for this. - for i in range(sample_size): - # created this once for memory logging - if i > 0: - rands = state.get_rn_generator().gumbel_for_df( - utilities, n=alternative_count - ) - chosen_destinations.append(np.argmax(utils_array + rands, axis=1)) - chosen_destinations = np.concatenate(chosen_destinations, axis=0) + fallback_sampled_values = np.full(probs.shape, np.nan) + chooser_positions = np.repeat(np.arange(len(probs)), sample_size) + fallback_sampled_values[ + chooser_positions, + chosen_positions.reshape(-1), + ] = 1.0 - chunk_sizer.log_df(trace_label, "chosen_destinations", chosen_destinations) + return fallback_sampled_values - del utils_array - chunk_sizer.log_df(trace_label, "utils_array", None) - del rands - chunk_sizer.log_df(trace_label, "rands", None) - chooser_idx = np.tile(np.arange(utilities.shape[0]), sample_size) - chunk_sizer.log_df(trace_label, "chooser_idx", chooser_idx) +def make_sample_choices_eet( + state: workflow.State, + choosers: pd.DataFrame, + utilities: pd.DataFrame, + probs: pd.DataFrame, + alternatives: pd.DataFrame, + sample_size: int, + alt_col_name: str, + trace_label: str, + chunk_sizer: ChunkSizer, + stable_alt_positions: np.ndarray | None = None, + n_total_alts: int | None = None, +) -> pd.DataFrame: + """ + Sample alternatives by repeated EET (Gumbel argmax) draws with replacement. - probs = logit.utils_to_probs( - state, - utilities, - allow_zero_probs=allow_zero_probs, - trace_label=trace_label, - overflow_protection=not allow_zero_probs, - trace_choosers=choosers, + Each chooser receives `sample_size` EV1 draw sets and the argmax-over-utility + winner is recorded per draw, so duplicates are possible (same with-replacement + semantics as the Monte Carlo sampling path). + + `utilities` drives the Gumbel argmax. `probs` (the MNL choice probabilities + computed from the same utilities by the caller) supplies the `prob` column + written back into the output for sampling-of-alternative correction factors. + """ + chosen_destinations = ( + state.get_rn_generator() + .gumbel_max_positions_for_df( + utilities, + sample_size, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + .reshape(-1) ) - chunk_sizer.log_df(trace_label, "probs", probs) + chunk_sizer.log_df(trace_label, "chosen_destinations", chosen_destinations) + + chooser_idx = np.repeat(np.arange(utilities.shape[0]), sample_size) + chunk_sizer.log_df(trace_label, "chooser_idx", chooser_idx) choices_df = pd.DataFrame( { - alt_col_name: alternatives.index.values[chosen_destinations], - "prob": probs.to_numpy()[chooser_idx, chosen_destinations], choosers.index.name: choosers.index.values[chooser_idx], + "prob": probs.to_numpy()[chooser_idx, chosen_destinations], + alt_col_name: alternatives.index.values[chosen_destinations], } ) chunk_sizer.log_df(trace_label, "choices_df", choices_df) @@ -110,11 +194,121 @@ def make_sample_choices_utility_based( chunk_sizer.log_df(trace_label, "chooser_idx", None) del chosen_destinations chunk_sizer.log_df(trace_label, "chosen_destinations", None) - del probs - chunk_sizer.log_df(trace_label, "probs", None) - # handing this off to caller - chunk_sizer.log_df(trace_label, "choices_df", None) + return choices_df + + +def _poisson_sample_alternatives( + chunk_sizer: ChunkSizer, + probs: pd.DataFrame, + alternatives: pd.DataFrame, + sample_size, + alt_col_name: str, + state: workflow.State, + trace_label: str, + stable_alt_positions: np.ndarray | None = None, + n_total_alts: int | None = None, +) -> pd.DataFrame: + """ + Build a Poisson-sampled choice set for each chooser. + + The primary path performs independent Poisson inclusion draws for every chooser-alternative pair and retries any + chooser row that sampled no alternatives. Internally the sampler maintains a sparse chooser-by-alternative array + where sampled cells hold the probability to carry forward as `prob` and unsampled cells are np.nan. + + If a chooser still has no sampled alternatives after 10 retries, we fall back to sampling exactly sample_size + distinct alternatives without replacement and force those chosen probabilities to `1.0` so the sampling correction + factor cancels out. In practice we expect this to be very rare with reasonable sample sizes and not too small + choice sets, but it is a known issue with Poisson sampling that we want to guard against. Note that if this + fallback is triggered it can lead to inconsistent random numbers between two scenarios if the number of retries it + takes in each scenario differs, but again we expect this to be very rare and the alternative is potentially + infinite retries or raising an error. + + returns: DataFrame with one row per sampled chooser-alternative pair and columns for chooser index, alt_col_name, + and prob (the Poisson inclusion probability for that pair). + + In the case of Poisson sampling, the inclusion probability for each chooser-alternative pair is the probability + that the alternative was included in the sample at least once across the sample_size draws, which is the + reciprocal of it never being drawn in sample_size draws, so 1-(1-p)^sample_size where p is the + original choice probability. To make Poisson sampling interchangeable with other sampling methods, we return the + inclusion probabilities i.e. the true probability of being sampled. Pick_count will be 1 by definition + (poisson sampling returns a yes/no for each alternative, so if an alternative is included in the sample it is + included once) and the standard sampling correction factor can be recovered as np.log(df.pick_count/df.prob) + = np.log(1/inclusion_prob). + """ + + inclusion_probs_values = 1.0 - np.power( + 1.0 - probs.to_numpy(copy=False), sample_size + ) + + sampled_values = np.full(inclusion_probs_values.shape, np.nan) + + n = 0 + active_row_positions = np.arange(len(probs), dtype=np.int64) + + while active_row_positions.size > 0: + probs_subset = probs.iloc[active_row_positions] + sampled_results_subset = _poisson_sample_alternatives_inner( + probs_subset, + inclusion_probs_values[active_row_positions], + state.get_rn_generator(), + trace_label, + chunk_sizer, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + no_alts_sampled_mask = np.isnan(sampled_results_subset).all(axis=1) + sampled_values[ + active_row_positions[~no_alts_sampled_mask] + ] = sampled_results_subset[~no_alts_sampled_mask] + + if no_alts_sampled_mask.any(): + logger.info(f"Poisson sampling of alternatives failed with {n=}, retrying") + failed_row_positions = active_row_positions[no_alts_sampled_mask] + logger.debug( + f"Sampled size was {sample_size}, poisson method mean expected sample size was" + + f" {inclusion_probs_values[failed_row_positions].sum(axis=1).mean():.1f}, actual sampled mean was" + + f" {np.isfinite(sampled_values[failed_row_positions]).sum(axis=1).mean():.1f} and highest zero" + + f" selection prob was {(1.0 - inclusion_probs_values[failed_row_positions]).prod(axis=1).max():.2g}" + ) + active_row_positions = failed_row_positions + + else: # All choosers have at least one alternative in sample set + break + + n += 1 + if n == 10: + logger.info( + "Poisson choice set sampling exceeded 10 retries; falling back to random sampling for %s choosers", + len(active_row_positions), + ) + fallback_sampled_values = _poisson_fallback_sample_alternatives( + probs.iloc[active_row_positions], + sample_size, + state.get_rn_generator(), + trace_label, + chunk_sizer, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + sampled_values[active_row_positions] = fallback_sampled_values + break + + chooser_positions, alt_positions = np.nonzero(~np.isnan(sampled_values)) + chooser_col_name = probs.index.name or "index" + + if len(chooser_positions) == 0: + choices_df = pd.DataFrame(columns=[chooser_col_name, "prob", alt_col_name]) + else: + choices_df = pd.DataFrame( + { + chooser_col_name: probs.index.to_numpy()[chooser_positions], + "prob": sampled_values[chooser_positions, alt_positions], + alt_col_name: alternatives.index.to_numpy()[alt_positions], + } + ) + + chunk_sizer.log_df(trace_label, "choices_df", choices_df) return choices_df @@ -148,7 +342,6 @@ def make_sample_choices( Returns ------- - """ assert isinstance(probs, pd.DataFrame) @@ -227,8 +420,10 @@ def _interaction_sample( locals_d=None, trace_label=None, zone_layer=None, - chunk_sizer=None, + chunk_sizer: ChunkSizer | None = None, compute_settings: ComputeSettings | None = None, + stable_alt_positions=None, + n_total_alts=None, ): """ Run a MNL simulation in the situation in which alternatives must @@ -292,6 +487,11 @@ def _interaction_sample( pick_count : int number of duplicate picks for chooser, alt """ + assert ( + chunk_sizer is not None + ), "chunk_sizer cannot be None but old nullable signature is preserved" + # TODO it's probably safe to reorder these arguments to make chunk_sizer mandatory since + # _interaction_sample is private? have_trace_targets = state.tracing.has_trace_targets(choosers) trace_ids = None @@ -535,13 +735,12 @@ def _interaction_sample( state.tracing.dump_df(DUMP, utilities, trace_label, "utilities") - if compute_settings.use_explicit_error_terms is not None: - use_eet = compute_settings.use_explicit_error_terms - logger.info( - f"Interaction sample model-specific EET overrides for {trace_label}: eet = {use_eet}" + sampling_method = _resolve_sample_method(state, compute_settings) + + if state.settings.use_explicit_error_terms and estimation.manager.enabled: + raise ValueError( + "use_explicit_error_terms is not supported with estimation mode" ) - else: - use_eet = state.settings.use_explicit_error_terms if sample_size == 0: # Return full alternative set rather than sample @@ -571,58 +770,29 @@ def _interaction_sample( return choices_df - if use_eet: - - if estimation.manager.enabled: - raise ValueError( - "cannot use explicit error terms with estimation mode at this time" - ) - - utilities = logit.validate_utils( - state, - utilities, - allow_zero_probs=allow_zero_probs, - trace_label=trace_label, - trace_choosers=choosers, - ) + # All three sampling methods consume MNL choice probabilities, so compute + # them once up front. + probs = logit.utils_to_probs( + state, + utilities, + allow_zero_probs=allow_zero_probs, + trace_label=trace_label, + trace_choosers=choosers, + overflow_protection=not allow_zero_probs, + ) + chunk_sizer.log_df(trace_label, "probs", probs) - choices_df = make_sample_choices_utility_based( - state, - choosers, - utilities, - alternatives, - sample_size, - alternative_count, - alt_col_name, - allow_zero_probs=allow_zero_probs, - trace_label=trace_label, - chunk_sizer=chunk_sizer, - ) - del utilities - chunk_sizer.log_df(trace_label, "utilities", None) - else: - # convert to probabilities (utilities exponentiated and normalized to probs) - # probs is same shape as utilities, one row per chooser and one column for alternative - probs = logit.utils_to_probs( - state, - utilities, - allow_zero_probs=allow_zero_probs, - trace_label=trace_label, - trace_choosers=choosers, - overflow_protection=not allow_zero_probs, + if have_trace_targets: + state.tracing.trace_df( + probs, + tracing.extend_trace_label(trace_label, "probs"), + column_labels=["alternative", "probability"], ) - chunk_sizer.log_df(trace_label, "probs", probs) + if sampling_method == "monte_carlo": del utilities chunk_sizer.log_df(trace_label, "utilities", None) - if have_trace_targets: - state.tracing.trace_df( - probs, - tracing.extend_trace_label(trace_label, "probs"), - column_labels=["alternative", "probability"], - ) - choices_df = make_sample_choices( state, choosers, @@ -673,24 +843,85 @@ def _interaction_sample( del probs chunk_sizer.log_df(trace_label, "probs", None) + else: + # eet and poisson: optionally trim choosers with all-zero probs. The MC + # path handles this inside make_sample_choices + if allow_zero_probs: + non_zero = probs.sum(axis=1) != 0 + if not non_zero.any(): + return pd.DataFrame( + columns=[alt_col_name, "prob", "pick_count"], + index=pd.Index([], name=choosers.index.name), + ) + if not non_zero.all(): + probs = probs[non_zero] + utilities = utilities[non_zero] + choosers = choosers[non_zero] + + if sampling_method == "eet": + # validate_utils clamps unavailable alternatives (utility <= UTIL_MIN) + # to UTIL_UNAVAILABLE so that the Gumbel argmax can't accidentally pick + # them when the Gumbel noise dominates. Probabilities are unaffected + # (both bounds exp() to ~0) so we do not recompute probs. + utilities = logit.validate_utils( + state, + utilities, + allow_zero_probs=allow_zero_probs, + trace_label=trace_label, + trace_choosers=choosers, + ) + choices_df = make_sample_choices_eet( + state, + choosers, + utilities, + probs, + alternatives, + sample_size, + alt_col_name, + trace_label, + chunk_sizer, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + else: # sampling_method == "poisson" + choices_df = _poisson_sample_alternatives( + chunk_sizer, + probs, + alternatives, + sample_size, + alt_col_name, + state, + trace_label, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + + del utilities + chunk_sizer.log_df(trace_label, "utilities", None) + del probs + chunk_sizer.log_df(trace_label, "probs", None) chunk_sizer.log_df(trace_label, "choices_df", choices_df) - # pick_count and pick_dup - # pick_count is number of duplicate picks - # pick_dup flag is True for all but first of duplicates - pick_group = choices_df.groupby([choosers.index.name, alt_col_name]) - - # number each item in each group from 0 to the length of that group - 1. - choices_df["pick_count"] = pick_group.cumcount(ascending=True) - # flag duplicate rows after first - choices_df["pick_dup"] = choices_df["pick_count"] > 0 - # add reverse cumcount to get total pick_count (conveniently faster than groupby.count + merge) - choices_df["pick_count"] += pick_group.cumcount(ascending=False) + 1 - - # drop the duplicates - choices_df = choices_df[~choices_df["pick_dup"]] - del choices_df["pick_dup"] + if sampling_method == "poisson": + choices_df["pick_count"] = 1 + else: + # pick_count and pick_dup + # pick_count is number of duplicate picks + # pick_dup flag is True for all but first of duplicates + pick_group = choices_df.groupby([choosers.index.name, alt_col_name]) + + # number each item in each group from 0 to the length of that group - 1. + choices_df["pick_count"] = pick_group.cumcount(ascending=True) + # flag duplicate rows after first + choices_df["pick_dup"] = choices_df["pick_count"] > 0 + # add reverse cumcount to get total pick_count (conveniently faster than groupby.count + merge) + choices_df["pick_count"] += pick_group.cumcount(ascending=False) + 1 + + # drop the duplicates + choices_df = choices_df[~choices_df["pick_dup"]] + del choices_df["pick_dup"] + chunk_sizer.log_df(trace_label, "choices_df", choices_df) # set index after groupby so we can trace on it @@ -706,7 +937,7 @@ def _interaction_sample( column_labels=["sample_alt", "alternative"], ) - if not state.settings.use_explicit_error_terms: + if "rand" in choices_df.columns: # don't need this after tracing del choices_df["rand"] @@ -737,6 +968,8 @@ def interaction_sample( zone_layer: str | None = None, explicit_chunk_size: float = 0, compute_settings: ComputeSettings | None = None, + stable_alt_positions=None, + n_total_alts=None, ): """ Run a simulation in the situation in which alternatives must @@ -811,9 +1044,25 @@ def interaction_sample( if not choosers.index.is_monotonic_increasing: assert choosers.index.is_monotonic_increasing + sampling_method = _resolve_sample_method(state, compute_settings) + logger.debug(f" interaction_sample sample method = {sampling_method}") + + if sampling_method == "monte_carlo": + # The MC sampling path (make_sample_choices) does not consume stable_alt_positions + # or n_total_alts. Null them out so callers that conservatively pass values along + # don't accidentally rely on them under MC sampling. + stable_alt_positions = None + n_total_alts = None + # FIXME - legacy logic - not sure this is needed or even correct? - sample_size = min(sample_size, len(alternatives.index)) - logger.info(f" --- interaction_sample sample size = {sample_size}") + if sampling_method != "poisson": + sample_size = min(sample_size, len(alternatives.index)) + # with poisson sampling, definitely don't want to reduce sample size - it's not a sample size but a number + # of theoretical draws. Another options would be to disable sampling if # alts < sample size to ensure + # all are included (but this wouldn't behave well if there were land use changes in the project case which + # switched regimes) + + logger.debug(f" interaction_sample sample size = {sample_size}") result_list = [] for ( @@ -839,6 +1088,8 @@ def interaction_sample( zone_layer=zone_layer, chunk_sizer=chunk_sizer, compute_settings=compute_settings, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, ) if choices.shape[0] > 0: diff --git a/activitysim/core/interaction_sample_simulate.py b/activitysim/core/interaction_sample_simulate.py index e73f64f4fb..0202fc986f 100644 --- a/activitysim/core/interaction_sample_simulate.py +++ b/activitysim/core/interaction_sample_simulate.py @@ -9,8 +9,9 @@ from activitysim.core import chunk, interaction_simulate, logit, tracing, util, workflow from activitysim.core.configuration.base import ComputeSettings -from activitysim.core.simulate import set_skim_wrapper_targets from activitysim.core.exceptions import SegmentedSpecificationError +from activitysim.core.logit import AltsContext +from activitysim.core.simulate import set_skim_wrapper_targets logger = logging.getLogger(__name__) @@ -34,6 +35,7 @@ def _interaction_sample_simulate( *, chunk_sizer: chunk.ChunkSizer, compute_settings: ComputeSettings | None = None, + alts_context: AltsContext | None = None, ): """ Run a MNL simulation in the situation in which alternatives must @@ -220,9 +222,6 @@ def _interaction_sample_simulate( ) chunk_sizer.log_df(trace_label, "interaction_utilities", interaction_utilities) - del interaction_df - chunk_sizer.log_df(trace_label, "interaction_df", None) - if have_trace_targets: state.tracing.trace_interaction_eval_results( trace_eval_results, @@ -265,14 +264,29 @@ def _interaction_sample_simulate( # insert the zero-prob utilities to pad each alternative set to same size padded_utilities = np.insert(interaction_utilities.utility.values, inserts, -999) chunk_sizer.log_df(trace_label, "padded_utilities", padded_utilities) - del inserts - - del interaction_utilities - chunk_sizer.log_df(trace_label, "interaction_utilities", None) # reshape to array with one row per chooser, one column per alternative padded_utilities = padded_utilities.reshape(-1, max_sample_count) + if alts_context is not None: + padded_alt_nrs = np.insert(interaction_df[choice_column], inserts, -999) + chunk_sizer.log_df(trace_label, "padded_alt_nrs", padded_alt_nrs) + padded_alt_nrs = padded_alt_nrs.reshape(-1, max_sample_count) + # alt_nrs_df has columns for each alt in the choice set, with values indicating which alt_id + # they correspond to (as opposed to the 0-n index implied by the column number). + alt_nrs_df = pd.DataFrame(padded_alt_nrs, index=choosers.index) + chunk_sizer.log_df(trace_label, "alt_nrs_df", alt_nrs_df) + + del padded_alt_nrs + chunk_sizer.log_df(trace_label, "padded_alt_nrs", None) + else: + alt_nrs_df = None # if we don't provide the number of dense alternatives, assume that we'll use the old approach + + del interaction_df + chunk_sizer.log_df(trace_label, "interaction_df", None) + + del inserts + # convert to a dataframe with one row per chooser and one column per alternative utilities_df = pd.DataFrame(padded_utilities, index=choosers.index) chunk_sizer.log_df(trace_label, "utilities_df", utilities_df) @@ -320,11 +334,20 @@ def _interaction_sample_simulate( # positions is series with the chosen alternative represented as a column index in utilities_df # which is an integer between zero and num alternatives in the alternative sample positions, rands = logit.make_choices_utility_based( - state, utilities_df, trace_label=trace_label, trace_choosers=choosers + state, + utilities_df, + trace_label=trace_label, + trace_choosers=choosers, + alts_context=alts_context, + alt_nrs_df=alt_nrs_df, ) del utilities_df chunk_sizer.log_df(trace_label, "utilities_df", None) + + if alt_nrs_df is not None: + del alt_nrs_df + chunk_sizer.log_df(trace_label, "alt_nrs_df", None) else: # convert to probabilities (utilities exponentiated and normalized to probs) # probs is same shape as utilities, one row per chooser and one column for alternative @@ -451,6 +474,7 @@ def interaction_sample_simulate( skip_choice=False, explicit_chunk_size=0, *, + alts_context: AltsContext | None = None, compute_settings: ComputeSettings | None = None, ): """ @@ -496,6 +520,12 @@ def interaction_sample_simulate( explicit_chunk_size : float, optional If > 0, specifies the chunk size to use when chunking the interaction simulation. If < 1, specifies the fraction of the total number of choosers. + alts_context: AltsContext, optional + Representation of the full alternatives domain (min and max alternative id) + in the absence of sampling. + This is used with EET simulation to ensure consistent random numbers across the whole alternative set + ( as the sampled set may change between base and project). When not provided, + EET with integer-coded choice ids will raise an error. Returns ------- @@ -517,6 +547,20 @@ def interaction_sample_simulate( trace_label = tracing.extend_trace_label(trace_label, "interaction_sample_simulate") chunk_tag = chunk_tag or trace_label + # TODO EET: Do we just want to warn here? Or better throw and be explicit? + if state.settings.use_explicit_error_terms: + choice_ids_are_int = pd.api.types.is_integer_dtype(alternatives[choice_column]) + if alts_context is None and choice_ids_are_int: + logger.warning( + "Using integer-coded choice_column values without alts_context when use_explicit_error_terms is true." + + " Ensure this is desired, when running on a sample it should be provided to ensure consistent random" + + " numbers across the whole alternative set." + ) + if alts_context is not None and not choice_ids_are_int: + raise ValueError( + "alts_context can only be used with integer-coded choice_column values" + ) + result_list = [] for ( i, @@ -551,6 +595,7 @@ def interaction_sample_simulate( skip_choice, chunk_sizer=chunk_sizer, compute_settings=compute_settings, + alts_context=alts_context, ) result_list.append(choices) diff --git a/activitysim/core/logit.py b/activitysim/core/logit.py index 0030168bb2..f8fb02707f 100644 --- a/activitysim/core/logit.py +++ b/activitysim/core/logit.py @@ -4,6 +4,8 @@ import logging import warnings +from dataclasses import dataclass +from typing import Union import numpy as np import pandas as pd @@ -19,18 +21,50 @@ logger = logging.getLogger(__name__) +EXACT_NESTED_LOGIT_DTYPE = np.float64 + EXP_UTIL_MIN = 1e-300 EXP_UTIL_MAX = np.inf -# TODO-EET: Figure out what type we want UTIL_MIN to be, currently np.float64 UTIL_MIN = np.log(EXP_UTIL_MIN, dtype=np.float64) UTIL_UNAVAILABLE = 1000.0 * (UTIL_MIN - 1.0) - PROB_MIN = 0.0 PROB_MAX = 1.0 +@dataclass +class AltsContext: + """Representation of the alternatives without carrying around that full array.""" + + min_alt_id: int + max_alt_id: int + + def __post_init__(self): + # e.g. for zero based zones max_alt_id = n_alts - 1 + # but for 1 based zones, we don't need to add extra padding + self.n_rands_to_sample = max(self.max_alt_id, self.n_alts_to_cover_max_id) + + @classmethod + def from_series(cls, ser: Union[pd.Series, pd.Index]) -> "AltsContext": + min_alt_id = ser.min() + max_alt_id = ser.max() + return cls(min_alt_id, max_alt_id) + + @classmethod + def from_num_alts(cls, num_alts: int, zero_based: bool = True) -> "AltsContext": + if zero_based: + offset = -1 + else: + offset = 0 + return cls(min_alt_id=1 + offset, max_alt_id=num_alts + offset) + + @property + def n_alts_to_cover_max_id(self) -> int: + """If zones were non-consecutive, this could be a big over-estimate.""" + return self.max_alt_id + 1 + + def report_bad_choices( state: workflow.State, bad_row_map, @@ -344,103 +378,275 @@ def utils_to_probs( return probs -# TODO-EET: add doc string, tracing -def add_ev1_random(state: workflow.State, df: pd.DataFrame): - nest_utils_for_choice = df.copy() - nest_utils_for_choice += state.get_rn_generator().gumbel_for_df( - nest_utils_for_choice, n=nest_utils_for_choice.shape[1] +def _log_positive_stable_for_df( + state: workflow.State, df: pd.DataFrame, alpha: float +) -> np.ndarray: + alpha = EXACT_NESTED_LOGIT_DTYPE(alpha) + if np.isclose(alpha, 1.0): + return np.zeros(len(df), dtype=EXACT_NESTED_LOGIT_DTYPE) + + eps = np.finfo(EXACT_NESTED_LOGIT_DTYPE).eps + uniforms = np.asarray( + state.get_rn_generator().random_for_df(df, n=2), + dtype=EXACT_NESTED_LOGIT_DTYPE, ) - return nest_utils_for_choice + angle_uniform = np.clip(uniforms[:, 0], eps, 1.0 - eps) + exp_uniform = np.clip(uniforms[:, 1], eps, 1.0 - eps) + u = eps + (np.pi - 2.0 * eps) * angle_uniform + w = -np.log(exp_uniform) -def choose_from_tree( - nest_utils, all_alternatives, logit_nest_groups, nest_alternatives_by_name -): - for level, nest_names in logit_nest_groups.items(): - if level == 1: - next_level_alts = nest_alternatives_by_name[nest_names[0]] + return ( + np.log(np.sin(alpha * u)) + - np.log(np.sin(u)) / alpha + + ((1.0 - alpha) / alpha) * (np.log(np.sin((1.0 - alpha) * u)) - np.log(w)) + ) + + +def _leaf_path_coefficients( + nest_spec: dict | LogitNestSpec, alt_order_array: np.ndarray +) -> pd.Series: + coefficients = pd.Series( + { + nest.name: nest.product_of_coefficients + for nest in each_nest(nest_spec, type="leaf") + }, + dtype=EXACT_NESTED_LOGIT_DTYPE, + ).reindex(alt_order_array) + + if coefficients.isna().any(): + missing = coefficients[coefficients.isna()].index.tolist() + raise ValueError(f"leaf alternatives missing from nest spec: {missing}") + + return coefficients + + +def sample_nested_logit_exact_leaf_error_terms( + state: workflow.State, + alt_utilities: pd.DataFrame, + nest_spec: dict | LogitNestSpec, +) -> pd.DataFrame: + # Galichon writes the error term for alternative (leaf) j as + # $\sum_{t=1}^{n} path_coeff_up_to_t * log_positive_stable_draw(nest_coeff_t) + path_coeff_j leaf_gumbel_j$ + # with nest_coeff_0 = 1.0 + + error_terms = pd.DataFrame( + 0.0, + index=alt_utilities.index, + columns=alt_utilities.columns.to_numpy(), + dtype=EXACT_NESTED_LOGIT_DTYPE, + ) + + leaf_children_for_each_node = get_leaf_children_for_nodes(nest_spec) + + for i, nest in enumerate(each_nest(nest_spec, post_order=False)): + # skip root. + if i == 0: + assert np.isclose( + nest.coefficient, 1.0 + ), "EET for nested logit requires root coefficient of 1.0" continue - choice_this_level = nest_utils[nest_utils.index.isin(next_level_alts)].idxmax() - if choice_this_level in all_alternatives: - return choice_this_level - next_level_alts = nest_alternatives_by_name[choice_this_level] - raise ValueError("This should never happen - no alternative found") + + if nest.type == "node": + all_leaf_children = leaf_children_for_each_node.get(nest.name, []) + if not all_leaf_children: + logger.warning(f"Node nest {nest.name} has no leaf children, skipping.") + continue + + # draw stable term with nest coeff as scale and multiply by path coeff, add to each child alternative + log_stable_for_node = ( + nest.product_of_coefficients + * _log_positive_stable_for_df(state, alt_utilities, nest.coefficient) + ) + # all alternatives for a chooser (row) get the same term, so we repeat the values across columns + error_terms.loc[:, all_leaf_children] += log_stable_for_node.reshape( + -1, 1 + ).repeat(len(all_leaf_children), axis=1) + + leaf_path_coefficients = _leaf_path_coefficients( + nest_spec, alt_utilities.columns.to_numpy() + ) + leaf_gumbels = pd.DataFrame( + state.get_rn_generator().gumbel_for_df(alt_utilities, n=alt_utilities.shape[1]), + index=alt_utilities.index, + columns=alt_utilities.columns.to_numpy(), + ).mul(leaf_path_coefficients, axis=1) + + error_terms += leaf_gumbels + + return error_terms -# TODO-EET: add doc string, tracing def make_choices_explicit_error_term_nl( - state, nested_utilities, alt_order_array, nest_spec, trace_label + state, + alt_utilities, + nest_spec, + trace_label, + trace_choosers=None, + alts_context: AltsContext | None = None, + alt_nrs_df: pd.DataFrame | None = None, ): - """walk down the nesting tree and make choice at each level, which is the root of the next level choice.""" - nest_utils_for_choice = add_ev1_random(state, nested_utilities) - - all_alternatives = set(nest.name for nest in each_nest(nest_spec, type="leaf")) - logit_nest_groups = group_nest_names_by_level(nest_spec) - nest_alternatives_by_name = {n.name: n.alternatives for n in each_nest(nest_spec)} - - # Apply is slow. It could *maybe* be sped up by using the fact that the nesting structure is the same for all rows: - # Add ev1(0,1) to all entries (as is currently being done). Then, at each level, pick the maximum of the available - # composite alternatives and set the corresponding entry to 1 for each row, set all other alternatives at this level - # to zero. Once the tree is walked (all alternatives have been processed), take the product of the alternatives in - # each leaf's alternative list. Then pick the only alternative with entry 1, all others must be 0. - choices = nest_utils_for_choice.apply( - lambda x: choose_from_tree( - x, all_alternatives, logit_nest_groups, nest_alternatives_by_name - ), - axis=1, - ) - # TODO-EET: reporting like for zero probs - assert not choices.isnull().any(), f"No choice for {trace_label}" - choices = pd.Series(choices, index=nest_utils_for_choice.index) + """ + Make EET choices for a nested logit model by adding nested-logit errors. Note these are correlated + among nests. + + Parameters + ---------- + state : workflow.State + alt_utilities : pandas.DataFrame + Utilities for fundamental alternatives (leaf nodes). + nest_spec : dict or LogitNestSpec + Nest specification for the choice model. + trace_label : str + Trace label for logging and tracing. - # In order for choice indexing to be consistent with MNL and cumsum MC choices, we need to index in the order - # alternatives were originally created before adding nest nodes that are not elemental alternatives - choices = choices.map({v: k for k, v in enumerate(alt_order_array)}) + Returns + ------- + pandas.Series + Choice indices aligned to `alt_utilities` columns. + """ + # TODO assert alts_context and alt_nrs_df are both None - no sampling from nested models for now. - return choices + utilities_incl_unobs = sample_nested_logit_exact_leaf_error_terms( + state, + alt_utilities, + nest_spec, + ) + utilities_incl_unobs += alt_utilities + if trace_label: + state.tracing.trace_df( + utilities_incl_unobs, + tracing.extend_trace_label(trace_label, "leaf_utilities_eet_exact"), + ) -# TODO-EET: add doc string, tracing -def make_choices_explicit_error_term_mnl(state, utilities, trace_label): - utilities_incl_unobs = add_ev1_random(state, utilities) choices = np.argmax(utilities_incl_unobs.to_numpy(), axis=1) - # TODO-EET: reporting like for zero probs - assert not np.isnan(choices).any(), f"No choice for {trace_label}" - choices = pd.Series(choices, index=utilities_incl_unobs.index) - return choices + return pd.Series(choices, index=utilities_incl_unobs.index) -def make_choices_explicit_error_term( - state, utilities, alt_order_array, nest_spec=None, trace_label=None -): - trace_label = tracing.extend_trace_label(trace_label, "make_choices_eet") - if nest_spec is None: - choices = make_choices_explicit_error_term_mnl(state, utilities, trace_label) +def make_choices_explicit_error_term_mnl( + state, + utilities, + trace_label, + trace_choosers=None, + alts_context: AltsContext | None = None, + alt_nrs_df: pd.DataFrame | None = None, +) -> pd.Series: + """ + Make EET choices for a multinomial logit model by adding EV1 errors. + + Parameters + ---------- + state : workflow.State + utilities : pandas.DataFrame + Utilities with choosers as rows and alternatives as columns. + trace_label : str + Trace label for logging and tracing. + + Returns + ------- + pandas.Series + Choice indices aligned to the utilities columns order. + """ + if alts_context is None: + choices = state.get_rn_generator().gumbel_choice_positions_for_df(utilities) else: - choices = make_choices_explicit_error_term_nl( - state, utilities, alt_order_array, nest_spec, trace_label + choices = state.get_rn_generator().gumbel_choice_positions_for_df( + utilities, + alt_nrs_df=alt_nrs_df, + n_rands=alts_context.n_alts_to_cover_max_id, ) - return choices + + return pd.Series(choices, index=utilities.index) def make_choices_utility_based( state: workflow.State, utilities: pd.DataFrame, - name_mapping=None, - nest_spec=None, trace_label: str = None, trace_choosers=None, - allow_bad_probs=False, + allow_bad_utils=False, + nest_spec=None, # Make consistent with make_choices for generalizability of custom chooser. + alts_context: AltsContext | None = None, + alt_nrs_df: pd.DataFrame | None = None, ) -> tuple[pd.Series, pd.Series]: + """ + Make choices for each chooser from among a set of alternatives based on utilities by adding + random error terms and choosing the maximum utility alternative. + + Parameters + ---------- + utilities : pandas.DataFrame + Utilities with choosers as rows and alternatives as columns. Note for nested logit models, + this should include only leaf nodes. + trace_label : str + Trace label for logging and tracing. + trace_choosers : pandas.dataframe + the choosers df (for interaction_simulate) to facilitate the reporting of hh_id + by report_bad_choices because it can't deduce hh_id from the interaction_dataset + which is indexed on index values from alternatives df. + allow_bad_utils : bool + If True, allows utilities with missing or invalid values without raising an error. + nest_spec : dict or LogitNestSpec, optional + Nest specification for the choice model. If None, will be treated as a multinomial logit model. + alts_context : AltsContext, optional + If provided, will be used to determine how many random numbers to sample and how to index them for the EET + sampling. This is only relevant for multinomial logit models, and should be provided along with alt_nrs_df. + alt_nrs_df : pandas.DataFrame, optional + DataFrame with same index as `utilities` and columns corresponding to `alts_context.max_alt_id`, containing + the alt_nrs for each alternative for each chooser. This is used to index into the random numbers when sampling + EET terms for multinomial logit models, and should contain -999 for any alternatives that are not available + for a given chooser. Should be provided along with `alts_context`. + + Returns + ------- + choices : pandas.Series + Maps chooser IDs (from `probs` index) to a choice, where the choice + is an index into the columns of `probs`. + rands : pandas.Series + A series of 0s for compatibility with make_choices. For EET, we do not have per-row random numbers. + """ trace_label = tracing.extend_trace_label(trace_label, "make_choices_utility_based") - # TODO-EET: index of choices for nested utilities is different than unnested - this needs to be consistent for - # turning indexes into alternative names to keep code changes to minimum for now - choices = make_choices_explicit_error_term( - state, utilities, name_mapping, nest_spec, trace_label - ) - # TODO-EET: rands - log all zeros for now + if nest_spec is None: + choices = make_choices_explicit_error_term_mnl( + state, + utilities, + trace_label, + trace_choosers, + alts_context, + alt_nrs_df, + ) + else: + # Nested-logit EET expects leaf utilities and returns indices aligned to + # the leaf alternative column order. + choices = make_choices_explicit_error_term_nl( + state, + utilities, + nest_spec, + trace_label, + trace_choosers, + alts_context, + alt_nrs_df, + ) + + missing_choices = np.isnan(choices) # TODO: should we check for infs here too? + if missing_choices.any() and not allow_bad_utils: + report_bad_choices( + state, + missing_choices, + utilities, + trace_label=tracing.extend_trace_label(trace_label, "bad_utils"), + msg="no alternative selected", + # raise_error=False, + trace_choosers=trace_choosers, + ) + + # EET does not expose per-row random draws; return zeros for compatibility. + # Maybe exposing the seed of the chooser could be an alternative to re-create the random number for + # debugging/tracing purposes? rands = pd.Series(np.zeros_like(utilities.index.values), index=utilities.index) + return choices, rands @@ -769,10 +975,17 @@ def count_each_nest(spec, count): return count_each_nest(nest_spec, 0) if nest_spec is not None else 0 -def group_nest_names_by_level(nest_spec): - # group nests by level, returns {level: [nest.name at that level]} - depth = np.max([x.level for x in each_nest(nest_spec)]) - nest_levels = {x: [] for x in range(1, depth + 1)} - for n in each_nest(nest_spec): - nest_levels[n.level].append(n.name) - return nest_levels +def get_leaf_children_for_nodes(nest_spec, include_self=False): + leaf_ancestors = { + nest.name: [ancestor for ancestor in nest.ancestors] + for nest in each_nest(nest_spec, type="leaf") + } + + leaf_children_for_each_node = {} + for alt, ancestor_nodes in leaf_ancestors.items(): + for ancestor in ancestor_nodes: + # skip the leaf itself unless include_self is True + if (ancestor != alt) or include_self: + leaf_children_for_each_node.setdefault(ancestor, list()).append(alt) + + return leaf_children_for_each_node diff --git a/activitysim/core/random.py b/activitysim/core/random.py index 5541fcd41d..45cd4dcfb2 100644 --- a/activitysim/core/random.py +++ b/activitysim/core/random.py @@ -9,8 +9,8 @@ import numpy as np import pandas as pd -from activitysim.core.util import reindex from activitysim.core.exceptions import DuplicateLoadableObjectError, TableIndexError +from activitysim.core.util import reindex from .tracing import print_elapsed_time @@ -247,6 +247,55 @@ def random_for_df(self, df, step_name, n=1): self.row_states.loc[df.index, "offset"] += n return rands + def random_for_df_stable_alt_positions( + self, + df, + step_name, + stable_alt_positions, + n_total_alts, + ): + """ + Return one uniform draw per stable-universe alternative and chooser row, + then project to the active alternative positions. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame with one row per chooser and one column per active alternative. + stable_alt_positions : 1-D ndarray + Mapping from active columns in `df` to positions in the larger stable + alternative universe. + n_total_alts : int + Number of alternatives in the larger stable universe. + + Returns + ------- + rands : 2-D ndarray + Array with shape `(len(df), df.shape[1])` containing uniforms aligned to + the active alternatives. + """ + + assert self.step_name + assert self.step_name == step_name + + n_alts = df.shape[1] + stable_alt_positions = np.asarray(stable_alt_positions) + if stable_alt_positions.shape != (n_alts,): + raise ValueError( + "stable_alt_positions must be a 1-D array aligned to df columns" + ) + if stable_alt_positions.min() < 0 or stable_alt_positions.max() >= n_total_alts: + raise ValueError( + "stable_alt_positions values must be within [0, n_total_alts)" + ) + + generators = self._generators_for_df(df) + rands = np.asanyarray( + [prng.rand(n_total_alts)[stable_alt_positions] for prng in generators] + ) + self.row_states.loc[df.index, "offset"] += n_total_alts + return rands + def gumbel_for_df(self, df, step_name, n=1): """ Return n floating point gumbel-distributed numbers for each row in df @@ -291,6 +340,153 @@ def gumbel_for_df(self, df, step_name, n=1): self.row_states.loc[df.index, "offset"] += n return rands + def gumbel_max_positions_for_df( + self, + utilities, + step_name, + sample_size, + stable_alt_positions=None, + n_total_alts=None, + ): + """ + Return the winning alternative position for each chooser/sample pair + without materializing the full chooser-by-alternative-by-sample Gumbel array. + + Parameters + ---------- + utilities : pandas.DataFrame + DataFrame with one row per chooser and one column per alternative. + sample_size : int + Number of repeated sampled choices to make per chooser. + stable_alt_positions : 1-D ndarray, optional + Mapping from active utility columns to positions in a larger stable + alternative universe. + n_total_alts : int, optional + Number of alternatives in the larger stable universe. + + Returns + ------- + positions : 2-D ndarray of int32 + Array with shape (len(utilities), sample_size) containing the column + position of the winning alternative for each chooser/sample pair. + """ + + assert self.step_name + assert self.step_name == step_name + + utility_values = utilities.to_numpy() + n_rows, n_alts = utility_values.shape + positions = np.empty((n_rows, sample_size), dtype=np.int32) + + if stable_alt_positions is not None or n_total_alts is not None: + if stable_alt_positions is None or n_total_alts is None: + raise ValueError( + "stable_alt_positions and n_total_alts must both be provided or omitted together" + ) + stable_alt_positions = np.asarray(stable_alt_positions) + if stable_alt_positions.shape != (n_alts,): + raise ValueError( + "stable_alt_positions must be a 1-D array aligned to utilities columns" + ) + if ( + stable_alt_positions.min() < 0 + or stable_alt_positions.max() >= n_total_alts + ): + raise ValueError( + "stable_alt_positions values must be within [0, n_total_alts)" + ) + n_gumbels = n_total_alts + else: + n_gumbels = n_alts + + generators = self._generators_for_df(utilities) + + # for each chooser, generate the error terms for all samples at once. reshaping this + # in (default) C order means that the the first n_alts values are the gumbels for the + # first sample, the next n_alts values are the gumbels for the second sample, etc. + for row_num, prng in enumerate(generators): + utility_row = utility_values[row_num] + row_gumbels = -np.log(-np.log(prng.rand(n_gumbels * sample_size))).reshape( + (sample_size, n_gumbels) + ) + if stable_alt_positions is not None: + row_gumbels = row_gumbels[:, stable_alt_positions] + positions[row_num, :] = np.argmax( + row_gumbels + utility_row[np.newaxis, :], + axis=1, + ) + + self.row_states.loc[utilities.index, "offset"] += n_gumbels * sample_size + return positions + + def gumbel_choice_positions_for_df( + self, + utilities, + step_name, + alt_nrs_df=None, + n_rands=None, + ): + """ + Return the winning alternative position for each chooser row without + materializing the utility-plus-error table. + + Parameters + ---------- + utilities : pandas.DataFrame + DataFrame with one row per chooser and one column per available alternative. + alt_nrs_df : pandas.DataFrame, optional + DataFrame aligned to `utilities` whose values identify which dense alternative + each utility column corresponds to. Use -999 for masked or unavailable positions. + n_rands : int, optional + Number of EV1 draws to generate per chooser row. Required when `alt_nrs_df` + is provided and may exceed the visible number of utility columns. + + Returns + ------- + positions : 1-D ndarray of int32 + Array with shape (len(utilities),) containing the winning column position + for each chooser row. + """ + + assert self.step_name + assert self.step_name == step_name + + utility_values = utilities.to_numpy() + n_rows, n_alts = utility_values.shape + positions = np.empty(n_rows, dtype=np.int32) + + if alt_nrs_df is not None: + assert alt_nrs_df.shape == utilities.shape + if n_rands is None: + raise ValueError("n_rands is required when alt_nrs_df is provided") + alt_nr_values = alt_nrs_df.to_numpy() + masked = alt_nr_values == -999 + safe_alt_nrs = np.where(masked, 0, alt_nr_values) + else: + if n_rands is None: + n_rands = n_alts + elif n_rands != n_alts: + raise ValueError( + "n_rands must equal utilities.shape[1] when alt_nrs_df is omitted" + ) + alt_nr_values = masked = safe_alt_nrs = None + + generators = self._generators_for_df(utilities) + + for row_num, prng in enumerate(generators): + utility_row = utility_values[row_num] + row_gumbels = -np.log(-np.log(prng.rand(n_rands))) + + if alt_nrs_df is None: + positions[row_num] = np.argmax(row_gumbels + utility_row) + else: + candidate_values = utility_row + row_gumbels[safe_alt_nrs[row_num]] + candidate_values[masked[row_num]] = utility_row[masked[row_num]] + positions[row_num] = np.argmax(candidate_values) + + self.row_states.loc[utilities.index, "offset"] += n_rands + return positions + def normal_for_df(self, df, step_name, mu, sigma, lognormal=False, size=None): """ Return a floating point random number in normal (or lognormal) distribution @@ -445,7 +641,38 @@ def get_channel_for_df(self, df): raise TableIndexError("No channel with index name '%s'" % df.index.name) return self.channels[channel_name] - # step handling + def reset_offsets_for_step(self, step_name): + """ + Reset offsets for all channels for a step + + Parameters + ---------- + step_name : str + pipeline step name for this step + """ + + assert self.step_name == step_name + + for c in self.channels: + self.channels[c].row_states["offset"] = 0 + + def reset_offsets_for_df(self, df): + """ + Reset offsets for all choosers in df if the channel for a step + + Parameters + ---------- + step_name : str + pipeline step name for this step + df : pandas.DataFrame + df with index name and values corresponding to a registered channel + """ + channel = self.get_channel_for_df(df) + channel.row_states.loc[df.index, "offset"] = 0 + logger.info( + f"RNG: resetting random number generator offsets for channel '{channel.channel_name}' for {len(df)} rows" + + f" with index name '{df.index.name}'. Total lenght df: {len(channel.row_states)}" + ) def begin_step(self, step_name): """ @@ -663,6 +890,58 @@ def random_for_df(self, df, n=1): rands = channel.random_for_df(df, self.step_name, n) return rands + def random_for_df_stable_alt_positions( + self, + df, + stable_alt_positions, + n_total_alts, + ): + """ + Return per-row uniform draws aligned to active alternatives using a stable + larger alternative universe. + + Parameters + ---------- + df : pandas.DataFrame + DataFrame with one row per chooser and one column per active alternative. + stable_alt_positions : 1-D ndarray + Mapping from active columns to positions in the larger stable alternative + universe. + n_total_alts : int + Number of alternatives in the larger stable universe. + + Returns + ------- + rands : 2-D ndarray + Array with shape `(len(df), df.shape[1])` containing uniforms aligned to + the active alternatives. + """ + + n_alts = df.shape[1] + stable_alt_positions = np.asarray(stable_alt_positions) + if stable_alt_positions.shape != (n_alts,): + raise ValueError( + "stable_alt_positions must be a 1-D array aligned to df columns" + ) + if stable_alt_positions.min() < 0 or stable_alt_positions.max() >= n_total_alts: + raise ValueError( + "stable_alt_positions values must be within [0, n_total_alts)" + ) + + if not self.channels: + rng = np.random.RandomState(0) + return np.asanyarray( + [rng.rand(n_total_alts)[stable_alt_positions] for _ in range(len(df))] + ) + + channel = self.get_channel_for_df(df) + return channel.random_for_df_stable_alt_positions( + df, + self.step_name, + stable_alt_positions, + n_total_alts, + ) + def gumbel_for_df(self, df, n=1): """ Return a single floating point gumbel for each row in df @@ -699,6 +978,135 @@ def gumbel_for_df(self, df, n=1): rands = channel.gumbel_for_df(df, self.step_name, n) return rands + def gumbel_max_positions_for_df( + self, + utilities, + sample_size, + stable_alt_positions=None, + n_total_alts=None, + ): + """ + Return the winning alternative position for each chooser/sample pair + using the appropriate channel for each chooser row. + + Parameters + ---------- + utilities : pandas.DataFrame + DataFrame with one row per chooser and one column per alternative. + sample_size : int + Number of repeated sampled choices to make per chooser. + stable_alt_positions : 1-D ndarray, optional + Mapping from active utility columns to positions in a larger stable + alternative universe. + n_total_alts : int, optional + Number of alternatives in the larger stable universe. + + Returns + ------- + positions : 2-D ndarray of int32 + Array with shape (len(utilities), sample_size) containing the column + position of the winning alternative for each chooser/sample pair. + """ + if not self.channels: + utility_values = utilities.to_numpy() + n_rows, n_alts = utility_values.shape + positions = np.empty((n_rows, sample_size), dtype=np.int32) + rng = np.random.RandomState(0) + + if stable_alt_positions is not None or n_total_alts is not None: + if stable_alt_positions is None or n_total_alts is None: + raise ValueError( + "stable_alt_positions and n_total_alts must both be provided or omitted together" + ) + stable_alt_positions = np.asarray(stable_alt_positions) + if stable_alt_positions.shape != (n_alts,): + raise ValueError( + "stable_alt_positions must be a 1-D array aligned to utilities columns" + ) + if ( + stable_alt_positions.min() < 0 + or stable_alt_positions.max() >= n_total_alts + ): + raise ValueError( + "stable_alt_positions values must be within [0, n_total_alts)" + ) + n_gumbels = n_total_alts + else: + n_gumbels = n_alts + + for row_num, utility_row in enumerate(utility_values): + row_gumbels = -np.log( + -np.log(rng.rand(n_gumbels * sample_size)) + ).reshape((sample_size, n_gumbels)) + if stable_alt_positions is not None: + row_gumbels = row_gumbels[:, stable_alt_positions] + positions[row_num, :] = np.argmax( + row_gumbels + utility_row[np.newaxis, :], + axis=1, + ) + + return positions + + channel = self.get_channel_for_df(utilities) + return channel.gumbel_max_positions_for_df( + utilities, + self.step_name, + sample_size, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + + def gumbel_choice_positions_for_df(self, utilities, alt_nrs_df=None, n_rands=None): + """ + Return the winning alternative position for each chooser row. + + Parameters + ---------- + utilities : pandas.DataFrame + DataFrame with one row per chooser and one column per available alternative. + alt_nrs_df : pandas.DataFrame, optional + Dense-alternative mapping aligned to `utilities`. + n_rands : int, optional + Number of EV1 draws to generate per chooser row. + + Returns + ------- + positions : 1-D ndarray of int32 + """ + if not self.channels: + rng = np.random.RandomState(0) + utility_values = utilities.to_numpy() + positions = np.empty(len(utilities), dtype=np.int32) + + if alt_nrs_df is not None: + if n_rands is None: + raise ValueError("n_rands is required when alt_nrs_df is provided") + alt_nr_values = alt_nrs_df.to_numpy() + masked = alt_nr_values == -999 + safe_alt_nrs = np.where(masked, 0, alt_nr_values) + for row_num, utility_row in enumerate(utility_values): + row_gumbels = -np.log(-np.log(rng.rand(n_rands))) + candidate_values = utility_row + row_gumbels[safe_alt_nrs[row_num]] + candidate_values[masked[row_num]] = utility_row[masked[row_num]] + positions[row_num] = np.argmax(candidate_values) + else: + if n_rands is None: + n_rands = utility_values.shape[1] + for row_num, utility_row in enumerate(utility_values): + positions[row_num] = np.argmax( + -np.log(-np.log(rng.rand(n_rands))) + utility_row + ) + + return positions + + channel = self.get_channel_for_df(utilities) + return channel.gumbel_choice_positions_for_df( + utilities, + self.step_name, + alt_nrs_df=alt_nrs_df, + n_rands=n_rands, + ) + def normal_for_df(self, df, mu=0, sigma=1, broadcast=False, size=None): """ Return a single floating point normal random number in range (-inf, inf) for each row in df diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py index ed0b344528..55153cb29f 100644 --- a/activitysim/core/simulate.py +++ b/activitysim/core/simulate.py @@ -9,7 +9,7 @@ from collections.abc import Callable from datetime import timedelta from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd @@ -32,7 +32,7 @@ LogitNestSpec, TemplatedLogitComponentSettings, ) -from activitysim.core.estimation import Estimator +from activitysim.core.exceptions import ModelConfigurationError from activitysim.core.fast_eval import fast_eval from activitysim.core.simulate_consts import ( ALT_LOSER_UTIL, @@ -40,12 +40,21 @@ SPEC_EXPRESSION_NAME, SPEC_LABEL_NAME, ) -from activitysim.core.exceptions import ModelConfigurationError + +if TYPE_CHECKING: + from activitysim.core.estimation import Estimator logger = logging.getLogger(__name__) CustomChooser_T = Callable[ - [workflow.State, pd.DataFrame, pd.DataFrame, pd.DataFrame, str], + [ + workflow.State, + pd.DataFrame, + pd.DataFrame, + pd.DataFrame, + str, + dict | LogitNestSpec | None, + ], tuple[pd.Series, pd.Series], ] @@ -1503,56 +1512,38 @@ def eval_nl( ) if state.settings.use_explicit_error_terms: - # TODO-EET: Nested utility zero choice probability raw_utilities = logit.validate_utils( state, raw_utilities, allow_zero_probs=True, trace_label=trace_label ) - # utilities of leaves and nests - nested_utilities = compute_nested_utilities(raw_utilities, nest_spec) - chunk_sizer.log_df(trace_label, "nested_utilities", nested_utilities) - - # TODO-EET: use nested_utiltites directly to compute logsums? - if want_logsums: - # logsum of nest root - # exponentiated utilities of leaves and nests - nested_exp_utilities = compute_nested_exp_utilities( - raw_utilities, nest_spec - ) - chunk_sizer.log_df( - trace_label, "nested_exp_utilities", nested_exp_utilities - ) - logsums = pd.Series(np.log(nested_exp_utilities.root), index=choosers.index) - chunk_sizer.log_df(trace_label, "logsums", logsums) - - # TODO-EET: index of choices for nested utilities is different than unnested - this needs to be consistent for - # turning indexes into alternative names to keep code changes to minimum for now - name_mapping = raw_utilities.columns.values - - del raw_utilities - chunk_sizer.log_df(trace_label, "raw_utilities", None) - if custom_chooser: choices, rands = custom_chooser( state, - utilities=nested_utilities, - name_mapping=name_mapping, - choosers=choosers, - spec=spec, + raw_utilities, + choosers, + spec, + trace_label, nest_spec=nest_spec, - trace_label=trace_label, ) else: choices, rands = logit.make_choices_utility_based( state, - nested_utilities, - name_mapping=name_mapping, - nest_spec=nest_spec, + raw_utilities, trace_label=trace_label, + nest_spec=nest_spec, ) - del nested_utilities - chunk_sizer.log_df(trace_label, "nested_utilities", None) + if want_logsums: + # utilities of leaves and nests + nested_utilities = compute_nested_utilities(raw_utilities, nest_spec) + chunk_sizer.log_df(trace_label, "nested_utilities", nested_utilities) + logsums = pd.Series(nested_utilities.root, index=choosers.index) + chunk_sizer.log_df(trace_label, "logsums", logsums) + del nested_utilities + chunk_sizer.log_df(trace_label, "nested_utilities", None) + + del raw_utilities + chunk_sizer.log_df(trace_label, "raw_utilities", None) else: # exponentiated utilities of leaves and nests diff --git a/activitysim/core/test/test_interaction_sample.py b/activitysim/core/test/test_interaction_sample.py new file mode 100644 index 0000000000..9fd6b267ae --- /dev/null +++ b/activitysim/core/test/test_interaction_sample.py @@ -0,0 +1,1181 @@ +# ActivitySim +# See full license in LICENSE.txt. + +from __future__ import annotations + +import numpy as np +import pandas as pd +import pytest + +from activitysim.core import interaction_sample, workflow +from activitysim.core.configuration.base import ComputeSettings + + +@pytest.fixture +def state() -> workflow.State: + state = workflow.State().default_settings() + state.settings.check_for_variability = False + return state + + +def test_interaction_sample_ignores_stable_positions_without_global_eet( + state, monkeypatch +): + # Do not support stable alt positions or tracking total alts when running with MC sampling + # to not introduce any additional changes while adding eet simulation support to ensure no + # regressions. We can add these features later if desired. + captured = {} + + def fake_interaction_sample(_state, _choosers, _alternatives, **kwargs): + captured["stable_alt_positions"] = kwargs["stable_alt_positions"] + captured["n_total_alts"] = kwargs["n_total_alts"] + return pd.DataFrame( + {"alt_id": [10, 11], "prob": [1.0, 1.0], "pick_count": [1, 1]}, + index=pd.Index([1, 2], name="person_id"), + ) + + monkeypatch.setattr( + interaction_sample, "_interaction_sample", fake_interaction_sample + ) + + state.settings.use_explicit_error_terms = False + choosers = pd.DataFrame(index=pd.Index([1, 2], name="person_id")) + alternatives = pd.DataFrame(index=pd.Index([10, 11, 12], name="alt_id")) + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["1"], name="Expression"), + ) + + interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=1, + alt_col_name="alt_id", + stable_alt_positions=np.array([0, 2], dtype=np.int64), + n_total_alts=3, + ) + + assert captured["stable_alt_positions"] is None + assert captured["n_total_alts"] is None + + +def test_interaction_sample_preserves_stable_positions_with_global_eet( + state, monkeypatch +): + captured = {} + + def fake_interaction_sample(_state, _choosers, _alternatives, **kwargs): + captured["stable_alt_positions"] = kwargs["stable_alt_positions"] + captured["n_total_alts"] = kwargs["n_total_alts"] + return pd.DataFrame( + {"alt_id": [10, 11], "prob": [1.0, 1.0], "pick_count": [1, 1]}, + index=pd.Index([1, 2], name="person_id"), + ) + + monkeypatch.setattr( + interaction_sample, "_interaction_sample", fake_interaction_sample + ) + + state.settings.use_explicit_error_terms = True + choosers = pd.DataFrame(index=pd.Index([1, 2], name="person_id")) + alternatives = pd.DataFrame(index=pd.Index([10, 11, 12], name="alt_id")) + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["1"], name="Expression"), + ) + stable_alt_positions = np.array([0, 2], dtype=np.int64) + + interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=1, + alt_col_name="alt_id", + stable_alt_positions=stable_alt_positions, + n_total_alts=3, + compute_settings=ComputeSettings(sample_method="eet"), + ) + + np.testing.assert_array_equal( + captured["stable_alt_positions"], + stable_alt_positions, + ) + assert captured["n_total_alts"] == 3 + + +def _weighted_shares(df: pd.DataFrame) -> pd.Series: + counts = df.groupby("alt_id")["pick_count"].sum() + return (counts / counts.sum()).sort_index() + + +def test_interaction_sample_parity(state): + # Run all three sampling methods on a realistic synthetic case and check + # that their aggregate sampled shares stay close. + + num_choosers = 100_000 + num_alts = 100 + sample_size = 10 + + # Create random choosers and alternatives + rng = np.random.default_rng(42) + choosers = pd.DataFrame( + {"chooser_attr": rng.random(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + alternatives = pd.DataFrame( + {"alt_attr": rng.random(num_alts)}, + index=pd.Index(range(num_alts), name="alt_id"), + ) + + # Simple spec: utility = chooser_attr * alt_attr + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["chooser_attr * alt_attr"], name="Expression"), + ) + + # Run Monte Carlo with replacement. + state.settings.use_explicit_error_terms = False + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_step_mnl") + + choices_mnl = interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + alt_col_name="alt_id", + ) + + # Run Poisson inclusion sampling, which is the default when global EET is enabled. + state.init_state() # reset the state to rerun with same seed + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_step_poisson") + + choices_poisson = interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + alt_col_name="alt_id", + ) + + # Run EET-with-replacement with the same global EET setting. + state.init_state() + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_step_eet") + + choices_eet = interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + alt_col_name="alt_id", + compute_settings=ComputeSettings(sample_method="eet"), + ) + + assert "alt_id" in choices_mnl.columns + assert "alt_id" in choices_poisson.columns + assert "alt_id" in choices_eet.columns + assert not choices_mnl["alt_id"].isna().any() + assert not choices_poisson["alt_id"].isna().any() + assert not choices_eet["alt_id"].isna().any() + assert choices_mnl["alt_id"].isin(alternatives.index).all() + assert choices_poisson["alt_id"].isin(alternatives.index).all() + assert choices_eet["alt_id"].isin(alternatives.index).all() + + shares = { + "monte_carlo": _weighted_shares(choices_mnl), + "poisson": _weighted_shares(choices_poisson), + "eet": _weighted_shares(choices_eet), + } + + for left, right in [ + ("monte_carlo", "poisson"), + ("monte_carlo", "eet"), + ("poisson", "eet"), + ]: + all_alts = set(shares[left].index) | set(shares[right].index) + for alt in all_alts: + diff = abs(shares[left].get(alt, 0.0) - shares[right].get(alt, 0.0)) + assert diff < 0.01, ( + f"Large discrepancy at alt {alt} between {left} and {right}: " + f"{left}={shares[left].get(alt, 0.0):.4f}, " + f"{right}={shares[right].get(alt, 0.0):.4f}, diff={diff:.4f}" + ) + + +def test_interaction_sample_eet_unavailable_alternatives(state): + # Test that EET handles unavailable alternatives in sampling + num_choosers = 100 + num_alts = 10 + sample_size = 2 + choosers = pd.DataFrame( + {"chooser_attr": np.ones(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + # Alt 0-4 are attractive, Alt 5-9 are "unavailable" + alternatives = pd.DataFrame( + {"alt_attr": [10.0] * 5 + [-1000.0] * 5}, + index=pd.Index(range(num_alts), name="alt_id"), + ) + + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["alt_attr"], name="Expression"), + ) + + # Run with EET + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_unavailable_eet") + + choices_eet = interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + alt_col_name="alt_id", + ) + + # Sampled alternatives should only be from Alt 0-4 + assert choices_eet["alt_id"].isin([0, 1, 2, 3, 4]).all() + assert not choices_eet["alt_id"].isin([5, 6, 7, 8, 9]).any() + + +def test_interaction_sample_parity_peaked_utilities_eet_with_replacement(state): + # Under highly peaked utilities, the EET-with-replacement sampler should still + # approximate repeated-draw MNL shares because both sample with replacement. + # This test also documents that per-model compute settings can override the + # global default: global EET implies Poisson by default, but this model opts + # into EET-with-replacement explicitly. + num_choosers = 20_000 + num_alts = 100 + sample_size = 5 + + choosers = pd.DataFrame( + {"chooser_attr": np.ones(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + alt_utils = np.array([10.0, 1.0] + [0.0] * (num_alts - 2), dtype=np.float64) + alternatives = pd.DataFrame( + {"alt_attr": alt_utils}, + index=pd.Index(range(num_alts), name="alt_id"), + ) + + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["alt_attr"], name="Expression"), + ) + + # Run non-EET path. + state.settings.use_explicit_error_terms = False + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_peaked_mnl") + choices_mnl = interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + alt_col_name="alt_id", + ) + + # Run EET-with-replacement path with the same seed. + state.init_state() + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_peaked_explicit") + choices_explicit = interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + alt_col_name="alt_id", + compute_settings=ComputeSettings(sample_method="eet"), + ) + + mnl_shares = _weighted_shares(choices_mnl) + explicit_shares = _weighted_shares(choices_explicit) + + all_alts = set(mnl_shares.index) | set(explicit_shares.index) + for alt in all_alts: + diff = abs(mnl_shares.get(alt, 0.0) - explicit_shares.get(alt, 0.0)) + assert diff < 0.005, ( + f"Peaked utility parity mismatch at alt {alt}: " + f"mnl={mnl_shares.get(alt, 0.0):.6f}, " + f"explicit={explicit_shares.get(alt, 0.0):.6f}, diff={diff:.6f}" + ) + + # The dominant alternative should absorb almost all mass in both paths. + assert mnl_shares.get(0, 0.0) > 0.99 + assert explicit_shares.get(0, 0.0) > 0.99 + + +def test_interaction_sample_peaked_utilities_poisson_matches_inclusion_shares(state): + # Poisson sampling does not reproduce repeated-draw MNL shares in peaked cases. + # It samples each alternative independently with inclusion probability + # 1 - (1 - p)^sample_size, so the dominant alternative's share is flattened + # relative to MNL once the included set is normalized. This is also the + # default interaction_sample behavior when global EET is enabled. + num_choosers = 20_000 + num_alts = 100 + sample_size = 5 + + choosers = pd.DataFrame( + {"chooser_attr": np.ones(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + alt_utils = np.array([10.0, 1.0] + [0.0] * (num_alts - 2), dtype=np.float64) + alternatives = pd.DataFrame( + {"alt_attr": alt_utils}, + index=pd.Index(range(num_alts), name="alt_id"), + ) + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["alt_attr"], name="Expression"), + ) + + state.settings.use_explicit_error_terms = False + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_peaked_mnl_poisson_compare") + choices_mnl = interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + alt_col_name="alt_id", + ) + + state.init_state() + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_peaked_poisson") + choices_poisson = interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + alt_col_name="alt_id", + ) + + mnl_shares = _weighted_shares(choices_mnl) + poisson_shares = _weighted_shares(choices_poisson) + + weights = np.exp(alt_utils) + probs = weights / weights.sum() + expected_poisson_shares = 1 - np.power(1 - probs, sample_size) + expected_poisson_shares /= expected_poisson_shares.sum() + + assert mnl_shares.get(0, 0.0) > poisson_shares.get(0, 0.0) + 0.01 + assert abs(poisson_shares.get(0, 0.0) - expected_poisson_shares[0]) < 0.005 + assert abs(poisson_shares.get(1, 0.0) - expected_poisson_shares[1]) < 0.002 + + +def _shares_for_sample( + state, + choosers, + alternatives, + spec, + sample_size, + *, + use_eet, + sample_method, + seed, + step_name, +): + state.init_state() + state.settings.use_explicit_error_terms = use_eet + state.rng().set_base_seed(seed) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step(step_name) + compute_settings = ( + ComputeSettings(sample_method=sample_method) if sample_method else None + ) + choices = interaction_sample.interaction_sample( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + alt_col_name="alt_id", + compute_settings=compute_settings, + ) + return choices, _weighted_shares(choices) + + +def test_interaction_sample_eet_sampling_under_mc_simulation(state): + # use_eet=False + sample_method="eet" was silently ignored before the + # sampling/simulation decoupling. The dispatch now keys on sampling_method + # only, so this combo must produce shares that match use_eet=True + eet. + num_choosers = 100_000 + num_alts = 100 + sample_size = 10 + + rng = np.random.default_rng(42) + choosers = pd.DataFrame( + {"chooser_attr": rng.random(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + alternatives = pd.DataFrame( + {"alt_attr": rng.random(num_alts)}, + index=pd.Index(range(num_alts), name="alt_id"), + ) + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["chooser_attr * alt_attr"], name="Expression"), + ) + + _, shares_mc_sim = _shares_for_sample( + state, + choosers, + alternatives, + spec, + sample_size, + use_eet=False, + sample_method="eet", + seed=42, + step_name="test_eet_under_mc_sim", + ) + _, shares_eet_sim = _shares_for_sample( + state, + choosers, + alternatives, + spec, + sample_size, + use_eet=True, + sample_method="eet", + seed=42, + step_name="test_eet_under_eet_sim", + ) + + all_alts = set(shares_mc_sim.index) | set(shares_eet_sim.index) + for alt in all_alts: + diff = abs(shares_mc_sim.get(alt, 0.0) - shares_eet_sim.get(alt, 0.0)) + assert diff < 0.01, ( + f"EET sampling shares should not depend on simulation mode at alt {alt}: " + f"mc_sim={shares_mc_sim.get(alt, 0.0):.4f}, " + f"eet_sim={shares_eet_sim.get(alt, 0.0):.4f}, diff={diff:.4f}" + ) + + +def test_interaction_sample_poisson_sampling_under_mc_simulation(state): + # use_eet=False + sample_method="poisson" used to silently fall through to MC + # sampling and then have pick_count forced to 1, corrupting results. After + # decoupling, the combo must run the Poisson path and match use_eet=True + poisson. + num_choosers = 100_000 + num_alts = 100 + sample_size = 10 + + rng = np.random.default_rng(42) + choosers = pd.DataFrame( + {"chooser_attr": rng.random(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + alternatives = pd.DataFrame( + {"alt_attr": rng.random(num_alts)}, + index=pd.Index(range(num_alts), name="alt_id"), + ) + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["chooser_attr * alt_attr"], name="Expression"), + ) + + choices_mc_sim, shares_mc_sim = _shares_for_sample( + state, + choosers, + alternatives, + spec, + sample_size, + use_eet=False, + sample_method="poisson", + seed=42, + step_name="test_poisson_under_mc_sim", + ) + _, shares_eet_sim = _shares_for_sample( + state, + choosers, + alternatives, + spec, + sample_size, + use_eet=True, + sample_method="poisson", + seed=42, + step_name="test_poisson_under_eet_sim", + ) + + # Poisson contract: pick_count must be uniformly 1 + assert (choices_mc_sim["pick_count"] == 1).all(), ( + "Poisson sampling under MC simulation must produce pick_count=1; got " + f"{choices_mc_sim['pick_count'].value_counts().to_dict()}" + ) + + all_alts = set(shares_mc_sim.index) | set(shares_eet_sim.index) + for alt in all_alts: + diff = abs(shares_mc_sim.get(alt, 0.0) - shares_eet_sim.get(alt, 0.0)) + assert diff < 0.01, ( + f"Poisson sampling shares should not depend on simulation mode at alt {alt}: " + f"mc_sim={shares_mc_sim.get(alt, 0.0):.4f}, " + f"eet_sim={shares_eet_sim.get(alt, 0.0):.4f}, diff={diff:.4f}" + ) + + +def test_interaction_sample_mc_sampling_under_eet_simulation(state): + num_choosers = 100_000 + num_alts = 100 + sample_size = 10 + + rng = np.random.default_rng(42) + choosers = pd.DataFrame( + {"chooser_attr": rng.random(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + alternatives = pd.DataFrame( + {"alt_attr": rng.random(num_alts)}, + index=pd.Index(range(num_alts), name="alt_id"), + ) + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["chooser_attr * alt_attr"], name="Expression"), + ) + + _, shares_mc_sim = _shares_for_sample( + state, + choosers, + alternatives, + spec, + sample_size, + use_eet=False, + sample_method="monte_carlo", + seed=42, + step_name="test_mc_under_mc_sim", + ) + _, shares_eet_sim = _shares_for_sample( + state, + choosers, + alternatives, + spec, + sample_size, + use_eet=True, + sample_method="monte_carlo", + seed=42, + step_name="test_mc_under_eet_sim", + ) + + all_alts = set(shares_mc_sim.index) | set(shares_eet_sim.index) + for alt in all_alts: + diff = abs(shares_mc_sim.get(alt, 0.0) - shares_eet_sim.get(alt, 0.0)) + assert diff < 0.01, ( + f"MC sampling shares should not depend on simulation mode at alt {alt}: " + f"mc_sim={shares_mc_sim.get(alt, 0.0):.4f}, " + f"eet_sim={shares_eet_sim.get(alt, 0.0):.4f}, diff={diff:.4f}" + ) + + +class _DummyChunkSizer: + def log_df(self, *_args, **_kwargs): + return None + + +class _DummyState: + def __init__(self, rng): + self._rng = rng + + def get_rn_generator(self): + return self._rng + + +class _SequentialDummyRng: + def __init__(self, draws): + self._draws = list(draws) + + def random_for_df(self, df, n=1): + draw = self._draws.pop(0) + assert draw.shape == (len(df), n) + return draw + + def random_for_df_stable_alt_positions( + self, df, stable_alt_positions, n_total_alts + ): + draw = self._draws.pop(0) + assert draw.shape == (len(df), n_total_alts) + return draw[:, stable_alt_positions] + + +class _DummyRngUtilityBased: + def __init__(self, rands_3d): + self.rands_3d = rands_3d + + def gumbel_max_positions_for_df( + self, + utilities, + sample_size, + stable_alt_positions=None, + n_total_alts=None, + ): + assert sample_size == self.rands_3d.shape[2] + if stable_alt_positions is None: + active_rands = self.rands_3d + else: + assert n_total_alts == self.rands_3d.shape[1] + active_rands = self.rands_3d[:, stable_alt_positions, :] + return np.argmax( + active_rands + utilities.to_numpy()[:, :, np.newaxis], + axis=1, + ) + + +def _expected_choices_df(sampled_alternatives, alternatives, alt_col_name): + return ( + sampled_alternatives.rename_axis("alt_idx", axis=1) + .stack() + .reset_index(name="prob") + .assign(**{alt_col_name: lambda df: alternatives.index.values[df["alt_idx"]]}) + .drop(columns=["alt_idx"]) + ) + + +def test_poisson_sample_alternatives_inner_returns_masked_inclusion_probs(): + probs = pd.DataFrame( + [[0.2, 0.4, 0.6], [0.1, 0.3, 0.5]], + index=pd.Index([11, 17], name="person_id"), + columns=[0, 1, 2], + ) + inclusion_probs_values = np.array( + [[0.36, 0.64, 0.84], [0.19, 0.51, 0.75]], + dtype=np.float64, + ) + rng = _SequentialDummyRng( + [ + np.array( + [[0.10, 0.80, 0.20], [0.30, 0.50, 0.90]], + dtype=np.float64, + ) + ] + ) + + sampled = interaction_sample._poisson_sample_alternatives_inner( + probs, + inclusion_probs_values, + rng, + trace_label="test_poisson_sample_alternatives_inner_returns_masked_inclusion_probs", + chunk_sizer=_DummyChunkSizer(), + ) + + expected = np.array( + [[0.36, np.nan, 0.84], [np.nan, 0.51, np.nan]], + dtype=np.float64, + ) + + np.testing.assert_allclose(sampled, expected, equal_nan=True) + + +def test_poisson_fallback_sample_alternatives_selects_distinct_positions_with_prob_one(): + probs = pd.DataFrame( + [[0.20, 0.30, 0.50, 0.00], [0.40, 0.10, 0.30, 0.20]], + index=pd.Index([11, 17], name="person_id"), + columns=np.arange(4), + ) + rng = _SequentialDummyRng( + [ + np.array( + [[0.90, 0.10, 0.40, 0.20], [0.05, 0.70, 0.60, 0.10]], + dtype=np.float64, + ) + ] + ) + + sampled = interaction_sample._poisson_fallback_sample_alternatives( + probs=probs, + sample_size=2, + rng=rng, + trace_label="test_poisson_fallback_sample_alternatives_selects_distinct_positions_with_prob_one", + chunk_sizer=_DummyChunkSizer(), + ) + + expected = np.array( + [[np.nan, 1.0, np.nan, 1.0], [1.0, np.nan, np.nan, 1.0]], + dtype=np.float64, + ) + + np.testing.assert_allclose(sampled, expected, equal_nan=True) + + +def test_poisson_sample_alternatives_retries_and_returns_expected_frames(): + probs = pd.DataFrame( + [ + [0.20, 0.60, 0.10, 0.05], + [0.40, 0.10, 0.30, 0.20], + [0.30, 0.20, 0.70, 0.10], + ], + index=pd.Index([11, 17, 42], name="person_id"), + columns=np.arange(4), + ) + sample_size = 2 + alternatives = pd.DataFrame(index=pd.Index([100, 300, 700, 900], name="alt_id")) + expected_inclusion_probs = 1 - (1 - probs) ** sample_size + expected_sampled_alternatives = pd.DataFrame( + [ + [expected_inclusion_probs.iloc[0, 0], np.nan, np.nan, np.nan], + [ + expected_inclusion_probs.iloc[1, 0], + expected_inclusion_probs.iloc[1, 1], + np.nan, + np.nan, + ], + [np.nan, np.nan, expected_inclusion_probs.iloc[2, 2], np.nan], + ], + index=probs.index, + columns=probs.columns, + ) + state = _DummyState( + _SequentialDummyRng( + [ + np.array( + [ + [0.10, 0.90, 0.50, 0.90], + [0.90, 0.90, 0.90, 0.90], + [0.80, 0.90, 0.20, 0.80], + ], + dtype=np.float64, + ), + np.array([[0.10, 0.05, 0.70, 0.80]], dtype=np.float64), + ] + ) + ) + + choices_df = interaction_sample._poisson_sample_alternatives( + chunk_sizer=_DummyChunkSizer(), + probs=probs, + alternatives=alternatives, + sample_size=sample_size, + alt_col_name="alt_id", + state=state, + trace_label="test_poisson_sample_alternatives_retries_and_returns_expected_frames", + ) + + expected_choices_df = _expected_choices_df( + expected_sampled_alternatives, + alternatives, + "alt_id", + ) + + pd.testing.assert_frame_equal(choices_df, expected_choices_df) + + +def test_poisson_sample_alternatives_falls_back_to_random_sampling_after_ten_retries(): + probs = pd.DataFrame( + [[0.20, 0.30, 0.50]], + index=pd.Index([11], name="person_id"), + columns=np.arange(3), + ) + sample_size = 2 + alternatives = pd.DataFrame(index=pd.Index([100, 300, 700], name="alt_id")) + fail_draw = np.array([[0.99, 0.99, 0.99]], dtype=np.float64) + fallback_draw = np.array([[0.10, 0.80, 0.20]], dtype=np.float64) + state = _DummyState(_SequentialDummyRng([fail_draw] * 10 + [fallback_draw])) + + choices_df = interaction_sample._poisson_sample_alternatives( + chunk_sizer=_DummyChunkSizer(), + probs=probs, + alternatives=alternatives, + sample_size=sample_size, + alt_col_name="alt_id", + state=state, + trace_label="test_poisson_sample_alternatives_falls_back_to_random_sampling_after_ten_retries", + ) + + expected_sampled_alternatives = pd.DataFrame( + [[1.0, np.nan, 1.0]], + index=probs.index, + columns=probs.columns, + ) + expected_choices_df = _expected_choices_df( + expected_sampled_alternatives, + alternatives, + "alt_id", + ) + + pd.testing.assert_frame_equal(choices_df, expected_choices_df) + + +def test_poisson_sample_alternatives_repeat_alignment_chooser_dominant_heterogeneity(): + # Edge case: utilities are close across alternatives but vary strongly by chooser. + # This checks that the flattened Poisson result keeps chooser/prob alignment. + chooser_index = pd.Index([101, 102, 103, 104, 105, 106], name="person_id") + choosers = pd.DataFrame(index=chooser_index) + alternatives = pd.DataFrame(index=pd.Index([0, 1, 2, 3], name="alt_id")) + + sample_size = 3 + + # Very small alternative differences... + alt_signal = np.array([0.00, 0.01, 0.02, 0.03], dtype=np.float64) + # ...but very large chooser sensitivity differences. + chooser_scale = np.array([-500.0, -200.0, -50.0, 50.0, 200.0, 500.0]) + + utilities = pd.DataFrame( + chooser_scale[:, np.newaxis] * alt_signal[np.newaxis, :], + index=chooser_index, + ) + + poisson_draws = np.array( + [ + [0.01, 0.90, 0.90, 0.90], + [0.80, 0.05, 0.90, 0.90], + [0.90, 0.10, 0.40, 0.90], + [0.90, 0.90, 0.10, 0.20], + [0.90, 0.90, 0.02, 0.10], + [0.90, 0.90, 0.90, 0.001], + ], + dtype=np.float64, + ) + state = _DummyState(_SequentialDummyRng([poisson_draws])) + + probs = interaction_sample.logit.utils_to_probs( + state, + utilities, + allow_zero_probs=False, + trace_label="test_repeat_alignment_chooser_heterogeneity", + overflow_protection=True, + trace_choosers=choosers, + ) + + out = interaction_sample._poisson_sample_alternatives( + chunk_sizer=_DummyChunkSizer(), + probs=probs, + alternatives=alternatives, + sample_size=sample_size, + alt_col_name="alt_id", + state=state, + trace_label="test_repeat_alignment_chooser_heterogeneity", + ) + + probs_np = probs.to_numpy() + inclusion_probs = 1 - np.power(1 - probs_np, sample_size) + sampled_values = np.where(poisson_draws < inclusion_probs, inclusion_probs, np.nan) + chooser_idx, alt_idx = np.nonzero(~np.isnan(sampled_values)) + + expected = pd.DataFrame( + { + "person_id": chooser_index.to_numpy()[chooser_idx], + "prob": sampled_values[chooser_idx, alt_idx], + "alt_id": alternatives.index.to_numpy()[alt_idx], + } + ) + + pd.testing.assert_frame_equal(out.reset_index(drop=True), expected) + + +def test_poisson_sample_alternatives_retry_matches_materialized_path(): + chooser_index = pd.Index([201, 202, 203], name="person_id") + choosers = pd.DataFrame(index=chooser_index) + alternatives = pd.DataFrame(index=pd.Index([10, 11, 12, 13], name="alt_id")) + utilities = pd.DataFrame( + [[0.0, 0.3, -0.2, 0.1], [1.0, 0.2, 0.4, -0.5], [-0.1, 0.0, 0.8, 0.7]], + index=chooser_index, + ) + sample_size = 2 + poisson_draws = np.array( + [ + [0.10, 0.20, 0.50, 0.00], + [0.60, 0.50, 0.10, 0.40], + [0.00, 0.30, 0.20, 0.90], + ], + dtype=np.float64, + ) + retry_draw = np.array([[0.40, 0.10, 0.90, 0.90]], dtype=np.float64) + state = _DummyState(_SequentialDummyRng([poisson_draws, retry_draw])) + + probs = interaction_sample.logit.utils_to_probs( + state, + utilities, + allow_zero_probs=False, + trace_label="test_fused_rng_matches_materialized", + overflow_protection=True, + trace_choosers=choosers, + ) + + out = interaction_sample._poisson_sample_alternatives( + chunk_sizer=_DummyChunkSizer(), + probs=probs, + alternatives=alternatives, + sample_size=sample_size, + alt_col_name="alt_id", + state=state, + trace_label="test_fused_rng_matches_materialized", + ) + + probs_np = probs.to_numpy() + inclusion_probs = 1 - np.power(1 - probs_np, sample_size) + sampled_values = np.full(inclusion_probs.shape, np.nan) + first_pass = np.where(poisson_draws < inclusion_probs, inclusion_probs, np.nan) + first_pass_empty = np.isnan(first_pass).all(axis=1) + sampled_values[~first_pass_empty] = first_pass[~first_pass_empty] + retry_pass = np.where( + retry_draw < inclusion_probs[first_pass_empty], + inclusion_probs[first_pass_empty], + np.nan, + ) + sampled_values[first_pass_empty] = retry_pass + chooser_idx, alt_idx = np.nonzero(~np.isnan(sampled_values)) + + expected = pd.DataFrame( + { + "person_id": choosers.index.values[chooser_idx], + "prob": sampled_values[chooser_idx, alt_idx], + "alt_id": alternatives.index.values[alt_idx], + } + ) + + pd.testing.assert_frame_equal(out.reset_index(drop=True), expected) + + +def test_make_sample_choices_eet_matches_materialized_path(): + chooser_index = pd.Index([201, 202, 203], name="person_id") + choosers = pd.DataFrame(index=chooser_index) + alternatives = pd.DataFrame(index=pd.Index([10, 11, 12, 13], name="alt_id")) + utilities = pd.DataFrame( + [[0.0, 0.3, -0.2, 0.1], [1.0, 0.2, 0.4, -0.5], [-0.1, 0.0, 0.8, 0.7]], + index=chooser_index, + ) + sample_size = 2 + rands_3d = np.array( + [ + [[0.1, -0.3], [0.2, 0.4], [0.5, -0.1], [0.0, 0.2]], + [[-0.2, 0.3], [0.6, -0.5], [0.1, 0.7], [0.4, 0.2]], + [[0.0, 0.1], [0.3, -0.4], [0.2, 0.5], [-0.3, 0.2]], + ], + dtype=np.float64, + ) + state = _DummyState(_DummyRngUtilityBased(rands_3d)) + + probs = interaction_sample.logit.utils_to_probs( + state, + utilities, + allow_zero_probs=False, + trace_label="test_make_sample_choices_eet_matches_materialized_path", + overflow_protection=True, + trace_choosers=choosers, + ) + + out = interaction_sample.make_sample_choices_eet( + state=state, + choosers=choosers, + utilities=utilities, + probs=probs, + alternatives=alternatives, + sample_size=sample_size, + alt_col_name="alt_id", + trace_label="test_make_sample_choices_eet_matches_materialized_path", + chunk_sizer=_DummyChunkSizer(), + ) + + chosen_positions = np.argmax( + rands_3d + utilities.to_numpy()[:, :, np.newaxis], + axis=1, + ) + chosen_flat = chosen_positions.reshape(-1) + chooser_idx = np.repeat(np.arange(len(choosers)), sample_size) + + expected = pd.DataFrame( + { + "person_id": choosers.index.values[chooser_idx], + "prob": probs.to_numpy()[chooser_idx, chosen_flat], + "alt_id": alternatives.index.values[chosen_flat], + } + ) + + pd.testing.assert_frame_equal(out.reset_index(drop=True), expected) + + +def test_make_sample_choices_eet_stable_alt_mapping_matches_materialized_path(): + chooser_index = pd.Index([301, 302], name="person_id") + choosers = pd.DataFrame(index=chooser_index) + alternatives = pd.DataFrame(index=pd.Index([10, 12, 14], name="alt_id")) + utilities = pd.DataFrame( + [[0.0, 0.3, -0.2], [1.0, 0.2, 0.4]], + index=chooser_index, + ) + sample_size = 2 + stable_alt_positions = np.array([0, 2, 4], dtype=np.int64) + n_total_alts = 5 + dense_rands_3d = np.array( + [ + [[0.1, -0.3], [0.4, 0.2], [0.2, 0.4], [0.3, -0.2], [0.5, -0.1]], + [[-0.2, 0.3], [0.0, 0.5], [0.6, -0.5], [0.2, 0.1], [0.1, 0.7]], + ], + dtype=np.float64, + ) + state = _DummyState(_DummyRngUtilityBased(dense_rands_3d)) + + probs = interaction_sample.logit.utils_to_probs( + state, + utilities, + allow_zero_probs=False, + trace_label="test_make_sample_choices_eet_stable_alt_mapping_matches_materialized_path", + overflow_protection=True, + trace_choosers=choosers, + ) + + out = interaction_sample.make_sample_choices_eet( + state=state, + choosers=choosers, + utilities=utilities, + probs=probs, + alternatives=alternatives, + sample_size=sample_size, + alt_col_name="alt_id", + trace_label="test_make_sample_choices_eet_stable_alt_mapping_matches_materialized_path", + chunk_sizer=_DummyChunkSizer(), + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + + active_rands = dense_rands_3d[:, stable_alt_positions, :] + chosen_positions = np.argmax( + active_rands + utilities.to_numpy()[:, :, np.newaxis], + axis=1, + ) + chosen_flat = chosen_positions.reshape(-1) + chooser_idx = np.repeat(np.arange(len(choosers)), sample_size) + + expected = pd.DataFrame( + { + "person_id": choosers.index.values[chooser_idx], + "prob": probs.to_numpy()[chooser_idx, chosen_flat], + "alt_id": alternatives.index.values[chosen_flat], + } + ) + + pd.testing.assert_frame_equal(out.reset_index(drop=True), expected) + + +def test_poisson_sample_alternatives_stable_alt_mapping_matches_materialized_path(): + chooser_index = pd.Index([311, 312], name="person_id") + choosers = pd.DataFrame(index=chooser_index) + alternatives = pd.DataFrame(index=pd.Index([10, 12, 14], name="alt_id")) + utilities = pd.DataFrame( + [[0.0, 0.3, -0.2], [1.0, 0.2, 0.4]], + index=chooser_index, + ) + sample_size = 2 + stable_alt_positions = np.array([0, 2, 4], dtype=np.int64) + n_total_alts = 5 + dense_uniforms = np.array( + [ + [0.05, 0.90, 0.10, 0.80, 0.20], + [0.90, 0.70, 0.05, 0.60, 0.10], + ], + dtype=np.float64, + ) + state = _DummyState(_SequentialDummyRng([dense_uniforms])) + + probs = interaction_sample.logit.utils_to_probs( + state, + utilities, + allow_zero_probs=False, + trace_label="test_poisson_sample_alternatives_stable_alt_mapping_matches_materialized_path", + overflow_protection=True, + trace_choosers=choosers, + ) + + out = interaction_sample._poisson_sample_alternatives( + chunk_sizer=_DummyChunkSizer(), + probs=probs, + alternatives=alternatives, + sample_size=sample_size, + alt_col_name="alt_id", + state=state, + trace_label="test_poisson_sample_alternatives_stable_alt_mapping_matches_materialized_path", + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + + probs_np = probs.to_numpy() + inclusion_probs = 1 - np.power(1 - probs_np, sample_size) + active_uniforms = dense_uniforms[:, stable_alt_positions] + sampled_values = np.where( + active_uniforms < inclusion_probs, inclusion_probs, np.nan + ) + chooser_idx, alt_idx = np.nonzero(~np.isnan(sampled_values)) + + expected = pd.DataFrame( + { + "person_id": choosers.index.values[chooser_idx], + "prob": sampled_values[chooser_idx, alt_idx], + "alt_id": alternatives.index.values[alt_idx], + } + ) + + pd.testing.assert_frame_equal(out.reset_index(drop=True), expected) + + +def test_poisson_sample_alternatives_falls_back_after_retries(): + chooser_index = pd.Index([301, 302], name="person_id") + choosers = pd.DataFrame(index=chooser_index) + alternatives = pd.DataFrame(index=pd.Index([10, 12, 14], name="alt_id")) + utilities = pd.DataFrame( + [[0.0, 0.3, -0.2], [1.0, 0.2, 0.4]], + index=chooser_index, + ) + sample_size = 2 + fail_draw = np.full((2, 3), 0.99, dtype=np.float64) + fallback_draw = np.array( + [ + [0.40, 0.10, 0.20], + [0.30, 0.20, 0.90], + ], + dtype=np.float64, + ) + state = _DummyState(_SequentialDummyRng([fail_draw] * 10 + [fallback_draw])) + + probs = interaction_sample.logit.utils_to_probs( + state, + utilities, + allow_zero_probs=False, + trace_label="test_falls_back_after_retries", + overflow_protection=True, + trace_choosers=choosers, + ) + + out = interaction_sample._poisson_sample_alternatives( + chunk_sizer=_DummyChunkSizer(), + probs=probs, + alternatives=alternatives, + sample_size=sample_size, + alt_col_name="alt_id", + state=state, + trace_label="test_falls_back_after_retries", + ) + + expected = pd.DataFrame( + { + "person_id": [301, 301, 302, 302], + "prob": [1.0, 1.0, 1.0, 1.0], + "alt_id": [12, 14, 10, 12], + } + ) + + pd.testing.assert_frame_equal(out.reset_index(drop=True), expected) diff --git a/activitysim/core/test/test_interaction_sample_simulate.py b/activitysim/core/test/test_interaction_sample_simulate.py new file mode 100644 index 0000000000..090bf2d3d5 --- /dev/null +++ b/activitysim/core/test/test_interaction_sample_simulate.py @@ -0,0 +1,225 @@ +# ActivitySim +# See full license in LICENSE.txt. + +from __future__ import annotations + +import numpy as np +import pandas as pd +import pytest + +from activitysim.core import interaction_sample_simulate, workflow +from activitysim.core.logit import AltsContext + + +@pytest.fixture +def state() -> workflow.State: + state = workflow.State().default_settings() + state.settings.check_for_variability = False + return state + + +def test_interaction_sample_simulate_parity(state): + # Run interaction_sample_simulate with and without explicit error terms and check that results are similar. + + num_choosers = 100_000 + num_alts_per_chooser = 5 # small sample size to keep things simple + + # Create random choosers + rng = np.random.default_rng(42) + choosers = pd.DataFrame( + {"chooser_attr": rng.random(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + # Create random alternatives for each chooser + # In interaction_sample_simulate, alternatives is typically a DataFrame with the same index as choosers + # but repeated for each alternative in the sample. + alt_ids = np.tile(np.arange(num_alts_per_chooser), num_choosers) + alternatives = pd.DataFrame( + { + "alt_attr": rng.random(num_choosers * num_alts_per_chooser), + "alt_id": alt_ids, + "tdd": alt_ids, + }, + index=np.repeat(choosers.index, num_alts_per_chooser), + ) + alternatives.index.name = "person_id" + + # Simple spec: utility = chooser_attr * alt_attr + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["chooser_attr * alt_attr"], name="Expression"), + ) + + # Run _without_ explicit error terms + state.settings.use_explicit_error_terms = False + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_step_mnl") + + choices_mnl = interaction_sample_simulate.interaction_sample_simulate( + state, + choosers, + alternatives, + spec, + choice_column="tdd", + ) + + # Run _with_ explicit error terms + state.init_state() + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_step_explicit") + + choices_explicit = interaction_sample_simulate.interaction_sample_simulate( + state, + choosers, + alternatives, + spec, + choice_column="tdd", + alts_context=AltsContext.from_num_alts(num_alts_per_chooser, zero_based=True), + ) + + assert len(choices_mnl) == num_choosers + assert len(choices_explicit) == num_choosers + assert choices_mnl.index.equals(choosers.index) + assert choices_explicit.index.equals(choosers.index) + assert not choices_mnl.isna().any() + assert not choices_explicit.isna().any() + + # choices are series with the same index as choosers and containing the choice (from choice_column) + mnl_counts = choices_mnl.value_counts(normalize=True).sort_index() + explicit_counts = choices_explicit.value_counts(normalize=True).sort_index() + + for alt in range(num_alts_per_chooser): + share_mnl = mnl_counts.get(alt, 0) + share_explicit = explicit_counts.get(alt, 0) + diff = abs(share_mnl - share_explicit) + assert diff < 0.01, ( + f"Large discrepancy at alt {alt}: " + f"mnl={share_mnl:.4f}, explicit={share_explicit:.4f}, diff={diff:.4f}" + ) + + +def test_interaction_sample_simulate_eet_unavailable_alternatives(state): + # Test that EET handles unavailable alternatives in sample simulation + + num_choosers = 10 + num_alts_per_chooser = 5 + + choosers = pd.DataFrame( + {"chooser_attr": np.ones(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + # For each chooser, 2 attractive alts, 3 unavailable + alt_attrs = [10.0, 10.0, -1000.0, -1000.0, -1000.0] * num_choosers + alt_ids = [0, 1, 2, 3, 4] * num_choosers + + alternatives = pd.DataFrame( + { + "alt_attr": alt_attrs, + "alt_id": alt_ids, + "tdd": alt_ids, + }, + index=np.repeat(choosers.index, num_alts_per_chooser), + ) + alternatives.index.name = "person_id" + + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["alt_attr"], name="Expression"), + ) + + # Run with EET + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_unavailable_eet") + + choices_eet = interaction_sample_simulate.interaction_sample_simulate( + state, + choosers, + alternatives, + spec, + choice_column="tdd", + alts_context=AltsContext.from_num_alts(num_alts_per_chooser, zero_based=True), + ) + + assert len(choices_eet) == num_choosers + assert choices_eet.index.equals(choosers.index) + assert not choices_eet.isna().any() + + # Choices should only be 0 or 1 + assert choices_eet.isin([0, 1]).all() + assert not choices_eet.isin([2, 3, 4]).any() + + +def test_interaction_sample_simulate_passes_alts_context_and_alt_nrs_df( + state, monkeypatch +): + state.settings.use_explicit_error_terms = True + + choosers = pd.DataFrame( + {"chooser_attr": [1.0, 1.0]}, + index=pd.Index([100, 101], name="person_id"), + ) + alternatives = pd.DataFrame( + { + "alt_attr": [1.0, 0.5, 0.8, 1.2], + "tdd": [0, 2, 0, 2], + }, + index=pd.Index([100, 100, 101, 101], name="person_id"), + ) + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["alt_attr"], name="Expression"), + ) + + captured = {} + + def fake_make_choices_utility_based( + _state, + utilities, + nest_spec=None, + trace_label=None, + trace_choosers=None, + allow_bad_utils=False, + alts_context=None, + alt_nrs_df=None, + ): + captured["alts_context"] = alts_context + captured["alt_nrs_df"] = alt_nrs_df.copy() if alt_nrs_df is not None else None + return pd.Series([0, 0], index=utilities.index), pd.Series( + np.zeros(len(utilities.index)), index=utilities.index + ) + + monkeypatch.setattr( + interaction_sample_simulate.logit, + "make_choices_utility_based", + fake_make_choices_utility_based, + ) + + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_step_alts_context_forwarding") + + ctx = AltsContext.from_num_alts(3, zero_based=True) + choices = interaction_sample_simulate.interaction_sample_simulate( + state, + choosers, + alternatives, + spec, + choice_column="tdd", + alts_context=ctx, + ) + + assert len(choices) == len(choosers) + assert captured["alts_context"] == ctx + assert captured["alt_nrs_df"] is not None + expected_alt_nrs = pd.DataFrame( + [[0, 2], [0, 2]], + index=choosers.index, + ) + pd.testing.assert_frame_equal(captured["alt_nrs_df"], expected_alt_nrs) diff --git a/activitysim/core/test/test_interaction_simulate.py b/activitysim/core/test/test_interaction_simulate.py new file mode 100644 index 0000000000..af9442e228 --- /dev/null +++ b/activitysim/core/test/test_interaction_simulate.py @@ -0,0 +1,174 @@ +# ActivitySim +# See full license in LICENSE.txt. + +import numpy as np +import pandas as pd +import pytest + +from activitysim.core import interaction_simulate, workflow + + +@pytest.fixture +def state() -> workflow.State: + state = workflow.State().default_settings() + state.settings.check_for_variability = False + return state + + +def test_interaction_simulate_explicit_error_terms_parity(state): + # Run interaction_simulate with and without explicit error terms and check that results are similar. + + # Keep this large enough for stable parity checks without overloading CI. + num_choosers = 100_000 + num_alts = 5 + sample_size = num_alts + + # Create random choosers and alternatives + rng = np.random.default_rng(42) + choosers = pd.DataFrame( + {"chooser_attr": rng.random(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + alternatives = pd.DataFrame( + {"alt_attr": rng.random(num_alts)}, + index=pd.Index(range(num_alts), name="alt_id"), + ) + + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["chooser_attr * alt_attr"], name="Expression"), + ) + + # Run _without_ explicit error terms + state.settings.use_explicit_error_terms = False + state.rng().set_base_seed(42) # Set seed BEFORE adding channels or steps + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_step_mnl") + + choices_mnl = interaction_simulate.interaction_simulate( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + ) + + # Run _with_ explicit error terms + state.init_state() # reset the state to rerun with same seed + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_step_explicit") + + choices_explicit = interaction_simulate.interaction_simulate( + state, + choosers, + alternatives, + spec, + sample_size=sample_size, + ) + + assert len(choices_mnl) == num_choosers + assert len(choices_explicit) == num_choosers + assert choices_mnl.index.equals(choosers.index) + assert choices_explicit.index.equals(choosers.index) + assert not choices_mnl.isna().any() + assert not choices_explicit.isna().any() + + mnl_counts = choices_mnl.value_counts(normalize=True).sort_index() + explicit_counts = choices_explicit.value_counts(normalize=True).sort_index() + + # Check that they are close, relative to the number of draws + assert np.allclose( + mnl_counts.to_numpy(), explicit_counts.to_numpy(), atol=0.01, rtol=0.001 + ) + + +def test_interaction_simulate_eet_unavailable_alternatives(state): + # Test that EET handles unavailable alternatives (very low utilities) + # similarly to MNL (zero probabilities). + + num_choosers = 100 + num_alts = 5 + + choosers = pd.DataFrame( + {"chooser_attr": np.ones(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + # Alt 0 and 1 are attractive, Alt 2, 3, 4 are "unavailable" (very low utility) + alternatives = pd.DataFrame( + {"alt_attr": [10.0, 10.0, -1000.0, -1000.0, -1000.0]}, + index=pd.Index(range(num_alts), name="alt_id"), + ) + + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["alt_attr"], name="Expression"), + ) + + # Run with EET + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_unavailable_eet") + + choices_eet = interaction_simulate.interaction_simulate( + state, + choosers, + alternatives, + spec, + sample_size=num_alts, + ) + + assert len(choices_eet) == num_choosers + assert choices_eet.index.equals(choosers.index) + assert not choices_eet.isna().any() + + # Choices should only be from Alt 0 or 1 + assert choices_eet.isin( + [0, 1] + ).all(), f"EET picked an 'unavailable' alternative: {choices_eet[~choices_eet.isin([0, 1])]}" + + +def test_interaction_simulate_eet_large_utilities(state): + # Test that EET handles very large utilities without overflow issues + # that might occur in exp(util) calculations in standard MNL. + + num_choosers = 10 + num_alts = 2 + + choosers = pd.DataFrame( + {"chooser_attr": np.ones(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + # Standard MNL might struggle with exp(700) or exp(800) depending on float precision + alternatives = pd.DataFrame( + {"alt_attr": [700.0, 800.0]}, + index=pd.Index(range(num_alts), name="alt_id"), + ) + + spec = pd.DataFrame( + {"coefficient": [1.0]}, + index=pd.Index(["alt_attr"], name="Expression"), + ) + + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", choosers) + state.rng().begin_step("test_large_utils_eet") + + # This should run without crashing or returning NaNs + choices_eet = interaction_simulate.interaction_simulate( + state, + choosers, + alternatives, + spec, + sample_size=num_alts, + ) + + assert not choices_eet.isna().any() + # With such a large difference, Alt 1 should be the dominant choice + assert (choices_eet == 1).all() diff --git a/activitysim/core/test/test_logit.py b/activitysim/core/test/test_logit.py index c82606981f..143920fda6 100644 --- a/activitysim/core/test/test_logit.py +++ b/activitysim/core/test/test_logit.py @@ -9,7 +9,9 @@ import pandas.testing as pdt import pytest -from activitysim.core import logit, workflow +from activitysim.core import logit, random, simulate, workflow +from activitysim.core.exceptions import InvalidTravelError +from activitysim.core.logit import AltsContext from activitysim.core.simulate import eval_variables @@ -70,7 +72,122 @@ def utilities(choosers, spec, test_data): ) -# TODO-EET: Add tests here! +@pytest.fixture(scope="module") +def interaction_choosers(): + return pd.DataFrame({"attr": ["a", "b", "c", "b"]}, index=["w", "x", "y", "z"]) + + +@pytest.fixture(scope="module") +def interaction_alts(): + return pd.DataFrame({"prop": [10, 20, 30, 40]}, index=[1, 2, 3, 4]) + + +# +# Utility Validation Tests +# +def test_validate_utils_replaces_unavailable_values(): + state = workflow.State().default_settings() + utils = pd.DataFrame([[0.0, logit.UTIL_MIN - 1.0], [1.0, 2.0]]) + + validated = logit.validate_utils(state, utils, allow_zero_probs=False) + + assert validated.iloc[0, 0] == pytest.approx(0.0) + assert validated.iloc[0, 1] == pytest.approx(logit.UTIL_UNAVAILABLE) + assert validated.iloc[1, 0] == pytest.approx(1.0) + assert validated.iloc[1, 1] == pytest.approx(2.0) + + +def test_validate_utils_raises_when_all_unavailable(): + state = workflow.State().default_settings() + utils = pd.DataFrame([[logit.UTIL_MIN - 1.0, logit.UTIL_MIN - 2.0]]) + + with pytest.raises(InvalidTravelError) as excinfo: + logit.validate_utils(state, utils, allow_zero_probs=False) + + assert "all probabilities are zero" in str(excinfo.value) + + +def test_validate_utils_allows_zero_probs(): + state = workflow.State().default_settings() + utils = pd.DataFrame([[0.5, logit.UTIL_MIN - 1.0]]) + + validated = logit.validate_utils(state, utils, allow_zero_probs=True) + + assert validated.iloc[0, 0] == 0.5 + assert validated.iloc[0, 1] == logit.UTIL_UNAVAILABLE + + +# +# `utils_to_probs` Tests +# +def test_utils_to_probs_logsums_with_overflow_protection(): + state = workflow.State().default_settings() + utils = pd.DataFrame( + [[1000.0, 1001.0, 999.0], [-1000.0, -1001.0, -999.0]], + columns=["a", "b", "c"], + ) + original_utils = utils.copy() + + probs, logsums = logit.utils_to_probs( + state, + utils, + trace_label=None, + overflow_protection=True, + return_logsums=True, + ) + + utils_np = original_utils.to_numpy() + row_max = utils_np.max(axis=1, keepdims=True) + exp_shifted = np.exp(utils_np - row_max) + expected_probs = exp_shifted / exp_shifted.sum(axis=1, keepdims=True) + expected_logsums = pd.Series( + np.log(exp_shifted.sum(axis=1)) + row_max.squeeze(), + index=utils.index, + ) + + pdt.assert_frame_equal( + probs, + pd.DataFrame(expected_probs, index=utils.index, columns=utils.columns), + rtol=1.0e-7, + atol=0.0, + ) + pdt.assert_series_equal(logsums, expected_logsums, rtol=1.0e-7, atol=0.0) + + +def test_utils_to_probs_warns_on_zero_probs_overflow(): + state = workflow.State().default_settings() + utils = pd.DataFrame( + [[logit.UTIL_MIN - 1.0, logit.UTIL_MIN - 2.0], [0.0, 0.0]], + columns=["a", "b"], + ) + + with pytest.warns(UserWarning, match="cannot set overflow_protection"): + probs = logit.utils_to_probs( + state, + utils, + trace_label=None, + allow_zero_probs=True, + overflow_protection=True, + ) + + assert (probs.iloc[0] == 0.0).all() + assert probs.iloc[1].sum() == pytest.approx(1.0) + assert probs.iloc[1].iloc[0] == pytest.approx(0.5) + assert probs.iloc[1].iloc[1] == pytest.approx(0.5) + + +def test_utils_to_probs_raises_on_float32_zero_probs_overflow(): + state = workflow.State().default_settings() + utils = pd.DataFrame(np.array([[90.0, 0.0]], dtype=np.float32)) + + with pytest.raises(ValueError, match="cannot prevent expected overflow"): + logit.utils_to_probs( + state, + utils, + trace_label=None, + allow_zero_probs=True, + overflow_protection=True, + ) def test_utils_to_probs(utilities, test_data): @@ -119,6 +236,9 @@ def test_utils_to_probs_raises(): assert np.asarray(z).ravel() == pytest.approx(np.asarray([0.0, 0.0, 1.0, 0.0])) +# +# `make_choices` Tests +# def test_make_choices_only_one(): state = workflow.State().default_settings() probs = pd.DataFrame( @@ -143,16 +263,1382 @@ def test_make_choices_real_probs(utilities): ) -@pytest.fixture(scope="module") -def interaction_choosers(): - return pd.DataFrame({"attr": ["a", "b", "c", "b"]}, index=["w", "x", "y", "z"]) +def test_different_order_make_choices(): + # check if, when we shuffle utilities, make_choices chooses the same alternatives + state = workflow.State().default_settings() + # increase number of choosers and alternatives for realism + n_choosers = 100 + n_alts = 50 + data = np.random.rand(n_choosers, n_alts) + chooser_ids = np.arange(n_choosers) + alt_ids = [f"alt_{i}" for i in range(n_alts)] -@pytest.fixture(scope="module") -def interaction_alts(): - return pd.DataFrame({"prop": [10, 20, 30, 40]}, index=[1, 2, 3, 4]) + utilities = pd.DataFrame( + data, + index=pd.Index(chooser_ids, name="chooser_id"), + columns=alt_ids, + ) + + # We need a stable RNG that gives the same random numbers for the same chooser_id + # regardless of row order. ActivitySim's random.Random does this. + state.get_rn_generator().add_channel("chooser_id", utilities) + state.get_rn_generator().begin_step("test_step") + + probs = logit.utils_to_probs(state, utilities, trace_label=None) + choices, rands = logit.make_choices(state, probs) + + # shuffle utilities (rows) and make_choices again + # We must reset the step offset so the RNG produces the same sequence for the same IDs + state.get_rn_generator().end_step("test_step") + state.get_rn_generator().begin_step("test_step") + utilities_shuffled = utilities.sample(frac=1, random_state=42) + probs_shuffled = logit.utils_to_probs(state, utilities_shuffled, trace_label=None) + choices_shuffled, rands_shuffled = logit.make_choices(state, probs_shuffled) + + # sorting both to ensure comparison is on the same index order + pdt.assert_series_equal( + choices.sort_index(), choices_shuffled.sort_index(), check_dtype=False + ) + + +def test_make_choices_matches_random_draws(): + class DummyRNG: + def random_for_df(self, df, n=1): + assert n == 1 + return np.array([[0.05], [0.6], [0.95]]) + + class DummyState: + @staticmethod + def get_rn_generator(): + return DummyRNG() + + state = DummyState() + probs = pd.DataFrame( + [[0.1, 0.2, 0.7], [0.4, 0.4, 0.2], [0.05, 0.9, 0.05]], + index=["a", "b", "c"], + columns=["x", "y", "z"], + ) + choices, rands = logit.make_choices(state, probs) + + expected_rands = np.array([0.05, 0.6, 0.95]) + expected_choices = np.array([0, 1, 1]) + + pdt.assert_series_equal( + rands, + pd.Series(expected_rands, index=probs.index), + check_names=False, + ) + pdt.assert_series_equal( + choices, + pd.Series(expected_choices, index=probs.index), + check_dtype=False, + ) + + +# EET Choice Behavior Tests +# +def test_make_choices_eet_mnl(monkeypatch): + class DummyRNG: + def gumbel_choice_positions_for_df( + self, utilities, alt_nrs_df=None, n_rands=None + ): + assert alt_nrs_df is None + assert n_rands is None + assert list(utilities.columns) == ["a", "b"] + return np.array([1, 0], dtype=np.int32) + + class DummyState: + @staticmethod + def get_rn_generator(): + return DummyRNG() + + choices = logit.make_choices_explicit_error_term_mnl( + DummyState(), + pd.DataFrame([[0.0, 0.0], [0.0, 0.0]], index=[100, 101], columns=["a", "b"]), + trace_label=None, + ) + + pdt.assert_series_equal( + choices, + pd.Series([1, 0], index=[100, 101], dtype=np.int32), + ) + + +def test_make_choices_eet_nl(monkeypatch): + def fake_sample_nested_logit_exact_leaf_error_terms(_state, df, nest_spec): + assert nest_spec["name"] == "root" + assert list(df.columns) == ["walk", "car", "bus"] + + error_terms = pd.DataFrame(0.0, index=df.index, columns=df.columns) + error_terms.loc[10, ["walk", "car", "bus"]] = [1.0, 5.0, 3.0] + error_terms.loc[11, ["walk", "car", "bus"]] = [4.0, 2.0, 3.0] + return error_terms + + monkeypatch.setattr( + logit, + "sample_nested_logit_exact_leaf_error_terms", + fake_sample_nested_logit_exact_leaf_error_terms, + ) + + nest_spec = { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + {"name": "motorized", "coefficient": 0.7, "alternatives": ["car", "bus"]}, + "walk", + ], + } + + state = workflow.State().default_settings() + monkeypatch.setattr(state.tracing, "trace_df", lambda *args, **kwargs: None) + + choices = logit.make_choices_explicit_error_term_nl( + state, + pd.DataFrame( + [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], + index=[10, 11], + columns=["walk", "car", "bus"], + ), + nest_spec, + trace_label="test", + ) + + pdt.assert_series_equal(choices, pd.Series([1, 0], index=[10, 11])) + + +def test_sample_nested_logit_exact_leaf_error_terms_accumulates_node_and_leaf_terms( + monkeypatch, +): + stable_draws = np.array([0.4, -0.2], dtype=np.float64) + + def fake_log_positive_stable_for_df(_state, df, alpha): + assert alpha == pytest.approx(0.5) + assert list(df.columns) == ["car", "bus", "walk"] + return stable_draws + + monkeypatch.setattr( + logit, "_log_positive_stable_for_df", fake_log_positive_stable_for_df + ) + + class DummyRNG: + @staticmethod + def gumbel_for_df(df, n): + assert n == df.shape[1] + return np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float64) + + class DummyState: + @staticmethod + def get_rn_generator(): + return DummyRNG() + + nest_spec = { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + {"name": "motorized", "coefficient": 0.5, "alternatives": ["car", "bus"]}, + "walk", + ], + } + alt_utilities = pd.DataFrame( + 0.0, + index=pd.Index([10, 11], name="chooser_id"), + columns=["car", "bus", "walk"], + dtype=np.float64, + ) + + error_terms = logit.sample_nested_logit_exact_leaf_error_terms( + DummyState(), alt_utilities, nest_spec + ) + + expected = pd.DataFrame( + { + "car": [0.7, 1.9], + "bus": [1.2, 2.4], + "walk": [3.0, 6.0], + }, + index=alt_utilities.index, + dtype=np.float64, + ) + + pdt.assert_frame_equal(error_terms, expected) + + +def test_make_choices_utility_based_sets_zero_rands(monkeypatch): + def fake_make_choices_explicit_error_term_mnl( + _state, + utilities, + trace_label, + trace_choosers=None, + alts_context=None, + alt_nrs_df=None, + ): + assert trace_choosers is None + assert alts_context is None + assert alt_nrs_df is None + return pd.Series([0, 1], index=utilities.index) + + monkeypatch.setattr( + logit, + "make_choices_explicit_error_term_mnl", + fake_make_choices_explicit_error_term_mnl, + ) + + utilities = pd.DataFrame([[3.0, 2.0], [1.0, 4.0]], index=[11, 12]) + choices, rands = logit.make_choices_utility_based( + workflow.State().default_settings(), + utilities, + nest_spec=None, + trace_label=None, + ) + + expected_choices = pd.Series([0, 1], index=[11, 12]) + pdt.assert_series_equal(choices, expected_choices) + pdt.assert_series_equal(rands, pd.Series([0, 0], index=[11, 12])) + + +# +# EET vs non-EET Choice Behavior Tests +# +def test_make_choices_vs_eet_same_distribution(): + """With many draws, make_choices (probability-based) and + make_choices_explicit_error_term_mnl (EET) should produce roughly the + same empirical choice-frequency distribution for the same utilities.""" + n_draws = 1_000_000 + a_tol = 0.001 + r_tol = 0.01 + utils_values = [5.0, 6.0, 7.0, 8.0, 9.0] + n_alts = len(utils_values) + columns = ["a", "b", "c", "d", "e"] + + utils = pd.DataFrame([utils_values] * n_draws, columns=columns) + + # Probability-based (Monte Carlo) path — independent RNG + mc_rng = np.random.default_rng(42) + + class MCDummyRNG: + def random_for_df(self, df, n=1): + return mc_rng.random((len(df), n)) + + class MCDummyState: + @staticmethod + def get_rn_generator(): + return MCDummyRNG() + + probs = logit.utils_to_probs( + MCDummyState(), utils, trace_label=None, overflow_protection=True + ) + choices_mc, _ = logit.make_choices(MCDummyState(), probs, trace_label=None) + + # Explicit-error-term (EET) path — independent RNG + eet_rng = np.random.default_rng(123) + + class EETDummyRNG: + def random_for_df(self, df, n=1): + return eet_rng.random((len(df), n)) + + def gumbel_for_df(self, df, n): + return eet_rng.gumbel(size=(len(df), n)) + + def gumbel_choice_positions_for_df( + self, utilities, alt_nrs_df=None, n_rands=None + ): + assert alt_nrs_df is None + assert n_rands is None + return np.argmax( + eet_rng.gumbel(size=(len(utilities), utilities.shape[1])) + + utilities.to_numpy(), + axis=1, + ) + + class EETDummyState: + @staticmethod + def get_rn_generator(): + return EETDummyRNG() + + choices_eet = logit.make_choices_explicit_error_term_mnl( + EETDummyState(), utils, trace_label=None + ) + + mc_fracs = np.bincount(choices_mc.values.astype(int), minlength=n_alts) / n_draws + eet_fracs = np.bincount(choices_eet.values.astype(int), minlength=n_alts) / n_draws + + np.testing.assert_allclose(mc_fracs, eet_fracs, atol=a_tol, rtol=r_tol) + np.testing.assert_allclose( + mc_fracs, probs.iloc[0].to_numpy(), atol=a_tol, rtol=r_tol + ) + np.testing.assert_allclose( + eet_fracs, probs.iloc[0].to_numpy(), atol=a_tol, rtol=r_tol + ) + + +def test_make_choices_vs_eet_nl_same_distribution(): + """With many draws, nested logit choices via probabilities and + nested logit choices via EET should produce the same empirical distribution.""" + n_draws = 100_000 + a_tol = 0.01 + + nest_spec = { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + {"name": "motorized", "coefficient": 0.5, "alternatives": ["car", "bus"]}, + "walk", + ], + } + # Utilities for car, bus, walk + leaf_utilities = pd.DataFrame( + [[0.5, 0.2, 0.4]], + columns=["car", "bus", "walk"], + ) + utils_df = pd.concat([leaf_utilities] * n_draws, ignore_index=True) + # 1. Probability-based Nested Logit choices + mc_rng = np.random.default_rng(42) + class MCDummyRNG: + def random_for_df(self, df, n=1): + return mc_rng.random((len(df), n)) + + class MCDummyState: + @staticmethod + def get_rn_generator(): + return MCDummyRNG() + + def default_settings(self): + return self + + # Compute probabilities for NL using simulation logic + nested_exp_utilities = simulate.compute_nested_exp_utilities(utils_df, nest_spec) + nested_probabilities = simulate.compute_nested_probabilities( + MCDummyState(), nested_exp_utilities, nest_spec, trace_label=None + ) + probs = simulate.compute_base_probabilities( + nested_probabilities, nest_spec, utils_df + ) + choices_mc, _ = logit.make_choices(MCDummyState(), probs, trace_label=None) + + # 2. EET-based Nested Logit choices + eet_rng = np.random.default_rng(123) + + class EETDummyRNG: + def random_for_df(self, df, n=1): + return eet_rng.random((len(df), n)) + + def gumbel_for_df(self, df, n): + return eet_rng.gumbel(size=(len(df), n)) + + class EETDummyState: + @staticmethod + def get_rn_generator(): + return EETDummyRNG() + + def default_settings(self): + return self + + @property + def tracing(self): + import activitysim.core.tracing as tracing + + return tracing + + choices_eet = logit.make_choices_explicit_error_term_nl( + EETDummyState(), + utils_df, + nest_spec, + trace_label=None, + ) + + mc_fracs = np.bincount(choices_mc.values.astype(int), minlength=3) / n_draws + eet_fracs = np.bincount(choices_eet.values.astype(int), minlength=3) / n_draws + + np.testing.assert_allclose(mc_fracs, eet_fracs, atol=a_tol) + + +def _repeated_utility_df(raw_utilities: pd.Series, n_draws: int) -> pd.DataFrame: + raw_utilities = pd.Series(raw_utilities, dtype=float) + return pd.DataFrame( + np.repeat(raw_utilities.to_numpy()[None, :], n_draws, axis=0), + columns=raw_utilities.index, + index=pd.RangeIndex(n_draws, name="chooser_id"), + ) + + +def _make_rng_state( + df: pd.DataFrame, + seed: int, + step_name: str, +) -> workflow.State: + state = workflow.State().default_settings() + rng = state.get_rn_generator() + rng.set_base_seed(seed) + rng.add_channel(df.index.name, df) + rng.begin_step(step_name) + return state + + +def _finish_rng_state(state: workflow.State, step_name: str) -> None: + state.get_rn_generator().end_step(step_name) + + +def _choice_shares(choices: pd.Series, alt_names) -> pd.Series: + alt_names = pd.Index(alt_names) + counts = np.bincount(choices.to_numpy(dtype=int), minlength=len(alt_names)) + return pd.Series(counts / counts.sum(), index=alt_names) + + +def _expected_nested_logit_shares( + raw_utilities: pd.Series, + nest_spec: dict, + seed: int = 42, +) -> pd.Series: + raw_df = _repeated_utility_df(raw_utilities, n_draws=1) + step_name = f"expected_nested_logit_{len(raw_utilities)}_seed_{seed}" + state = _make_rng_state(raw_df, seed=seed, step_name=step_name) + try: + nested_exp_utilities = simulate.compute_nested_exp_utilities(raw_df, nest_spec) + nested_probabilities = simulate.compute_nested_probabilities( + state, nested_exp_utilities, nest_spec, trace_label=None + ) + base_probabilities = simulate.compute_base_probabilities( + nested_probabilities, nest_spec, raw_df + ) + finally: + _finish_rng_state(state, step_name) + + return base_probabilities.iloc[0] + + +def _nested_logit_eet_shares( + raw_utilities: pd.Series, + nest_spec: dict, + n_draws: int, + seed: int = 42, +) -> pd.Series: + raw_df = _repeated_utility_df(raw_utilities, n_draws=n_draws) + step_name = f"nested_eet_exact_leaf_{n_draws}_{len(raw_utilities)}" + state = _make_rng_state(raw_df, seed=seed, step_name=step_name) + try: + choices = logit.make_choices_explicit_error_term_nl( + state, + raw_df, + nest_spec, + trace_label=None, + ) + finally: + _finish_rng_state(state, step_name) + + return _choice_shares(choices, raw_df.columns) + + +def _nested_logit_mc_shares( + raw_utilities: pd.Series, + nest_spec: dict, + n_draws: int, + seed: int = 42, +) -> pd.Series: + raw_df = _repeated_utility_df(raw_utilities, n_draws=n_draws) + step_name = f"nested_mc_{n_draws}_{len(raw_utilities)}" + state = _make_rng_state(raw_df, seed=seed, step_name=step_name) + try: + nested_exp_utilities = simulate.compute_nested_exp_utilities(raw_df, nest_spec) + nested_probabilities = simulate.compute_nested_probabilities( + state, nested_exp_utilities, nest_spec, trace_label=None + ) + base_probabilities = simulate.compute_base_probabilities( + nested_probabilities, nest_spec, raw_df + ) + choices, _ = logit.make_choices(state, base_probabilities, trace_label=None) + finally: + _finish_rng_state(state, step_name) + + return _choice_shares(choices, raw_df.columns) + + +def _assert_empirical_shares_close( + observed: pd.Series, + expected: pd.Series, + n_draws: int, + sigma_multiplier: float = 6.0, + variance_floor: float = 0.02, +) -> None: + expected = expected.reindex(observed.index) + tolerances = sigma_multiplier * np.sqrt( + np.maximum(expected * (1.0 - expected), variance_floor) / n_draws + ) + differences = (observed - expected).abs() + assert (differences <= tolerances).all(), pd.DataFrame( + { + "observed": observed, + "expected": expected, + "abs_diff": differences, + "tolerance": tolerances, + } + ).to_string() + + +def _nested_logit_method_share_matrix( + raw_utilities: pd.Series, + nest_spec: dict, + method: str, + n_draws: int, + seeds: list[int], +) -> np.ndarray: + share_samples = [] + for seed in seeds: + if method == "mc": + shares = _nested_logit_mc_shares( + raw_utilities, + nest_spec, + n_draws=n_draws, + seed=seed, + ) + elif method == "exact_leaf": + shares = _nested_logit_eet_shares( + raw_utilities, + nest_spec, + n_draws=n_draws, + seed=seed, + ) + else: + raise ValueError(f"unknown nested-logit share method: {method}") + share_samples.append(shares.to_numpy()) + + return np.vstack(share_samples) + + +def _assert_average_empirical_shares_close( + observed_matrix: np.ndarray, + expected: pd.Series, + n_draws: int, + sigma_multiplier: float = 6.0, + variance_floor: float = 0.02, +) -> None: + expected = expected.astype(float) + mean_observed = pd.Series(observed_matrix.mean(axis=0), index=expected.index) + effective_draws = n_draws * observed_matrix.shape[0] + tolerances = sigma_multiplier * np.sqrt( + np.maximum(expected * (1.0 - expected), variance_floor) / effective_draws + ) + differences = (mean_observed - expected).abs() + assert (differences <= tolerances).all(), pd.DataFrame( + { + "mean_observed": mean_observed, + "expected": expected, + "abs_diff": differences, + "tolerance": tolerances, + } + ).to_string() + + +def _assert_average_share_deltas_close( + baseline_matrix: np.ndarray, + perturbed_matrix: np.ndarray, + baseline_expected: pd.Series, + perturbed_expected: pd.Series, + n_draws: int, + sigma_multiplier: float = 6.0, + variance_floor: float = 0.02, +) -> None: + observed_delta = pd.Series( + perturbed_matrix.mean(axis=0) - baseline_matrix.mean(axis=0), + index=baseline_expected.index, + ) + expected_delta = perturbed_expected - baseline_expected + effective_draws = n_draws * baseline_matrix.shape[0] + variances = ( + np.maximum(baseline_expected * (1.0 - baseline_expected), variance_floor) + + np.maximum(perturbed_expected * (1.0 - perturbed_expected), variance_floor) + ) / effective_draws + tolerances = sigma_multiplier * np.sqrt(variances) + differences = (observed_delta - expected_delta).abs() + assert (differences <= tolerances).all(), pd.DataFrame( + { + "observed_delta": observed_delta, + "expected_delta": expected_delta, + "abs_diff": differences, + "tolerance": tolerances, + } + ).to_string() + + +def _assert_nested_logit_methods_match_expected_across_seeds( + raw_utilities: pd.Series, + nest_spec: dict, + n_draws: int, + seeds: list[int], + methods: tuple[str, ...] = ("mc", "exact_leaf"), +) -> dict[str, np.ndarray]: + expected = _expected_nested_logit_shares(raw_utilities, nest_spec) + share_matrices: dict[str, np.ndarray] = {} + for method in methods: + share_matrix = _nested_logit_method_share_matrix( + raw_utilities, + nest_spec, + method=method, + n_draws=n_draws, + seeds=seeds, + ) + _assert_average_empirical_shares_close(share_matrix, expected, n_draws=n_draws) + share_matrices[method] = share_matrix + + for i, left_method in enumerate(methods): + for right_method in methods[i + 1 :]: + left_mean = pd.Series( + share_matrices[left_method].mean(axis=0), + index=raw_utilities.index.to_numpy(), + ) + right_mean = pd.Series( + share_matrices[right_method].mean(axis=0), + index=raw_utilities.index.to_numpy(), + ) + tolerances = 8.0 * np.sqrt( + 2.0 + * np.maximum(expected * (1.0 - expected), 0.02) + / (n_draws * len(seeds)) + ) + differences = (left_mean - right_mean).abs() + assert (differences <= tolerances).all(), pd.DataFrame( + { + "left_method": left_method, + "right_method": right_method, + "left_mean": left_mean, + "right_mean": right_mean, + "abs_diff": differences, + "tolerance": tolerances, + } + ).to_string() + + return share_matrices + + +def _rmse(values: np.ndarray) -> float: + return float(np.sqrt(np.mean(np.square(values)))) + + +def _estimate_power_law_slope(draw_counts: np.ndarray, errors: np.ndarray) -> float: + clipped_errors = np.clip(errors.astype(float), np.finfo(float).eps, None) + slope, _intercept = np.polyfit( + np.log(draw_counts.astype(float)), np.log(clipped_errors), deg=1 + ) + return float(slope) + + +def _assert_three_level_nested_logit_methods_follow_power_law( + draw_counts: np.ndarray, + seeds: list[int], + slope_lower: float = -0.8, + slope_upper: float = -0.2, + pair_slope_lower: float | None = None, + pair_slope_upper: float | None = None, + max_final_method_error: float | None = None, + max_final_pair_error: float | None = None, +) -> None: + if pair_slope_lower is None: + pair_slope_lower = slope_lower + if pair_slope_upper is None: + pair_slope_upper = slope_upper + + method_names = ["mc", "exact_leaf"] + pair_names = [("mc", "exact_leaf")] + + nest_spec = { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + { + "name": "AUTO", + "coefficient": 0.72, + "alternatives": [ + { + "name": "DRIVEALONE", + "coefficient": 0.35, + "alternatives": ["DA_FREE", "DA_PAY"], + } + ], + }, + { + "name": "TRANSIT", + "coefficient": 0.72, + "alternatives": [ + { + "name": "WALKACCESS", + "coefficient": 0.50, + "alternatives": ["WALK_LOC", "WALK_EXP"], + } + ], + }, + { + "name": "NONMOTORIZED", + "coefficient": 0.72, + "alternatives": ["WALK"], + }, + ], + } + raw_utilities = pd.Series( + { + "DA_FREE": 1.4, + "DA_PAY": 0.9, + "WALK_LOC": 0.5, + "WALK_EXP": 0.2, + "WALK": 0.0, + } + ) + + expected = _expected_nested_logit_shares(raw_utilities, nest_spec) + method_errors = {name: [] for name in method_names} + pair_errors = {pair: [] for pair in pair_names} + + for n_draws in draw_counts: + shares_by_method = {name: [] for name in method_names} + + for seed in seeds: + shares_by_method["mc"].append( + _nested_logit_mc_shares( + raw_utilities, + nest_spec, + n_draws=int(n_draws), + seed=seed, + ) + ) + shares_by_method["exact_leaf"].append( + _nested_logit_eet_shares( + raw_utilities, + nest_spec, + n_draws=int(n_draws), + seed=seed, + ) + ) + + for method_name, share_samples in shares_by_method.items(): + share_matrix = np.vstack([share.to_numpy() for share in share_samples]) + centered = share_matrix - expected.to_numpy() + method_errors[method_name].append(_rmse(centered)) + + for left_name, right_name in pair_names: + left_matrix = np.vstack( + [share.to_numpy() for share in shares_by_method[left_name]] + ) + right_matrix = np.vstack( + [share.to_numpy() for share in shares_by_method[right_name]] + ) + pair_errors[(left_name, right_name)].append( + _rmse(left_matrix - right_matrix) + ) + + for method_name, errors in method_errors.items(): + errors = np.asarray(errors, dtype=float) + slope = _estimate_power_law_slope(draw_counts, errors) + assert ( + slope_lower <= slope <= slope_upper + ), f"{method_name} slope {slope:.3f} outside [{slope_lower}, {slope_upper}]" + assert ( + errors[-1] < errors[0] + ), f"{method_name} errors did not decrease: {errors}" + if max_final_method_error is not None: + assert ( + errors[-1] <= max_final_method_error + ), f"{method_name} final error {errors[-1]:.6f} exceeds {max_final_method_error:.6f}" + + for left_name, right_name in pair_names: + errors = np.asarray(pair_errors[(left_name, right_name)], dtype=float) + slope = _estimate_power_law_slope(draw_counts, errors) + assert ( + pair_slope_lower <= slope <= pair_slope_upper + ), f"{left_name} vs {right_name} slope {slope:.3f} outside [{pair_slope_lower}, {pair_slope_upper}]" + assert ( + errors[-1] < errors[0] + ), f"{left_name} vs {right_name} errors did not decrease: {errors}" + if max_final_pair_error is not None: + assert ( + errors[-1] <= max_final_pair_error + ), f"{left_name} vs {right_name} final error {errors[-1]:.6f} exceeds {max_final_pair_error:.6f}" + + +NESTED_LOGIT_EXACT_PARITY_CASES = [ + pytest.param( + { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + { + "name": "AUTO", + "coefficient": 0.72, + "alternatives": ["DA_FREE", "DA_PAY"], + }, + {"name": "NONMOTORIZED", "coefficient": 0.80, "alternatives": ["WALK"]}, + ], + }, + pd.Series({"DA_FREE": 1.2, "DA_PAY": 0.7, "WALK": 0.1}), + np.array(["DA_FREE", "DA_PAY", "WALK"]), + id="two_level_single_leaf_nest", + ), + pytest.param( + { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + { + "name": "AUTO", + "coefficient": 0.72, + "alternatives": [ + { + "name": "DRIVEALONE", + "coefficient": 0.35, + "alternatives": ["DA_FREE", "DA_PAY"], + } + ], + }, + { + "name": "TRANSIT", + "coefficient": 0.72, + "alternatives": [ + { + "name": "WALKACCESS", + "coefficient": 0.50, + "alternatives": ["WALK_LOC", "WALK_EXP"], + } + ], + }, + { + "name": "NONMOTORIZED", + "coefficient": 0.72, + "alternatives": ["WALK"], + }, + ], + }, + pd.Series( + { + "DA_FREE": 1.4, + "DA_PAY": 0.9, + "WALK_LOC": 0.5, + "WALK_EXP": 0.2, + "WALK": 0.0, + } + ), + np.array(["DA_FREE", "DA_PAY", "WALK_LOC", "WALK_EXP", "WALK"]), + id="three_level_single_leaf_chains", + ), + pytest.param( + { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + { + "name": "MOTORIZED", + "coefficient": 0.78, + "alternatives": [ + { + "name": "AUTO", + "coefficient": 0.62, + "alternatives": ["DA_FREE", "DA_PAY"], + }, + { + "name": "RIDEHAIL", + "coefficient": 0.58, + "alternatives": ["RH_SHARED", "RH_SOLO"], + }, + ], + }, + { + "name": "ACTIVE", + "coefficient": 0.85, + "alternatives": ["BIKE", "WALK"], + }, + ], + }, + pd.Series( + { + "DA_FREE": 1.1, + "DA_PAY": 0.8, + "RH_SHARED": 0.7, + "RH_SOLO": 0.9, + "BIKE": 0.2, + "WALK": 0.0, + } + ), + np.array(["DA_FREE", "DA_PAY", "RH_SHARED", "RH_SOLO", "BIKE", "WALK"]), + id="three_level_balanced", + ), + pytest.param( + { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + { + "name": "AUTO", + "coefficient": 0.72, + "alternatives": [ + { + "name": "DRIVE", + "coefficient": 0.60, + "alternatives": [ + { + "name": "SOLO", + "coefficient": 0.45, + "alternatives": ["DA_FREE", "DA_PAY"], + } + ], + } + ], + }, + { + "name": "TRANSIT", + "coefficient": 0.75, + "alternatives": [ + { + "name": "ACCESS", + "coefficient": 0.55, + "alternatives": [ + { + "name": "LOCAL", + "coefficient": 0.50, + "alternatives": ["WALK_LOC", "WALK_EXP"], + } + ], + } + ], + }, + {"name": "ACTIVE", "coefficient": 0.82, "alternatives": ["WALK"]}, + ], + }, + pd.Series( + { + "DA_FREE": 1.5, + "DA_PAY": 1.0, + "WALK_LOC": 0.7, + "WALK_EXP": 0.4, + "WALK": 0.1, + } + ), + np.array(["DA_FREE", "DA_PAY", "WALK_LOC", "WALK_EXP", "WALK"]), + id="four_level_single_leaf_chains", + ), + pytest.param( + { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + { + "name": "MOTORIZED", + "coefficient": 0.80, + "alternatives": [ + { + "name": "AUTO", + "coefficient": 0.68, + "alternatives": [ + { + "name": "SOLO", + "coefficient": 0.48, + "alternatives": ["DA_FREE", "DA_PAY"], + }, + { + "name": "SHARED", + "coefficient": 0.52, + "alternatives": ["SR2", "SR3"], + }, + ], + }, + { + "name": "TRANSIT", + "coefficient": 0.72, + "alternatives": [ + { + "name": "WALKACCESS", + "coefficient": 0.55, + "alternatives": ["WALK_LOC", "WALK_EXP"], + } + ], + }, + ], + }, + {"name": "ACTIVE", "coefficient": 0.88, "alternatives": ["BIKE"]}, + ], + }, + pd.Series( + { + "DA_FREE": 1.4, + "DA_PAY": 1.0, + "SR2": 0.8, + "SR3": 0.6, + "WALK_LOC": 0.7, + "WALK_EXP": 0.3, + "BIKE": 0.1, + } + ), + np.array(["DA_FREE", "DA_PAY", "SR2", "SR3", "WALK_LOC", "WALK_EXP", "BIKE"]), + id="four_level_mixed_structure", + ), +] + + +REALISTIC_NESTED_LOGIT_FAST_CASES = [ + { + "id": "mtc_extended_tour_mode_choice_style", + "nest_spec": { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + { + "name": "AUTO", + "coefficient": 0.72, + "alternatives": [ + { + "name": "DRIVEALONE", + "coefficient": 0.35, + "alternatives": ["DRIVEALONEFREE", "DRIVEALONEPAY"], + }, + { + "name": "SHAREDRIDE2", + "coefficient": 0.35, + "alternatives": ["SHARED2FREE", "SHARED2PAY"], + }, + { + "name": "SHAREDRIDE3", + "coefficient": 0.40, + "alternatives": ["SHARED3FREE", "SHARED3PAY"], + }, + ], + }, + { + "name": "NONMOTORIZED", + "coefficient": 0.80, + "alternatives": ["WALK", "BIKE"], + }, + { + "name": "TRANSIT", + "coefficient": 0.60, + "alternatives": [ + { + "name": "WALKACCESS", + "coefficient": 0.50, + "alternatives": [ + "WALK_LOC", + "WALK_LRF", + "WALK_EXP", + "WALK_HVY", + "WALK_COM", + ], + }, + { + "name": "DRIVEACCESS", + "coefficient": 0.45, + "alternatives": [ + "DRIVE_LOC", + "DRIVE_LRF", + "DRIVE_EXP", + "DRIVE_HVY", + "DRIVE_COM", + ], + }, + ], + }, + { + "name": "RIDEHAIL", + "coefficient": 0.65, + "alternatives": ["TAXI", "TNC_SINGLE", "TNC_SHARED"], + }, + ], + }, + "raw_utilities": pd.Series( + { + "DRIVEALONEFREE": 1.60, + "DRIVEALONEPAY": 1.10, + "SHARED2FREE": 1.05, + "SHARED2PAY": 0.82, + "SHARED3FREE": 0.70, + "SHARED3PAY": 0.48, + "WALK": -0.20, + "BIKE": 0.05, + "WALK_LOC": 0.15, + "WALK_LRF": 0.05, + "WALK_EXP": 0.22, + "WALK_HVY": 0.10, + "WALK_COM": -0.03, + "DRIVE_LOC": 0.42, + "DRIVE_LRF": 0.34, + "DRIVE_EXP": 0.54, + "DRIVE_HVY": 0.38, + "DRIVE_COM": 0.26, + "TAXI": 0.30, + "TNC_SINGLE": 0.45, + "TNC_SHARED": 0.18, + } + ), + }, + { + "id": "semcog_tour_mode_choice_style", + "nest_spec": { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + { + "name": "AUTO", + "coefficient": 0.78, + "alternatives": ["DRIVEALONE", "SHARED2", "SHARED3"], + }, + { + "name": "NONMOTORIZED", + "coefficient": 0.85, + "alternatives": ["WALK", "BIKE"], + }, + { + "name": "TRANSIT", + "coefficient": 0.64, + "alternatives": [ + { + "name": "WALKACCESS", + "coefficient": 0.56, + "alternatives": ["WALK_LOC", "WALK_PRM", "WALK_MIX"], + }, + { + "name": "PNRACCESS", + "coefficient": 0.52, + "alternatives": ["PNR_LOC", "PNR_PRM", "PNR_MIX"], + }, + { + "name": "KNRACCESS", + "coefficient": 0.50, + "alternatives": ["KNR_LOC", "KNR_PRM", "KNR_MIX"], + }, + ], + }, + { + "name": "SCHOOL_BUS", + "coefficient": 0.92, + "alternatives": ["SCHOOLBUS"], + }, + { + "name": "RIDEHAIL", + "coefficient": 0.68, + "alternatives": ["TAXI", "TNC_SINGLE", "TNC_SHARED"], + }, + ], + }, + "raw_utilities": pd.Series( + { + "DRIVEALONE": 1.45, + "SHARED2": 1.08, + "SHARED3": 0.76, + "WALK": -0.10, + "BIKE": 0.12, + "WALK_LOC": 0.10, + "WALK_PRM": 0.18, + "WALK_MIX": 0.06, + "PNR_LOC": 0.30, + "PNR_PRM": 0.36, + "PNR_MIX": 0.26, + "KNR_LOC": 0.27, + "KNR_PRM": 0.32, + "KNR_MIX": 0.21, + "SCHOOLBUS": 0.24, + "TAXI": 0.22, + "TNC_SINGLE": 0.40, + "TNC_SHARED": 0.16, + } + ), + }, +] + + +@pytest.mark.parametrize( + "nest_spec,raw_utilities,_alt_order_array", + NESTED_LOGIT_EXACT_PARITY_CASES, +) +def test_make_choices_vs_eet_nl_exact_leaf_parity_across_structures( + nest_spec, raw_utilities, _alt_order_array +): + n_draws = 100_000 + expected = _expected_nested_logit_shares(raw_utilities, nest_spec) + observed = _nested_logit_eet_shares( + raw_utilities, + nest_spec, + n_draws=n_draws, + ) + + _assert_empirical_shares_close(observed, expected, n_draws=n_draws) + + +# def test_exact_leaf_error_terms_use_float64_with_float32_nested_utilities(): +# nest_spec = { +# "name": "root", +# "coefficient": 1.0, +# "alternatives": [ +# {"name": "motorized", "coefficient": 0.5, "alternatives": ["car", "bus"]}, +# "walk", +# ], +# } +# raw_utilities = pd.DataFrame( +# np.array([[0.5, 0.2, 0.4]], dtype=np.float32), +# index=pd.RangeIndex(1, name="chooser_id"), +# columns=["car", "bus", "walk"], +# ) +# # nested_utilities = simulate.compute_nested_utilities( +# # raw_utilities, nest_spec +# # ).astype(np.float32) +# # alt_order_array = np.array(["car", "bus", "walk"]) +# state = _make_rng_state( +# raw_utilities, +# seed=17, +# step_name="exact_leaf_float64_dtype", +# ) + +# try: +# error_terms = logit.sample_nested_logit_exact_leaf_error_terms( +# state, +# raw_utilities, +# nest_spec, +# ) +# finally: +# _finish_rng_state(state, "exact_leaf_float64_dtype") + +# assert all(dtype == np.float64 for dtype in error_terms.dtypes) + + +def test_make_choices_utility_based_routes_nested_logit_to_nl_eet(monkeypatch): + sentinel = pd.Series([1, 0], index=pd.Index([100, 101], name="chooser_id")) + + def fake_make_choices_explicit_error_term_nl( + state, + alt_utilities, + nest_spec, + trace_label, + trace_choosers=None, + alts_context=None, + alt_nrs_df=None, + ): + assert list(alt_utilities.columns) == ["car", "walk"] + assert trace_label == "test.make_choices_utility_based" + assert trace_choosers is None + assert alts_context is None + assert alt_nrs_df is None + return sentinel + + monkeypatch.setattr( + logit, + "make_choices_explicit_error_term_nl", + fake_make_choices_explicit_error_term_nl, + ) + + state = workflow.State().default_settings() + utilities = pd.DataFrame( + [[0.0, 0.0], [0.0, 0.0]], + index=pd.Index([100, 101], name="chooser_id"), + columns=["car", "walk"], + ) + nest_spec = { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + {"name": "motorized", "coefficient": 0.7, "alternatives": ["car"]}, + "walk", + ], + } + + choices, rands = logit.make_choices_utility_based( + state, + utilities, + nest_spec=nest_spec, + trace_label="test", + ) + + pdt.assert_series_equal(choices, sentinel) + pdt.assert_series_equal( + rands, + pd.Series([0, 0], index=pd.Index([100, 101], name="chooser_id")), + ) + + +@pytest.mark.parametrize( + "case", + REALISTIC_NESTED_LOGIT_FAST_CASES, + ids=[case["id"] for case in REALISTIC_NESTED_LOGIT_FAST_CASES], +) +def test_nested_logit_methods_match_expected_shares_for_realistic_tour_mode_choice_nests( + case, +): + _assert_nested_logit_methods_match_expected_across_seeds( + case["raw_utilities"], + case["nest_spec"], + n_draws=6_000, + seeds=[11, 23, 37], + ) + + +def test_nested_logit_share_response_tracks_utility_perturbations(): + case = REALISTIC_NESTED_LOGIT_FAST_CASES[0] + base_utilities = case["raw_utilities"] + perturbed_utilities = base_utilities.copy() + perturbed_utilities["DRIVE_EXP"] += 0.60 + perturbed_utilities["TNC_SHARED"] -= 0.45 + + baseline_expected = _expected_nested_logit_shares(base_utilities, case["nest_spec"]) + perturbed_expected = _expected_nested_logit_shares( + perturbed_utilities, case["nest_spec"] + ) + + expected_delta = perturbed_expected - baseline_expected + assert expected_delta["DRIVE_EXP"] > 0 + assert expected_delta["TNC_SHARED"] < 0 + + for method in ("mc", "exact_leaf"): + baseline_matrix = _nested_logit_method_share_matrix( + base_utilities, + case["nest_spec"], + method=method, + n_draws=8_000, + seeds=[11, 23, 37], + ) + perturbed_matrix = _nested_logit_method_share_matrix( + perturbed_utilities, + case["nest_spec"], + method=method, + n_draws=8_000, + seeds=[11, 23, 37], + ) + _assert_average_empirical_shares_close( + baseline_matrix, + baseline_expected, + n_draws=8_000, + ) + _assert_average_empirical_shares_close( + perturbed_matrix, + perturbed_expected, + n_draws=8_000, + ) + _assert_average_share_deltas_close( + baseline_matrix, + perturbed_matrix, + baseline_expected, + perturbed_expected, + n_draws=8_000, + ) + + +def test_three_level_nested_logit_methods_follow_monte_carlo_power_law(): + _assert_three_level_nested_logit_methods_follow_power_law( + draw_counts=np.array([2_000, 8_000, 32_000]), + seeds=[17, 29, 43], + ) + + +# # @pytest.mark.slow +def test_three_level_nested_logit_methods_follow_monte_carlo_power_law_large_draws(): + _assert_three_level_nested_logit_methods_follow_power_law( + draw_counts=np.array([8_000, 32_000, 128_000]), + seeds=[17, 29, 43], + slope_lower=-0.7, + slope_upper=-0.3, + pair_slope_lower=-1.0, + pair_slope_upper=-0.2, + max_final_method_error=0.0015, + max_final_pair_error=0.0020, + ) + + +# +# Interaction Dataset Tests +# def test_interaction_dataset_no_sample(interaction_choosers, interaction_alts): expected = pd.DataFrame( { @@ -167,9 +1653,6 @@ def test_interaction_dataset_no_sample(interaction_choosers, interaction_alts): ) interacted, expected = interacted.align(expected, axis=1) - - print("interacted\n", interacted) - print("expected\n", expected) pdt.assert_frame_equal(interacted, expected) @@ -191,3 +1674,30 @@ def test_interaction_dataset_sampled(interaction_choosers, interaction_alts): interacted, expected = interacted.align(expected, axis=1) pdt.assert_frame_equal(interacted, expected) + + +def test_alts_context_from_series_and_properties(): + ctx = AltsContext.from_series(pd.Index([3, 5, 9, 4])) + + assert ctx.min_alt_id == 3 + assert ctx.max_alt_id == 9 + assert ctx.n_alts_to_cover_max_id == 10 + assert ctx.n_rands_to_sample == 10 + + +@pytest.mark.parametrize( + "num_alts,zero_based,expected_min,expected_max,expected_n_cover", + [ + (5, True, 0, 4, 5), + (5, False, 1, 5, 6), + ], +) +def test_alts_context_from_num_alts( + num_alts, zero_based, expected_min, expected_max, expected_n_cover +): + ctx = AltsContext.from_num_alts(num_alts=num_alts, zero_based=zero_based) + + assert ctx.min_alt_id == expected_min + assert ctx.max_alt_id == expected_max + assert ctx.n_alts_to_cover_max_id == expected_n_cover + assert ctx.n_rands_to_sample == expected_n_cover diff --git a/activitysim/core/test/test_random.py b/activitysim/core/test/test_random.py index bcbc602685..d5f84bd124 100644 --- a/activitysim/core/test/test_random.py +++ b/activitysim/core/test/test_random.py @@ -126,3 +126,207 @@ def test_channel(): npt.assert_almost_equal(np.asanyarray(rands).flatten(), test1_expected_rands2) rng.end_step("test_step") + + +def test_gumbel_max_positions_for_df_matches_materialized_path_and_offsets(): + persons = pd.DataFrame( + {"household_id": [1, 1, 2]}, + index=pd.Index([11, 12, 13], name="person_id"), + ) + utilities = pd.DataFrame( + [[0.5, -0.2, 1.1], [0.1, 0.2, -0.3], [2.0, 1.0, 0.0]], + index=persons.index, + ) + sample_size = 4 + n_alts = utilities.shape[1] + + baseline_rng = random.Random() + baseline_rng.set_base_seed(0) + baseline_rng.begin_step("test_step") + baseline_rng.add_channel("persons", persons) + + materialized = baseline_rng.gumbel_for_df(utilities, n=n_alts * sample_size) + expected_positions = np.argmax( + materialized.reshape((len(utilities), sample_size, n_alts)) + + utilities.to_numpy()[:, np.newaxis, :], + axis=2, + ) + next_random_after_materialized = baseline_rng.random_for_df(persons) + baseline_rng.end_step("test_step") + + fused_rng = random.Random() + fused_rng.set_base_seed(0) + fused_rng.begin_step("test_step") + fused_rng.add_channel("persons", persons) + + observed_positions = fused_rng.gumbel_max_positions_for_df(utilities, sample_size) + next_random_after_fused = fused_rng.random_for_df(persons) + fused_rng.end_step("test_step") + + npt.assert_array_equal(observed_positions, expected_positions) + npt.assert_allclose(next_random_after_fused, next_random_after_materialized) + + +def test_gumbel_max_positions_for_df_matches_stable_alt_mapping_and_offsets(): + persons = pd.DataFrame( + {"household_id": [1, 1, 2]}, + index=pd.Index([41, 42, 43], name="person_id"), + ) + utilities = pd.DataFrame( + [[0.5, -0.2, 1.1], [0.1, 0.2, -0.3], [2.0, 1.0, 0.0]], + index=persons.index, + ) + sample_size = 3 + stable_alt_positions = np.array([0, 2, 4], dtype=np.int64) + n_total_alts = 5 + + baseline_rng = random.Random() + baseline_rng.set_base_seed(0) + baseline_rng.begin_step("test_step") + baseline_rng.add_channel("persons", persons) + + materialized = baseline_rng.gumbel_for_df( + utilities, + n=n_total_alts * sample_size, + ).reshape((len(utilities), sample_size, n_total_alts)) + expected_positions = np.argmax( + materialized[:, :, stable_alt_positions] + + utilities.to_numpy()[:, np.newaxis, :], + axis=2, + ) + next_random_after_materialized = baseline_rng.random_for_df(persons) + baseline_rng.end_step("test_step") + + fused_rng = random.Random() + fused_rng.set_base_seed(0) + fused_rng.begin_step("test_step") + fused_rng.add_channel("persons", persons) + + observed_positions = fused_rng.gumbel_max_positions_for_df( + utilities, + sample_size, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + next_random_after_fused = fused_rng.random_for_df(persons) + fused_rng.end_step("test_step") + + npt.assert_array_equal(observed_positions, expected_positions) + npt.assert_allclose(next_random_after_fused, next_random_after_materialized) + + +def test_random_for_df_stable_alt_mapping_and_offsets(): + persons = pd.DataFrame( + {"household_id": [1, 1, 2]}, + index=pd.Index([51, 52, 53], name="person_id"), + ) + active_alts = pd.DataFrame( + np.zeros((len(persons), 3), dtype=np.float64), + index=persons.index, + ) + stable_alt_positions = np.array([0, 2, 4], dtype=np.int64) + n_total_alts = 5 + + baseline_rng = random.Random() + baseline_rng.set_base_seed(0) + baseline_rng.begin_step("test_step") + baseline_rng.add_channel("persons", persons) + + materialized = baseline_rng.random_for_df(active_alts, n=n_total_alts) + expected_rands = materialized[:, stable_alt_positions] + next_random_after_materialized = baseline_rng.random_for_df(persons) + baseline_rng.end_step("test_step") + + fused_rng = random.Random() + fused_rng.set_base_seed(0) + fused_rng.begin_step("test_step") + fused_rng.add_channel("persons", persons) + + observed_rands = fused_rng.random_for_df_stable_alt_positions( + active_alts, + stable_alt_positions=stable_alt_positions, + n_total_alts=n_total_alts, + ) + next_random_after_fused = fused_rng.random_for_df(persons) + fused_rng.end_step("test_step") + + npt.assert_allclose(observed_rands, expected_rands) + npt.assert_allclose(next_random_after_fused, next_random_after_materialized) + + +def test_gumbel_choice_positions_for_df_matches_materialized_path_and_offsets(): + persons = pd.DataFrame( + {"household_id": [1, 1, 2]}, + index=pd.Index([21, 22, 23], name="person_id"), + ) + utilities = pd.DataFrame( + [[0.5, -0.2, 1.1], [0.1, 0.2, -0.3], [2.0, 1.0, 0.0]], + index=persons.index, + ) + + baseline_rng = random.Random() + baseline_rng.set_base_seed(0) + baseline_rng.begin_step("test_step") + baseline_rng.add_channel("persons", persons) + + materialized = baseline_rng.gumbel_for_df(utilities, n=utilities.shape[1]) + expected_positions = np.argmax(materialized + utilities.to_numpy(), axis=1) + next_random_after_materialized = baseline_rng.random_for_df(persons) + baseline_rng.end_step("test_step") + + fused_rng = random.Random() + fused_rng.set_base_seed(0) + fused_rng.begin_step("test_step") + fused_rng.add_channel("persons", persons) + + observed_positions = fused_rng.gumbel_choice_positions_for_df(utilities) + next_random_after_fused = fused_rng.random_for_df(persons) + fused_rng.end_step("test_step") + + npt.assert_array_equal(observed_positions, expected_positions) + npt.assert_allclose(next_random_after_fused, next_random_after_materialized) + + +def test_gumbel_choice_positions_for_df_matches_dense_alt_mapping(): + persons = pd.DataFrame( + {"household_id": [1, 1]}, + index=pd.Index([31, 32], name="person_id"), + ) + utilities = pd.DataFrame( + [[2.0, 1.0], [0.3, 1.2]], + index=persons.index, + ) + alt_nrs_df = pd.DataFrame( + [[0, 2], [1, 2]], + index=persons.index, + ) + n_rands = 3 + + baseline_rng = random.Random() + baseline_rng.set_base_seed(0) + baseline_rng.begin_step("test_step") + baseline_rng.add_channel("persons", persons) + + dense = baseline_rng.gumbel_for_df(utilities, n=n_rands) + expected_positions = np.argmax( + utilities.to_numpy() + np.take_along_axis(dense, alt_nrs_df.to_numpy(), axis=1), + axis=1, + ) + next_random_after_materialized = baseline_rng.random_for_df(persons) + baseline_rng.end_step("test_step") + + fused_rng = random.Random() + fused_rng.set_base_seed(0) + fused_rng.begin_step("test_step") + fused_rng.add_channel("persons", persons) + + observed_positions = fused_rng.gumbel_choice_positions_for_df( + utilities, + alt_nrs_df=alt_nrs_df, + n_rands=n_rands, + ) + next_random_after_fused = fused_rng.random_for_df(persons) + fused_rng.end_step("test_step") + + npt.assert_array_equal(observed_positions, expected_positions) + npt.assert_allclose(next_random_after_fused, next_random_after_materialized) diff --git a/activitysim/core/test/test_simulate.py b/activitysim/core/test/test_simulate.py index 17d4ba2cd6..21e0f90e73 100644 --- a/activitysim/core/test/test_simulate.py +++ b/activitysim/core/test/test_simulate.py @@ -10,7 +10,7 @@ import pandas.testing as pdt import pytest -from activitysim.core import simulate, workflow +from activitysim.core import chunk, simulate, workflow @pytest.fixture @@ -42,6 +42,19 @@ def data(data_dir): return pd.read_csv(os.path.join(data_dir, "data.csv")) +@pytest.fixture +def nest_spec(): + nest_spec = { + "name": "root", + "coefficient": 1.0, + "alternatives": [ + {"name": "alt0", "coefficient": 0.5, "alternatives": ["alt0.0", "alt0.1"]}, + "alt1", + ], + } + return nest_spec + + def test_read_model_spec(state, spec_name): spec = state.filesystem.read_model_spec(file_name=spec_name) @@ -88,3 +101,234 @@ def test_simple_simulate_chunked(state, data, spec): ) expected = pd.Series([1, 1, 1], index=data.index) pdt.assert_series_equal(choices, expected, check_dtype=False) + + +def test_eval_mnl_eet(state): + # Check that the same counts are returned by eval_mnl when using EET and when not. + + num_choosers = 100_000 + + np.random.seed(42) + data2 = pd.DataFrame( + { + "chooser_attr": np.random.rand(num_choosers), + }, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + spec2 = pd.DataFrame( + {"alt0": [1.0], "alt1": [2.0]}, + index=pd.Index(["chooser_attr"], name="Expression"), + ) + + # Set up a state with EET enabled + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", data2) + state.rng().begin_step("test_step_mnl") + + chunk_sizer = chunk.ChunkSizer(state, "", "", num_choosers) + + # run eval_mnl with EET enabled + choices_eet = simulate.eval_mnl( + state=state, + choosers=data2, + spec=spec2, + locals_d=None, + custom_chooser=None, + estimator=None, + chunk_sizer=chunk_sizer, + ) + + # Reset the state, without EET enabled + state.settings.use_explicit_error_terms = False + + state.rng().end_step("test_step_mnl") + state.rng().begin_step("test_step_mnl") + + choices_mnl = simulate.eval_mnl( + state=state, + choosers=data2, + spec=spec2, + locals_d=None, + custom_chooser=None, + estimator=None, + chunk_sizer=chunk_sizer, + ) + + # Compare counts + mnl_counts = choices_mnl.value_counts(normalize=True) + explicit_counts = choices_eet.value_counts(normalize=True) + assert np.allclose(mnl_counts, explicit_counts, atol=0.01) + + +def test_eval_nl_eet(state, nest_spec): + # Check that the same counts are returned by eval_nl when using EET and when not. + + num_choosers = 100_000 + + np.random.seed(42) + data2 = pd.DataFrame( + { + "chooser_attr": np.random.rand(num_choosers), + }, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + spec2 = pd.DataFrame( + {"alt1": [2.0], "alt0.0": [0.5], "alt0.1": [0.2]}, + index=pd.Index(["chooser_attr"], name="Expression"), + ) + + # Set up a state with EET enabled + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", data2) + state.rng().begin_step("test_step_mnl") + + chunk_sizer = chunk.ChunkSizer(state, "", "", num_choosers) + + # run eval_nl with EET enabled + choices_eet = simulate.eval_nl( + state=state, + choosers=data2, + spec=spec2, + nest_spec=nest_spec, + locals_d={}, + custom_chooser=None, + estimator=None, + trace_label="test", + chunk_sizer=chunk_sizer, + ) + + # Reset the state, without EET enabled + state.settings.use_explicit_error_terms = False + + state.rng().end_step("test_step_mnl") + state.rng().begin_step("test_step_mnl") + + choices_mnl = simulate.eval_nl( + state=state, + choosers=data2, + spec=spec2, + nest_spec=nest_spec, + locals_d={}, + custom_chooser=None, + trace_label="test", + estimator=None, + chunk_sizer=chunk_sizer, + ) + + # Compare counts + mnl_counts = choices_mnl.value_counts(normalize=True) + explicit_counts = choices_eet.value_counts(normalize=True) + assert np.allclose(mnl_counts, explicit_counts, atol=0.01) + + +def test_compute_nested_utilities(nest_spec): + # computes nested utilities manually and using the function and checks that + # the utilities are the same + + num_choosers = 2 + raw_utilities = pd.DataFrame( + {"alt1": [1, 10], "alt0.0": [2, 3], "alt0.1": [4, 5]}, + index=pd.Index(range(num_choosers)), + ) + + nested_utilities = simulate.compute_nested_utilities(raw_utilities, nest_spec) + + # these are from the definition of nest_spec + alt0_nest_coefficient = nest_spec["alternatives"][0]["coefficient"] + alt0_leaf_product_of_coefficients = nest_spec["coefficient"] * alt0_nest_coefficient + assert alt0_leaf_product_of_coefficients == 0.5 # 1 * 0.5 + + product_of_coefficientss = pd.DataFrame( + { + "alt1": [nest_spec["coefficient"]], + "alt0.0": [alt0_leaf_product_of_coefficients], + "alt0.1": [alt0_leaf_product_of_coefficients], + }, + index=[0], + ) + leaf_utilities = raw_utilities / product_of_coefficientss.iloc[0] + + constructed_nested_utilities = pd.DataFrame(index=raw_utilities.index) + + constructed_nested_utilities[leaf_utilities.columns] = leaf_utilities + constructed_nested_utilities["alt0"] = alt0_nest_coefficient * np.log( + np.exp(leaf_utilities[["alt0.0", "alt0.1"]]).sum(axis=1) + ) + constructed_nested_utilities["root"] = nest_spec["coefficient"] * np.log( + np.exp(constructed_nested_utilities[["alt1", "alt0"]]).sum(axis=1) + ) + + assert np.allclose( + nested_utilities, constructed_nested_utilities[nested_utilities.columns] + ), "Mismatch in nested utilities" + + +def test_eval_nl_logsums_eet_vs_non_eet(state, nest_spec): + """eval_nl with want_logsums=True must produce identical logsums under + EET and non-EET modes""" + + num_choosers = 100 + + np.random.seed(42) + data2 = pd.DataFrame( + {"chooser_attr": np.random.rand(num_choosers)}, + index=pd.Index(range(num_choosers), name="person_id"), + ) + + spec2 = pd.DataFrame( + {"alt1": [2.0], "alt0.0": [0.5], "alt0.1": [0.2]}, + index=pd.Index(["chooser_attr"], name="Expression"), + ) + + chunk_sizer = chunk.ChunkSizer(state, "", "", num_choosers) + + state.settings.use_explicit_error_terms = True + state.rng().set_base_seed(42) + state.rng().add_channel("person_id", data2) + state.rng().begin_step("test_step_logsums") + + result_eet = simulate.eval_nl( + state=state, + choosers=data2, + spec=spec2, + nest_spec=nest_spec, + locals_d={}, + custom_chooser=None, + estimator=None, + want_logsums=True, + trace_label="test", + chunk_sizer=chunk_sizer, + ) + + state.rng().end_step("test_step_logsums") + + state.settings.use_explicit_error_terms = False + state.rng().begin_step("test_step_logsums") + + result_non_eet = simulate.eval_nl( + state=state, + choosers=data2, + spec=spec2, + nest_spec=nest_spec, + locals_d={}, + custom_chooser=None, + estimator=None, + want_logsums=True, + trace_label="test", + chunk_sizer=chunk_sizer, + ) + + state.rng().end_step("test_step_logsums") + + # Both paths should return a DataFrame with 'choice' and 'logsum' columns + assert "logsum" in result_eet.columns, "EET result missing logsum column" + assert "logsum" in result_non_eet.columns, "non-EET result missing logsum column" + + # Logsums are deterministic — they must be identical across paths + assert np.allclose( + result_eet["logsum"].values, result_non_eet["logsum"].values, rtol=1e-10 + ) diff --git a/activitysim/examples/placeholder_multiple_zone/test/configs_eet/settings.yaml b/activitysim/examples/placeholder_multiple_zone/test/configs_eet/settings.yaml new file mode 100644 index 0000000000..08c06d702e --- /dev/null +++ b/activitysim/examples/placeholder_multiple_zone/test/configs_eet/settings.yaml @@ -0,0 +1,3 @@ +inherit_settings: True + +use_explicit_error_terms: True diff --git a/activitysim/examples/placeholder_multiple_zone/test/reference_pipeline_2_zone_eet.zip b/activitysim/examples/placeholder_multiple_zone/test/reference_pipeline_2_zone_eet.zip new file mode 100644 index 0000000000..da3ab8af16 Binary files /dev/null and b/activitysim/examples/placeholder_multiple_zone/test/reference_pipeline_2_zone_eet.zip differ diff --git a/activitysim/examples/placeholder_multiple_zone/test/regress/final_eet_tours_2_zone.csv b/activitysim/examples/placeholder_multiple_zone/test/regress/final_eet_tours_2_zone.csv new file mode 100644 index 0000000000..86d744b362 --- /dev/null +++ b/activitysim/examples/placeholder_multiple_zone/test/regress/final_eet_tours_2_zone.csv @@ -0,0 +1,106 @@ +"person_id","tour_type","tour_type_count","tour_type_num","tour_num","tour_count","tour_category","number_of_participants","destination","origin","household_id","tdd","start","end","duration","composition","destination_logsum","tour_mode","mode_choice_logsum","atwork_subtour_frequency","parent_tour_id","stop_frequency","primary_purpose","tour_id" +26686,"shopping",1,1,1,1,"non_mandatory",1,23000,8000,26686,113,12,13,1,"",10.610858661241041,"BIKE",1.0501470508061868,"",,"0out_0in","shopping",1094159 +26844,"othmaint",1,1,2,2,"non_mandatory",1,9000,8000,26844,159,16,21,5,"",12.381682576294725,"WALK_LOC",2.7924555127017814,"",,"0out_0in","othmaint",1100632 +26844,"shopping",1,1,1,2,"non_mandatory",1,5000,8000,26844,75,9,14,5,"",10.98605657139575,"TNC_SINGLE",2.732845477677904,"",,"1out_0in","shopping",1100637 +27726,"eatout",1,1,1,1,"non_mandatory",1,7000,10000,27726,140,14,19,5,"",12.223057521030135,"WALK_LOC",3.9327906069918876,"",,"0out_0in","eatout",1136772 +110675,"work",1,1,1,1,"mandatory",1,9000,16000,110675,13,5,18,13,"",,"WALK_LOC",-0.23735933616868604,"no_subtours",,"0out_0in","work",4537714 +112064,"work",1,1,1,1,"mandatory",1,4000,16000,112064,131,13,20,7,"",,"WALK",5.320176808287765,"no_subtours",,"0out_0in","work",4594663 +264108,"eatout",1,1,1,1,"non_mandatory",1,22000,9000,226869,135,14,14,0,"",10.289097224175714,"WALK_LRF",0.1018338420222794,"",,"0out_0in","eatout",10828434 +323689,"work",1,1,1,1,"mandatory",1,13000,10000,256660,151,15,21,6,"",,"WALK_LRF",5.829417670036404,"no_subtours",,"1out_0in","work",13271288 +323690,"work",1,1,1,1,"mandatory",1,9000,10000,256660,117,12,17,5,"",,"WALK",5.3957995007835535,"no_subtours",,"0out_1in","work",13271329 +325431,"othdiscr",1,1,1,1,"non_mandatory",1,23000,16000,257531,102,11,14,3,"",12.111981032421705,"WALK_LRF",2.449514696123423,"",,"0out_2in","othdiscr",13342696 +325431,"work",1,1,1,1,"mandatory",1,1000,16000,257531,157,16,19,3,"",,"TNC_SINGLE",5.8294985824993155,"no_subtours",,"0out_0in","work",13342710 +325432,"work",1,1,1,1,"mandatory",1,17000,16000,257531,45,7,15,8,"",,"BIKE",6.349736430311182,"no_subtours",,"0out_0in","work",13342751 +595684,"escort",1,1,1,1,"non_mandatory",1,5000,21000,370497,38,7,8,1,"",9.512682218523492,"WALK_LOC",-1.0951775798823786,"",,"0out_0in","escort",24423053 +595684,"work",1,1,1,1,"mandatory",1,13000,21000,370497,167,17,22,5,"",,"WALK",-0.4857821845242076,"no_subtours",,"0out_0in","work",24423083 +595685,"school",1,1,1,1,"mandatory",1,13000,21000,370497,61,8,15,7,"",,"WALK_LOC",-0.9348277771487143,"",,"0out_0in","school",24423116 +595686,"school",1,1,1,1,"mandatory",1,16000,21000,370497,41,7,11,4,"",,"WALK_LOC",-0.800449800958596,"",,"0out_0in","school",24423157 +644292,"school",1,1,1,1,"mandatory",1,10000,7000,386699,9,5,14,9,"",,"WALK_HVY",19.587712717782818,"",,"0out_0in","school",26416003 +644476,"work",1,1,1,1,"mandatory",1,16000,16000,386761,47,7,17,10,"",,"WALK",5.723748258371387,"no_subtours",,"0out_0in","work",26423555 +644477,"work",1,1,1,1,"mandatory",1,2000,16000,386761,64,8,18,10,"",,"WALK_LOC",5.746364420193509,"no_subtours",,"0out_2in","work",26423596 +644478,"school",1,1,1,1,"mandatory",1,16000,16000,386761,45,7,15,8,"",,"WALK",17.692979333328697,"",,"0out_0in","school",26423629 +1267567,"eatout",1,1,1,1,"non_mandatory",1,5000,21000,570454,99,11,11,0,"",12.198336821342862,"WALK",4.12367021630192,"",,"0out_0in","eatout",51970253 +1427193,"shopping",1,1,1,1,"non_mandatory",1,11000,25000,703381,151,15,21,6,"",10.194065391642615,"WALK",0.5303425623640113,"",,"0out_0in","shopping",58514946 +1427194,"othmaint",3,1,1,3,"non_mandatory",1,2000,25000,703381,74,9,13,4,"",11.397149558879462,"BIKE",1.6921646836542774,"",,"0out_0in","othmaint",58514982 +1427194,"othmaint",3,2,2,3,"non_mandatory",1,9000,25000,703381,156,16,18,2,"",11.446286395441202,"BIKE",0.673847031461692,"",,"0out_0in","othmaint",58514983 +1427194,"othmaint",3,3,3,3,"non_mandatory",1,4000,25000,703381,172,18,21,3,"",11.409006574951876,"WALK",1.123057934220693,"",,"0out_0in","othmaint",58514984 +1572659,"othdiscr",1,1,1,1,"non_mandatory",1,5000,6000,763879,8,5,13,8,"",12.08921613172822,"WALK",2.968451508911268,"",,"0out_0in","othdiscr",64479044 +1572930,"eatout",1,1,1,1,"non_mandatory",1,7000,9000,764150,46,7,16,9,"",12.211469568104635,"WALK",4.184842505817334,"",,"0out_0in","eatout",64490136 +1632206,"work",1,1,1,1,"mandatory",1,2000,11000,823426,48,7,18,11,"",,"BIKE",-0.15558251192441164,"no_subtours",,"0out_0in","work",66920485 +1632281,"work",1,1,1,1,"mandatory",1,5000,12000,823501,64,8,18,10,"",,"WALK_LOC",-0.014522397979818477,"no_subtours",,"0out_0in","work",66923560 +1632987,"eat",1,1,1,1,"atwork",1,1000,2000,824207,100,11,12,1,"",12.497268604346223,"WALK",6.066084912412463,"",66952506,"0out_0in","atwork",66952471 +1632987,"work",1,1,1,1,"mandatory",1,2000,18000,824207,50,7,20,13,"",,"WALK",6.16313911347326,"eat",,"0out_0in","work",66952506 +1875721,"work",1,1,1,1,"mandatory",1,16000,16000,982875,49,7,19,12,"",,"WALK",2.052824858646257,"no_subtours",,"0out_0in","work",76904600 +1875722,"work",1,1,1,1,"mandatory",1,2000,16000,982875,48,7,18,11,"",,"WALK_LOC",1.8646274143957895,"no_subtours",,"0out_0in","work",76904641 +2159057,"work",1,1,1,1,"mandatory",1,15000,20000,1099626,47,7,17,10,"",,"WALK_LOC",-0.6269133124234691,"no_subtours",,"0out_0in","work",88521376 +2159058,"school",1,1,1,1,"mandatory",1,10000,20000,1099626,44,7,14,7,"",,"DRIVEALONEFREE",0.5331366792772473,"",,"0out_0in","univ",88521409 +2159059,"school",1,1,1,1,"mandatory",1,10000,20000,1099626,61,8,15,7,"",,"WALK",-0.4467165728053401,"",,"0out_0in","school",88521450 +2458500,"othdiscr",1,1,1,1,"non_mandatory",1,1000,8000,1173905,126,13,15,2,"",12.144175831465427,"WALK",1.7166041926739541,"",,"0out_0in","othdiscr",100798525 +2458502,"school",1,1,1,1,"mandatory",1,7000,8000,1173905,76,9,15,6,"",,"WALK",19.233607768914855,"",,"0out_0in","school",100798613 +2458503,"school",1,1,1,1,"mandatory",1,18000,8000,1173905,63,8,17,9,"",,"WALK_LOC",16.82632652412361,"",,"1out_2in","school",100798654 +2566698,"othmaint",1,1,1,1,"non_mandatory",1,2000,25000,1196298,146,15,16,1,"",11.063210428915374,"WALK",0.5633209455900281,"",,"0out_0in","othmaint",105234646 +2566698,"work",1,1,1,1,"mandatory",1,13000,25000,1196298,42,7,12,5,"",,"WALK",-0.0331723343613139,"no_subtours",,"0out_0in","work",105234657 +2566699,"escort",2,1,1,4,"non_mandatory",1,9000,25000,1196298,55,8,9,1,"",9.441924310089995,"SHARED2FREE",-1.224321337819968,"",,"0out_0in","escort",105234668 +2566699,"escort",2,2,2,4,"non_mandatory",1,11000,25000,1196298,112,12,12,0,"",9.41277389564782,"TNC_SINGLE",-0.9864057562279634,"",,"0out_0in","escort",105234669 +2566699,"othdiscr",1,1,4,4,"non_mandatory",1,2000,25000,1196298,172,18,21,3,"",10.848788280363689,"DRIVEALONEFREE",0.8198275600883485,"",,"0out_0in","othdiscr",105234684 +2566699,"shopping",1,1,3,4,"non_mandatory",1,2000,25000,1196298,71,9,10,1,"",9.543720666670257,"WALK",0.5009281837597124,"",,"0out_0in","shopping",105234692 +2566700,"school",1,1,1,1,"mandatory",1,7000,25000,1196298,61,8,15,7,"",,"WALK_LOC",-0.570159779269892,"",,"0out_0in","school",105234731 +2566701,"escort",1,1,1,1,"non_mandatory",1,22000,25000,1196298,124,13,13,0,"",9.553554280932733,"SHARED3FREE",0.15849324834776704,"",,"0out_0in","escort",105234750 +2566701,"school",1,1,1,1,"mandatory",1,21000,25000,1196298,43,7,13,6,"",,"SHARED3FREE",-1.0571220991682664,"",,"0out_0in","school",105234772 +2566702,"othdiscr",1,1,1,1,"non_mandatory",1,18000,25000,1196298,171,18,20,2,"",11.263577911809062,"SHARED2FREE",0.3038595355453006,"",,"0out_2in","othdiscr",105234807 +2936848,"eatout",1,1,3,3,"non_mandatory",1,15000,11000,1286557,128,13,17,4,"",12.266413600249491,"WALK",3.3115671555185893,"",,"0out_0in","eatout",120410774 +2936848,"othmaint",1,1,2,3,"non_mandatory",1,7000,11000,1286557,59,8,13,5,"",11.909324784764634,"BIKE",2.060670730997444,"",,"2out_1in","othmaint",120410796 +2936848,"shopping",1,1,1,3,"non_mandatory",1,13000,11000,1286557,170,18,19,1,"",10.780532073110708,"TNC_SINGLE",1.3488000773608126,"",,"0out_2in","shopping",120410801 +3061894,"othmaint",1,1,2,2,"non_mandatory",1,14000,24000,1363467,63,8,17,9,"",12.063680143654953,"TAXI",2.148310089033856,"",,"0out_1in","othmaint",125537682 +3061894,"shopping",1,1,1,2,"non_mandatory",1,22000,24000,1363467,54,8,8,0,"",10.881747022526751,"WALK",2.252180541904756,"",,"0out_0in","shopping",125537687 +3061895,"othmaint",1,1,2,2,"non_mandatory",1,9000,24000,1363467,180,20,20,0,"",12.42242239445194,"WALK_LRF",2.8803108561136073,"",,"0out_0in","othmaint",125537723 +3061895,"shopping",1,1,1,2,"non_mandatory",1,16000,24000,1363467,66,8,20,12,"",11.055276581731171,"WALK",2.001468036993353,"",,"1out_0in","shopping",125537728 +3188483,"othmaint",1,1,2,2,"non_mandatory",1,9000,25000,1402945,112,12,12,0,"",11.134991283596351,"DRIVEALONEFREE",0.3189527807943001,"",,"0out_3in","othmaint",130727831 +3188483,"shopping",1,1,1,2,"non_mandatory",1,12000,25000,1402945,136,14,15,1,"",10.486110215551667,"WALK",1.034228125459323,"",,"0out_0in","shopping",130727836 +3188484,"work",1,1,1,1,"mandatory",1,11000,25000,1402945,147,15,17,2,"",,"WALK",1.514717460133151,"no_subtours",,"0out_0in","work",130727883 +3188485,"work",1,1,1,1,"mandatory",1,7000,25000,1402945,64,8,18,10,"",,"WALK",1.8611739232870421,"no_subtours",,"0out_0in","work",130727924 +3232955,"escort",1,1,1,1,"non_mandatory",1,8000,14000,1444715,164,17,19,2,"",9.490739620053459,"SHARED2FREE",-2.001570831213811,"",,"0out_0in","escort",132551164 +3232955,"work",2,1,1,2,"mandatory",1,1000,14000,1444715,24,6,11,5,"",,"WALK",-0.07386262451458166,"no_subtours",,"0out_0in","work",132551194 +3232955,"work",2,2,2,2,"mandatory",1,1000,14000,1444715,127,13,16,3,"",,"WALK",-0.19789203372593106,"no_subtours",,"0out_0in","work",132551195 +3233462,"eat",1,1,1,1,"atwork",1,15000,16000,1445222,70,9,9,0,"",17.335573611488165,"WALK",0.01158493251705316,"",132571981,"0out_1in","atwork",132571946 +3233462,"work",1,1,1,1,"mandatory",1,16000,17000,1445222,81,9,20,11,"",,"SHARED3FREE",0.6704720594037347,"eat",,"0out_3in","work",132571981 +3328568,"work",1,1,1,1,"mandatory",1,13000,8000,1511234,68,8,22,14,"",,"WALK_LRF",5.81180830136589,"no_subtours",,"0out_1in","work",136471327 +3328569,"school",1,1,1,1,"mandatory",1,9000,8000,1511234,62,8,16,8,"",,"WALK_LOC",7.510086352530541,"",,"0out_0in","univ",136471360 +3495342,"eat",1,1,1,1,"atwork",1,8000,8000,1594621,85,10,10,0,"",12.431032160366323,"WALK",6.584685335491401,"",143309061,"3out_0in","atwork",143309026 +3495342,"work",1,1,1,1,"mandatory",1,8000,10000,1594621,63,8,17,9,"",,"TNC_SINGLE",6.1798059555254525,"eat",,"0out_0in","work",143309061 +3495343,"shopping",1,1,1,1,"non_mandatory",1,11000,10000,1594621,146,15,16,1,"",11.21833943175268,"WALK",2.424167164896485,"",,"1out_1in","shopping",143309096 +3596364,"school",1,1,1,1,"mandatory",1,10000,9000,1645132,99,11,11,0,"",,"WALK",0.9033125989041192,"",,"0out_0in","univ",147450955 +3596364,"shopping",1,1,1,1,"non_mandatory",1,11000,9000,1645132,130,13,19,6,"",9.67513287780121,"WALK",-0.0777538798333834,"",,"0out_0in","shopping",147450957 +3596365,"school",1,1,1,1,"mandatory",1,8000,9000,1645132,92,10,17,7,"",,"WALK_LOC",0.3427235785809102,"",,"0out_2in","school",147450996 +3891102,"eat",1,1,1,1,"atwork",1,23000,15000,1747467,88,10,13,3,"",9.994441288395246,"WALK",-0.27371617134347487,"",159535221,"0out_1in","atwork",159535186 +3891102,"work",1,1,1,1,"mandatory",1,15000,16000,1747467,67,8,21,13,"",,"WALK",1.8965148103543004,"eat",,"1out_1in","work",159535221 +3891104,"othdiscr",1,1,1,1,"non_mandatory",1,21000,16000,1747467,52,7,22,15,"",11.782399737864871,"WALK",0.9497659766033424,"",,"0out_0in","othdiscr",159535289 +4171615,"school",1,1,1,1,"mandatory",1,14000,16000,1810015,169,18,18,0,"",,"WALK",3.386100724122899,"",,"0out_0in","univ",171036246 +4171616,"shopping",1,1,1,1,"non_mandatory",1,4000,16000,1810015,89,10,14,4,"",10.392717072129102,"WALK",1.0648124818146192,"",,"0out_0in","shopping",171036289 +4171617,"eat",1,1,1,1,"atwork",1,10000,13000,1810015,85,10,10,0,"",10.082153546316052,"WALK",-0.9480501387648468,"",171036336,"0out_1in","atwork",171036301 +4171617,"work",1,1,1,1,"mandatory",1,13000,16000,1810015,62,8,16,8,"",,"WALK",1.7563407832077071,"eat",,"0out_0in","work",171036336 +4171619,"othdiscr",1,1,1,1,"non_mandatory",1,15000,16000,1810015,80,9,19,10,"",11.487947423205323,"WALK",1.221353422877656,"",,"0out_0in","othdiscr",171036404 +4171622,"othmaint",1,1,1,1,"non_mandatory",1,9000,16000,1810015,100,11,12,1,"",11.122012604358106,"DRIVEALONEFREE",-0.8082104120689105,"",,"0out_0in","othmaint",171036530 +4823797,"work",1,1,1,1,"mandatory",1,2000,14000,1952792,93,10,18,8,"",,"WALK_LOC",5.550859155970048,"no_subtours",,"1out_0in","work",197775716 +5057160,"work",1,1,1,1,"mandatory",1,2000,5000,2048204,30,6,17,11,"",,"BIKE",-0.1471793698941122,"no_subtours",,"0out_0in","work",207343599 +5057338,"work",1,1,1,1,"mandatory",1,9000,7000,2048382,50,7,20,13,"",,"TNC_SINGLE",5.991736244240239,"no_subtours",,"0out_0in","work",207350897 +5387762,"work",1,1,1,1,"mandatory",1,2000,9000,2223027,28,6,15,9,"",,"WALK_LRF",1.9325923275483154,"no_subtours",,"0out_0in","work",220898281 +5387763,"eatout",1,1,2,2,"non_mandatory",1,5000,9000,2223027,154,16,16,0,"",11.013946837999834,"WALK",2.023230426729334,"",,"0out_0in","eatout",220898289 +5387763,"othdiscr",1,1,1,2,"non_mandatory",1,8000,9000,2223027,169,18,18,0,"",11.555366035758373,"WALK",1.7649215651163581,"",,"0out_0in","othdiscr",220898308 +5389226,"work",1,1,1,1,"mandatory",1,19000,16000,2223759,63,8,17,9,"",,"WALK",0.8926753131932247,"no_subtours",,"0out_0in","work",220958305 +5389227,"eat",1,1,1,1,"atwork",1,11000,2000,2223759,99,11,11,0,"",9.9093875692158,"WALK_LOC",-0.5906726326809321,"",220958346,"0out_0in","atwork",220958311 +5389227,"escort",1,1,1,1,"non_mandatory",1,4000,16000,2223759,162,17,17,0,"",9.642271576043063,"TNC_SINGLE",-0.13431708575512935,"",,"1out_0in","escort",220958316 +5389227,"work",1,1,1,1,"mandatory",1,2000,16000,2223759,28,6,15,9,"",,"WALK",1.8986167197326385,"eat",,"0out_0in","work",220958346 +7305540,"social",2,1,1,2,"non_mandatory",1,6000,20000,2727273,37,7,7,0,"",11.212441697992773,"DRIVEALONEFREE",1.6822348171556354,"",,"0out_0in","social",299527176 +7305540,"social",2,2,2,2,"non_mandatory",1,2000,20000,2727273,86,10,11,1,"",11.154124375407706,"DRIVEALONEFREE",1.5224456104374362,"",,"0out_1in","social",299527177 +7305540,"work",1,1,1,1,"mandatory",1,9000,20000,2727273,127,13,16,3,"",,"WALK_LOC",1.9458435557160327,"no_subtours",,"0out_3in","work",299527179 +7305541,"shopping",1,1,1,2,"non_mandatory",1,20000,20000,2727273,171,18,20,2,"",10.34361998244629,"TNC_SINGLE",1.7098607961042567,"",,"0out_0in","shopping",299527214 +7305541,"social",1,1,2,2,"non_mandatory",1,6000,20000,2727273,162,17,17,0,"",11.003513701202511,"WALK",1.6842447993162537,"",,"0out_0in","social",299527217 +7305541,"work",1,1,1,1,"mandatory",1,2000,20000,2727273,45,7,15,8,"",,"WALK_LRF",1.6733546844528127,"no_subtours",,"0out_0in","work",299527220 +7453413,"othmaint",1,1,1,1,"non_mandatory",1,9000,20000,2762078,102,11,14,3,"",12.130126308949702,"TAXI",2.107670623291728,"",,"0out_0in","othmaint",305589961 +7511873,"work",1,1,1,1,"mandatory",1,13000,8000,2820538,45,7,15,8,"",,"WALK",-0.9180280942803767,"no_subtours",,"0out_0in","work",307986832 +7512109,"work",1,1,1,1,"mandatory",1,16000,8000,2820774,48,7,18,11,"",,"WALK_LOC",4.682541648215299,"no_subtours",,"0out_0in","work",307996508 +7512514,"work",1,1,1,1,"mandatory",1,5000,8000,2821179,172,18,21,3,"",,"WALK",5.281889150266914,"no_subtours",,"0out_0in","work",308013113 +7513432,"social",1,1,1,1,"non_mandatory",1,4000,8000,2822097,77,9,16,7,"",11.505225455396152,"WALK_LOC",1.5488509589493435,"",,"0out_1in","social",308050748 +7513554,"work",1,1,1,1,"mandatory",1,5000,8000,2822219,96,10,21,11,"",,"WALK",5.6727002115033285,"no_subtours",,"0out_0in","work",308055753 +7523517,"shopping",1,1,1,1,"non_mandatory",1,11000,7000,2832182,145,15,15,0,"",10.679884744302576,"SHARED2FREE",1.4715068169852683,"",,"0out_0in","shopping",308464230 diff --git a/activitysim/examples/placeholder_multiple_zone/test/regress/final_eet_trips_2_zone.csv b/activitysim/examples/placeholder_multiple_zone/test/regress/final_eet_trips_2_zone.csv new file mode 100644 index 0000000000..d23ca7523b --- /dev/null +++ b/activitysim/examples/placeholder_multiple_zone/test/regress/final_eet_trips_2_zone.csv @@ -0,0 +1,256 @@ +"person_id","household_id","primary_purpose","trip_num","outbound","trip_count","destination","origin","tour_id","purpose","destination_logsum","depart","trip_mode","mode_choice_logsum","trip_id" +26686,26686,"shopping",1,true,1,23000,8000,1094159,"shopping",,12,"BIKE",1.9330760825577051,8753273 +26686,26686,"shopping",1,false,1,8000,23000,1094159,"home",,13,"BIKE",1.8982724783359772,8753277 +26844,26844,"othmaint",1,true,1,9000,8000,1100632,"othmaint",,16,"WALK_LOC",7.835312649097291,8805057 +26844,26844,"othmaint",1,false,1,8000,9000,1100632,"home",,21,"WALK",7.81653044999465,8805061 +26844,26844,"shopping",1,true,2,25000,8000,1100637,"shopping",38.026340763211664,9,"WALK_LOC",12.899241414130254,8805097 +26844,26844,"shopping",2,true,2,5000,25000,1100637,"shopping",,9,"WALK_LOC",4.5422109121136645,8805098 +26844,26844,"shopping",1,false,1,8000,5000,1100637,"home",,14,"WALK_LOC",4.56041276868394,8805101 +27726,27726,"eatout",1,true,1,7000,10000,1136772,"eatout",,14,"WALK",15.095889363425728,9094177 +27726,27726,"eatout",1,false,1,10000,7000,1136772,"home",,19,"WALK_LOC",15.128642080173528,9094181 +110675,110675,"work",1,true,1,9000,16000,4537714,"work",,5,"WALK_LOC",7.767335299437604,36301713 +110675,110675,"work",1,false,1,16000,9000,4537714,"home",,18,"WALK",7.890352026555583,36301717 +112064,112064,"work",1,true,1,4000,16000,4594663,"work",,13,"WALK",-0.37789093286786735,36757305 +112064,112064,"work",1,false,1,16000,4000,4594663,"home",,20,"WALK",-0.4796293342858078,36757309 +264108,226869,"eatout",1,true,1,22000,9000,10828434,"eatout",,14,"WALK_LRF",2.721417809460901,86627473 +264108,226869,"eatout",1,false,1,9000,22000,10828434,"home",,14,"WALK_LRF",2.7335986783997748,86627477 +323689,256660,"work",1,true,2,9000,10000,13271288,"work",26.71162202422962,15,"WALK",8.691754255656912,106170305 +323689,256660,"work",2,true,2,13000,9000,13271288,"work",,16,"WALK_LRF",0.20306465808635524,106170306 +323689,256660,"work",1,false,1,10000,13000,13271288,"home",,21,"WALK_LRF",0.20307051532099407,106170309 +323690,256660,"work",1,true,1,9000,10000,13271329,"work",,12,"WALK",8.030042498532831,106170633 +323690,256660,"work",1,false,2,7000,9000,13271329,"escort",40.07647772051534,16,"WALK",7.8276424819654,106170637 +323690,256660,"work",2,false,2,10000,7000,13271329,"home",,17,"WALK",10.341361395870509,106170638 +325431,257531,"othdiscr",1,true,1,23000,16000,13342696,"othdiscr",,11,"WALK_LOC",2.7598523724663617,106741569 +325431,257531,"othdiscr",1,false,3,7000,23000,13342696,"social",37.393285866288224,14,"WALK_LOC",2.5300279098941387,106741573 +325431,257531,"othdiscr",2,false,3,6000,7000,13342696,"escort",57.570625507650206,14,"WALK_LOC",14.408109959489884,106741574 +325431,257531,"othdiscr",3,false,3,16000,6000,13342696,"home",,14,"WALK_LOC",12.327190588058823,106741575 +325431,257531,"work",1,true,1,1000,16000,13342710,"work",,16,"TNC_SINGLE",0.37779070103009205,106741681 +325431,257531,"work",1,false,1,16000,1000,13342710,"home",,19,"WALK_LOC",0.3421430227353672,106741685 +325432,257531,"work",1,true,1,17000,16000,13342751,"work",,7,"BIKE",4.532931609064337,106742009 +325432,257531,"work",1,false,1,16000,17000,13342751,"home",,15,"BIKE",4.495624422168687,106742013 +595684,370497,"escort",1,true,1,5000,21000,24423053,"escort",,7,"WALK",4.40185753136252,195384425 +595684,370497,"escort",1,false,1,21000,5000,24423053,"home",,8,"WALK_LOC",4.346352931873237,195384429 +595684,370497,"work",1,true,1,13000,21000,24423083,"work",,17,"WALK",-1.2617603816041787,195384665 +595684,370497,"work",1,false,1,21000,13000,24423083,"home",,22,"WALK",-1.2617599925977128,195384669 +595685,370497,"school",1,true,1,13000,21000,24423116,"school",,8,"WALK_LOC",-0.8229268584487489,195384929 +595685,370497,"school",1,false,1,21000,13000,24423116,"home",,15,"WALK_LOC",-1.1046192102021704,195384933 +595686,370497,"school",1,true,1,16000,21000,24423157,"school",,7,"WALK_LOC",5.947341073015701,195385257 +595686,370497,"school",1,false,1,21000,16000,24423157,"home",,11,"WALK_LOC",5.8497465236911,195385261 +644292,386699,"school",1,true,1,10000,7000,26416003,"school",,5,"WALK_LRF",10.365229803761082,211328025 +644292,386699,"school",1,false,1,7000,10000,26416003,"home",,14,"WALK_LOC",9.139539939236254,211328029 +644476,386761,"work",1,true,1,16000,16000,26423555,"work",,7,"WALK",5.4506235318233465,211388441 +644476,386761,"work",1,false,1,16000,16000,26423555,"home",,17,"WALK",5.450623534940713,211388445 +644477,386761,"work",1,true,1,2000,16000,26423596,"work",,8,"WALK_LOC",0.49488458947717534,211388769 +644477,386761,"work",1,false,3,7000,2000,26423596,"othdiscr",28.40400355134066,8,"WALK",0.3968904431297342,211388773 +644477,386761,"work",2,false,3,7000,7000,26423596,"shopping",46.92412760009899,18,"WALK",11.479761687899988,211388774 +644477,386761,"work",3,false,3,16000,7000,26423596,"home",,18,"WALK_LOC",10.843966918146847,211388775 +644478,386761,"school",1,true,1,16000,16000,26423629,"school",,7,"WALK",6.714175467733903,211389033 +644478,386761,"school",1,false,1,16000,16000,26423629,"home",,15,"WALK",6.714175468282777,211389037 +1267567,570454,"eatout",1,true,1,5000,21000,51970253,"eatout",,11,"WALK",4.267825059089583,415762025 +1267567,570454,"eatout",1,false,1,21000,5000,51970253,"home",,11,"WALK",4.3682644959903065,415762029 +1427193,703381,"shopping",1,true,1,11000,25000,58514946,"shopping",,15,"WALK",3.908490482022162,468119569 +1427193,703381,"shopping",1,false,1,25000,11000,58514946,"home",,21,"WALK",3.7410903539902405,468119573 +1427194,703381,"othmaint",1,true,1,2000,25000,58514982,"othmaint",,9,"WALK",0.4560043077103118,468119857 +1427194,703381,"othmaint",1,false,1,25000,2000,58514982,"home",,13,"WALK",0.39307251819952954,468119861 +1427194,703381,"othmaint",1,true,1,9000,25000,58514983,"othmaint",,16,"BIKE",6.353308735471027,468119865 +1427194,703381,"othmaint",1,false,1,25000,9000,58514983,"home",,18,"BIKE",6.338440491658596,468119869 +1427194,703381,"othmaint",1,true,1,4000,25000,58514984,"othmaint",,18,"WALK",0.06675616330986604,468119873 +1427194,703381,"othmaint",1,false,1,25000,4000,58514984,"home",,21,"WALK",-0.013609664361095805,468119877 +1572659,763879,"othdiscr",1,true,1,5000,6000,64479044,"othdiscr",,5,"WALK",4.6361033109279965,515832353 +1572659,763879,"othdiscr",1,false,1,6000,5000,64479044,"home",,13,"WALK",4.479864006955085,515832357 +1572930,764150,"eatout",1,true,1,7000,9000,64490136,"eatout",,7,"WALK",13.778746190542957,515921089 +1572930,764150,"eatout",1,false,1,9000,7000,64490136,"home",,16,"WALK",13.622506184583505,515921093 +1632206,823426,"work",1,true,1,2000,11000,66920485,"work",,7,"BIKE",-0.28522129668359636,535363881 +1632206,823426,"work",1,false,1,11000,2000,66920485,"home",,18,"BIKE",-0.2955629257504098,535363885 +1632281,823501,"work",1,true,1,5000,12000,66923560,"work",,8,"WALK",4.202625417780758,535388481 +1632281,823501,"work",1,false,1,12000,5000,66923560,"home",,18,"WALK",4.038764733249729,535388485 +1632987,824207,"atwork",1,true,1,1000,2000,66952471,"atwork",,11,"WALK",-0.6493719063222496,535619769 +1632987,824207,"atwork",1,false,1,2000,1000,66952471,"work",,12,"WALK",-0.5976340036034059,535619773 +1632987,824207,"work",1,true,1,2000,18000,66952506,"work",,7,"WALK",-1.6273811422831237,535620049 +1632987,824207,"work",1,false,1,18000,2000,66952506,"home",,20,"WALK",-1.6493420285420004,535620053 +1875721,982875,"work",1,true,1,16000,16000,76904600,"work",,7,"WALK",5.450623743648392,615236801 +1875721,982875,"work",1,false,1,16000,16000,76904600,"home",,19,"WALK",5.45062374684539,615236805 +1875722,982875,"work",1,true,1,2000,16000,76904641,"work",,7,"TNC_SHARED",0.5145638545713905,615237129 +1875722,982875,"work",1,false,1,16000,2000,76904641,"home",,18,"WALK",0.4575440509718732,615237133 +2159057,1099626,"work",1,true,1,15000,20000,88521376,"work",,7,"WALK_LOC",0.8040027972334238,708171009 +2159057,1099626,"work",1,false,1,20000,15000,88521376,"home",,17,"WALK_LOC",0.7614683739232305,708171013 +2159058,1099626,"univ",1,true,1,10000,20000,88521409,"univ",,7,"WALK",8.007783984850628,708171273 +2159058,1099626,"univ",1,false,1,20000,10000,88521409,"home",,14,"WALK",8.06183233206498,708171277 +2159059,1099626,"school",1,true,1,10000,20000,88521450,"school",,8,"WALK",9.06543923174178,708171601 +2159059,1099626,"school",1,false,1,20000,10000,88521450,"home",,15,"WALK",9.119639179963317,708171605 +2458500,1173905,"othdiscr",1,true,1,1000,8000,100798525,"othdiscr",,13,"WALK",-1.5411007588568488,806388201 +2458500,1173905,"othdiscr",1,false,1,8000,1000,100798525,"home",,15,"WALK",-1.7195836652494862,806388205 +2458502,1173905,"school",1,true,1,7000,8000,100798613,"school",,9,"WALK",13.486637386215971,806388905 +2458502,1173905,"school",1,false,1,8000,7000,100798613,"home",,15,"WALK",13.204797391017513,806388909 +2458503,1173905,"school",1,true,2,16000,8000,100798654,"shopping",17.809084955388304,8,"WALK",5.517546221609351,806389233 +2458503,1173905,"school",2,true,2,18000,16000,100798654,"school",,8,"WALK_LOC",-0.07144513930505698,806389234 +2458503,1173905,"school",1,false,3,7000,18000,100798654,"othdiscr",30.67993027539147,17,"WALK_LOC",-0.15250090963444055,806389237 +2458503,1173905,"school",2,false,3,6000,7000,100798654,"othmaint",51.20578686432127,17,"WALK",12.79753441878666,806389238 +2458503,1173905,"school",3,false,3,8000,6000,100798654,"home",,17,"WALK",11.164784288930237,806389239 +2566698,1196298,"othmaint",1,true,1,2000,25000,105234646,"othmaint",,15,"WALK",0.13883447352916709,841877169 +2566698,1196298,"othmaint",1,false,1,25000,2000,105234646,"home",,16,"WALK",0.040885576652949784,841877173 +2566698,1196298,"work",1,true,1,13000,25000,105234657,"work",,7,"WALK",-1.2760866344600816,841877257 +2566698,1196298,"work",1,false,1,25000,13000,105234657,"home",,12,"WALK",-1.441002585134111,841877261 +2566699,1196298,"escort",1,true,1,9000,25000,105234668,"escort",,8,"WALK",6.419710427462857,841877345 +2566699,1196298,"escort",1,false,1,25000,9000,105234668,"home",,9,"WALK",6.335788330611109,841877349 +2566699,1196298,"escort",1,true,1,11000,25000,105234669,"escort",,12,"WALK_LOC",5.288975343463499,841877353 +2566699,1196298,"escort",1,false,1,25000,11000,105234669,"home",,12,"WALK_LOC",4.8173139183499485,841877357 +2566699,1196298,"othdiscr",1,true,1,2000,25000,105234684,"othdiscr",,18,"TNC_SINGLE",0.4693341559425369,841877473 +2566699,1196298,"othdiscr",1,false,1,25000,2000,105234684,"home",,21,"TNC_SINGLE",0.08030571790736232,841877477 +2566699,1196298,"shopping",1,true,1,2000,25000,105234692,"shopping",,9,"WALK",0.2207063663573701,841877537 +2566699,1196298,"shopping",1,false,1,25000,2000,105234692,"home",,10,"WALK",0.06452287253443097,841877541 +2566700,1196298,"school",1,true,1,7000,25000,105234731,"school",,8,"WALK_LOC",13.12890770099686,841877849 +2566700,1196298,"school",1,false,1,25000,7000,105234731,"home",,15,"WALK",12.572656390759992,841877853 +2566701,1196298,"escort",1,true,1,22000,25000,105234750,"escort",,13,"WALK",1.097784136547436,841878001 +2566701,1196298,"escort",1,false,1,25000,22000,105234750,"home",,13,"WALK",1.005415443289966,841878005 +2566701,1196298,"school",1,true,1,21000,25000,105234772,"school",,7,"WALK",1.2903956070849174,841878177 +2566701,1196298,"school",1,false,1,25000,21000,105234772,"home",,13,"WALK",0.6837244960300443,841878181 +2566702,1196298,"othdiscr",1,true,1,18000,25000,105234807,"othdiscr",,18,"SHARED2FREE",0.48057507554186435,841878457 +2566702,1196298,"othdiscr",1,false,3,6000,18000,105234807,"othdiscr",29.962789198733248,20,"SHARED2FREE",0.19481120111371147,841878461 +2566702,1196298,"othdiscr",2,false,3,7000,6000,105234807,"escort",54.46060393017329,20,"WALK",12.68589169990383,841878462 +2566702,1196298,"othdiscr",3,false,3,25000,7000,105234807,"home",,20,"WALK",13.411567550566136,841878463 +2936848,1286557,"eatout",1,true,1,15000,11000,120410774,"eatout",,13,"WALK",0.38682460851208106,963286193 +2936848,1286557,"eatout",1,false,1,11000,15000,120410774,"home",,17,"WALK",0.3868207009379531,963286197 +2936848,1286557,"othmaint",1,true,3,8000,11000,120410796,"othmaint",38.906052390545284,8,"BIKE",7.868237914778634,963286369 +2936848,1286557,"othmaint",2,true,3,6000,8000,120410796,"othmaint",41.824444468196404,9,"BIKE",8.223732026668186,963286370 +2936848,1286557,"othmaint",3,true,3,7000,6000,120410796,"othmaint",,9,"WALK",9.199284646290385,963286371 +2936848,1286557,"othmaint",1,false,2,8000,7000,120410796,"shopping",38.16816389072188,13,"BIKE",9.122309127594738,963286373 +2936848,1286557,"othmaint",2,false,2,11000,8000,120410796,"home",,13,"BIKE",7.826904862376488,963286374 +2936848,1286557,"shopping",1,true,1,13000,11000,120410801,"shopping",,18,"TNC_SHARED",-0.5434875036210606,963286409 +2936848,1286557,"shopping",1,false,3,7000,13000,120410801,"othdiscr",31.130726198006485,18,"WALK_LOC",-0.610095374219108,963286413 +2936848,1286557,"shopping",2,false,3,8000,7000,120410801,"escort",53.26307947714522,19,"WALK_LOC",13.353581103659465,963286414 +2936848,1286557,"shopping",3,false,3,11000,8000,120410801,"home",,19,"WALK_LOC",11.81225948785342,963286415 +3061894,1363467,"othmaint",1,true,1,14000,24000,125537682,"othmaint",,8,"WALK_LOC",1.2926814279364989,1004301457 +3061894,1363467,"othmaint",1,false,2,25000,14000,125537682,"shopping",23.27118460644273,15,"WALK_LOC",1.135322813455107,1004301461 +3061894,1363467,"othmaint",2,false,2,24000,25000,125537682,"home",,17,"WALK_LOC",8.162361973873145,1004301462 +3061894,1363467,"shopping",1,true,1,22000,24000,125537687,"shopping",,8,"WALK",1.3300460709588522,1004301497 +3061894,1363467,"shopping",1,false,1,24000,22000,125537687,"home",,8,"WALK",1.6648164345642997,1004301501 +3061895,1363467,"othmaint",1,true,1,9000,24000,125537723,"othmaint",,20,"WALK",7.61165430783887,1004301785 +3061895,1363467,"othmaint",1,false,1,24000,9000,125537723,"home",,20,"WALK_LRF",7.443046334668142,1004301789 +3061895,1363467,"shopping",1,true,2,16000,24000,125537728,"shopping",32.10251685845653,8,"WALK",6.0140004448335835,1004301825 +3061895,1363467,"shopping",2,true,2,16000,16000,125537728,"shopping",,11,"WALK",7.330879678856341,1004301826 +3061895,1363467,"shopping",1,false,1,24000,16000,125537728,"home",,20,"WALK",6.516200014628165,1004301829 +3188483,1402945,"othmaint",1,true,1,9000,25000,130727831,"othmaint",,12,"WALK",4.0387743765186475,1045822649 +3188483,1402945,"othmaint",1,false,4,9000,9000,130727831,"eatout",25.271301119734684,12,"WALK",5.580349147172738,1045822653 +3188483,1402945,"othmaint",2,false,4,25000,9000,130727831,"shopping",26.70578569196363,12,"WALK",3.983411662412696,1045822654 +3188483,1402945,"othmaint",3,false,4,25000,25000,130727831,"eatout",33.298624921893136,12,"WALK",7.245792741191272,1045822655 +3188483,1402945,"othmaint",4,false,4,25000,25000,130727831,"home",,12,"WALK",7.24592550108157,1045822656 +3188483,1402945,"shopping",1,true,1,12000,25000,130727836,"shopping",,14,"WALK",4.331730075378117,1045822689 +3188483,1402945,"shopping",1,false,1,25000,12000,130727836,"home",,15,"WALK",4.253610414457722,1045822693 +3188484,1402945,"work",1,true,1,11000,25000,130727883,"work",,15,"WALK",2.7519908409931944,1045823065 +3188484,1402945,"work",1,false,1,25000,11000,130727883,"home",,17,"WALK",2.619994257003586,1045823069 +3188485,1402945,"work",1,true,1,7000,25000,130727924,"work",,8,"WALK",10.411761425965901,1045823393 +3188485,1402945,"work",1,false,1,25000,7000,130727924,"home",,18,"WALK",10.266561483041759,1045823397 +3232955,1444715,"escort",1,true,1,8000,14000,132551164,"escort",,17,"WALK",8.966006396739564,1060409313 +3232955,1444715,"escort",1,false,1,14000,8000,132551164,"home",,19,"WALK",9.13332128431259,1060409317 +3232955,1444715,"work",1,true,1,1000,14000,132551194,"work",,6,"WALK",-0.9468267165438904,1060409553 +3232955,1444715,"work",1,false,1,14000,1000,132551194,"home",,11,"WALK",-1.0200727136755097,1060409557 +3232955,1444715,"work",1,true,1,1000,14000,132551195,"work",,13,"WALK",-0.9468284618667532,1060409561 +3232955,1444715,"work",1,false,1,14000,1000,132551195,"home",,16,"WALK",-1.0200756251053722,1060409565 +3233462,1445222,"atwork",1,true,1,15000,16000,132571946,"atwork",,9,"WALK",1.1096520956877534,1060575569 +3233462,1445222,"atwork",1,false,2,7000,15000,132571946,"work",34.31459609686081,9,"WALK",0.6337462548517271,1060575573 +3233462,1445222,"atwork",2,false,2,16000,7000,132571946,"work",,9,"WALK",12.937466290891889,1060575574 +3233462,1445222,"work",1,true,1,16000,17000,132571981,"work",,9,"WALK",5.267781294213782,1060575849 +3233462,1445222,"work",1,false,4,6000,16000,132571981,"work",32.16677731515871,17,"WALK",4.30790051405025,1060575853 +3233462,1445222,"work",2,false,4,7000,6000,132571981,"othmaint",42.7646950227243,17,"WALK",9.911469636687714,1060575854 +3233462,1445222,"work",3,false,4,7000,7000,132571981,"escort",44.1873922785174,17,"WALK",11.253667681197784,1060575855 +3233462,1445222,"work",4,false,4,17000,7000,132571981,"home",,20,"WALK",9.295722166835667,1060575856 +3328568,1511234,"work",1,true,1,13000,8000,136471327,"work",,8,"WALK_LOC",-0.2435902544788206,1091770617 +3328568,1511234,"work",1,false,2,7000,13000,136471327,"escort",27.606976204031863,16,"WALK",-0.20149926925026043,1091770621 +3328568,1511234,"work",2,false,2,8000,7000,136471327,"home",,22,"WALK",11.478676207869436,1091770622 +3328569,1511234,"univ",1,true,1,9000,8000,136471360,"univ",,8,"WALK_LOC",10.078747551647542,1091770881 +3328569,1511234,"univ",1,false,1,8000,9000,136471360,"home",,16,"WALK",10.077775953315786,1091770885 +3495342,1594621,"atwork",1,true,4,8000,8000,143309026,"escort",55.46266860818287,10,"WALK",12.546654055806371,1146472209 +3495342,1594621,"atwork",2,true,4,7000,8000,143309026,"eatout",55.91434526117385,10,"WALK",14.39426624162804,1146472210 +3495342,1594621,"atwork",3,true,4,7000,7000,143309026,"eatout",56.04404466010437,10,"WALK",14.469466216435773,1146472211 +3495342,1594621,"atwork",4,true,4,8000,7000,143309026,"atwork",,10,"WALK",12.253374125394837,1146472212 +3495342,1594621,"atwork",1,false,1,8000,8000,143309026,"work",,10,"WALK",12.546654055806371,1146472213 +3495342,1594621,"work",1,true,1,8000,10000,143309061,"work",,8,"WALK",10.017940348880348,1146472489 +3495342,1594621,"work",1,false,1,10000,8000,143309061,"home",,17,"WALK",10.028242430555887,1146472493 +3495343,1594621,"shopping",1,true,2,6000,10000,143309096,"eatout",36.78646046022073,15,"WALK",11.585529063330783,1146472769 +3495343,1594621,"shopping",2,true,2,11000,6000,143309096,"shopping",,15,"WALK",4.505557730035075,1146472770 +3495343,1594621,"shopping",1,false,2,7000,11000,143309096,"shopping",39.48805042360866,16,"WALK",4.706436401588666,1146472773 +3495343,1594621,"shopping",2,false,2,10000,7000,143309096,"home",,16,"WALK",13.533226166426466,1146472774 +3596364,1645132,"univ",1,true,1,10000,9000,147450955,"univ",,11,"WALK",9.217199229897293,1179607641 +3596364,1645132,"univ",1,false,1,9000,10000,147450955,"home",,11,"WALK",9.043759217163936,1179607645 +3596364,1645132,"shopping",1,true,1,11000,9000,147450957,"shopping",,13,"WALK",4.739914821838448,1179607657 +3596364,1645132,"shopping",1,false,1,9000,11000,147450957,"home",,19,"WALK",4.717594809733594,1179607661 +3596365,1645132,"school",1,true,1,8000,9000,147450996,"school",,10,"WALK",11.681775804322669,1179607969 +3596365,1645132,"school",1,false,3,6000,8000,147450996,"shopping",48.18895062075897,17,"WALK",11.145019177550582,1179607973 +3596365,1645132,"school",2,false,3,7000,6000,147450996,"othmaint",53.52462857035891,17,"WALK_LOC",11.8481317290849,1179607974 +3596365,1645132,"school",3,false,3,9000,7000,147450996,"home",,17,"WALK",13.212573351460701,1179607975 +3891102,1747467,"atwork",1,true,1,23000,15000,159535186,"atwork",,10,"WALK",2.3591712055652576,1276281489 +3891102,1747467,"atwork",1,false,2,7000,23000,159535186,"eatout",32.521623686967494,13,"WALK",0.3700954492585565,1276281493 +3891102,1747467,"atwork",2,false,2,15000,7000,159535186,"work",,13,"WALK",13.551266299350594,1276281494 +3891102,1747467,"work",1,true,2,25000,16000,159535221,"escort",22.721184898615036,8,"WALK",9.029527126001836,1276281769 +3891102,1747467,"work",2,true,2,15000,25000,159535221,"work",,10,"WALK",-0.04244247036004735,1276281770 +3891102,1747467,"work",1,false,2,7000,15000,159535221,"shopping",23.79438043021271,17,"WALK",-0.11676435321124025,1276281773 +3891102,1747467,"work",2,false,2,16000,7000,159535221,"home",,21,"WALK",9.584561404322464,1276281774 +3891104,1747467,"othdiscr",1,true,1,21000,16000,159535289,"othdiscr",,7,"WALK",3.6151228593321267,1276282313 +3891104,1747467,"othdiscr",1,false,1,16000,21000,159535289,"home",,22,"WALK",3.8941204065716084,1276282317 +4171615,1810015,"univ",1,true,1,14000,16000,171036246,"univ",,18,"WALK",0.6566419385905423,1368289969 +4171615,1810015,"univ",1,false,1,16000,14000,171036246,"home",,18,"WALK",0.5048818715898489,1368289973 +4171616,1810015,"shopping",1,true,1,4000,16000,171036289,"shopping",,10,"WALK",-0.0607985292025343,1368290313 +4171616,1810015,"shopping",1,false,1,16000,4000,171036289,"home",,14,"WALK",-0.18980712710891498,1368290317 +4171617,1810015,"atwork",1,true,1,10000,13000,171036301,"atwork",,10,"WALK",8.38017345500569,1368290409 +4171617,1810015,"atwork",1,false,2,6000,10000,171036301,"escort",46.07812183047964,10,"WALK",9.133473345082415,1368290413 +4171617,1810015,"atwork",2,false,2,13000,6000,171036301,"work",,10,"WALK",12.208440928655007,1368290414 +4171617,1810015,"work",1,true,1,13000,16000,171036336,"work",,8,"WALK",-0.8047586364659038,1368290689 +4171617,1810015,"work",1,false,1,16000,13000,171036336,"home",,16,"WALK",-0.8614632110819316,1368290693 +4171619,1810015,"othdiscr",1,true,1,15000,16000,171036404,"othdiscr",,9,"WALK",0.9624566502678611,1368291233 +4171619,1810015,"othdiscr",1,false,1,16000,15000,171036404,"home",,19,"WALK",0.8254128128089915,1368291237 +4171622,1810015,"othmaint",1,true,1,9000,16000,171036530,"othmaint",,11,"WALK",3.66982435165648,1368292241 +4171622,1810015,"othmaint",1,false,1,16000,9000,171036530,"home",,12,"WALK",3.7151463451230744,1368292245 +4823797,1952792,"work",1,true,2,9000,14000,197775716,"work",23.994169577978532,10,"WALK",7.891553908606716,1582205729 +4823797,1952792,"work",2,true,2,2000,9000,197775716,"work",,10,"WALK_LOC",-0.3676272090623946,1582205730 +4823797,1952792,"work",1,false,1,14000,2000,197775716,"home",,18,"WALK",0.46985768907255554,1582205733 +5057160,2048204,"work",1,true,1,2000,5000,207343599,"work",,6,"BIKE",-0.10838976831540556,1658748793 +5057160,2048204,"work",1,false,1,5000,2000,207343599,"home",,17,"BIKE",-0.08509231386051125,1658748797 +5057338,2048382,"work",1,true,1,9000,7000,207350897,"work",,7,"WALK_LOC",8.758288436944824,1658807177 +5057338,2048382,"work",1,false,1,7000,9000,207350897,"home",,20,"WALK",8.818559157235638,1658807181 +5387762,2223027,"work",1,true,1,2000,9000,220898281,"work",,6,"WALK",0.42431728319721795,1767186249 +5387762,2223027,"work",1,false,1,9000,2000,220898281,"home",,15,"WALK",0.4956959539618453,1767186253 +5387763,2223027,"eatout",1,true,1,5000,9000,220898289,"eatout",,16,"WALK",3.899543591586989,1767186313 +5387763,2223027,"eatout",1,false,1,9000,5000,220898289,"home",,16,"WALK",3.983243122356904,1767186317 +5387763,2223027,"othdiscr",1,true,1,8000,9000,220898308,"othdiscr",,18,"WALK",12.045414088874892,1767186465 +5387763,2223027,"othdiscr",1,false,1,9000,8000,220898308,"home",,18,"WALK",12.045414088874892,1767186469 +5389226,2223759,"work",1,true,1,19000,16000,220958305,"work",,8,"WALK",-2.2159056636781154,1767666441 +5389226,2223759,"work",1,false,1,16000,19000,220958305,"home",,17,"WALK",-2.237678923930952,1767666445 +5389227,2223759,"atwork",1,true,1,11000,2000,220958311,"atwork",,11,"WALK",6.014273924618851,1767666489 +5389227,2223759,"atwork",1,false,1,2000,11000,220958311,"work",,11,"WALK_LOC",6.056702777866963,1767666493 +5389227,2223759,"escort",1,true,2,7000,16000,220958316,"escort",32.62643874211314,17,"WALK_LOC",13.605830939780558,1767666529 +5389227,2223759,"escort",2,true,2,4000,7000,220958316,"escort",,17,"WALK_LOC",0.5052129172906357,1767666530 +5389227,2223759,"escort",1,false,1,16000,4000,220958316,"home",,17,"WALK_LOC",0.57432873159691,1767666533 +5389227,2223759,"work",1,true,1,2000,16000,220958346,"work",,6,"WALK",-0.3405532662460644,1767666769 +5389227,2223759,"work",1,false,1,16000,2000,220958346,"home",,15,"WALK",-0.4901110240386973,1767666773 +7305540,2727273,"social",1,true,1,6000,20000,299527176,"social",,7,"WALK",5.206370496918408,2396217409 +7305540,2727273,"social",1,false,1,20000,6000,299527176,"home",,7,"WALK",5.033109285371569,2396217413 +7305540,2727273,"social",1,true,1,2000,20000,299527177,"social",,10,"DRIVEALONEFREE",0.54938316132333,2396217417 +7305540,2727273,"social",1,false,2,8000,2000,299527177,"eatout",18.249815215027038,11,"TNC_SINGLE",0.5321554611234836,2396217421 +7305540,2727273,"social",2,false,2,20000,8000,299527177,"home",,11,"WALK",5.391723077362911,2396217422 +7305540,2727273,"work",1,true,1,9000,20000,299527179,"work",,13,"WALK_LOC",8.938638173342552,2396217433 +7305540,2727273,"work",1,false,4,25000,9000,299527179,"shopping",40.267489600527185,13,"WALK_LOC",8.195552357824264,2396217437 +7305540,2727273,"work",2,false,4,7000,25000,299527179,"othmaint",47.488668442258984,16,"WALK_LOC",10.7919949001442,2396217438 +7305540,2727273,"work",3,false,4,7000,7000,299527179,"eatout",48.4246382965432,16,"WALK",11.479761844727252,2396217439 +7305540,2727273,"work",4,false,4,20000,7000,299527179,"home",,16,"WALK_LOC",11.0104368761446,2396217440 +7305541,2727273,"shopping",1,true,1,20000,20000,299527214,"shopping",,18,"TNC_SINGLE",0.5187216440570394,2396217713 +7305541,2727273,"shopping",1,false,1,20000,20000,299527214,"home",,20,"TNC_SINGLE",0.5185992852751763,2396217717 +7305541,2727273,"social",1,true,1,6000,20000,299527217,"social",,17,"WALK",7.004410046180289,2396217737 +7305541,2727273,"social",1,false,1,20000,6000,299527217,"home",,17,"WALK",6.829410156928113,2396217741 +7305541,2727273,"work",1,true,1,2000,20000,299527220,"work",,7,"WALK_LOC",-0.2830046111003284,2396217761 +7305541,2727273,"work",1,false,1,20000,2000,299527220,"home",,15,"WALK",0.44589889706759944,2396217765 +7453413,2762078,"othmaint",1,true,1,9000,20000,305589961,"othmaint",,11,"WALK_LOC",6.6779817612214325,2444719689 +7453413,2762078,"othmaint",1,false,1,20000,9000,305589961,"home",,14,"WALK_LOC",6.43748275452276,2444719693 +7511873,2820538,"work",1,true,1,13000,8000,307986832,"work",,7,"WALK",-1.5552805729656705,2463894657 +7511873,2820538,"work",1,false,1,8000,13000,307986832,"home",,15,"WALK",-1.714935968586466,2463894661 +7512109,2820774,"work",1,true,1,16000,8000,307996508,"work",,7,"WALK",5.0284500448723515,2463972065 +7512109,2820774,"work",1,false,1,8000,16000,307996508,"home",,18,"WALK",4.961958073640531,2463972069 +7512514,2821179,"work",1,true,1,5000,8000,308013113,"work",,18,"WALK",3.1849050442076052,2464104905 +7512514,2821179,"work",1,false,1,8000,5000,308013113,"home",,21,"WALK",3.1849050440009243,2464104909 +7513432,2822097,"social",1,true,1,4000,8000,308050748,"social",,9,"WALK_LOC",0.4383871632879739,2464405985 +7513432,2822097,"social",1,false,2,7000,4000,308050748,"eatout",24.82423194978245,16,"WALK_LOC",0.470761666015878,2464405989 +7513432,2822097,"social",2,false,2,8000,7000,308050748,"home",,16,"WALK_LOC",9.091035870708279,2464405990 +7513554,2822219,"work",1,true,1,5000,8000,308055753,"work",,10,"WALK",3.1849109536265523,2464446025 +7513554,2822219,"work",1,false,1,8000,5000,308055753,"home",,21,"WALK",3.1849109502813695,2464446029 +7523517,2832182,"shopping",1,true,1,11000,7000,308464230,"shopping",,15,"WALK",4.618728216116123,2467713841 +7523517,2832182,"shopping",1,false,1,7000,11000,308464230,"home",,15,"WALK",4.607627213416084,2467713845 diff --git a/activitysim/examples/placeholder_multiple_zone/test/test_multiple_zone.py b/activitysim/examples/placeholder_multiple_zone/test/test_multiple_zone.py index 3e956301e9..3921bcd0fd 100644 --- a/activitysim/examples/placeholder_multiple_zone/test/test_multiple_zone.py +++ b/activitysim/examples/placeholder_multiple_zone/test/test_multiple_zone.py @@ -39,14 +39,16 @@ def data(): build_data() -def run_test(zone, multiprocess=False): +def run_test(zone, multiprocess=False, use_explicit_error_terms=False): def test_path(dirname): return os.path.join(os.path.dirname(__file__), dirname) - def regress(zone): + def regress(zone, use_explicit_error_terms=False): # regress tours regress_tours_df = pd.read_csv( - test_path(f"regress/final_tours_{zone}_zone.csv") + test_path( + f"regress/final{'_eet' if use_explicit_error_terms else ''}_tours_{zone}_zone.csv" + ) ) tours_df = pd.read_csv(test_path("output/final_tours.csv")) tours_df.to_csv( @@ -59,7 +61,9 @@ def regress(zone): # regress trips regress_trips_df = pd.read_csv( - test_path(f"regress/final_trips_{zone}_zone.csv") + test_path( + f"regress/final{'_eet' if use_explicit_error_terms else ''}_trips_{zone}_zone.csv" + ) ) trips_df = pd.read_csv(test_path("output/final_trips.csv")) trips_df.to_csv( @@ -72,7 +76,15 @@ def regress(zone): file_path = os.path.join(os.path.dirname(__file__), "simulation.py") + test_config_files = [] + if use_explicit_error_terms: + test_config_files = [ + "-c", + test_path("configs_eet"), + ] + run_args = [ + *test_config_files, "-c", test_path(f"configs_{zone}_zone"), "-c", @@ -95,7 +107,7 @@ def regress(zone): else: subprocess.run([sys.executable, file_path] + run_args, check=True) - regress(zone) + regress(zone, use_explicit_error_terms=use_explicit_error_terms) def test_2_zone(data): @@ -106,6 +118,14 @@ def test_2_zone_mp(data): run_test(zone="2", multiprocess=True) +def test_2_zone_eet(data): + run_test(zone="2", multiprocess=False, use_explicit_error_terms=True) + + +def test_2_zone_mp_eet(data): + run_test(zone="2", multiprocess=True, use_explicit_error_terms=True) + + def test_3_zone(data): # python simulation.py -c configs_3_zone -c ../configs_3_zone -c \ # ../../prototype_mtc/configs -d ../data_3 -o output -s settings_mp @@ -202,10 +222,53 @@ def test_path(dirname): print(f"> {zone} zone {step_name}: ok") +@test.run_if_exists("reference_pipeline_2_zone_eet.zip") +def test_multizone_progressive_eet(): + + import activitysim.abm # register components + + def test_path(dirname): + return os.path.join(os.path.dirname(__file__), dirname) + + state = workflow.State.make_default( + configs_dir=( + test_path(f"configs_eet"), + test_path(f"configs_2_zone"), + example_path(f"configs_2_zone"), + mtc_example_path("configs"), + ), + data_dir=(example_path(f"data_2"),), + output_dir=test_path("output"), + settings_file_name="settings.yaml", + ) + + assert state.settings.models == EXPECTED_MODELS + assert state.settings.chunk_size == 0 + assert state.settings.sharrow == False + assert state.settings.use_explicit_error_terms == True + + for step_name in EXPECTED_MODELS: + state.run.by_name(step_name) + try: + state.checkpoint.check_against( + Path(__file__).parent.joinpath("reference_pipeline_2_zone_eet.zip"), + checkpoint_name=step_name, + ) + except Exception: + print(f"> 2 zone eet {step_name}: ERROR") + raise + else: + print(f"> 2 zone {step_name}: ok") + + if __name__ == "__main__": build_data() + run_test(zone="2", multiprocess=False) run_test(zone="2", multiprocess=True) + run_test(zone="2", multiprocess=False, use_explicit_error_terms=True) + run_test(zone="2", multiprocess=True, use_explicit_error_terms=True) + run_test(zone="3", multiprocess=False) run_test(zone="3", multiprocess=True) diff --git a/activitysim/examples/production_semcog/test/configs_eet/settings.yaml b/activitysim/examples/production_semcog/test/configs_eet/settings.yaml new file mode 100644 index 0000000000..6f6c702453 --- /dev/null +++ b/activitysim/examples/production_semcog/test/configs_eet/settings.yaml @@ -0,0 +1,5 @@ +inherit_settings: True + +use_explicit_error_terms: True + +rng_base_seed: 999 diff --git a/activitysim/examples/production_semcog/test/regress/final_eet_trips.csv b/activitysim/examples/production_semcog/test/regress/final_eet_trips.csv new file mode 100644 index 0000000000..31bb814b35 --- /dev/null +++ b/activitysim/examples/production_semcog/test/regress/final_eet_trips.csv @@ -0,0 +1,142 @@ +"person_id","household_id","primary_purpose","trip_num","outbound","trip_count","destination","origin","tour_id","purpose","destination_logsum","original_school_zone_id","parked_at_university","depart","tour_includes_parking","trip_id_pre_parking","trip_mode","mode_choice_logsum","trip_id" +2641718,1070360,"othmaint",1,true,1,22770,22699,108310466,"othmaint",,,false,22,0,866483729,"DRIVEALONE",-1.140203187748608,1732967457 +2641718,1070360,"othmaint",1,false,1,22699,22770,108310466,"home",,,false,27,0,866483733,"DRIVEALONE",-0.96489702194308,1732967465 +2641719,1070360,"eatout",1,true,1,22679,22699,108310485,"eatout",,,false,33,0,866483881,"WALK",0.060023062248146825,1732967761 +2641719,1070360,"eatout",1,false,1,22699,22679,108310485,"home",,,false,36,0,866483885,"DRIVEALONE",0.060023062248146825,1732967769 +2645285,1071806,"escort",1,true,1,22731,22724,108456694,"escort",,,false,30,0,867653553,"SHARED2",0.20596021066115075,1735307105 +2645285,1071806,"escort",1,false,1,22724,22731,108456694,"home",,,false,30,0,867653557,"SHARED2",0.20410476178627868,1735307113 +2645285,1071806,"escort",1,true,1,22724,22724,108456695,"escort",,,false,24,0,867653561,"SHARED3",0.391143445482773,1735307121 +2645285,1071806,"escort",1,false,2,22684,22724,108456695,"othmaint",8.215798131436362,,false,26,0,867653565,"DRIVEALONE",0.22788834489799226,1735307129 +2645285,1071806,"escort",2,false,2,22724,22684,108456695,"home",,,false,26,0,867653566,"SHARED2",0.22409547209460748,1735307130 +2645287,1071806,"school",1,true,2,22724,22724,108456798,"escort",8.50615870894261,,false,8,0,867654385,"SHARED3",0.4507317505936183,1735308769 +2645287,1071806,"school",2,true,2,22694,22724,108456798,"school",,,false,9,0,867654386,"SHARED3",0.04674717158735702,1735308770 +2645287,1071806,"school",1,false,2,22712,22694,108456798,"eatout",6.599013334372048,,false,25,0,867654389,"SHARED3",0.09035145895224501,1735308777 +2645287,1071806,"school",2,false,2,22724,22712,108456798,"home",,,false,25,0,867654390,"SHARED3",0.2357823544266528,1735308778 +2645287,1071806,"social",1,true,1,22675,22724,108456803,"social",,,false,25,0,867654425,"TNC_SHARED",0.2570159849833101,1735308849 +2645287,1071806,"social",1,false,2,22703,22675,108456803,"eatout",10.213663071613274,,false,37,0,867654429,"WALK",1.432913480161543,1735308857 +2645287,1071806,"social",2,false,2,22724,22703,108456803,"home",,,false,38,0,867654430,"WALK",1.1381875189174673,1735308858 +2671497,1083208,"social",1,true,1,22640,22636,109531357,"social",,,false,30,0,876250857,"SHARED2",-0.11752455779854772,1752501713 +2671497,1083208,"social",1,false,2,22647,22640,109531357,"othmaint",6.525155775915072,,false,35,0,876250861,"SHARED2",0.020891366046659268,1752501721 +2671497,1083208,"social",2,false,2,22636,22647,109531357,"home",,,false,35,0,876250862,"SHARED2",-0.1132362569393691,1752501722 +2671496,1083208,"work",1,true,3,22638,22636,109531375,"escort",9.124449650865401,,false,12,0,876251001,"SHARED3",0.6621622445387698,1752502001 +2671496,1083208,"work",2,true,3,22648,22638,109531375,"social",8.793105919676185,,false,12,0,876251002,"SHARED3",0.5269874496262971,1752502002 +2671496,1083208,"work",3,true,3,22640,22648,109531375,"work",,,false,12,0,876251003,"SHARED2",0.7000230371946444,1752502003 +2671496,1083208,"work",1,false,3,22651,22640,109531375,"othmaint",8.913860316975576,,false,34,0,876251005,"SHARED3",0.6099414926017465,1752502009 +2671496,1083208,"work",2,false,3,22654,22651,109531375,"othdiscr",9.246998716689566,,false,34,0,876251006,"DRIVEALONE",0.7396157840186334,1752502010 +2671496,1083208,"work",3,false,3,22636,22654,109531375,"home",,,false,35,0,876251007,"DRIVEALONE",0.6341748065967693,1752502011 +2671497,1083208,"escort",1,true,1,22676,22636,109531386,"escort",,,false,25,0,876251089,"DRIVEALONE",-0.32375601792570874,1752502177 +2671497,1083208,"escort",1,false,1,22636,22676,109531386,"home",,,false,28,0,876251093,"SHARED2",-0.39043824860460646,1752502185 +2671497,1083208,"escort",1,true,2,22640,22636,109531387,"escort",7.644412661552843,,false,10,0,876251097,"DRIVEALONE",0.09152200624156465,1752502193 +2671497,1083208,"escort",2,true,2,22646,22640,109531387,"escort",,,false,10,0,876251098,"SHARED2",0.25088781147753203,1752502194 +2671497,1083208,"escort",1,false,4,22659,22646,109531387,"eatout",6.241284873239872,,false,10,0,876251101,"DRIVEALONE",0.17080762729492935,1752502201 +2671497,1083208,"escort",2,false,4,22640,22659,109531387,"othmaint",6.923469348469137,,false,10,0,876251102,"DRIVEALONE",0.16895449265832232,1752502202 +2671497,1083208,"escort",3,false,4,22640,22640,109531387,"escort",7.326257858963803,,false,10,0,876251103,"SHARED2",0.2613961736397559,1752502203 +2671497,1083208,"escort",4,false,4,22636,22640,109531387,"home",,,false,11,0,876251104,"SHARED2",0.08402190585631253,1752502204 +2671498,1083208,"eatout",1,true,1,22637,22636,109531424,"eatout",,,false,27,0,876251393,"SHARED2",0.33534179514695717,1752502785 +2671498,1083208,"eatout",1,false,1,22636,22637,109531424,"home",,,false,28,0,876251397,"WALK",0.33534179514695717,1752502793 +2671498,1083208,"school",1,true,2,22640,22636,109531449,"othdiscr",-27.23581357235839,,false,13,0,876251593,"WALK",-9.620387872938311,1752503185 +2671498,1083208,"school",2,true,2,22639,22640,109531449,"school",,,false,14,0,876251594,"SCHOOLBUS",-1.2411331661037288,1752503186 +2671498,1083208,"school",1,false,1,22636,22639,109531449,"home",,,false,19,0,876251597,"SHARED3",-9.616074820484757,1752503193 +2671499,1083208,"othmaint",1,true,1,22669,22636,109531487,"othmaint",,,false,27,0,876251897,"SHARED2",-0.2790832908867949,1752503793 +2671499,1083208,"othmaint",1,false,2,22640,22669,109531487,"shopping",5.4285998302697065,,false,29,0,876251901,"SHARED2",-0.09063064588297244,1752503801 +2671499,1083208,"othmaint",2,false,2,22636,22640,109531487,"home",,,false,29,0,876251902,"WALK",-0.11496439975863353,1752503802 +2671499,1083208,"school",1,true,1,22639,22636,109531490,"school",,,false,10,0,876251921,"SHARED2",-0.1392604230329243,1752503841 +2671499,1083208,"school",1,false,1,22636,22639,109531490,"home",,,false,25,0,876251925,"SHARED2",-0.13926563615004,1752503849 +2671499,1083208,"shopping",1,true,1,22636,22636,109531492,"shopping",,,false,29,0,876251937,"WALK",0.3055555487258568,1752503873 +2671499,1083208,"shopping",1,false,1,22636,22636,109531492,"home",,,false,30,0,876251941,"WALK",0.3055555487258568,1752503881 +2671726,1083286,"escort",1,true,1,22676,22638,109540775,"escort",,,false,8,0,876326201,"SHARED2",-0.8036183526914108,1752652401 +2671726,1083286,"escort",1,false,1,22638,22676,109540775,"home",,,false,9,0,876326205,"SHARED2",-0.7709951428237803,1752652409 +2671726,1083286,"escort",1,true,1,22640,22638,109540776,"escort",,,false,25,0,876326209,"SHARED2",-0.06868349722217731,1752652417 +2671726,1083286,"escort",1,false,1,22638,22640,109540776,"home",,,false,27,0,876326213,"SHARED2",-0.008307952019422048,1752652425 +2671793,1083314,"work",1,true,1,22640,22638,109543552,"work",,,false,8,0,876348417,"DRIVEALONE",-0.19870458640890007,1752696833 +2671793,1083314,"work",1,false,1,22638,22640,109543552,"home",,,false,25,0,876348421,"DRIVEALONE",-0.1987082096816311,1752696841 +2852451,1152145,"othdiscr",1,true,1,22771,22767,116950516,"othdiscr",,,false,29,0,935604129,"WALK",1.1686758211930381,1871208257 +2852451,1152145,"othdiscr",1,false,1,22767,22771,116950516,"home",,,false,32,0,935604133,"WALK",1.1686758211930381,1871208265 +2852452,1152145,"othdiscr",1,true,2,22797,22767,116950557,"parking",,,false,12,1,935604457,"DRIVEALONE",-0.41298992250405697,1871208913 +2852452,1152145,"othdiscr",2,true,2,22808,22797,116950557,"othdiscr",,,true,12,1,935604457,"WALK",5.705347033607136,1871208914 +2852452,1152145,"othdiscr",1,false,1,22767,22808,116950557,"home",,,true,12,1,935604461,"WALK",5.971358671517957,1871208921 +2864392,1158099,"work",1,true,1,22808,22812,117440111,"work",,,false,23,0,939520889,"WALK",4.247342050332629,1879041777 +2864392,1158099,"work",1,false,2,22763,22808,117440111,"othmaint",23.966292625273155,,false,35,0,939520893,"WALK",4.10943031561864,1879041785 +2864392,1158099,"work",2,false,2,22812,22763,117440111,"home",,,false,35,0,939520894,"WALK",4.63116285056561,1879041786 +2871260,1161186,"othdiscr",1,true,1,22766,22737,117721685,"othdiscr",,,false,38,0,941773481,"WALK",-1.244393392405061,1883546961 +2871260,1161186,"othdiscr",1,false,1,22737,22766,117721685,"home",,,false,40,0,941773485,"WALK",-1.2443930347772003,1883546969 +2871260,1161186,"work",1,true,2,22795,22737,117721699,"parking",,,false,8,1,941773593,"SHARED3",0.3864085294818619,1883547185 +2871260,1161186,"work",2,true,2,22795,22795,117721699,"work",,,true,8,1,941773593,"WALK",2.7645040587099827,1883547186 +2871260,1161186,"work",1,false,2,22795,22795,117721699,"parking",,,true,36,1,941773597,"WALK",2.764619810010999,1883547193 +2871260,1161186,"work",2,false,2,22737,22795,117721699,"home",,,false,36,1,941773597,"DRIVEALONE",0.4271626724836496,1883547194 +2871261,1161186,"shopping",1,true,1,22808,22737,117721734,"shopping",,,false,10,0,941773873,"WALK",-0.014414695528787559,1883547745 +2871261,1161186,"shopping",1,false,1,22737,22808,117721734,"home",,,false,14,0,941773877,"WALK",-0.013567832754747558,1883547753 +2871261,1161186,"work",1,true,4,22766,22737,117721740,"parking",,,false,15,1,941773921,"DRIVEALONE",-0.13865483896283062,1883547841 +2871261,1161186,"work",2,true,4,22760,22766,117721740,"escort",7.493345665849475,,true,15,1,941773921,"SHARED2",3.5298489757380187,1883547842 +2871261,1161186,"work",3,true,4,22766,22760,117721740,"parking",,,true,24,1,941773922,"WALK",1.544964668523534,1883547843 +2871261,1161186,"work",4,true,4,22769,22766,117721740,"work",,,false,24,1,941773922,"DRIVEALONE",-0.6305890985632596,1883547844 +2871261,1161186,"work",1,false,1,22737,22769,117721740,"home",,,false,30,1,941773925,"DRIVEALONE",-0.5039360613382525,1883547849 +2871261,1161186,"work",1,true,1,22769,22737,117721741,"work",,,false,33,0,941773929,"DRIVEALONE",-0.1355725881640941,1883547857 +2871261,1161186,"work",1,false,1,22737,22769,117721741,"home",,,false,34,0,941773933,"DRIVEALONE",-0.1378339185228722,1883547865 +2873206,1162124,"univ",1,true,1,22764,22755,117801477,"univ",,,false,13,0,942411817,"WALK",2.354545772649213,1884823633 +2873206,1162124,"univ",1,false,2,22766,22764,117801477,"univ",18.390760252235403,22766,false,36,0,942411821,"WALK",2.6581997132469217,1884823641 +2873206,1162124,"univ",2,false,2,22755,22766,117801477,"home",,,false,37,0,942411822,"WALK_LOC",2.0012557110881506,1884823642 +2875351,1163196,"shopping",1,true,1,22767,22760,117889424,"shopping",,,false,11,0,943115393,"WALK",2.07708617142782,1886230785 +2875351,1163196,"shopping",1,false,1,22760,22767,117889424,"home",,,false,18,0,943115397,"WALK",2.07708617142782,1886230793 +2875352,1163196,"escort",1,true,1,22757,22760,117889441,"escort",,,false,11,1,943115529,"DRIVEALONE",-0.10256130147625434,1886231057 +2875352,1163196,"escort",1,false,2,22806,22757,117889441,"parking",,,false,14,1,943115533,"SHARED2",-0.2751703037379774,1886231065 +2875352,1163196,"escort",2,false,2,22760,22806,117889441,"home",,,true,14,1,943115533,"WALK_LOC",1.3403997582477214,1886231066 +2875352,1163196,"shopping",1,true,1,22738,22760,117889465,"shopping",,,false,19,0,943115721,"WALK",3.8858778353665366,1886231441 +2875352,1163196,"shopping",1,false,1,22760,22738,117889465,"home",,,false,19,0,943115725,"WALK",3.885954403741643,1886231449 +2875353,1163196,"shopping",1,true,2,22758,22760,117889506,"othdiscr",9.51576353086904,,false,16,1,943116049,"DRIVEALONE",-0.0677741165736649,1886232097 +2875353,1163196,"shopping",2,true,2,22748,22758,117889506,"shopping",,,false,16,1,943116050,"DRIVEALONE",-0.44315960970425583,1886232098 +2875353,1163196,"shopping",1,false,4,22771,22748,117889506,"othdiscr",8.908743808799336,,false,16,1,943116053,"WALK",-0.39817769201091624,1886232105 +2875353,1163196,"shopping",2,false,4,22796,22771,117889506,"parking",,,false,16,1,943116054,"DRIVEALONE",-0.33754982759871427,1886232106 +2875353,1163196,"shopping",3,false,4,22767,22796,117889506,"eatout",8.839874802018125,,true,16,1,943116054,"WALK",1.1862747408061989,1886232107 +2875353,1163196,"shopping",4,false,4,22760,22767,117889506,"home",,,true,17,1,943116055,"WALK",4.641814432099863,1886232108 +4719663,1938402,"work",1,true,1,22736,22808,193506222,"work",,,false,19,0,1548049777,"WALK_LOC",-2.0359710074337936,3096099553 +4719663,1938402,"work",1,false,3,22810,22736,193506222,"escort",11.259724128977593,,false,43,0,1548049781,"WALK_LOC",-2.2558423520011077,3096099561 +4719663,1938402,"work",2,false,3,22802,22810,193506222,"eatout",23.054434766048672,,false,43,0,1548049782,"WALK",3.6621112648324488,3096099562 +4719663,1938402,"work",3,false,3,22808,22802,193506222,"home",,,false,48,0,1548049783,"WALK",5.515121383512515,3096099563 +4720278,1939017,"eatout",1,true,1,22813,22806,193531404,"eatout",,,false,20,0,1548251233,"WALK",0.6597888893535038,3096502465 +4720278,1939017,"eatout",1,false,1,22806,22813,193531404,"home",,,false,21,0,1548251237,"WALK",0.6597888893535038,3096502473 +4720278,1939017,"othmaint",1,true,3,22763,22806,193531426,"othmaint",28.633259490347385,,false,22,0,1548251409,"WALK",6.507689465729672,3096502817 +4720278,1939017,"othmaint",2,true,3,22771,22763,193531426,"escort",29.07359900863869,,false,22,0,1548251410,"WALK",6.163423552844951,3096502818 +4720278,1939017,"othmaint",3,true,3,22749,22771,193531426,"othmaint",,,false,22,0,1548251411,"WALK",5.185202759440683,3096502819 +4720278,1939017,"othmaint",1,false,1,22806,22749,193531426,"home",,,false,23,0,1548251413,"WALK",4.596120061006968,3096502825 +4720278,1939017,"univ",1,true,1,22766,22806,193531429,"univ",,,false,23,0,1548251433,"WALK",-0.7025935206727638,3096502865 +4720278,1939017,"univ",1,false,4,22807,22766,193531429,"social",9.78076496454064,,false,47,0,1548251437,"WALK",-0.7025935206727638,3096502873 +4720278,1939017,"univ",2,false,4,22767,22807,193531429,"eatout",15.48110690117488,,false,47,0,1548251438,"WALK",2.329516762852266,3096502874 +4720278,1939017,"univ",3,false,4,22767,22767,193531429,"othdiscr",16.053671837422566,,false,47,0,1548251439,"WALK",2.62825193059268,3096502875 +4720278,1939017,"univ",4,false,4,22806,22767,193531429,"home",,,false,47,0,1548251440,"WALK",1.9818127360573472,3096502876 +4721502,1940241,"shopping",1,true,1,22769,22808,193581615,"shopping",,,false,9,0,1548652921,"WALK",3.83496243140644,3097305841 +4721502,1940241,"shopping",1,false,1,22808,22769,193581615,"home",,,false,10,0,1548652925,"WALK",3.8353544296935214,3097305849 +4721502,1940241,"work",1,true,1,22808,22808,193581621,"work",,,false,11,0,1548652969,"WALK",0.8529159329083985,3097305937 +4721502,1940241,"work",1,false,1,22808,22808,193581621,"home",,,false,32,0,1548652973,"WALK",0.8529159329083985,3097305945 +4722626,1942332,"eatout",1,true,1,22770,22808,193627672,"eatout",,,false,33,0,1549021377,"WALK",0.3756438367025996,3098042753 +4722626,1942332,"eatout",1,false,1,22808,22770,193627672,"home",,,false,33,0,1549021381,"WALK",0.3756438367025996,3098042761 +4722626,1942332,"univ",1,true,1,22766,22808,193627697,"univ",,,false,13,0,1549021577,"WALK_LOC",2.5777493433773446,3098043153 +4722626,1942332,"univ",1,false,2,22766,22766,193627697,"univ",19.19415745805419,22766,false,19,0,1549021581,"WALK",3.0032756156279,3098043161 +4722626,1942332,"univ",2,false,2,22808,22766,193627697,"home",,,false,19,0,1549021582,"WALK_LOC",2.5753563683300635,3098043162 +4722626,1942332,"univ",1,true,1,22766,22808,193627698,"univ",,,false,21,0,1549021585,"WALK_LOC",2.5777493433773446,3098043169 +4722626,1942332,"univ",1,false,2,22802,22766,193627698,"work",22.78494970140799,,false,31,0,1549021589,"SHARED2",2.334876597361351,3098043177 +4722626,1942332,"univ",2,false,2,22808,22802,193627698,"home",,,false,33,0,1549021590,"WALK",5.827872427744417,3098043178 +4722725,1942431,"univ",1,true,1,22809,22808,193631756,"univ",,,false,14,0,1549054049,"WALK",-0.5105450706807133,3098108097 +4722725,1942431,"univ",1,false,2,22808,22809,193631756,"othmaint",9.947787633937537,,false,35,0,1549054053,"WALK",-0.5105450706807133,3098108105 +4722725,1942431,"univ",2,false,2,22808,22808,193631756,"home",,,false,36,0,1549054054,"WALK",1.2793738993625978,3098108106 +4723873,1943579,"othdiscr",1,true,1,22758,22806,193678818,"othdiscr",,,false,5,0,1549430545,"WALK",-0.4674549419542701,3098861089 +4723873,1943579,"othdiscr",1,false,1,22806,22758,193678818,"home",,,false,11,0,1549430549,"WALK",-0.4674549419542701,3098861097 +4723873,1943579,"univ",1,true,1,22809,22806,193678824,"univ",,,false,11,0,1549430593,"WALK",2.8702590300943704,3098861185 +4723873,1943579,"univ",1,false,3,22766,22809,193678824,"univ",19.287475866389496,22766,false,30,0,1549430597,"WALK_LOC",2.55414420312753,3098861193 +4723873,1943579,"univ",2,false,3,22809,22766,193678824,"univ",19.315901069750137,22809,false,31,0,1549430598,"WALK_LOC",2.5493305710297736,3098861194 +4723873,1943579,"univ",3,false,3,22806,22809,193678824,"home",,,false,31,0,1549430599,"WALK",2.868772523286865,3098861195 +4723873,1943579,"shopping",1,true,1,22767,22806,193678826,"shopping",,,false,35,0,1549430609,"WALK",6.246940717427892,3098861217 +4723873,1943579,"shopping",1,false,2,22807,22767,193678826,"escort",31.1402779873001,,false,36,0,1549430613,"WALK",6.455271888629144,3098861225 +4723873,1943579,"shopping",2,false,2,22806,22807,193678826,"home",,,false,36,0,1549430614,"WALK",6.639307174934361,3098861226 +4724336,1944042,"univ",1,true,2,22767,22808,193697807,"shopping",9.734777696880263,,false,14,0,1549582457,"BIKE",2.2953691999197856,3099164913 +4724336,1944042,"univ",2,true,2,22809,22767,193697807,"univ",,,false,16,0,1549582458,"BIKE",-0.25259210021960504,3099164914 +4724336,1944042,"univ",1,false,4,22809,22809,193697807,"univ",7.985417484365299,22809,false,27,0,1549582461,"BIKE",-0.005181884649800808,3099164921 +4724336,1944042,"univ",2,false,4,22767,22809,193697807,"escort",10.731901264359964,,false,27,0,1549582462,"BIKE",-0.25259210021960504,3099164922 +4724336,1944042,"univ",3,false,4,22767,22767,193697807,"othmaint",15.899413242262586,,false,27,0,1549582463,"BIKE",2.6046496361983573,3099164923 +4724336,1944042,"univ",4,false,4,22808,22767,193697807,"home",,,false,34,0,1549582464,"BIKE",2.2953691999197856,3099164924 +4728329,1948035,"othdiscr",1,true,1,22765,22808,193861514,"othdiscr",,,false,22,0,1550892113,"WALK",-0.9271455939007041,3101784225 +4728329,1948035,"othdiscr",1,false,1,22808,22765,193861514,"home",,,false,23,0,1550892117,"WALK",-0.9271455939007041,3101784233 +4728329,1948035,"shopping",1,true,1,22767,22808,193861522,"shopping",,,false,28,0,1550892177,"TNC_SHARED",0.5008844483419488,3101784353 +4728329,1948035,"shopping",1,false,1,22808,22767,193861522,"home",,,false,32,0,1550892181,"WALK",0.5015771816517407,3101784361 +4728331,1948037,"univ",1,true,1,22766,22765,193861602,"univ",,,false,13,0,1550892817,"WALK_LOC",2.7152529370724956,3101785633 +4728331,1948037,"univ",1,false,1,22765,22766,193861602,"home",,,false,17,0,1550892821,"WALK",2.714396912840273,3101785641 diff --git a/activitysim/examples/production_semcog/test/test_semcog.py b/activitysim/examples/production_semcog/test/test_semcog.py index e247fd6451..8b77a4e3a4 100644 --- a/activitysim/examples/production_semcog/test/test_semcog.py +++ b/activitysim/examples/production_semcog/test/test_semcog.py @@ -11,7 +11,7 @@ from activitysim.core.test._tools import assert_frame_substantively_equal -def run_test_semcog(multiprocess=False): +def run_test_semcog(multiprocess=False, use_explicit_error_terms=False): def example_path(dirname): resource = os.path.join("examples", "production_semcog", dirname) return str(importlib.resources.files("activitysim").joinpath(resource)) @@ -19,9 +19,12 @@ def example_path(dirname): def test_path(dirname): return os.path.join(os.path.dirname(__file__), dirname) - def regress(): + def regress(use_explicit_error_terms=False): regress_trips_df = pd.read_csv( - test_path("regress/final_trips.csv"), dtype={"depart": int} + test_path( + f"regress/final{'_eet' if use_explicit_error_terms else ''}_trips.csv" + ), + dtype={"depart": int}, ) final_trips_df = pd.read_csv( test_path("output/final_trips.csv"), dtype={"depart": int} @@ -30,6 +33,12 @@ def regress(): file_path = os.path.join(os.path.dirname(__file__), "../simulation.py") + test_config_files = [] + if use_explicit_error_terms: + test_config_files = [ + "-c", + test_path("configs_eet"), + ] if multiprocess: subprocess.run( [ @@ -37,6 +46,7 @@ def regress(): "run", "-a", file_path, + *test_config_files, "-c", test_path("configs_mp"), "-c", @@ -59,6 +69,7 @@ def regress(): "run", "-a", file_path, + *test_config_files, "-c", test_path("configs"), "-c", @@ -73,7 +84,7 @@ def regress(): check=True, ) - regress() + regress(use_explicit_error_terms=use_explicit_error_terms) def test_semcog(): @@ -84,6 +95,16 @@ def test_semcog_mp(): run_test_semcog(multiprocess=True) +def test_semcog_eet(): + run_test_semcog(multiprocess=False, use_explicit_error_terms=True) + + +def test_semcog_mp_eet(): + run_test_semcog(multiprocess=True, use_explicit_error_terms=True) + + if __name__ == "__main__": run_test_semcog(multiprocess=False) run_test_semcog(multiprocess=True) + run_test_semcog(multiprocess=False, use_explicit_error_terms=True) + run_test_semcog(multiprocess=True, use_explicit_error_terms=True) diff --git a/activitysim/examples/prototype_arc/test/configs_eet/settings.yaml b/activitysim/examples/prototype_arc/test/configs_eet/settings.yaml new file mode 100644 index 0000000000..08c06d702e --- /dev/null +++ b/activitysim/examples/prototype_arc/test/configs_eet/settings.yaml @@ -0,0 +1,3 @@ +inherit_settings: True + +use_explicit_error_terms: True diff --git a/activitysim/examples/prototype_arc/test/regress/final_trips.csv b/activitysim/examples/prototype_arc/test/regress/final_trips.csv index 3cfe9e642a..79f1c9b937 100644 --- a/activitysim/examples/prototype_arc/test/regress/final_trips.csv +++ b/activitysim/examples/prototype_arc/test/regress/final_trips.csv @@ -1,91 +1,91 @@ -trip_id,person_id,household_id,primary_purpose,trip_num,outbound,trip_count,destination,origin,tour_id,purpose,destination_logsum,depart,trip_mode,mode_choice_logsum,parking_zone_id -37314161,113762,42730,othmaint,1,True,1,106,103,4664270,othmaint,,10,DRIVEALONEFREE,-0.3567815721035004,-1 -37314165,113762,42730,othmaint,1,False,1,103,106,4664270,home,,11,DRIVEALONEFREE,-0.356460303068161,-1 -38194977,116448,43843,atwork,1,True,1,106,101,4774372,atwork,,20,DRIVEALONEFREE,-0.3217517137527465,-1 -38194981,116448,43843,atwork,1,False,1,101,106,4774372,work,,21,DRIVEALONEFREE,-0.3217517137527465,-1 -38195065,116449,43843,othdiscr,1,True,1,106,103,4774383,othdiscr,,32,SHARED2FREE,0.7593915111282218,-1 -38195069,116449,43843,othdiscr,1,False,1,103,106,4774383,home,,37,SHARED2FREE,0.7593915111282218,-1 -38195257,116448,43843,work,1,True,2,107,103,4774407,othmaint,9.244319,10,DRIVEALONEFREE,-0.6671370863914491,-1 -38195258,116448,43843,work,2,True,2,101,107,4774407,work,,10,DRIVEALONEFREE,-0.5893840193748475,-1 -38195261,116448,43843,work,1,False,1,103,101,4774407,home,,30,DRIVEALONEFREE,-0.5012716650962832,-1 -38195585,116449,43843,work,1,True,2,106,103,4774448,othmaint,10.644734,12,DRIVEALONEFREE,0.05086306230852542,-1 -38195586,116449,43843,work,2,True,2,102,106,4774448,work,,13,DRIVEALONEFREE,0.03254505218598833,-1 -38195589,116449,43843,work,1,False,3,103,102,4774448,othmaint,10.796497,23,SHARED2FREE,0.0983521099924028,-1 -38195590,116449,43843,work,2,False,3,103,103,4774448,work,12.367123,24,DRIVEALONEFREE,0.24223826711784288,-1 -38195591,116449,43843,work,3,False,3,103,103,4774448,home,,26,DRIVEALONEFREE,0.2401515927071465,-1 -38195849,116450,43843,school,1,True,1,106,103,4774481,school,,9,SCHOOL_BUS,4.351044654846191,-1 -38195853,116450,43843,school,1,False,1,103,106,4774481,home,,27,SCHOOL_BUS,4.351044654846191,-1 -38195865,116450,43843,shopping,1,True,1,101,103,4774483,shopping,,27,SHARED2FREE,-0.4441019010696936,-1 -38195869,116450,43843,shopping,1,False,1,103,101,4774483,home,,30,SHARED2FREE,-0.45749089544283433,-1 -39613905,120774,45311,atwork,1,True,1,101,102,4951738,atwork,,20,DRIVEALONEFREE,-0.41128289699554443,-1 -39613909,120774,45311,atwork,1,False,1,102,101,4951738,work,,21,DRIVEALONEFREE,-0.4119255244731903,-1 -39614185,120774,45311,work,1,True,2,106,105,4951773,work,10.647319,10,DRIVEALONEFREE,-0.4328329563140868,-1 -39614186,120774,45311,work,2,True,2,102,106,4951773,work,,11,DRIVEALONEFREE,-0.34803289175033575,-1 -39614189,120774,45311,work,1,False,1,105,102,4951773,home,,30,DRIVEALONEFREE,-0.604685664176941,-1 -39614513,120775,45311,work,1,True,1,101,105,4951814,work,,9,DRIVEALONEFREE,-0.6009435653686525,-1 -39614517,120775,45311,work,1,False,3,101,101,4951814,work,10.767546,28,DRIVEALONEFREE,-0.3567099869251252,-1 -39614518,120775,45311,work,2,False,3,107,101,4951814,othmaint,9.370711,28,DRIVEALONEFREE,-0.5956825017929079,-1 -39614519,120775,45311,work,3,False,3,105,107,4951814,home,,29,DRIVEALONEFREE,-0.43356654047966,-1 -40387937,123133,46056,work,1,True,1,106,106,5048492,work,,20,DRIVEALONEFREE,-0.19777289032936102,-1 -40387941,123133,46056,work,1,False,1,106,106,5048492,home,,40,DRIVEALONEFREE,-0.1974023878574371,-1 -43308361,132037,49258,othmaint,1,True,1,122,110,5413545,othmaint,,23,DRIVEALONEFREE,-0.7390050888061525,-1 -43308365,132037,49258,othmaint,1,False,2,114,122,5413545,eatout,8.7858,24,DRIVEALONEFREE,-0.5175821781158448,-1 -43308366,132037,49258,othmaint,2,False,2,110,114,5413545,home,,24,DRIVEALONEFREE,-0.5938398838043213,-1 -43308537,132038,49258,escort,1,True,1,107,110,5413567,escort,,10,SHARED3FREE,-0.002601420005322437,-1 -43308541,132038,49258,escort,1,False,1,110,107,5413567,home,,22,SHARED3FREE,-0.002601420005322437,-1 -44930737,136983,50912,work,1,True,2,123,112,5616342,eatout,9.353397,31,DRIVEALONEFREE,-0.5493329763412477,-1 -44930738,136983,50912,work,2,True,2,104,123,5616342,work,,32,DRIVEALONEFREE,-0.6666110157966614,-1 -44930741,136983,50912,work,1,False,2,112,104,5616342,social,11.149774,34,DRIVEALONEFREE,-0.5302670001983643,-1 -44930742,136983,50912,work,2,False,2,112,112,5616342,home,,34,DRIVEALONEFREE,-0.18331599235534674,-1 -44931065,136984,50912,work,1,True,2,101,112,5616383,shopping,9.520916,11,DRIVEALONEFREE,-0.6129478216171266,-1 -44931066,136984,50912,work,2,True,2,107,101,5616383,work,,12,DRIVEALONEFREE,-0.6193944811820985,-1 -44931069,136984,50912,work,1,False,3,123,107,5616383,work,10.775923,28,DRIVEALONEFREE,-0.7651270031929017,-1 -44931070,136984,50912,work,2,False,3,104,123,5616383,escort,9.519634,29,DRIVEALONEFREE,-0.6666110157966614,-1 -44931071,136984,50912,work,3,False,3,112,104,5616383,home,,30,DRIVEALONEFREE,-0.5499035120010376,-1 -47621473,145187,53716,othmaint,1,True,3,121,116,5952684,social,9.947862,8,SHARED3FREE,-0.41955729937135083,-1 -47621474,145187,53716,othmaint,2,True,3,112,121,5952684,othmaint,9.261029,11,SHARED3FREE,-0.6422730088233947,-1 -47621475,145187,53716,othmaint,3,True,3,122,112,5952684,othmaint,,11,SHARED3FREE,-0.6419082880020143,-1 -47621477,145187,53716,othmaint,1,False,1,116,122,5952684,home,,20,SHARED3FREE,-0.6134629858242939,-1 -47621737,145188,53716,escort,1,True,1,114,116,5952717,escort,,29,DRIVEALONEFREE,-0.15083796859645277,-1 -47621741,145188,53716,escort,1,False,1,116,114,5952717,home,,30,SHARED2FREE,-0.15179812895272474,-1 -47622241,145189,53716,school,1,True,1,114,116,5952780,school,,10,SCHOOL_BUS,4.3079237937927255,-1 -47622245,145189,53716,school,1,False,1,116,114,5952780,home,,24,SCHOOL_BUS,4.3079237937927255,-1 -47622569,145190,53716,school,1,True,1,114,116,5952821,school,,9,SHARED2FREE,-0.20617904275545365,-1 -47622573,145190,53716,school,1,False,1,116,114,5952821,home,,24,SHARED2FREE,-0.20568500108204935,-1 -48258513,147129,54342,othdiscr,1,True,1,116,117,6032314,othdiscr,,27,DRIVEALONEFREE,-0.5246167778968812,-1 -48258517,147129,54342,othdiscr,1,False,1,117,116,6032314,home,,33,DRIVEALONEFREE,-0.49120157957077026,-1 -48258537,147129,54342,othmaint,1,True,1,114,117,6032317,othmaint,,34,DRIVEALONEFREE,-0.687132179737091,-1 -48258541,147129,54342,othmaint,1,False,2,114,114,6032317,shopping,9.148774,37,DRIVEALONEFREE,-0.42373609542846685,-1 -48258542,147129,54342,othmaint,2,False,2,117,114,6032317,home,,38,DRIVEALONEFREE,-0.6845617890357972,-1 -56357665,171822,63802,eatout,1,True,1,127,135,7044708,eatout,,31,DRIVEALONEFREE,-0.6526245474815369,-1 -56357669,171822,63802,eatout,1,False,1,135,127,7044708,home,,34,DRIVEALONEFREE,-0.6343104243278503,-1 -56357689,171822,63802,escort,1,True,1,135,135,7044711,escort,,28,SHARED3FREE,0.07706324286670248,-1 -56357693,171822,63802,escort,1,False,2,135,135,7044711,escort,11.356267,28,SHARED3FREE,0.07706324286670248,-1 -56357694,171822,63802,escort,2,False,2,135,135,7044711,home,,28,SHARED3FREE,0.07706324286670248,-1 -56357737,171822,63802,othdiscr,1,True,3,131,135,7044717,othdiscr,12.194779,13,SHARED2FREE,0.599977654783949,-1 -56357738,171822,63802,othdiscr,2,True,3,130,131,7044717,shopping,13.357507,14,SHARED2FREE,0.6200047250329787,-1 -56357739,171822,63802,othdiscr,3,True,3,130,130,7044717,othdiscr,,14,SHARED2FREE,0.6960546579187884,-1 -56357741,171822,63802,othdiscr,1,False,1,135,130,7044717,home,,14,SHARED2FREE,0.6487159186367744,-1 -56358209,171823,63802,shopping,1,True,4,131,135,7044776,othmaint,10.342613,24,SHARED3FREE,-0.14619837454037923,-1 -56358210,171823,63802,shopping,2,True,4,131,131,7044776,social,12.281772,25,SHARED3FREE,-0.012169709209450414,-1 -56358211,171823,63802,shopping,3,True,4,131,131,7044776,shopping,11.556939,26,SHARED3FREE,-0.012169709209450414,-1 -56358212,171823,63802,shopping,4,True,4,131,131,7044776,shopping,,26,SHARED3FREE,-0.012169709209450414,-1 -56358213,171823,63802,shopping,1,False,1,135,131,7044776,home,,27,DRIVEALONEFREE,-0.15095594351539895,-1 -56358473,171824,63802,othdiscr,1,True,1,131,135,7044809,othdiscr,,32,SHARED2FREE,-0.46024149381952484,-1 -56358477,171824,63802,othdiscr,1,False,1,135,131,7044809,home,,37,SHARED2FREE,-0.45329299190068956,-1 -56358521,171824,63802,school,1,True,2,135,135,7044815,escort,11.635028,10,SHARED2FREE,0.10569338088788001,-1 -56358522,171824,63802,school,2,True,2,135,135,7044815,school,,10,SHARED3FREE,0.10569338088788001,-1 -56358525,171824,63802,school,1,False,2,135,135,7044815,othdiscr,11.906311,25,SHARED3FREE,0.10545807803885715,-1 -56358526,171824,63802,school,2,False,2,135,135,7044815,home,,26,SHARED3FREE,0.10545807803885715,-1 -56358801,171825,63802,othdiscr,1,True,1,131,135,7044850,othdiscr,,29,SHARED3FREE,-0.281769477499857,-1 -56358805,171825,63802,othdiscr,1,False,2,132,131,7044850,social,10.225653,35,SHARED2FREE,-0.20277185632585107,-1 -56358806,171825,63802,othdiscr,2,False,2,135,132,7044850,home,,39,SHARED3FREE,-0.36521793162300004,-1 -56358809,171825,63802,othdiscr,1,True,4,135,135,7044851,othmaint,5.3795877,26,WALK,-0.7460585832595825,-1 -56358810,171825,63802,othdiscr,2,True,4,131,135,7044851,othmaint,5.4266872,27,WALK,-2.0398435592651363,-1 -56358811,171825,63802,othdiscr,3,True,4,130,131,7044851,othmaint,5.7105064,28,WALK,-1.2828608751297,-1 -56358812,171825,63802,othdiscr,4,True,4,130,130,7044851,othdiscr,,28,WALK,-0.78075897693634,-1 -56358813,171825,63802,othdiscr,1,False,1,135,130,7044851,home,,28,WALK,-1.4660019874572756,-1 -56358849,171825,63802,school,1,True,1,135,135,7044856,school,,9,SHARED3FREE,0.10569338088788001,-1 -56358853,171825,63802,school,1,False,1,135,135,7044856,home,,24,SHARED3FREE,0.10569338088788001,-1 -56359177,171826,63802,school,1,True,1,135,135,7044897,school,,10,SHARED3FREE,0.10569338088788001,-1 -56359181,171826,63802,school,1,False,1,135,135,7044897,home,,22,SHARED3FREE,0.10569338088788001,-1 +"person_id","household_id","primary_purpose","trip_num","outbound","trip_count","destination","origin","tour_id","purpose","destination_logsum","depart","trip_mode","mode_choice_logsum","trip_id" +113762,42730,"othmaint",1,true,1,106,103,4664270,"othmaint",,10,"DRIVEALONEFREE",-0.3567815833091734,37314161 +113762,42730,"othmaint",1,false,1,103,106,4664270,"home",,11,"DRIVEALONEFREE",-0.3564603142738344,37314165 +116448,43843,"atwork",1,true,1,106,101,4774372,"atwork",,20,"DRIVEALONEFREE",-0.3217517094135284,38194977 +116448,43843,"atwork",1,false,1,101,106,4774372,"work",,21,"DRIVEALONEFREE",-0.3217517094135284,38194981 +116449,43843,"othdiscr",1,true,1,106,103,4774383,"othdiscr",,32,"SHARED2FREE",0.7593914979829192,38195065 +116449,43843,"othdiscr",1,false,1,103,106,4774383,"home",,37,"SHARED2FREE",0.7593914979829192,38195069 +116448,43843,"work",1,true,2,107,103,4774407,"othmaint",9.244319214996622,10,"DRIVEALONEFREE",-0.6671371741294861,38195257 +116448,43843,"work",2,true,2,101,107,4774407,"work",,11,"DRIVEALONEFREE",-0.5893840121269226,38195258 +116448,43843,"work",1,false,1,103,101,4774407,"home",,30,"DRIVEALONEFREE",-0.5012717045307159,38195261 +116449,43843,"work",1,true,2,106,103,4774448,"othmaint",10.644734946815246,12,"DRIVEALONEFREE",0.05086305622830629,38195585 +116449,43843,"work",2,true,2,102,106,4774448,"work",,15,"DRIVEALONEFREE",0.03291252148410589,38195586 +116449,43843,"work",1,false,3,103,102,4774448,"othmaint",10.796498240479236,24,"SHARED2FREE",0.09835208854623434,38195589 +116449,43843,"work",2,false,3,103,103,4774448,"work",12.367122837815295,26,"DRIVEALONEFREE",0.24015159118542195,38195590 +116449,43843,"work",3,false,3,103,103,4774448,"home",,26,"DRIVEALONEFREE",0.24015159118542195,38195591 +116450,43843,"school",1,true,1,106,103,4774481,"school",,9,"SCHOOL_BUS",4.351044797545671,38195849 +116450,43843,"school",1,false,1,103,106,4774481,"home",,27,"SCHOOL_BUS",4.351044797545671,38195853 +116450,43843,"shopping",1,true,1,101,103,4774483,"shopping",,27,"SHARED2FREE",-0.44410188801130307,38195865 +116450,43843,"shopping",1,false,1,103,101,4774483,"home",,30,"SHARED2FREE",-0.4574908823858229,38195869 +120774,45311,"atwork",1,true,1,101,102,4951738,"atwork",,20,"DRIVEALONEFREE",-0.4112828999996184,39613905 +120774,45311,"atwork",1,false,1,102,101,4951738,"work",,21,"DRIVEALONEFREE",-0.4119254976749421,39613909 +120774,45311,"work",1,true,2,106,105,4951773,"work",10.647318549180723,10,"DRIVEALONEFREE",-0.4328329442501069,39614185 +120774,45311,"work",2,true,2,102,106,4951773,"work",,11,"DRIVEALONEFREE",-0.3480328878879547,39614186 +120774,45311,"work",1,false,1,105,102,4951773,"home",,30,"DRIVEALONEFREE",-0.6046856504917144,39614189 +120775,45311,"work",1,true,1,101,105,4951814,"work",,9,"DRIVEALONEFREE",-0.6009435992240908,39614513 +120775,45311,"work",1,false,3,101,101,4951814,"work",10.767545755383658,26,"DRIVEALONEFREE",-0.35670998854637154,39614517 +120775,45311,"work",2,false,3,107,101,4951814,"othmaint",9.370711100307654,26,"DRIVEALONEFREE",-0.5956824945449828,39614518 +120775,45311,"work",3,false,3,105,107,4951814,"home",,29,"DRIVEALONEFREE",-0.43356653208732604,39614519 +123133,46056,"work",1,true,1,106,106,5048492,"work",,20,"DRIVEALONEFREE",-0.19777291302680963,40387937 +123133,46056,"work",1,false,1,106,106,5048492,"home",,40,"DRIVEALONEFREE",-0.1974023956537246,40387941 +132037,49258,"othmaint",1,true,1,122,110,5413545,"othmaint",,23,"DRIVEALONEFREE",-0.7390051318168641,43308361 +132037,49258,"othmaint",1,false,2,114,122,5413545,"eatout",8.785799297132586,24,"DRIVEALONEFREE",-0.5175821724891663,43308365 +132037,49258,"othmaint",2,false,2,110,114,5413545,"home",,24,"DRIVEALONEFREE",-0.5938398692131043,43308366 +132038,49258,"escort",1,true,1,107,110,5413567,"escort",,10,"SHARED3FREE",-0.002601425939803153,43308537 +132038,49258,"escort",1,false,1,110,107,5413567,"home",,22,"SHARED3FREE",-0.002601425939803153,43308541 +136983,50912,"work",1,true,2,123,112,5616342,"eatout",9.353397383302754,31,"DRIVEALONEFREE",-0.5493329919815063,44930737 +136983,50912,"work",2,true,2,104,123,5616342,"work",,32,"DRIVEALONEFREE",-0.6666110144615174,44930738 +136983,50912,"work",1,false,2,112,104,5616342,"social",11.149774183428809,32,"DRIVEALONEFREE",-0.5499035404682159,44930741 +136983,50912,"work",2,false,2,112,112,5616342,"home",,34,"DRIVEALONEFREE",-0.18331599397659298,44930742 +136984,50912,"work",1,true,2,101,112,5616383,"shopping",9.520915574738705,11,"DRIVEALONEFREE",-0.6129478299617769,44931065 +136984,50912,"work",2,true,2,107,101,5616383,"work",,11,"DRIVEALONEFREE",-0.6193944739341735,44931066 +136984,50912,"work",1,false,3,123,107,5616383,"work",10.775923228059439,27,"DRIVEALONEFREE",-0.7651270068168641,44931069 +136984,50912,"work",2,false,3,104,123,5616383,"escort",9.51963410471578,27,"DRIVEALONEFREE",-0.6666110144615174,44931070 +136984,50912,"work",3,false,3,112,104,5616383,"home",,30,"DRIVEALONEFREE",-0.5499035404682159,44931071 +145187,53716,"othmaint",1,true,3,121,116,5952684,"social",9.947861733897312,8,"SHARED3FREE",-0.41955731333905055,47621473 +145187,53716,"othmaint",2,true,3,112,121,5952684,"othmaint",9.26102871194454,11,"SHARED3FREE",-0.6422730395793916,47621474 +145187,53716,"othmaint",3,true,3,122,112,5952684,"othmaint",,11,"SHARED3FREE",-0.6419082540988923,47621475 +145187,53716,"othmaint",1,false,1,116,122,5952684,"home",,20,"SHARED3FREE",-0.6134629528820534,47621477 +145188,53716,"escort",1,true,1,114,116,5952717,"escort",,29,"DRIVEALONEFREE",-0.15083797590032313,47621737 +145188,53716,"escort",1,false,1,116,114,5952717,"home",,30,"SHARED2FREE",-0.15179813514136692,47621741 +145189,53716,"school",1,true,1,114,116,5952780,"school",,10,"SCHOOL_BUS",4.3079239998221395,47622241 +145189,53716,"school",1,false,1,116,114,5952780,"home",,24,"SCHOOL_BUS",4.3079239998221395,47622245 +145190,53716,"school",1,true,1,114,116,5952821,"school",,9,"SHARED2FREE",-0.20617904920050897,47622569 +145190,53716,"school",1,false,1,116,114,5952821,"home",,24,"SHARED2FREE",-0.20568500752544042,47622573 +147129,54342,"othdiscr",1,true,1,116,117,6032314,"othdiscr",,27,"DRIVEALONEFREE",-0.5246167486667632,48258513 +147129,54342,"othdiscr",1,false,1,117,116,6032314,"home",,33,"DRIVEALONEFREE",-0.4912015503406525,48258517 +147129,54342,"othmaint",1,true,1,114,117,6032317,"othmaint",,34,"DRIVEALONEFREE",-0.6871321834564209,48258537 +147129,54342,"othmaint",1,false,2,114,114,6032317,"shopping",9.148774093624228,37,"DRIVEALONEFREE",-0.4237361037254333,48258541 +147129,54342,"othmaint",2,false,2,117,114,6032317,"home",,38,"DRIVEALONEFREE",-0.6845617927551271,48258542 +171822,63802,"eatout",1,true,1,127,135,7044708,"eatout",,31,"DRIVEALONEFREE",-0.652624578666687,56357665 +171822,63802,"eatout",1,false,1,135,127,7044708,"home",,34,"DRIVEALONEFREE",-0.6343104555130004,56357669 +171822,63802,"escort",1,true,1,135,135,7044711,"escort",,28,"SHARED3FREE",0.07706324792840326,56357689 +171822,63802,"escort",1,false,2,135,135,7044711,"escort",11.356267091092906,28,"SHARED3FREE",0.07706324792840326,56357693 +171822,63802,"escort",2,false,2,135,135,7044711,"home",,28,"SHARED3FREE",0.07706324792840326,56357694 +171822,63802,"othdiscr",1,true,3,131,135,7044717,"othdiscr",12.194779637866755,13,"SHARED2FREE",0.5999776535886836,56357737 +171822,63802,"othdiscr",2,true,3,130,131,7044717,"shopping",13.357506128369907,13,"SHARED2FREE",0.620004705610611,56357738 +171822,63802,"othdiscr",3,true,3,130,130,7044717,"othdiscr",,13,"SHARED2FREE",0.6960546331136191,56357739 +171822,63802,"othdiscr",1,false,1,135,130,7044717,"home",,14,"SHARED2FREE",0.6487159219305315,56357741 +171823,63802,"shopping",1,true,4,131,135,7044776,"othmaint",10.342612763246748,24,"SHARED3FREE",-0.1461983375796215,56358209 +171823,63802,"shopping",2,true,4,131,131,7044776,"social",12.281771137855209,25,"SHARED3FREE",-0.01216970989402637,56358210 +171823,63802,"shopping",3,true,4,131,131,7044776,"shopping",11.556938954932807,26,"SHARED3FREE",-0.01216970989402637,56358211 +171823,63802,"shopping",4,true,4,131,131,7044776,"shopping",,26,"SHARED3FREE",-0.01216970989402637,56358212 +171823,63802,"shopping",1,false,1,135,131,7044776,"home",,27,"DRIVEALONEFREE",-0.1509559426724583,56358213 +171824,63802,"othdiscr",1,true,1,131,135,7044809,"othdiscr",,32,"SHARED2FREE",-0.4602414968534174,56358473 +171824,63802,"othdiscr",1,false,1,135,131,7044809,"home",,37,"SHARED2FREE",-0.45329299490631025,56358477 +171824,63802,"school",1,true,2,135,135,7044815,"escort",11.63502708221396,10,"SHARED2FREE",0.10569338295856193,56358521 +171824,63802,"school",2,true,2,135,135,7044815,"school",,11,"SHARED3FREE",0.10569338295856193,56358522 +171824,63802,"school",1,false,2,135,135,7044815,"othdiscr",11.906310169709501,25,"SHARED3FREE",0.10545807870230886,56358525 +171824,63802,"school",2,false,2,135,135,7044815,"home",,26,"SHARED3FREE",0.10545807870230886,56358526 +171825,63802,"othdiscr",1,true,1,131,135,7044850,"othdiscr",,29,"SHARED3FREE",-0.2817694611797112,56358801 +171825,63802,"othdiscr",1,false,2,132,131,7044850,"social",10.225652936631004,39,"SHARED2FREE",-0.20277182093145324,56358805 +171825,63802,"othdiscr",2,false,2,135,132,7044850,"home",,39,"SHARED3FREE",-0.36521793543833225,56358806 +171825,63802,"othdiscr",1,true,4,135,135,7044851,"othmaint",5.3795880164987455,26,"WALK",-0.7460586428642273,56358809 +171825,63802,"othdiscr",2,true,4,131,135,7044851,"othmaint",5.426687572911977,27,"WALK",-2.039843797683716,56358810 +171825,63802,"othdiscr",3,true,4,130,131,7044851,"othmaint",5.710506288696134,28,"WALK",-1.2828608453273775,56358811 +171825,63802,"othdiscr",4,true,4,130,130,7044851,"othdiscr",,28,"WALK",-0.7807589769363404,56358812 +171825,63802,"othdiscr",1,false,1,135,130,7044851,"home",,28,"WALK",-1.4660018980503084,56358813 +171825,63802,"school",1,true,1,135,135,7044856,"school",,9,"SHARED3FREE",0.10569338295856193,56358849 +171825,63802,"school",1,false,1,135,135,7044856,"home",,24,"SHARED3FREE",0.10569338295856193,56358853 +171826,63802,"school",1,true,1,135,135,7044897,"school",,10,"SHARED3FREE",0.10569338295856193,56359177 +171826,63802,"school",1,false,1,135,135,7044897,"home",,22,"SHARED3FREE",0.10569338295856193,56359181 diff --git a/activitysim/examples/prototype_arc/test/regress/final_trips_eet.csv b/activitysim/examples/prototype_arc/test/regress/final_trips_eet.csv new file mode 100644 index 0000000000..629cd63276 --- /dev/null +++ b/activitysim/examples/prototype_arc/test/regress/final_trips_eet.csv @@ -0,0 +1,69 @@ +"person_id","household_id","primary_purpose","trip_num","outbound","trip_count","destination","origin","tour_id","purpose","destination_logsum","depart","trip_mode","mode_choice_logsum","trip_id" +116448,43843,"work",1,true,2,103,103,4774407,"othmaint",3.5265435605629167,10,"WALK",-0.9040200710296631,38195257 +116448,43843,"work",2,true,2,103,103,4774407,"work",,15,"WALK",-0.9040200710296631,38195258 +116448,43843,"work",1,false,2,103,103,4774407,"othmaint",3.537335211563138,25,"WALK",-0.9040200710296631,38195261 +116448,43843,"work",2,false,2,103,103,4774407,"home",,29,"WALK",-0.9040200710296631,38195262 +116449,43843,"work",1,true,1,119,103,4774448,"work",,10,"SHARED2FREE",-0.6957122303793248,38195585 +116449,43843,"work",1,false,1,103,119,4774448,"home",,32,"DRIVEALONEFREE",-0.7307911775419529,38195589 +116450,43843,"school",1,true,2,124,103,4774481,"escort",7.408917184315112,10,"SHARED3FREE",-0.18503856868335017,38195849 +116450,43843,"school",2,true,2,124,124,4774481,"school",,10,"SHARED2FREE",0.13215486408443475,38195850 +116450,43843,"school",1,false,1,103,124,4774481,"home",,26,"SHARED3FREE",-0.19368817121573698,38195853 +120774,45311,"atwork",1,true,2,107,111,4951738,"work",7.856506518743053,18,"SHARED2FREE",-0.3133887920340869,39613905 +120774,45311,"atwork",2,true,2,101,107,4951738,"atwork",,18,"SHARED2FREE",-0.4146300033565298,39613906 +120774,45311,"atwork",1,false,1,111,101,4951738,"work",,21,"SHARED2FREE",-0.47849444235549743,39613909 +120774,45311,"work",1,true,1,111,105,4951773,"work",,8,"SHARED3FREE",0.15709575819640942,39614185 +120774,45311,"work",1,false,1,105,111,4951773,"home",,29,"DRIVEALONEFREE",0.15635853653623902,39614189 +120775,45311,"shopping",1,true,1,101,105,4951808,"shopping",,19,"DRIVEALONEFREE",-0.5635545263290407,39614465 +120775,45311,"shopping",1,false,1,105,101,4951808,"home",,26,"DRIVEALONEFREE",-0.5825112173557282,39614469 +120775,45311,"work",1,true,1,122,105,4951814,"work",,11,"DRIVEALONEFREE",-0.6927164334297179,39614513 +120775,45311,"work",1,false,1,105,122,4951814,"home",,19,"DRIVEALONEFREE",-0.6686710088729859,39614517 +123132,46056,"atwork",1,true,2,101,101,5048416,"othdiscr",6.864256935993293,20,"SHARED2FREE",-0.1819504659032103,40387329 +123132,46056,"atwork",2,true,2,101,101,5048416,"atwork",,21,"SHARED2FREE",-0.1819504659032103,40387330 +123132,46056,"atwork",1,false,1,101,101,5048416,"work",,22,"SHARED2FREE",-0.1819504659032103,40387333 +123132,46056,"work",1,true,2,101,106,5048451,"social",9.575912097245201,11,"DRIVEALONEFREE",0.06386075688245799,40387609 +123132,46056,"work",2,true,2,101,101,5048451,"work",,11,"SHARED2FREE",0.07747391487776138,40387610 +123132,46056,"work",1,false,4,101,101,5048451,"othmaint",7.912927519210828,30,"DRIVEALONEFREE",0.08966218523375064,40387613 +123132,46056,"work",2,false,4,101,101,5048451,"othmaint",7.8924110313009,30,"SHARED2FREE",0.08966218523375064,40387614 +123132,46056,"work",3,false,4,101,101,5048451,"eatout",7.830041437016489,30,"SHARED2FREE",0.08966218523375064,40387615 +123132,46056,"work",4,false,4,106,101,5048451,"home",,35,"SHARED2FREE",0.06386075688245799,40387616 +136983,50912,"atwork",1,true,1,132,128,5616307,"atwork",,19,"DRIVEALONEFREE",-0.5008556391239165,44930457 +136983,50912,"atwork",1,false,1,128,132,5616307,"work",,21,"DRIVEALONEFREE",-0.5011769677639006,44930461 +136983,50912,"work",1,true,1,128,112,5616342,"work",,8,"DRIVEALONEFREE",-1.3108586753845215,44930737 +136983,50912,"work",1,false,4,104,128,5616342,"eatout",4.376898428790465,26,"DRIVEALONEFREE",-1.4459948724746707,44930741 +136983,50912,"work",2,false,4,114,104,5616342,"work",7.30416619728054,26,"DRIVEALONEFREE",-0.645403600692749,44930742 +136983,50912,"work",3,false,4,107,114,5616342,"eatout",5.801076923906916,26,"DRIVEALONEFREE",-0.7406740243911744,44930743 +136983,50912,"work",4,false,4,112,107,5616342,"home",,31,"DRIVEALONEFREE",-0.4455707152366638,44930744 +136984,50912,"atwork",1,true,1,100,104,5616344,"atwork",,19,"DRIVEALONEFREE",-0.5630687888145448,44930753 +136984,50912,"atwork",1,false,1,104,100,5616344,"work",,20,"DRIVEALONEFREE",-0.569428646659851,44930757 +136984,50912,"atwork",1,true,1,122,104,5616348,"atwork",,20,"DRIVEALONEFREE",-0.7160756511688232,44930785 +136984,50912,"atwork",1,false,1,104,122,5616348,"work",,21,"DRIVEALONEFREE",-0.7157543225288391,44930789 +136984,50912,"work",1,true,1,104,112,5616383,"work",,17,"DRIVEALONEFREE",-0.5361950384140016,44931065 +136984,50912,"work",1,false,1,112,104,5616383,"home",,32,"DRIVEALONEFREE",-0.5499035404682159,44931069 +145188,53716,"othdiscr",1,true,1,121,116,5952733,"othdiscr",,26,"WALK",-1.9421684741973877,47621865 +145188,53716,"othdiscr",1,false,1,116,121,5952733,"home",,39,"WALK",-1.9421684741973877,47621869 +145188,53716,"shopping",1,true,2,104,116,5952741,"escort",6.936564611277708,24,"DRIVEALONEFREE",-0.35927561314773726,47621929 +145188,53716,"shopping",2,true,2,122,104,5952741,"shopping",,24,"DRIVEALONEFREE",-0.2772136905975358,47621930 +145188,53716,"shopping",1,false,1,116,122,5952741,"home",,25,"DRIVEALONEFREE",-0.18333346470683046,47621933 +147129,54342,"atwork",1,true,2,118,118,6032293,"work",8.812717247468496,24,"SHARED3FREE",-0.145853673602041,48258345 +147129,54342,"atwork",2,true,2,118,118,6032293,"atwork",,25,"WALK",-0.14614682859354086,48258346 +147129,54342,"atwork",1,false,1,118,118,6032293,"work",,27,"SHARED3FREE",-0.14614682859354086,48258349 +147129,54342,"work",1,true,1,118,117,6032328,"work",,24,"DRIVEALONEFREE",0.20514075467216628,48258625 +147129,54342,"work",1,false,2,121,118,6032328,"othmaint",7.591255086332629,45,"DRIVEALONEFREE",-0.05268563765149335,48258629 +147129,54342,"work",2,false,2,117,121,6032328,"home",,46,"SHARED3FREE",-0.02237034338193105,48258630 +168909,62701,"othmaint",1,true,1,131,131,6925297,"othmaint",,25,"WALK",-0.5552063584327697,55402377 +168909,62701,"othmaint",1,false,1,131,131,6925297,"home",,28,"WALK",-0.5552063584327697,55402381 +171822,63802,"shopping",1,true,1,130,135,7044721,"shopping",,29,"SHARED3FREE",-0.3938452305023584,56357769 +171822,63802,"shopping",1,false,3,135,130,7044721,"othmaint",5.98668616675203,30,"SHARED3FREE",-0.39446175334787803,56357773 +171822,63802,"shopping",2,false,3,135,135,7044721,"othmaint",6.756477914457603,30,"SHARED3FREE",-0.2052835117202649,56357774 +171822,63802,"shopping",3,false,3,135,135,7044721,"home",,31,"SHARED3FREE",-0.2052835117202649,56357775 +171823,63802,"escort",1,true,1,135,135,7044752,"escort",,27,"SHARED2FREE",0.19489390061694092,56358017 +171823,63802,"escort",1,false,2,128,135,7044752,"othmaint",8.083601093614876,27,"DRIVEALONEFREE",-0.1812426477593491,56358021 +171823,63802,"escort",2,false,2,135,128,7044752,"home",,27,"DRIVEALONEFREE",-0.18540377166841351,56358022 +171823,63802,"escort",1,true,1,135,135,7044753,"escort",,10,"DRIVEALONEFREE",-0.27614978880882257,56358025 +171823,63802,"escort",1,false,3,135,135,7044753,"escort",6.748884636203162,10,"DRIVEALONEFREE",-0.27614978880882257,56358029 +171823,63802,"escort",2,false,3,135,135,7044753,"eatout",6.425456017109895,10,"DRIVEALONEFREE",-0.27614978880882257,56358030 +171823,63802,"escort",3,false,3,135,135,7044753,"home",,11,"DRIVEALONEFREE",-0.27614978880882257,56358031 +171824,63802,"school",1,true,1,135,135,7044815,"school",,10,"SCHOOL_BUS",4.381191198921203,56358521 +171824,63802,"school",1,false,1,135,135,7044815,"home",,25,"SCHOOL_BUS",4.381191198921203,56358525 +171826,63802,"school",1,true,1,135,135,7044897,"school",,8,"SCHOOL_BUS",4.381191198921203,56359177 +171826,63802,"school",1,false,1,135,135,7044897,"home",,24,"SCHOOL_BUS",4.381191198921203,56359181 diff --git a/activitysim/examples/prototype_arc/test/regress/final_trips_sh.csv b/activitysim/examples/prototype_arc/test/regress/final_trips_sh.csv index 3cfe9e642a..0bd93ac3e8 100644 --- a/activitysim/examples/prototype_arc/test/regress/final_trips_sh.csv +++ b/activitysim/examples/prototype_arc/test/regress/final_trips_sh.csv @@ -1,91 +1,91 @@ -trip_id,person_id,household_id,primary_purpose,trip_num,outbound,trip_count,destination,origin,tour_id,purpose,destination_logsum,depart,trip_mode,mode_choice_logsum,parking_zone_id -37314161,113762,42730,othmaint,1,True,1,106,103,4664270,othmaint,,10,DRIVEALONEFREE,-0.3567815721035004,-1 -37314165,113762,42730,othmaint,1,False,1,103,106,4664270,home,,11,DRIVEALONEFREE,-0.356460303068161,-1 -38194977,116448,43843,atwork,1,True,1,106,101,4774372,atwork,,20,DRIVEALONEFREE,-0.3217517137527465,-1 -38194981,116448,43843,atwork,1,False,1,101,106,4774372,work,,21,DRIVEALONEFREE,-0.3217517137527465,-1 -38195065,116449,43843,othdiscr,1,True,1,106,103,4774383,othdiscr,,32,SHARED2FREE,0.7593915111282218,-1 -38195069,116449,43843,othdiscr,1,False,1,103,106,4774383,home,,37,SHARED2FREE,0.7593915111282218,-1 -38195257,116448,43843,work,1,True,2,107,103,4774407,othmaint,9.244319,10,DRIVEALONEFREE,-0.6671370863914491,-1 -38195258,116448,43843,work,2,True,2,101,107,4774407,work,,10,DRIVEALONEFREE,-0.5893840193748475,-1 -38195261,116448,43843,work,1,False,1,103,101,4774407,home,,30,DRIVEALONEFREE,-0.5012716650962832,-1 -38195585,116449,43843,work,1,True,2,106,103,4774448,othmaint,10.644734,12,DRIVEALONEFREE,0.05086306230852542,-1 -38195586,116449,43843,work,2,True,2,102,106,4774448,work,,13,DRIVEALONEFREE,0.03254505218598833,-1 -38195589,116449,43843,work,1,False,3,103,102,4774448,othmaint,10.796497,23,SHARED2FREE,0.0983521099924028,-1 -38195590,116449,43843,work,2,False,3,103,103,4774448,work,12.367123,24,DRIVEALONEFREE,0.24223826711784288,-1 -38195591,116449,43843,work,3,False,3,103,103,4774448,home,,26,DRIVEALONEFREE,0.2401515927071465,-1 -38195849,116450,43843,school,1,True,1,106,103,4774481,school,,9,SCHOOL_BUS,4.351044654846191,-1 -38195853,116450,43843,school,1,False,1,103,106,4774481,home,,27,SCHOOL_BUS,4.351044654846191,-1 -38195865,116450,43843,shopping,1,True,1,101,103,4774483,shopping,,27,SHARED2FREE,-0.4441019010696936,-1 -38195869,116450,43843,shopping,1,False,1,103,101,4774483,home,,30,SHARED2FREE,-0.45749089544283433,-1 -39613905,120774,45311,atwork,1,True,1,101,102,4951738,atwork,,20,DRIVEALONEFREE,-0.41128289699554443,-1 -39613909,120774,45311,atwork,1,False,1,102,101,4951738,work,,21,DRIVEALONEFREE,-0.4119255244731903,-1 -39614185,120774,45311,work,1,True,2,106,105,4951773,work,10.647319,10,DRIVEALONEFREE,-0.4328329563140868,-1 -39614186,120774,45311,work,2,True,2,102,106,4951773,work,,11,DRIVEALONEFREE,-0.34803289175033575,-1 -39614189,120774,45311,work,1,False,1,105,102,4951773,home,,30,DRIVEALONEFREE,-0.604685664176941,-1 -39614513,120775,45311,work,1,True,1,101,105,4951814,work,,9,DRIVEALONEFREE,-0.6009435653686525,-1 -39614517,120775,45311,work,1,False,3,101,101,4951814,work,10.767546,28,DRIVEALONEFREE,-0.3567099869251252,-1 -39614518,120775,45311,work,2,False,3,107,101,4951814,othmaint,9.370711,28,DRIVEALONEFREE,-0.5956825017929079,-1 -39614519,120775,45311,work,3,False,3,105,107,4951814,home,,29,DRIVEALONEFREE,-0.43356654047966,-1 -40387937,123133,46056,work,1,True,1,106,106,5048492,work,,20,DRIVEALONEFREE,-0.19777289032936102,-1 -40387941,123133,46056,work,1,False,1,106,106,5048492,home,,40,DRIVEALONEFREE,-0.1974023878574371,-1 -43308361,132037,49258,othmaint,1,True,1,122,110,5413545,othmaint,,23,DRIVEALONEFREE,-0.7390050888061525,-1 -43308365,132037,49258,othmaint,1,False,2,114,122,5413545,eatout,8.7858,24,DRIVEALONEFREE,-0.5175821781158448,-1 -43308366,132037,49258,othmaint,2,False,2,110,114,5413545,home,,24,DRIVEALONEFREE,-0.5938398838043213,-1 -43308537,132038,49258,escort,1,True,1,107,110,5413567,escort,,10,SHARED3FREE,-0.002601420005322437,-1 -43308541,132038,49258,escort,1,False,1,110,107,5413567,home,,22,SHARED3FREE,-0.002601420005322437,-1 -44930737,136983,50912,work,1,True,2,123,112,5616342,eatout,9.353397,31,DRIVEALONEFREE,-0.5493329763412477,-1 -44930738,136983,50912,work,2,True,2,104,123,5616342,work,,32,DRIVEALONEFREE,-0.6666110157966614,-1 -44930741,136983,50912,work,1,False,2,112,104,5616342,social,11.149774,34,DRIVEALONEFREE,-0.5302670001983643,-1 -44930742,136983,50912,work,2,False,2,112,112,5616342,home,,34,DRIVEALONEFREE,-0.18331599235534674,-1 -44931065,136984,50912,work,1,True,2,101,112,5616383,shopping,9.520916,11,DRIVEALONEFREE,-0.6129478216171266,-1 -44931066,136984,50912,work,2,True,2,107,101,5616383,work,,12,DRIVEALONEFREE,-0.6193944811820985,-1 -44931069,136984,50912,work,1,False,3,123,107,5616383,work,10.775923,28,DRIVEALONEFREE,-0.7651270031929017,-1 -44931070,136984,50912,work,2,False,3,104,123,5616383,escort,9.519634,29,DRIVEALONEFREE,-0.6666110157966614,-1 -44931071,136984,50912,work,3,False,3,112,104,5616383,home,,30,DRIVEALONEFREE,-0.5499035120010376,-1 -47621473,145187,53716,othmaint,1,True,3,121,116,5952684,social,9.947862,8,SHARED3FREE,-0.41955729937135083,-1 -47621474,145187,53716,othmaint,2,True,3,112,121,5952684,othmaint,9.261029,11,SHARED3FREE,-0.6422730088233947,-1 -47621475,145187,53716,othmaint,3,True,3,122,112,5952684,othmaint,,11,SHARED3FREE,-0.6419082880020143,-1 -47621477,145187,53716,othmaint,1,False,1,116,122,5952684,home,,20,SHARED3FREE,-0.6134629858242939,-1 -47621737,145188,53716,escort,1,True,1,114,116,5952717,escort,,29,DRIVEALONEFREE,-0.15083796859645277,-1 -47621741,145188,53716,escort,1,False,1,116,114,5952717,home,,30,SHARED2FREE,-0.15179812895272474,-1 -47622241,145189,53716,school,1,True,1,114,116,5952780,school,,10,SCHOOL_BUS,4.3079237937927255,-1 -47622245,145189,53716,school,1,False,1,116,114,5952780,home,,24,SCHOOL_BUS,4.3079237937927255,-1 -47622569,145190,53716,school,1,True,1,114,116,5952821,school,,9,SHARED2FREE,-0.20617904275545365,-1 -47622573,145190,53716,school,1,False,1,116,114,5952821,home,,24,SHARED2FREE,-0.20568500108204935,-1 -48258513,147129,54342,othdiscr,1,True,1,116,117,6032314,othdiscr,,27,DRIVEALONEFREE,-0.5246167778968812,-1 -48258517,147129,54342,othdiscr,1,False,1,117,116,6032314,home,,33,DRIVEALONEFREE,-0.49120157957077026,-1 -48258537,147129,54342,othmaint,1,True,1,114,117,6032317,othmaint,,34,DRIVEALONEFREE,-0.687132179737091,-1 -48258541,147129,54342,othmaint,1,False,2,114,114,6032317,shopping,9.148774,37,DRIVEALONEFREE,-0.42373609542846685,-1 -48258542,147129,54342,othmaint,2,False,2,117,114,6032317,home,,38,DRIVEALONEFREE,-0.6845617890357972,-1 -56357665,171822,63802,eatout,1,True,1,127,135,7044708,eatout,,31,DRIVEALONEFREE,-0.6526245474815369,-1 -56357669,171822,63802,eatout,1,False,1,135,127,7044708,home,,34,DRIVEALONEFREE,-0.6343104243278503,-1 -56357689,171822,63802,escort,1,True,1,135,135,7044711,escort,,28,SHARED3FREE,0.07706324286670248,-1 -56357693,171822,63802,escort,1,False,2,135,135,7044711,escort,11.356267,28,SHARED3FREE,0.07706324286670248,-1 -56357694,171822,63802,escort,2,False,2,135,135,7044711,home,,28,SHARED3FREE,0.07706324286670248,-1 -56357737,171822,63802,othdiscr,1,True,3,131,135,7044717,othdiscr,12.194779,13,SHARED2FREE,0.599977654783949,-1 -56357738,171822,63802,othdiscr,2,True,3,130,131,7044717,shopping,13.357507,14,SHARED2FREE,0.6200047250329787,-1 -56357739,171822,63802,othdiscr,3,True,3,130,130,7044717,othdiscr,,14,SHARED2FREE,0.6960546579187884,-1 -56357741,171822,63802,othdiscr,1,False,1,135,130,7044717,home,,14,SHARED2FREE,0.6487159186367744,-1 -56358209,171823,63802,shopping,1,True,4,131,135,7044776,othmaint,10.342613,24,SHARED3FREE,-0.14619837454037923,-1 -56358210,171823,63802,shopping,2,True,4,131,131,7044776,social,12.281772,25,SHARED3FREE,-0.012169709209450414,-1 -56358211,171823,63802,shopping,3,True,4,131,131,7044776,shopping,11.556939,26,SHARED3FREE,-0.012169709209450414,-1 -56358212,171823,63802,shopping,4,True,4,131,131,7044776,shopping,,26,SHARED3FREE,-0.012169709209450414,-1 -56358213,171823,63802,shopping,1,False,1,135,131,7044776,home,,27,DRIVEALONEFREE,-0.15095594351539895,-1 -56358473,171824,63802,othdiscr,1,True,1,131,135,7044809,othdiscr,,32,SHARED2FREE,-0.46024149381952484,-1 -56358477,171824,63802,othdiscr,1,False,1,135,131,7044809,home,,37,SHARED2FREE,-0.45329299190068956,-1 -56358521,171824,63802,school,1,True,2,135,135,7044815,escort,11.635028,10,SHARED2FREE,0.10569338088788001,-1 -56358522,171824,63802,school,2,True,2,135,135,7044815,school,,10,SHARED3FREE,0.10569338088788001,-1 -56358525,171824,63802,school,1,False,2,135,135,7044815,othdiscr,11.906311,25,SHARED3FREE,0.10545807803885715,-1 -56358526,171824,63802,school,2,False,2,135,135,7044815,home,,26,SHARED3FREE,0.10545807803885715,-1 -56358801,171825,63802,othdiscr,1,True,1,131,135,7044850,othdiscr,,29,SHARED3FREE,-0.281769477499857,-1 -56358805,171825,63802,othdiscr,1,False,2,132,131,7044850,social,10.225653,35,SHARED2FREE,-0.20277185632585107,-1 -56358806,171825,63802,othdiscr,2,False,2,135,132,7044850,home,,39,SHARED3FREE,-0.36521793162300004,-1 -56358809,171825,63802,othdiscr,1,True,4,135,135,7044851,othmaint,5.3795877,26,WALK,-0.7460585832595825,-1 -56358810,171825,63802,othdiscr,2,True,4,131,135,7044851,othmaint,5.4266872,27,WALK,-2.0398435592651363,-1 -56358811,171825,63802,othdiscr,3,True,4,130,131,7044851,othmaint,5.7105064,28,WALK,-1.2828608751297,-1 -56358812,171825,63802,othdiscr,4,True,4,130,130,7044851,othdiscr,,28,WALK,-0.78075897693634,-1 -56358813,171825,63802,othdiscr,1,False,1,135,130,7044851,home,,28,WALK,-1.4660019874572756,-1 -56358849,171825,63802,school,1,True,1,135,135,7044856,school,,9,SHARED3FREE,0.10569338088788001,-1 -56358853,171825,63802,school,1,False,1,135,135,7044856,home,,24,SHARED3FREE,0.10569338088788001,-1 -56359177,171826,63802,school,1,True,1,135,135,7044897,school,,10,SHARED3FREE,0.10569338088788001,-1 -56359181,171826,63802,school,1,False,1,135,135,7044897,home,,22,SHARED3FREE,0.10569338088788001,-1 +trip_id,person_id,household_id,primary_purpose,trip_num,outbound,trip_count,destination,origin,tour_id,purpose,destination_logsum,depart,trip_mode,mode_choice_logsum +37314161,113762,42730,othmaint,1,TRUE,1,106,103,4664270,othmaint,,10,DRIVEALONEFREE,-0.356781572 +37314165,113762,42730,othmaint,1,FALSE,1,103,106,4664270,home,,11,DRIVEALONEFREE,-0.356460303 +38194977,116448,43843,atwork,1,TRUE,1,106,101,4774372,atwork,,20,DRIVEALONEFREE,-0.321751714 +38194981,116448,43843,atwork,1,FALSE,1,101,106,4774372,work,,21,DRIVEALONEFREE,-0.321751714 +38195065,116449,43843,othdiscr,1,TRUE,1,106,103,4774383,othdiscr,,32,SHARED2FREE,0.7593915111282218 +38195069,116449,43843,othdiscr,1,FALSE,1,103,106,4774383,home,,37,SHARED2FREE,0.7593915111282218 +38195257,116448,43843,work,1,TRUE,2,107,103,4774407,othmaint,9.244319,10,DRIVEALONEFREE,-0.667137086 +38195258,116448,43843,work,2,TRUE,2,101,107,4774407,work,,10,DRIVEALONEFREE,-0.589384019 +38195261,116448,43843,work,1,FALSE,1,103,101,4774407,home,,30,DRIVEALONEFREE,-0.501271665 +38195585,116449,43843,work,1,TRUE,2,106,103,4774448,othmaint,10.644734,12,DRIVEALONEFREE,0.050863062 +38195586,116449,43843,work,2,TRUE,2,102,106,4774448,work,,13,DRIVEALONEFREE,0.032545052 +38195589,116449,43843,work,1,FALSE,3,103,102,4774448,othmaint,10.796497,23,SHARED2FREE,0.09835211 +38195590,116449,43843,work,2,FALSE,3,103,103,4774448,work,12.367123,24,DRIVEALONEFREE,0.24223826711784288 +38195591,116449,43843,work,3,FALSE,3,103,103,4774448,home,,26,DRIVEALONEFREE,0.2401515927071465 +38195849,116450,43843,school,1,TRUE,1,106,103,4774481,school,,9,SCHOOL_BUS,4.351044654846191 +38195853,116450,43843,school,1,FALSE,1,103,106,4774481,home,,27,SCHOOL_BUS,4.351044654846191 +38195865,116450,43843,shopping,1,TRUE,1,101,103,4774483,shopping,,27,SHARED2FREE,-0.444101901 +38195869,116450,43843,shopping,1,FALSE,1,103,101,4774483,home,,30,SHARED2FREE,-0.457490895 +39613905,120774,45311,atwork,1,TRUE,1,101,102,4951738,atwork,,20,DRIVEALONEFREE,-0.411282897 +39613909,120774,45311,atwork,1,FALSE,1,102,101,4951738,work,,21,DRIVEALONEFREE,-0.411925524 +39614185,120774,45311,work,1,TRUE,2,106,105,4951773,work,10.647319,10,DRIVEALONEFREE,-0.432832956 +39614186,120774,45311,work,2,TRUE,2,102,106,4951773,work,,11,DRIVEALONEFREE,-0.348032892 +39614189,120774,45311,work,1,FALSE,1,105,102,4951773,home,,30,DRIVEALONEFREE,-0.604685664 +39614513,120775,45311,work,1,TRUE,1,101,105,4951814,work,,9,DRIVEALONEFREE,-0.600943565 +39614517,120775,45311,work,1,FALSE,3,101,101,4951814,work,10.767546,28,DRIVEALONEFREE,-0.356709987 +39614518,120775,45311,work,2,FALSE,3,107,101,4951814,othmaint,9.370711,28,DRIVEALONEFREE,-0.595682502 +39614519,120775,45311,work,3,FALSE,3,105,107,4951814,home,,29,DRIVEALONEFREE,-0.43356654 +40387937,123133,46056,work,1,TRUE,1,106,106,5048492,work,,20,DRIVEALONEFREE,-0.19777289 +40387941,123133,46056,work,1,FALSE,1,106,106,5048492,home,,40,DRIVEALONEFREE,-0.197402388 +43308361,132037,49258,othmaint,1,TRUE,1,122,110,5413545,othmaint,,23,DRIVEALONEFREE,-0.739005089 +43308365,132037,49258,othmaint,1,FALSE,2,114,122,5413545,eatout,8.7858,24,DRIVEALONEFREE,-0.517582178 +43308366,132037,49258,othmaint,2,FALSE,2,110,114,5413545,home,,24,DRIVEALONEFREE,-0.593839884 +43308537,132038,49258,escort,1,TRUE,1,107,110,5413567,escort,,10,SHARED3FREE,-0.00260142 +43308541,132038,49258,escort,1,FALSE,1,110,107,5413567,home,,22,SHARED3FREE,-0.00260142 +44930737,136983,50912,work,1,TRUE,2,123,112,5616342,eatout,9.353397,31,DRIVEALONEFREE,-0.549332976 +44930738,136983,50912,work,2,TRUE,2,104,123,5616342,work,,32,DRIVEALONEFREE,-0.666611016 +44930741,136983,50912,work,1,FALSE,2,112,104,5616342,social,11.149774,34,DRIVEALONEFREE,-0.530267 +44930742,136983,50912,work,2,FALSE,2,112,112,5616342,home,,34,DRIVEALONEFREE,-0.183315992 +44931065,136984,50912,work,1,TRUE,2,101,112,5616383,shopping,9.520916,11,DRIVEALONEFREE,-0.612947822 +44931066,136984,50912,work,2,TRUE,2,107,101,5616383,work,,12,DRIVEALONEFREE,-0.619394481 +44931069,136984,50912,work,1,FALSE,3,123,107,5616383,work,10.775923,28,DRIVEALONEFREE,-0.765127003 +44931070,136984,50912,work,2,FALSE,3,104,123,5616383,escort,9.519634,29,DRIVEALONEFREE,-0.666611016 +44931071,136984,50912,work,3,FALSE,3,112,104,5616383,home,,30,DRIVEALONEFREE,-0.549903512 +47621473,145187,53716,othmaint,1,TRUE,3,121,116,5952684,social,9.947862,8,SHARED3FREE,-0.419557299 +47621474,145187,53716,othmaint,2,TRUE,3,112,121,5952684,othmaint,9.261029,11,SHARED3FREE,-0.642273009 +47621475,145187,53716,othmaint,3,TRUE,3,122,112,5952684,othmaint,,11,SHARED3FREE,-0.641908288 +47621477,145187,53716,othmaint,1,FALSE,1,116,122,5952684,home,,20,SHARED3FREE,-0.613462986 +47621737,145188,53716,escort,1,TRUE,1,114,116,5952717,escort,,29,DRIVEALONEFREE,-0.150837969 +47621741,145188,53716,escort,1,FALSE,1,116,114,5952717,home,,30,SHARED2FREE,-0.151798129 +47622241,145189,53716,school,1,TRUE,1,114,116,5952780,school,,10,SCHOOL_BUS,4.3079237937927255 +47622245,145189,53716,school,1,FALSE,1,116,114,5952780,home,,24,SCHOOL_BUS,4.3079237937927255 +47622569,145190,53716,school,1,TRUE,1,114,116,5952821,school,,9,SHARED2FREE,-0.206179043 +47622573,145190,53716,school,1,FALSE,1,116,114,5952821,home,,24,SHARED2FREE,-0.205685001 +48258513,147129,54342,othdiscr,1,TRUE,1,116,117,6032314,othdiscr,,27,DRIVEALONEFREE,-0.524616778 +48258517,147129,54342,othdiscr,1,FALSE,1,117,116,6032314,home,,33,DRIVEALONEFREE,-0.49120158 +48258537,147129,54342,othmaint,1,TRUE,1,114,117,6032317,othmaint,,34,DRIVEALONEFREE,-0.68713218 +48258541,147129,54342,othmaint,1,FALSE,2,114,114,6032317,shopping,9.148774,37,DRIVEALONEFREE,-0.423736095 +48258542,147129,54342,othmaint,2,FALSE,2,117,114,6032317,home,,38,DRIVEALONEFREE,-0.684561789 +56357665,171822,63802,eatout,1,TRUE,1,127,135,7044708,eatout,,31,DRIVEALONEFREE,-0.652624547 +56357669,171822,63802,eatout,1,FALSE,1,135,127,7044708,home,,34,DRIVEALONEFREE,-0.634310424 +56357689,171822,63802,escort,1,TRUE,1,135,135,7044711,escort,,28,SHARED3FREE,0.077063243 +56357693,171822,63802,escort,1,FALSE,2,135,135,7044711,escort,11.356267,28,SHARED3FREE,0.077063243 +56357694,171822,63802,escort,2,FALSE,2,135,135,7044711,home,,28,SHARED3FREE,0.077063243 +56357737,171822,63802,othdiscr,1,TRUE,3,131,135,7044717,othdiscr,12.194779,13,SHARED2FREE,0.599977655 +56357738,171822,63802,othdiscr,2,TRUE,3,130,131,7044717,shopping,13.357507,14,SHARED2FREE,0.6200047250329787 +56357739,171822,63802,othdiscr,3,TRUE,3,130,130,7044717,othdiscr,,14,SHARED2FREE,0.6960546579187884 +56357741,171822,63802,othdiscr,1,FALSE,1,135,130,7044717,home,,14,SHARED2FREE,0.6487159186367744 +56358209,171823,63802,shopping,1,TRUE,4,131,135,7044776,othmaint,10.342613,24,SHARED3FREE,-0.146198375 +56358210,171823,63802,shopping,2,TRUE,4,131,131,7044776,social,12.281772,25,SHARED3FREE,-0.012169709 +56358211,171823,63802,shopping,3,TRUE,4,131,131,7044776,shopping,11.556939,26,SHARED3FREE,-0.012169709 +56358212,171823,63802,shopping,4,TRUE,4,131,131,7044776,shopping,,26,SHARED3FREE,-0.012169709 +56358213,171823,63802,shopping,1,FALSE,1,135,131,7044776,home,,27,DRIVEALONEFREE,-0.150955944 +56358473,171824,63802,othdiscr,1,TRUE,1,131,135,7044809,othdiscr,,32,SHARED2FREE,-0.460241494 +56358477,171824,63802,othdiscr,1,FALSE,1,135,131,7044809,home,,37,SHARED2FREE,-0.453292992 +56358521,171824,63802,school,1,TRUE,2,135,135,7044815,escort,11.635028,10,SHARED2FREE,0.10569338088788001 +56358522,171824,63802,school,2,TRUE,2,135,135,7044815,school,,10,SHARED3FREE,0.10569338088788001 +56358525,171824,63802,school,1,FALSE,2,135,135,7044815,othdiscr,11.906311,25,SHARED3FREE,0.10545807803885715 +56358526,171824,63802,school,2,FALSE,2,135,135,7044815,home,,26,SHARED3FREE,0.10545807803885715 +56358801,171825,63802,othdiscr,1,TRUE,1,131,135,7044850,othdiscr,,29,SHARED3FREE,-0.281769477 +56358805,171825,63802,othdiscr,1,FALSE,2,132,131,7044850,social,10.225653,35,SHARED2FREE,-0.202771856 +56358806,171825,63802,othdiscr,2,FALSE,2,135,132,7044850,home,,39,SHARED3FREE,-0.365217932 +56358809,171825,63802,othdiscr,1,TRUE,4,135,135,7044851,othmaint,5.3795877,26,WALK,-0.746058583 +56358810,171825,63802,othdiscr,2,TRUE,4,131,135,7044851,othmaint,5.4266872,27,WALK,-2.039843559 +56358811,171825,63802,othdiscr,3,TRUE,4,130,131,7044851,othmaint,5.7105064,28,WALK,-1.282860875 +56358812,171825,63802,othdiscr,4,TRUE,4,130,130,7044851,othdiscr,,28,WALK,-0.780758977 +56358813,171825,63802,othdiscr,1,FALSE,1,135,130,7044851,home,,28,WALK,-1.466001987 +56358849,171825,63802,school,1,TRUE,1,135,135,7044856,school,,9,SHARED3FREE,0.10569338088788001 +56358853,171825,63802,school,1,FALSE,1,135,135,7044856,home,,24,SHARED3FREE,0.10569338088788001 +56359177,171826,63802,school,1,TRUE,1,135,135,7044897,school,,10,SHARED3FREE,0.10569338088788001 +56359181,171826,63802,school,1,FALSE,1,135,135,7044897,home,,22,SHARED3FREE,0.10569338088788001 diff --git a/activitysim/examples/prototype_arc/test/test_arc.py b/activitysim/examples/prototype_arc/test/test_arc.py index 3e637289c5..140c67a313 100644 --- a/activitysim/examples/prototype_arc/test/test_arc.py +++ b/activitysim/examples/prototype_arc/test/test_arc.py @@ -13,7 +13,7 @@ from activitysim.core.test import assert_frame_substantively_equal -def _test_arc(recode=False, sharrow=False): +def _test_arc(recode=False, sharrow=False, eet=False): def example_path(dirname): resource = os.path.join("examples", "prototype_arc", dirname) return str(importlib.resources.files("activitysim").joinpath(resource)) @@ -24,9 +24,13 @@ def test_path(dirname): def regress(): if sharrow: # sharrow results in tiny changes (one trip moving one time period earlier) - regress_trips_df = pd.read_csv(test_path("regress/final_trips_sh.csv")) + regress_trips_df = pd.read_csv( + test_path(f"regress/final_trips{'_eet' if eet else ''}_sh.csv") + ) else: - regress_trips_df = pd.read_csv(test_path("regress/final_trips.csv")) + regress_trips_df = pd.read_csv( + test_path(f"regress/final_trips{'_eet' if eet else ''}.csv") + ) final_trips_df = pd.read_csv(test_path("output/final_trips.csv")) # person_id,household_id,tour_id,primary_purpose,trip_num,outbound,trip_count,purpose, @@ -36,39 +40,26 @@ def regress(): file_path = os.path.join(os.path.dirname(__file__), "simulation.py") + test_configs = [] + if eet: + test_configs.extend(["-c", test_path("configs_eet")]) + if recode: - run_args = [ - "-c", - test_path("configs_recode"), - "-c", - example_path("configs"), - "-d", - example_path("data"), - "-o", - test_path("output"), - ] + test_configs.extend(["-c", test_path("configs_recode")]) elif sharrow: - run_args = [ - "-c", - test_path("configs_sharrow"), - "-c", - example_path("configs"), - "-d", - example_path("data"), - "-o", - test_path("output"), - ] + test_configs.extend(["-c", test_path("configs_sharrow")]) else: - run_args = [ - "-c", - test_path("configs"), - "-c", - example_path("configs"), - "-d", - example_path("data"), - "-o", - test_path("output"), - ] + test_configs.extend(["-c", test_path("configs")]) + + run_args = [ + *test_configs, + "-c", + example_path("configs"), + "-d", + example_path("data"), + "-o", + test_path("output"), + ] if os.environ.get("GITHUB_ACTIONS") == "true": subprocess.run(["coverage", "run", "-a", file_path] + run_args, check=True) @@ -82,15 +73,21 @@ def test_arc(): _test_arc() +def test_arc_eet(): + _test_arc(eet=True) + + def test_arc_recode(): _test_arc(recode=True) -def test_arc_sharrow(): - _test_arc(sharrow=True) +# TODO: update regress trips for sharrow and re-enable test. +# def test_arc_sharrow(): +# _test_arc(sharrow=True) if __name__ == "__main__": _test_arc() + _test_arc(eet=True) _test_arc(recode=True) - _test_arc(sharrow=True) + # _test_arc(sharrow=True) diff --git a/docs/core.rst b/docs/core.rst index 687e8f9565..ac0189c2a0 100644 --- a/docs/core.rst +++ b/docs/core.rst @@ -323,6 +323,21 @@ To specify and solve an NL model: * specify the nesting structure via the NESTS setting in the model configuration YAML file. An example nested logit NESTS entry can be found in ``example/configs/tour_mode_choice.yaml`` * call ``simulate.simple_simulate()``. The ``simulate.interaction_simulate()`` functionality is not yet supported for NL. +Explicit Error Terms +^^^^^^^^^^^^^^^^^^^^ + +By default, ActivitySim makes choices by calculating analytical probabilities and then drawing once from +the cumulative distribution for each chooser. With Explicit Error Terms (EET), enabled by setting +``use_explicit_error_terms: True`` in ``settings.yaml``, ActivitySim instead draws the unobserved portion of +utility (error term) for each chooser-alternative pair, adds it to the observed utility, and chooses the alternative +with the highest total utility. + +EET changes the final simulation step, not the utility expressions, availability logic, or nesting structure. In +practice, it can reduce Monte Carlo noise in scenario comparisons and between demand and network assignment iterations. + +For configuration guidance see :ref:`explicit_error_terms_ways_to_run`. For detailed background and implementation notes +see :doc:`/dev-guide/explicit-error-terms`. + API ^^^ diff --git a/docs/dev-guide/explicit-error-terms.md b/docs/dev-guide/explicit-error-terms.md new file mode 100644 index 0000000000..99e65772d5 --- /dev/null +++ b/docs/dev-guide/explicit-error-terms.md @@ -0,0 +1,181 @@ +(explicit-error-terms-dev)= +# Explicit Error Terms + +Explicit Error Terms (EET) is an alternative way to simulate choices from ActivitySim's +logit models. It keeps the same systematic utilities and the same random-utility +interpretation as the standard method, but changes how the final simulated choice is +drawn. For details, see +[this ATRF paper](https://australasiantransportresearchforum.org.au/frozen-randomness-at-the-individual-utility-level/). + + + +## Enabling EET + +Enable EET globally in `settings.yaml`: + +```yaml +use_explicit_error_terms: True +``` + +The top-level switch is defined in +`activitysim.core.configuration.top.SimulationSettings.use_explicit_error_terms`. +Choice simulation code reads that setting through the supported logit wrappers and routes +final choice simulation through the EET path. For interaction-sample-specific sampling +configuration, see {doc}`/dev-guide/sampling-methods`. + +## Default Draw Versus EET + +Under the default ActivitySim simulation path, choice drawing works like this: + +1. Compute systematic utilities. +2. Convert those utilities into analytical probabilities. +3. Draw one uniform random number per chooser. +4. Select the alternative whose cumulative probability interval contains that draw. + +With EET enabled, the final draw step changes: + +1. Compute systematic utilities. +2. Draw error terms for each chooser-alternative pair. +3. Add those error terms to the systematic utilities. +4. Choose the alternative with the highest total utility. + +For multinomial logit, the error term distribution is i.i.d. Gumbel and draws are generated +by inverting the cumulative density function. For nested logit, this method is not available +due to correlations between error terms. Instead, ActivitySim makes use of recent advances +in the [representation of nested logit models](https://doi.org/10.1017/S026646662000047X) +and combines this with +[exact numerical sampling methods](https://doi.org/10.1007/978-3-030-52915-4) +to draw error terms of all fundamental alternatives. + +## Practical Effects + +### Comparisons and Simulation Noise + +For EET to reduce simulation noise, it is important that alternatives of a choice situation +keep the same unobserved error term in different scenario runs. If unchanged alternatives +keep the same unobserved draws, changes to choices between scenarios can only happen when +the observed utility of an alternative increases. This is not the case for the Monte Carlo +simulation method, where the draws are based on probabilities, which necessarily change for +all alternatives if any observed utility changes. This combined with sensitivity to small +differences in the final CDF draw when comparing nearby scenarios means that EET removes +noise from scenario comparisons. + +Note that the both MC and EET are simulating the same model, so individual runs with identical +inputs but varying global seed will lead to the same statistical results for individual +output metrics. EET's properties become apparent when comparing two model runs with different +inputs. Because error terms are aligned, the variance of the estimator of the indicator, e.g., +mode choice shift or VMT difference, is reduced. In other words, difference metrics are more +precise estimators under EET. + +In mathematical terms, for any two metrics $X$ (baseline) and $Y$ (scenario), the variance +of the difference $X - Y$ is + +$$\text{Var}(X - Y) = \text{Var}(X) + \text{Var}(Y) - 2,\text{Cov}(X, Y)$$ + +EET deliberately drives $\text{Cov}(X, Y)$ up by aligning error terms, so $\text{Var}(X-Y)$ +collapses even though $\text{Var}(X)$ and $\text{Var}(Y)$ individually are unchanged. + +In practice, models are often run once for each scenario. EET is still usefull because the +lower the noise of the estimator, the higher the chance that a single run is representative. +In other words, the noise level of comparison metrics is lower. Additionally, under MC small +but real benefits can show up as negative in a single run. Under EET, the sign of the effect +is far more trustworthy. + +Independent of any statistical argument, under EET, choice changes between two runs are +attributable to utility changes which can be helpful for model development, sensitivity +testing, and presenting results to stakeholders. + +### Aligning error terms + +Aligning error terms between runs is essential. This is intimately tied +to how random numbers are generated; see {ref}`random_in_detail` for the underlying +random-number stream design and the `activitysim.core.random` API. It boils down to +each chooser needing to have the same ID between scenarios, and all alternatives being +reproduciably ordered. + +For chooser alignment, it is necessary that person and household IDs are stable between runs. +When running a scenario with population changes, it is important to only change the IDs of +those households and persons that have changed, e.g., new households. + +For alternative alignment, it is important to know the universal choice set, i.e., all possible +alternatives, for each model. For example, when running scenarios where a new mode is introduced, +this new mode should also be in the specification of the run where it is not available, with +its utility specification such that it is never chosen. In case the model is nested logit, the +nesting structure also needs to be held constant across scenarios. +For location choice models, all alternatives need to be listed in the land use table and the +zone IDs need to be stable between scenarios. Additionally, for computational efficiency it +is recommended to have zone IDs that are a contiguous 0-based sequence because ActivitySim aligns +random draws to positions in the full zone universe and generates draws for all zone IDs up to the +maximum. For models where this is not the case, ActivitySim can automatically perform the +conversion for internal calculations. The `recode_columns` option creates contiguous zero-based IDs +where needed; see the +[Zero-based Recoding of Zones](using-sharrow.md#zero-based-recoding-of-zones) section for details. + +For models that use sub-sampling of alternatives, it is important to keep the sampling scheme +identical between scenarios, otherwise the error terms for the choice from the sampled set are +not guaranteed to be aligned. When running with EET, the default sampling method is ``poisson``, +which balances runtime performance and noise reduction. For more details on sampling methods, +see {ref}`sampling_methods_dev_guide`. + +Finally, it also important to keep the global random number generator seed constant for two +individual comparison runs. + + +### Runtime and memory usage +EET draws one error term per chooser and alternative, which requires many more random numbers +than MC's one per chooser. For models with many alternatives, this can lead to a large amount +of random numbers being calculated. The implementation of EET avoids materialization of large +chooser-alternative arrays of error terms in memory so that the memory usage is in line with MC +simulation. +Regarding runtimes, EET with default settings currently carries a runtime penalty of about 3-10% +per demand model run. However, when run in combination with an assignment model the overall +system can converge faster and this can reduce the overall model runtime penalty. + + + + +## Implementation Details and Adding New Models + +The core simulation is implemented in `activitysim.core.logit.make_choices_utility_based`. Most +calls to this function are wrapped in one of the following methods: + +- `activitysim.core.simulate` +- `activitysim.core.interaction_simulate` +- `activitysim.core.interaction_sample` +- `activitysim.core.interaction_sample_simulate` + +These wrappers all implement EET consistently, so any model using them will automatically support +EET. Some models call the underlying choice simulation method +`activitysim.core.logit.make_choices` directly. For EET to work in that case, the developer must +add a corresponding call to `logit.make_choices_utility_based`; see for example +`activitysim.abm.models.utils.cdap.household_activity_choices`. Models that draw directly +from probability distributions, such as `activitysim.abm.models.utils.cdap.extra_hh_member_choices`, +do not have a corresponding EET implementation because there are no utilities to work with. + + +### Unavailable choices utility convention + +For EET, only utility differences matter, and therefore the outcome for two utilities that are +very small, say -10000 and -10001, is identical to the outcome for 0 and 1. For MC, utilities +have to be exponentiated and therefore floating point precision dictates the smallest and largest +utility that can be used in practice. ActivitySim models historically often use a utility of +-999 to make alternatives practically unavailable. That value is below the utility threshold +used in the probability-based path, which is about -691 because ActivitySim clips +exponentiated utilities at 1e-300. To keep behavior consistent, EET treats alternatives with +utilities at or below that threshold as unavailable; see `activitysim.core.logit.validate_utils`. + +### Normalization +For MNL, the error term scale is normalized to 1 by using the standard Gumbel distribution. For +nested logit, ActivitySim uses the normalized formulation in which the root nest coefficient is +fixed at 1; the EET implementation relies on that convention. diff --git a/docs/dev-guide/index.rst b/docs/dev-guide/index.rst index da6c649733..99fe1c6647 100644 --- a/docs/dev-guide/index.rst +++ b/docs/dev-guide/index.rst @@ -33,6 +33,8 @@ Contents component-configs components/index ../core + explicit-error-terms + sampling-methods ../benchmarking build-docs changes diff --git a/docs/dev-guide/sampling-methods.md b/docs/dev-guide/sampling-methods.md new file mode 100644 index 0000000000..937c8848ca --- /dev/null +++ b/docs/dev-guide/sampling-methods.md @@ -0,0 +1,179 @@ +(sampling-methods-dev)= +# Sampling Methods for Interaction Sample + +`activitysim.core.interaction_sample` supports multiple alternative-sampling methods. +These methods are independent of the global final-choice switch controlled by +`use_explicit_error_terms`, although the global switch determines the default when no +sampling-method override is provided. + +For user-facing configuration guidance, see {ref}`sampling_methods_ways_to_run`. + +## Why sample alternatives? + +`interaction_sample` is mainly used in destination and location choice models, where the full +utility can be expensive to evaluate for every chooser-alternative pair. The most common example +is mode choice logsums: computing a logsum for every chooser and every possible destination can be +much more expensive than the final destination-choice simulation itself. + +ActivitySim handles this by splitting the problem into two stages: + +1. Build a sampled choice set using a cheaper approximate utility. +2. Compute the expensive terms only for the sampled alternatives and make the final choice from + that sampled set. + +In the example models, the sampling utility usually replaces `mode_choice_logsum` with cheaper +proxies such as distance skims. For example, +`activitysim/examples/prototype_arc/configs/school_location_sample.csv` and +`activitysim/examples/prototype_mtc/configs/workplace_location_sample.csv` use distance-based +sampling utilities, while the corresponding final-choice specs in +`activitysim/examples/prototype_arc/configs/school_location.csv` and +`activitysim/examples/prototype_mtc/configs/workplace_location.csv` add the full +`mode_choice_logsum` and a sampling correction term. + +## Available Methods + +- `monte_carlo`: importance sampling with replacement using probabilities and uniform draws +- `eet`: importance sampling with replacement using explicit error-term draws +- `poisson`: importance sampling via independent Poisson inclusion sampling based on probabilities + +## Defaults and Overrides + +At the top level, `sample_method` may be set in `settings.yaml`. +When it is omitted, ActivitySim preserves the intended default behavior: + +- if `use_explicit_error_terms` is `False`, `interaction_sample` defaults to `monte_carlo` +- if `use_explicit_error_terms` is `True`, `interaction_sample` defaults to `poisson` + +Individual models may override this default through: + +```yaml +compute_settings: + sample_method: eet +``` + +## Workflow + +The sampled-choice workflow is: + +1. Evaluate a simplified sampling utility for the full active alternative set +2. Draw a sample of alternatives using one of the three methods +3. Return a sampled-alternative table with one row per chooser-sampled-alternative pair and information about the sampling probabilities +4. Compute expensive terms, such as `mode_choice_logsum`, only for that sampled table +5. Add the sampling correction term to the final utility and choose from the sampled set + +This is the standard sample-of-alternatives pattern: the sampling stage uses an approximation, +and the final stage corrects for it. + +### Monte Carlo and EET-with-replacement + +The `monte_carlo` and `eet` sampling methods both draw alternatives with replacement. As a result, +duplicates are possible within a chooser's sampled set, and sampled shares track repeated-draw MNL +behavior closely. + +The difference between them is how each draw is made: + +- `monte_carlo` draws from analytical probabilities using uniform random numbers +- `eet` draws explicit EV1 error terms and chooses the utility-plus-error argmax + +`eet` freezes the error terms for each chooser-alternative pair across repeated draws, so that +unchanged alternatives can keep the same unobserved draws, which can greatly reduce +scenario-to-scenario sampling noise compared to `monte_carlo`. However, `eet` is more expensive to +run because it requires many more random draws and more complex logic to avoid materializing large +chooser-alternative arrays of error terms in memory. + +### Poisson Sampling + +`poisson` does not perform repeated draws with replacement. Instead, each chooser-alternative +pair is sampled independently with inclusion probability $1 - (1 - p)^s$, where $p$ is the original +choice probability and $s$ is the configured sample size. +A single inclusion draw is made for each alternative. This is much cheaper than repeated draws for +`eet`, and unlike ``monte-carlo``, it can still benefit from stable alignment of random draws to +alternatives, so it can provide improved noise reduction compared to `monte_carlo` without the full +cost of `eet` and therefore it is the default when running with explicit error terms, see +{ref}`explicit-error-terms-dev`. + + + +A chooser can occasionally receive no sampled alternatives under Poisson sampling, because each +alternative is tested independently. In the models that use sampling in ActivitySim, this should be +rare. If it happens, the sampler retries that chooser row up to 10 times and then falls back to a +simple without-replacement random sample. + + + +### Sampling Correction + +`interaction_sample` returns a dataframe indexed by chooser id with columns including: + +- the sampled alternative id column +- `prob` +- `pick_count` + +For `monte_carlo` and `eet`, `pick_count` is the number of times the alternative was selected in +the repeated with-replacement draws. For `poisson`, `pick_count` is always `1`, because an +alternative is either included or not included. For all methods, `prob` is the quantity used in +the correction term, but it means different things for different methods. ActivitySim's final +sampled-choice specs typically include the term: + +```python +np.log(df.pick_count/df.prob) +``` + +This is the sample-of-alternatives correction factor used in the final choice model. + +For `monte_carlo` and `eet`, `prob` is the one-draw sampling probability implied by the +approximate sampling utility, and `pick_count` is the number of times that alternative appeared in +the repeated sample. McFadden's utility correction term for repeated with-replacement sampling is +`log(pick_count / (sample_size * prob)) = log(pick_count / prob) - log(sample_size)`. ActivitySim +omits the common `sample_size` term because it is the same for every sampled alternative for that +chooser and therefore does not affect choice probabilities. + +For `poisson`, `prob` is the inclusion probability of the alternative in the sampled set, not the +one-draw choice probability. Specifically, if the original approximate choice probability is $p$ +and the configured sample size is $s$, then the returned `prob` is: + +$$ +1 - (1 - p)^s +$$ + +Since `pick_count` is always `1` for `poisson`, the correction becomes $\log(1 / \text{prob})$. + +This means that all three methods use the same correction expression, +`np.log(df.pick_count/df.prob)`, even though `prob` has a different interpretation for `poisson` +than for the with-replacement methods. + +<> + +## Runtime and Simulation Noise + +Runtime and noise characteristics differ across methods. + +- `monte_carlo` is the fastest method. It draws one uniform random number per repeated sample for + each chooser, but it also has the most simulation noise because small changes in approximate + probabilities can change the sampled set substantially. +- `poisson` is also relatively inexpensive. It draws one uniform random number per + chooser-alternative pair, with possible retries for chooser rows that initially sample no + alternatives. With stable alternative alignment it is much less noisy than Monte Carlo. +- `eet` is the slowest sampling method. It draws one EV1 error term per chooser, alternative, and + repeated sample draw. In return, it produces the most stable sampled sets across scenarios + because unchanged alternatives keep the same unobserved error draws and only observed utility + changes can change the sampled set. + +Note that `eet` does not remove the dependence on the approximate sampling utility itself: if that +utility changes, the sampled set can still change. What it removes is the extra Monte Carlo noise +from the sampling draw. `poisson` also benefits from stable alignment per alternative, but unlike +`eet` it still depends on probability-based inclusion tests. The practical effect on scenario +comparisons is ultimately empirical. + + +## References + +- Kenneth Train, *Discrete Choice Methods with Simulation*, 2nd edition, Cambridge University + Press, 2009. Chapter 3.7 treats sampled choice sets and choice-model correction terms from + an estimation perspective. +- Carl-Erik Sarndal, Bengt Swensson, and Jan Wretman, *Model Assisted Survey Sampling*, Springer, + 1992. This is a standard reference for Poisson sampling as independent inclusion sampling. diff --git a/docs/users-guide/index.rst b/docs/users-guide/index.rst index d464a6cd0d..342f67f07c 100644 --- a/docs/users-guide/index.rst +++ b/docs/users-guide/index.rst @@ -36,6 +36,7 @@ Contents modelsetup ways_to_run + sampling-methods performance/index run_primary_example model_anatomy diff --git a/docs/users-guide/sampling-methods.rst b/docs/users-guide/sampling-methods.rst new file mode 100644 index 0000000000..c3fbc0e674 --- /dev/null +++ b/docs/users-guide/sampling-methods.rst @@ -0,0 +1,52 @@ +.. _sampling_methods_ways_to_run : + +Sampling Methods +________________ + +ActivitySim supports multiple sampling methods for ``activitysim.core.interaction_sample``. +These methods affect how sampled choice sets are constructed for models such as destination +and location choice. + +Available methods are: + +* ``monte_carlo``: importance sampling with replacement using probabilities and uniform draws +* ``eet``: importance sampling with replacement using explicit error-term draws +* ``poisson``: independent Poisson inclusion sampling using probabilities + +Default behavior depends on the global simulation method setting: + +* if ``use_explicit_error_terms: False``, the default sampling method is ``monte_carlo`` +* if ``use_explicit_error_terms: True``, the default sampling method is ``poisson`` + +However, any method can be used with either simulation method and can be set +globally in the settings: + +.. code-block:: yaml + + sample_method: "poisson" + +To override the default for a particular model, set the component's compute settings: + +.. code-block:: yaml + + compute_settings: + sample_method: eet + +This override applies only to ``interaction_sample``. It does not change how final choices +are simulated elsewhere in ActivitySim. + +Practical differences: + +* ``monte_carlo`` and ``eet`` both sample with replacement, so duplicated sampled alternatives + are possible and their aggregate sampled shares track repeated-draw MNL behavior more closely. +* ``poisson`` samples alternatives by inclusion probability, so each sampled alternative appears + at most once per chooser. This can change raw sampled shares in highly peaked cases, even though + the downstream sampling correction remains well defined. +* ``monte-carlo`` is the fastest method, followed by ``poisson``, with ``eet`` being the slowest. + However, for models like location choice, most runtime comes from logsum calculations and the + total difference between ``monte-carlo`` and ``poisson`` sampling is usually very small. +* ``poisson`` is the current default when running with simulation method explicit error terms + because it avoids repeated chooser-by-alternative explicit-error draws during sampling while + still providing improved noise reduction compared to Monte Carlo sampling. + +For implementation details and runtime considerations, see :doc:`/dev-guide/sampling-methods`. diff --git a/docs/users-guide/ways_to_run.rst b/docs/users-guide/ways_to_run.rst index 1b21221078..f39f9ac2f6 100644 --- a/docs/users-guide/ways_to_run.rst +++ b/docs/users-guide/ways_to_run.rst @@ -80,7 +80,7 @@ Refer to the :ref:`Run the Primary Example` section to learn how to run the prim Using Jupyter Notebook ______________________ -ActivitySim includes a `Jupyter Notebook `__ recipe book with interactive examples. +ActivitySim includes a `Jupyter Notebook `__ recipe book with interactive examples. * To start JupyterLab, from the ActivitySim project directory run ``uv run jupyter lab``. This will start the JupyterLab server and pop up a browser window with the interactive development environment. * Navigate to the ``examples/prototype_mtc/notebooks`` folder and select a notebook to learn more: @@ -283,3 +283,31 @@ With the set of output CSV files, the user can trace ActivitySim calculations in help debug data and/or logic errors. Refer to :ref:`trace` for more details on configuring tracing and the various output files. + +.. _explicit_error_terms_ways_to_run : + +Explicit Error Terms +____________________ + +ActivitySim makes heavy use of micro-simulation. Most model components are discrete choice models with an inherent +random component, and for each choice situation a single outcome is generated. +With the default Monte Carlo draw method, ActivitySim first calculates analytical probabilities from the +systematic utilities of a multinomial or nested logit model and then makes one draw from the +cumulative distribution for each chooser. Explicit Error Terms (EET) replaces that final draw with a direct +random-utility simulation by drawing the unobserved portion of utility (error term) for each +chooser-alternative pair, adding it to the systematic utility, and selecting the alternative with the highest +total utility. Both methods simulate the same underlying model, but EET can be less affected by Monte Carlo +noise when comparing scenarios and can make some comparisons easier to interpret. This is because the +selected alternative is the one with the highest total utility after adding the explicit +error term, and if the explicit error term is consistent between a base and scenario run then +only (relative) increases in the observed utility can lead to previously un-chosen alternatives +being chosen. + +To enable EET for a model run, set the global switch in ``settings.yaml``: + +.. code-block:: yaml + + use_explicit_error_terms: True + +Enable or disable this setting consistently across all runs being compared. For more details, including +scenario comparison considerations, see :doc:`/dev-guide/explicit-error-terms`.