Source code for fasttrips.PathSet

from __future__ import division
from builtins import str
from builtins import range
from builtins import object

__copyright__ = "Copyright 2015-2016 Contributing Entities"
__license__   = """
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
"""
import datetime
import os
import sys

import numpy as np
import pandas as pd

from .Error     import NotImplementedError, UnexpectedError
from .Logger    import FastTripsLogger
from .Passenger import Passenger
from .Route     import Route
from .TAZ       import TAZ
from .Trip      import Trip
from .Util      import Util


#: Default user class: just one class called "all"
def generic_user_class(row_series):
    return "all"

[docs]class PathSet(object):
    """
    Represents a path set for a passenger from an origin :py:class:`TAZ` to a destination :py:class:`TAZ`
    through a set of stops.
    """
    #: Paths output file
    PATHS_OUTPUT_FILE               = 'ft_output_passengerPaths.txt'

    #: Path times output file
    PATH_TIMES_OUTPUT_FILE          = 'ft_output_passengerTimes.txt'

    #: Configured functions, indexed by name
    CONFIGURED_FUNCTIONS            = { 'generic_user_class':generic_user_class }

    #: Path configuration: Name of the function that defines user class
    USER_CLASS_FUNCTION             = None

    #: File with weights file.  Space delimited table.
    WEIGHTS_FILE                    = 'pathweight_ft.txt'
    #: Path weights
    WEIGHTS_DF                      = None
    #: Read weights file as fixed-width format.  If false, standard CSV format is read.
    WEIGHTS_FIXED_WIDTH             = False

    #: Configuration: Minimum transfer penalty. Safeguard against having no transfer penalty
    #: which can result in terrible paths with excessive transfers.
    MIN_TRANSFER_PENALTY            = None
    #: Configuration: Overlap scale parameter.
    OVERLAP_SCALE_PARAMETER         = None
    #: Configuration: Overlap variable. Can be "None", "count", "distance", "time".
    OVERLAP_VARIABLE                = None
    #: Overlap variable option: None.  Don't use overlap pathsize correction.
    OVERLAP_NONE                    = "None"
    #: Overlap variable option: count. Use leg count overlap pathsize correction.
    OVERLAP_COUNT                   = "count"
    #: Overlap variable option: distance. Use leg distance overlap pathsize correction.
    OVERLAP_DISTANCE                = "distance"
    #: Overlap variable option: time. Use leg time overlap pathsize correction.
    OVERLAP_TIME                    = "time"
    #: Valid values for OVERLAP_VARAIBLE
    OVERLAP_VARIABLE_OPTIONS        = [OVERLAP_NONE,
                                       OVERLAP_COUNT,
                                       OVERLAP_DISTANCE,
                                       OVERLAP_TIME]
    #: Overlap chunk size.  How many person's trips to process at a time in overlap calculations
    #: in python simulation
    OVERLAP_CHUNK_SIZE              = None

    #: Overlap option: Split transit leg into component parts?  e.g. split A-E
    #: into A-B-C-D-E for overlap calculations?
    OVERLAP_SPLIT_TRANSIT           = None

    LEARN_ROUTES                    = False
    LEARN_ROUTES_RATE               = 0.05
    SUCCESS_FLAG_COLUMN             = 'success_flag'
    BUMP_FLAG_COLUMN                = 'bump_flag'

    #: Allow departures and arrivals before / after preferred time
    ARRIVE_LATE_ALLOWED_MIN         = datetime.timedelta(minutes = 0)
    DEPART_EARLY_ALLOWED_MIN        = datetime.timedelta(minutes = 0)

    CONSTANT_GROWTH_MODEL            = 'constant'
    EXP_GROWTH_MODEL                 = 'exponential'
    LOGARITHMIC_GROWTH_MODEL         = 'logarithmic'
    LOGISTIC_GROWTH_MODEL            = 'logistic'

    PENALTY_GROWTH_MODELS            = [
        CONSTANT_GROWTH_MODEL,
        EXP_GROWTH_MODEL,
        LOGARITHMIC_GROWTH_MODEL,
        LOGISTIC_GROWTH_MODEL,
    ]

    #: Weights column: User Class
    WEIGHTS_COLUMN_USER_CLASS       = "user_class"
    #: Weights column: Purpose
    WEIGHTS_COLUMN_PURPOSE          = "purpose"
    #: Weights column: Demand Mode Type
    WEIGHTS_COLUMN_DEMAND_MODE_TYPE = "demand_mode_type"
    #: Weights column: Demand Mode Type
    WEIGHTS_COLUMN_DEMAND_MODE      = "demand_mode"
    #: Weights column: Supply Mode
    WEIGHTS_COLUMN_SUPPLY_MODE      = "supply_mode"
    #: Weights column: Weight Name
    WEIGHTS_COLUMN_WEIGHT_NAME      = "weight_name"
    #: Weights column: Weight Value
    WEIGHTS_COLUMN_WEIGHT_VALUE     = "weight_value"
    #: Weights column: Growth Type
    WEIGHTS_GROWTH_TYPE             = "growth_type"
    #: Weights column: Log Base for logarithmic growth function
    WEIGHTS_GROWTH_LOG_BASE         = "log_base"
    #: Weights column: Max value for logistic growth function
    WEIGHTS_GROWTH_LOGISTIC_MAX     = "logistic_max"
    #: Weights column: Midpoint value for logistic growth function
    WEIGHTS_GROWTH_LOGISTIC_MID     = "logistic_mid"

    WEIGHT_NAME_DEPART_EARLY_MIN    = "depart_early_min"
    WEIGHT_NAME_ARRIVE_LATE_MIN     = "arrive_late_min"

    WEIGHT_NAME_DEPART_LATE_MIN     = 'depart_late_min'
    WEIGHT_NAME_ARRIVE_EARLY_MIN    = 'arrive_early_min'

    WEIGHT_NAME_VALID_NAMES = [
        WEIGHT_NAME_DEPART_EARLY_MIN,
        WEIGHT_NAME_DEPART_LATE_MIN,
        WEIGHT_NAME_ARRIVE_EARLY_MIN,
        WEIGHT_NAME_ARRIVE_LATE_MIN,
    ]

    # ========== Added by fasttrips =======================================================
    #: Weights column: Supply Mode number
    WEIGHTS_COLUMN_SUPPLY_MODE_NUM  = "supply_mode_num"

    #: File with weights for c++
    OUTPUT_WEIGHTS_FILE             = "ft_intermediate_weights.txt"

    DIR_OUTBOUND    = 1  #: Trips outbound from home have preferred arrival times
    DIR_INBOUND     = 2  #: Trips inbound to home have preferred departure times

    PATH_KEY_COST           = "pf_cost"         #: path cost according to pathfinder
    PATH_KEY_FARE           = "pf_fare"         #: path fare according to pathfinder
    PATH_KEY_PROBABILITY    = "pf_probability"  #: path probability according to pathfinder
    PATH_KEY_INIT_COST      = "pf_initcost"     #: initial cost (in pathfinding, before path was finalized)
    PATH_KEY_INIT_FARE      = "pf_initfare"     #: initial fare (in pathfinding, before path was finalized)
    PATH_KEY_STATES         = "states"

    STATE_IDX_LABEL         = 0  #: :py:class:`datetime.timedelta` instance
    STATE_IDX_DEPARR        = 1  #: :py:class:`datetime.datetime` instance. Departure if outbound/backwards, arrival if inbound/forwards.
    STATE_IDX_DEPARRMODE    = 2  #: mode id
    STATE_IDX_TRIP          = 3  #: trip id
    STATE_IDX_SUCCPRED      = 4  #: stop identifier or TAZ identifier
    STATE_IDX_SEQ           = 5  #: sequence (for trip)
    STATE_IDX_SEQ_SUCCPRED  = 6  #: sequence for successor/predecessor
    STATE_IDX_LINKTIME      = 7  #: :py:class:`datetime.timedelta` instance
    STATE_IDX_LINKFARE      = 8  #: fare cost, float
    STATE_IDX_LINKCOST      = 9  #: link generalized cost, float for hyperpath/stochastic,
    STATE_IDX_LINKDIST      = 10 #: link distance, float
    STATE_IDX_COST          = 11 #: cost float, for hyperpath/stochastic assignment
    STATE_IDX_ARRDEP        = 12 #: :py:class:`datetime.datetime` instance. Arrival if outbound/backwards, departure if inbound/forwards.

    # these are also the demand_mode_type values
    STATE_MODE_ACCESS   = "access"
    STATE_MODE_EGRESS   = "egress"
    STATE_MODE_TRANSFER = "transfer"

    # new
    STATE_MODE_TRIP     = "transit" # onboard

    BUMP_EXPERIENCED_COST    = 999999
    HUGE_COST = 9999

[docs]    def __init__(self, trip_list_dict):
        """
        Constructor from dictionary mapping attribute to value.
        """
        self.__dict__.update(trip_list_dict)

        #: Direction is one of :py:attr:`PathSet.DIR_OUTBOUND` or :py:attr:`PathSet.DIR_INBOUND`
        #: Preferred time is a datetime.time object
        if trip_list_dict[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == "arrival":
            self.direction     = PathSet.DIR_OUTBOUND
            self.pref_time     = trip_list_dict[Passenger.TRIP_LIST_COLUMN_ARRIVAL_TIME]
            self.pref_time_min = trip_list_dict[Passenger.TRIP_LIST_COLUMN_ARRIVAL_TIME_MIN]
        elif trip_list_dict[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == "departure":
            self.direction     = PathSet.DIR_INBOUND
            self.pref_time     = trip_list_dict[Passenger.TRIP_LIST_COLUMN_DEPARTURE_TIME]
            self.pref_time_min = trip_list_dict[Passenger.TRIP_LIST_COLUMN_DEPARTURE_TIME_MIN]
        else:
            raise Exception("Don't understand trip_list %s: %s" % (Passenger.TRIP_LIST_COLUMN_TIME_TARGET, str(trip_list_dict)))

        #: Dict of path-num -> { cost:, probability:, states: [List of (stop_id, stop_state)]}
        self.pathdict = {}

[docs]    def goes_somewhere(self):
        """
        Does this path go somewhere?  Does the destination differ from the origin?
        """
        return (self.__dict__[Passenger.TRIP_LIST_COLUMN_ORIGIN_TAZ_ID] != self.__dict__[Passenger.TRIP_LIST_COLUMN_DESTINATION_TAZ_ID])

[docs]    def path_found(self):
        """
        Was a a transit path found from the origin to the destination with the constraints?
        """
        return len(self.pathdict) > 0

[docs]    def num_paths(self):
        """
        Number of paths in the PathSet
        """
        return len(self.pathdict)

[docs]    def reset(self):
        """
        Delete my states, something went wrong and it won't work out.
        """
        self.pathdict = []

[docs]    @staticmethod
    def set_user_class(trip_list_df, new_colname):
        """
        Adds a column called user_class by applying the configured user class function.
        """
        trip_list_df[new_colname] = trip_list_df.apply(PathSet.CONFIGURED_FUNCTIONS[PathSet.USER_CLASS_FUNCTION], axis=1)

[docs]    @staticmethod
    def verify_weight_config(modes_df, output_dir, routes, capacity_constraint, trip_list_df):
        """
        Verify that we have complete weight configurations for the user classes and modes in the given DataFrame.

        Trips with invalid weight configurations will be dropped from the trip list and warned about.

        The parameter mode_df is a dataframe with the user_class, demand_mode_type and demand_mode combinations
        found in the demand file.

        If *capacity_constraint* is true, make sure there's an at_capacity weight on the transit supply mode links
        to enforce it.

        Returns updated trip_list_df.
        """
        (verify, error_str) = PathSet.verify_weights(PathSet.WEIGHTS_DF)

        # Join - make sure that all demand combinations (user class, purpose, demand mode type and demand mode) are configured
        weight_check = pd.merge(left=modes_df,
                                    right=PathSet.WEIGHTS_DF,
                                    on=[PathSet.WEIGHTS_COLUMN_USER_CLASS,
                                        PathSet.WEIGHTS_COLUMN_PURPOSE,
                                        PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE,
                                        PathSet.WEIGHTS_COLUMN_DEMAND_MODE],
                                    how='left')
        FastTripsLogger.debug("demand_modes x weights: \n%s" % weight_check.to_string())

        FastTripsLogger.debug("trip_list_df head=\n%s" % str(trip_list_df.head()))

        # If something is missing, warn and remove those trips
        null_supply_mode_weights = weight_check.loc[pd.isnull(weight_check[PathSet.WEIGHTS_COLUMN_SUPPLY_MODE])]
        if len(null_supply_mode_weights) > 0:
            # warn
            FastTripsLogger.warn("The following user_class, demand_mode_type, demand_mode combinations exist in the demand file but are missing from the weight configuration:")
            FastTripsLogger.warn("\n%s" % null_supply_mode_weights.to_string())

            # remove those trips -- need to do it one demand mode type at a time
            null_supply_mode_weights = null_supply_mode_weights[[PathSet.WEIGHTS_COLUMN_USER_CLASS,
                                                                 PathSet.WEIGHTS_COLUMN_PURPOSE,
                                                                 PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE,
                                                                 PathSet.WEIGHTS_COLUMN_DEMAND_MODE]]
            null_supply_mode_weights["to_remove"] = 1
            for demand_mode_type in [PathSet.STATE_MODE_ACCESS, PathSet.STATE_MODE_EGRESS, PathSet.STATE_MODE_TRIP]:
                remove_trips = null_supply_mode_weights.loc[null_supply_mode_weights[PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE]==demand_mode_type].copy()
                if len(remove_trips) == 0: continue

                remove_trips.rename(columns={PathSet.WEIGHTS_COLUMN_DEMAND_MODE:"%s_mode" % demand_mode_type}, inplace=True)
                remove_trips.drop([PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE], axis=1, inplace=True)
                FastTripsLogger.debug("Removing for \n%s" % remove_trips)

                trip_list_df = pd.merge(left  = trip_list_df,
                                            right = remove_trips,
                                            how   = "left")
                FastTripsLogger.debug("Removing\n%s" % trip_list_df.loc[pd.notnull(trip_list_df["to_remove"])])

                # keep only those not flagged to_remove
                trip_list_df = trip_list_df.loc[pd.isnull(trip_list_df["to_remove"])]
                trip_list_df.drop(["to_remove"], axis=1, inplace=True)


        # demand_mode_type and demand_modes implicit to all travel    :   xfer walk,  xfer wait, initial wait
        user_classes = modes_df[[PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE]].drop_duplicates().reset_index()
        implicit_df = pd.DataFrame({ PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE:[ 'transfer'],
                                         PathSet.WEIGHTS_COLUMN_DEMAND_MODE     :[ 'transfer'],
                                         PathSet.WEIGHTS_COLUMN_SUPPLY_MODE     :[ 'transfer'] })
        user_classes['key'] = 1
        implicit_df['key'] = 1
        implicit_df = pd.merge(left=user_classes, right=implicit_df, on='key')
        implicit_df.drop(['index','key'], axis=1, inplace=True)
        # FastTripsLogger.debug("implicit_df: \n%s" % implicit_df)

        weight_check = pd.merge(left=implicit_df, right=PathSet.WEIGHTS_DF,
                                    on=[PathSet.WEIGHTS_COLUMN_USER_CLASS,
                                        PathSet.WEIGHTS_COLUMN_PURPOSE,
                                        PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE,
                                        PathSet.WEIGHTS_COLUMN_DEMAND_MODE,
                                        PathSet.WEIGHTS_COLUMN_SUPPLY_MODE],
                                    how='left')
        FastTripsLogger.debug("implicit demand_modes x weights: \n%s" % weight_check.to_string())

        if pd.isnull(weight_check[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]).sum() > 0:
            error_str += "\nThe following user_class, purpose, demand_mode_type, demand_mode, supply_mode combinations exist in the demand file but are missing from the weight configuration:\n"
            error_str += weight_check.loc[pd.isnull(weight_check[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME])].to_string()
            error_str += "\n\n"

        # transfer penalty check
        tp_index = pd.DataFrame({ PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE:['transfer'],
                                      PathSet.WEIGHTS_COLUMN_DEMAND_MODE     :['transfer'],
                                      PathSet.WEIGHTS_COLUMN_SUPPLY_MODE     :['transfer'],
                                      PathSet.WEIGHTS_COLUMN_WEIGHT_NAME     :['transfer_penalty']})
        uc_purp_index = PathSet.WEIGHTS_DF[[PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE]].drop_duplicates()
        FastTripsLogger.debug("uc_purp_index: \n%s" % uc_purp_index)

        # these are all the transfer penalties we have
        transfer_penaltes = pd.merge(left=tp_index, right=PathSet.WEIGHTS_DF, how='left')
        FastTripsLogger.debug("transfer_penaltes: \n%s" % transfer_penaltes)

        transfer_penalty_check = pd.merge(left=uc_purp_index, right=transfer_penaltes, how='left')
        FastTripsLogger.debug("transfer_penalty_check: \n%s" % transfer_penalty_check)

        # missing transfer penalty
        if pd.isnull(transfer_penalty_check[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]).sum() > 0:
            error_str += "\nThe following user class x purpose are missing a transfer penalty:\n"
            error_str += transfer_penalty_check.loc[pd.isnull(transfer_penalty_check[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME])].to_string()
            error_str += "\n\n"

        bad_pen = transfer_penalty_check.loc[transfer_penalty_check[PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE] < PathSet.MIN_TRANSFER_PENALTY]
        if len(bad_pen) > 0:
            error_str += "\nThe following user class x purpose path weights have invalid (too small) transfer penalties. MIN=(%f)\n" % PathSet.MIN_TRANSFER_PENALTY
            error_str += bad_pen.to_string()
            error_str += "\nConfigure smaller min_transfer_penalty AT YOUR OWN RISK since this will make path generation slow/unreliable.\n\n"

        # If *capacity_constraint* is true, make sure there's an at_capacity weight on the transit supply mode links
        # to enforce it.
        if capacity_constraint:
            # see if it's here already -- we don't know how to handle that...
            at_capacity = PathSet.WEIGHTS_DF.loc[ PathSet.WEIGHTS_DF[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "at_capacity" ]
            if len(at_capacity) > 0:
                error_str += "\nFound at_capacity path weights explicitly set when about to set these for hard capacity constraints.\n"
                error_str += at_capacity.to_string()
                error_str += "\n\n"
            else:
                # set it for all user_class x transit x demand_mode x supply_mode
                transit_weights_df = PathSet.WEIGHTS_DF.loc[PathSet.WEIGHTS_DF[PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE] == PathSet.STATE_MODE_TRIP,
                    [PathSet.WEIGHTS_COLUMN_USER_CLASS,
                     PathSet.WEIGHTS_COLUMN_PURPOSE,
                     PathSet.WEIGHTS_COLUMN_DEMAND_MODE,
                     PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE,
                     PathSet.WEIGHTS_COLUMN_SUPPLY_MODE]].copy()
                transit_weights_df.drop_duplicates(inplace=True)
                transit_weights_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME ] = "at_capacity"
                transit_weights_df[PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE] = PathSet.HUGE_COST
                transit_weights_df[PathSet.WEIGHTS_GROWTH_TYPE] = PathSet.CONSTANT_GROWTH_MODEL
                FastTripsLogger.debug("Adding capacity-constraint weights:\n%s" % transit_weights_df.to_string())

                PathSet.WEIGHTS_DF = pd.concat([PathSet.WEIGHTS_DF, transit_weights_df], axis=0)
                PathSet.WEIGHTS_DF.sort_values(by=[PathSet.WEIGHTS_COLUMN_USER_CLASS,
                                                   PathSet.WEIGHTS_COLUMN_PURPOSE,
                                                   PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE,
                                                   PathSet.WEIGHTS_COLUMN_DEMAND_MODE,
                                                   PathSet.WEIGHTS_COLUMN_SUPPLY_MODE,
                                                   PathSet.WEIGHTS_COLUMN_WEIGHT_NAME], inplace=True)

        if len(error_str) > 0:
            FastTripsLogger.fatal(error_str)
            sys.exit(2)

        # add mode numbers to weights DF for relevant rows
        PathSet.WEIGHTS_DF = routes.add_numeric_mode_id(PathSet.WEIGHTS_DF,
                                                    id_colname=PathSet.WEIGHTS_COLUMN_SUPPLY_MODE,
                                                    numeric_newcolname=PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM,
                                                    warn=True)  # don't fail if some supply modes are configured but not used, they may be for future runs
        FastTripsLogger.debug("PathSet weights: \n%s" % PathSet.WEIGHTS_DF)

        export_columns = [PathSet.WEIGHTS_COLUMN_USER_CLASS,
                                        PathSet.WEIGHTS_COLUMN_PURPOSE,
                                        PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE,
                                        PathSet.WEIGHTS_COLUMN_DEMAND_MODE,
                                        PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM,
                                        PathSet.WEIGHTS_COLUMN_WEIGHT_NAME,
                                        PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE,
                                        PathSet.WEIGHTS_GROWTH_TYPE,
                                        PathSet.WEIGHTS_GROWTH_LOG_BASE,
                                        PathSet.WEIGHTS_GROWTH_LOGISTIC_MAX,
                                        PathSet.WEIGHTS_GROWTH_LOGISTIC_MID]

        PathSet.WEIGHTS_DF.reindex(columns=export_columns).to_csv(os.path.join(output_dir,PathSet.OUTPUT_WEIGHTS_FILE),
                               columns=export_columns,
                               sep=" ", index=False)

        # add placeholder weights (ivt weight) for fares - one for each user_class, purpose, transit demand mode
        # these will be updated based on the person's value of time in calculate_cost()
        fare_weights = PathSet.WEIGHTS_DF.loc[ (PathSet.WEIGHTS_DF[PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE]==PathSet.STATE_MODE_TRIP) &
                                               (PathSet.WEIGHTS_DF[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME ]== "in_vehicle_time_min")]
        fare_weights = fare_weights[[PathSet.WEIGHTS_COLUMN_USER_CLASS,
                                     PathSet.WEIGHTS_COLUMN_PURPOSE,
                                     PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE,
                                     PathSet.WEIGHTS_COLUMN_DEMAND_MODE,
                                     PathSet.WEIGHTS_COLUMN_SUPPLY_MODE,
                                     PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM,
                                     PathSet.WEIGHTS_COLUMN_WEIGHT_NAME,
                                     PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE]].copy().drop_duplicates()
        fare_weights[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME ] = "fare"  # SIM_COL_PAX_FARE
        PathSet.WEIGHTS_DF = PathSet.WEIGHTS_DF.append(fare_weights)
        FastTripsLogger.debug("PathSet.WEIGHTS_DF with fare weights: \n%s" % PathSet.WEIGHTS_DF)
        return trip_list_df


[docs]    @staticmethod
    def verify_weights(weights):
        # First, verify required columns are found
        error_str = ""
        weight_cols = list(weights.columns.values)
        FastTripsLogger.debug("verify_weight_config:\n%s" % weights.to_string())
        if (PathSet.WEIGHTS_COLUMN_USER_CLASS not in weight_cols):
            error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_USER_CLASS)
        if (PathSet.WEIGHTS_COLUMN_PURPOSE not in weight_cols):
            error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_PURPOSE)
        if (PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE not in weight_cols):
            error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE)
        if (PathSet.WEIGHTS_COLUMN_DEMAND_MODE not in weight_cols):
            error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_DEMAND_MODE)
        if (PathSet.WEIGHTS_COLUMN_SUPPLY_MODE not in weight_cols):
            error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_SUPPLY_MODE)
        if (PathSet.WEIGHTS_COLUMN_WEIGHT_NAME not in weight_cols):
            error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_WEIGHT_NAME)
        if (PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE not in weight_cols):
            error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE)
        if (PathSet.WEIGHTS_GROWTH_TYPE not in weight_cols):
            error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_GROWTH_TYPE)

        constant_exp_slice = weights.loc[
            weights[PathSet.WEIGHTS_GROWTH_TYPE].isin(
                [PathSet.CONSTANT_GROWTH_MODEL, PathSet.EXP_GROWTH_MODEL]),
        ]
        logarithmic_slice = weights.loc[
            weights[PathSet.WEIGHTS_GROWTH_TYPE] == PathSet.LOGARITHMIC_GROWTH_MODEL,
        ]
        logistic_slice = weights.loc[
            weights[PathSet.WEIGHTS_GROWTH_TYPE] == PathSet.LOGISTIC_GROWTH_MODEL,
        ]

        # Verify that no extraneous values are set for constant and exponential functions
        if not pd.isnull(constant_exp_slice.reindex([
            PathSet.WEIGHTS_GROWTH_LOG_BASE,
            PathSet.WEIGHTS_GROWTH_LOGISTIC_MAX,
            PathSet.WEIGHTS_GROWTH_LOGISTIC_MID,
        ], axis='columns')).values.all():
            error_str += 'Linear or Exponential qualifier includes unnecessary modifier(s)\n'

        if not pd.isnull(logarithmic_slice.reindex([
            PathSet.WEIGHTS_GROWTH_LOGISTIC_MAX,
            PathSet.WEIGHTS_GROWTH_LOGISTIC_MID,
        ], axis='columns')).values.all():
            error_str += 'Logarithmic qualifier includes unnecessary modifier(s)\n'

        if not pd.isnull(logistic_slice.reindex([
            PathSet.WEIGHTS_GROWTH_LOG_BASE,
        ], axis='columns')).values.all():
            error_str += 'Logistic qualifier includes log_base modifier\n'

        if not pd.notnull(logarithmic_slice.reindex([
            PathSet.WEIGHTS_GROWTH_LOG_BASE,
        ],
            axis='columns')).values.all():
            error_str += 'Logarithmic qualifier missing necessary log_base modifier\n'

        if not pd.notnull(logistic_slice.reindex([
            PathSet.WEIGHTS_GROWTH_LOGISTIC_MAX,
            PathSet.WEIGHTS_GROWTH_LOGISTIC_MID,
        ], axis='columns')).values.all():
            error_str += 'Logistic qualifier missing necessary modifiers\n'

        if error_str:
            error_str = '\n-------Errors: pathweight_ft.txt---------------\n' + error_str

        return (not error_str), error_str


    def __str__(self):
        """
        Readable string version of the path.

        Note: If inbound trip, then the states are in reverse order (egress to access)
        """
        ret_str = "Dict vars:\n"
        for k,v in self.__dict__.items():
            ret_str += "%30s => %-30s   %s\n" % (str(k), str(v), str(type(v)))
        # ret_str += PathSet.states_to_str(self.states, self.direction)
        return ret_str

[docs]    @staticmethod
    def write_paths(passengers_df, output_dir):
        """
        Write the assigned paths to the given output file.

        :param passengers_df: Passenger paths assignment results
        :type  passengers_df: :py:class:`pandas.DataFrame` instance
        :param output_dir:    Output directory
        :type  output_dir:    string

        """
        # get trip information -- board stops, board trips and alight stops
        passenger_trips = passengers_df.loc[passengers_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_TRIP].copy()
        ptrip_group     = passenger_trips.groupby([Passenger.PERSONS_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID])
        # these are Series
        board_stops_str = ptrip_group.A_id.apply(lambda x:','.join(x))
        board_trips_str = ptrip_group.trip_id.apply(lambda x:','.join(x))
        alight_stops_str= ptrip_group.B_id.apply(lambda x:','.join(x))
        board_stops_str.name  = 'board_stop_str'
        board_trips_str.name  = 'board_trip_str'
        alight_stops_str.name = 'alight_stop_str'

        # get walking times
        walk_links = passengers_df.loc[(passengers_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_ACCESS  )| \
                                       (passengers_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_TRANSFER)| \
                                       (passengers_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_EGRESS  )].copy()
        walk_links['linktime_str'] = walk_links.pf_linktime.apply(lambda x: "%.2f" % (x/np.timedelta64(1,'m')))
        walklink_group = walk_links[['person_id','trip_list_id_num','linktime_str']].groupby(['person_id','trip_list_id_num'])
        walktimes_str  = walklink_group.linktime_str.apply(lambda x:','.join(x))

        # aggregate to one line per person_id, trip_list_id
        print_passengers_df = passengers_df[['person_id','trip_list_id_num','pathmode','A_id','B_id',Passenger.PF_COL_PAX_A_TIME]].groupby(['person_id','trip_list_id_num']).agg(
           {'pathmode'                  :'first',   # path mode
            'A_id'                      :'first',   # origin
            'B_id'                      :'last',    # destination
            Passenger.PF_COL_PAX_A_TIME :'first'    # start time
           })

        # put them all together
        print_passengers_df = pd.concat([print_passengers_df,
                                            board_stops_str,
                                            board_trips_str,
                                            alight_stops_str,
                                            walktimes_str], axis=1)

        print_passengers_df.reset_index(inplace=True)
        print_passengers_df.sort_values(by=['trip_list_id_num'], inplace=True)

        print_passengers_df.rename(columns=
           {'pathmode'                  :'mode',
            'A_id'                      :'originTaz',
            'B_id'                      :'destinationTaz',
            Passenger.PF_COL_PAX_A_TIME :'startTime_time',
            'board_stop_str'            :'boardingStops',
            'board_trip_str'            :'boardingTrips',
            'alight_stop_str'           :'alightingStops',
            'linktime_str'              :'walkingTimes'}, inplace=True)

        print_passengers_df['startTime'] = print_passengers_df['startTime_time'].apply(Util.datetime64_formatter)

        print_passengers_df = print_passengers_df[['trip_list_id_num','person_id','mode','originTaz','destinationTaz','startTime',
                                                   'boardingStops','boardingTrips','alightingStops','walkingTimes']]

        print_passengers_df.to_csv(os.path.join(output_dir, PathSet.PATHS_OUTPUT_FILE), sep="\t", index=False)
        # passengerId mode    originTaz   destinationTaz  startTime   boardingStops   boardingTrips   alightingStops  walkingTimes

[docs]    @staticmethod
    def write_path_times(passengers_df, output_dir):
        """
        Write the assigned path times to the given output file.

        :param passengers_df: Passenger path links
        :type  passengers_df: :py:class:`pandas.DataFrame` instance
        :param output_dir:    Output directory
        :type  output_dir:    string
        """
        passenger_trips = passengers_df.loc[passengers_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_TRIP].copy()

        ######         TODO: this is really catering to output format; an alternative might be more appropriate
        from .Assignment import Assignment
        passenger_trips.loc[:,  'board_time_str'] = passenger_trips[Assignment.SIM_COL_PAX_BOARD_TIME ].apply(Util.datetime64_formatter)
        passenger_trips.loc[:,'arrival_time_str'] = passenger_trips[Passenger.PF_COL_PAX_A_TIME].apply(Util.datetime64_formatter)
        passenger_trips.loc[:, 'alight_time_str'] = passenger_trips[Assignment.SIM_COL_PAX_ALIGHT_TIME].apply(Util.datetime64_formatter)

        # Aggregate (by joining) across each passenger + path
        ptrip_group = passenger_trips.groupby([Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                               Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID])
        # these are Series
        board_time_str   = ptrip_group['board_time_str'  ].apply(lambda x:','.join(x))
        arrival_time_str = ptrip_group['arrival_time_str'].apply(lambda x:','.join(x))
        alight_time_str  = ptrip_group['alight_time_str' ].apply(lambda x:','.join(x))

        # Aggregate other fields across each passenger + path
        pax_exp_df = passengers_df.groupby([Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                            Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID]).agg(
            {# 'pathmode'                  :'first',  # path mode
             'A_id'                      :'first',  # origin
             'B_id'                      :'last',   # destination
             Passenger.PF_COL_PAX_A_TIME :'first',  # start time
             Passenger.PF_COL_PAX_B_TIME :'last',   # end time
             # TODO: cost needs to be updated for updated dwell & travel time
             # 'cost'                      :'first',  # total travel cost is calculated for the whole path
            })

        # Put them together and return
        assert(len(pax_exp_df) == len(board_time_str))
        pax_exp_df = pd.concat([pax_exp_df,
                                    board_time_str,
                                    arrival_time_str,
                                    alight_time_str], axis=1)
        # print pax_exp_df.to_string(formatters={'A_time':Assignment.datetime64_min_formatter,
        #                                        'B_time':Assignment.datetime64_min_formatter}

        # reset columns
        print_pax_exp_df = pax_exp_df.reset_index()
        print_pax_exp_df.sort_values(by=[Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                         Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID], inplace=True)

        print_pax_exp_df['A_time_str'] = print_pax_exp_df[Passenger.PF_COL_PAX_A_TIME].apply(Util.datetime64_formatter)
        print_pax_exp_df['B_time_str'] = print_pax_exp_df[Passenger.PF_COL_PAX_B_TIME].apply(Util.datetime64_formatter)

        # rename columns
        print_pax_exp_df.rename(columns=
            {#'pathmode'             :'mode',
             'A_id'                 :'originTaz',
             'B_id'                 :'destinationTaz',
             'A_time_str'           :'startTime',
             'B_time_str'           :'endTime',
             'arrival_time_str'     :'arrivalTimes',
             'board_time_str'       :'boardingTimes',
             'alight_time_str'      :'alightingTimes',
             # TODO: cost needs to be updated for updated dwell & travel time
             # 'cost'                 :'travelCost',
             }, inplace=True)

        # reorder
        print_pax_exp_df = print_pax_exp_df[[
            Passenger.TRIP_LIST_COLUMN_PERSON_ID,
            Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
            #'mode',
            'originTaz',
            'destinationTaz',
            'startTime',
            'endTime',
            'arrivalTimes',
            'boardingTimes',
            'alightingTimes',
            # 'travelCost',
            ]]

        times_out = open(os.path.join(output_dir, PathSet.PATH_TIMES_OUTPUT_FILE), 'w')
        print_pax_exp_df.to_csv(times_out,
                                sep="\t", float_format="%.2f", index=False)

[docs]    @staticmethod
    def split_transit_links(pathset_links_df, veh_trips_df, stops):
        """
        Splits the transit links to their component links and returns.

        So if a transit trip goes from stop A to D but passes stop B and C in between, the
        row A->D will now be replaced by rows A->B, B->C, and C->D.

        Adds "split_first" bool - True on the first veh link only

        Note that this does *not* renumber the linknum field.
        """
        from .Assignment import Assignment

        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("split_transit_links: pathset_links_df (%d) trace\n%s" % (len(pathset_links_df),
                                  pathset_links_df.loc[pathset_links_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True].to_string()))
            FastTripsLogger.debug("split_transit_links: pathset_links_df columns\n%s" % str(pathset_links_df.dtypes))

        veh_links_df = Trip.linkify_vehicle_trips(veh_trips_df, stops)
        veh_links_df["linkmode"] = "transit"

        FastTripsLogger.debug("split_transit_links: veh_links_df\n%s" % veh_links_df.head(20).to_string())

        # join the pathset links with the vehicle links
        drop_cols  = []
        merge_cols = [Passenger.PF_COL_LINK_MODE,
                      Route.ROUTES_COLUMN_MODE,
                      Trip.TRIPS_COLUMN_ROUTE_ID,
                      Trip.TRIPS_COLUMN_TRIP_ID]
        if Trip.TRIPS_COLUMN_TRIP_ID_NUM in pathset_links_df.columns.values:
            merge_cols.append(Trip.TRIPS_COLUMN_TRIP_ID_NUM)
        if Route.ROUTES_COLUMN_MODE_NUM in pathset_links_df.columns.values:
            merge_cols.append(Route.ROUTES_COLUMN_MODE_NUM)

        path2 = pd.merge(left    =pathset_links_df,
                             right   =veh_links_df,
                             on      =merge_cols,
                             how     ="left",
                             suffixes=["","_veh"])
        path2["split_first"] = False

        # delete anything irrelevant -- so keep non-transit links, and transit links WITH valid sequences
        path2 = path2.loc[ (path2[Passenger.PF_COL_LINK_MODE]!=Route.MODE_TYPE_TRANSIT) |
                           ( (path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT) &
                             (path2["A_seq_veh"]>=path2["A_seq"]) &
                             (path2["B_seq_veh"]<=path2["B_seq"]) ) ]
        # These are the new columns -- incorporate them
        path2.loc[ (path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT)&(path2["A_seq_veh"]==path2["A_seq"]), "split_first"] = True

        # A_arrival_time       datetime64[ns] => A time for intermediate links

        path2.loc[ (path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT)&(path2["A_id"]!=path2["A_id_veh"]), Assignment.SIM_COL_PAX_A_TIME     ] = path2["A_arrival_time"]
        # no waittime, boardtime, missed_xfer except on first link
        path2.loc[ (path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT)&(path2["A_id"]!=path2["A_id_veh"]), Assignment.SIM_COL_PAX_WAIT_TIME  ] = None
        path2.loc[ (path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT)&(path2["A_id"]!=path2["A_id_veh"]), Assignment.SIM_COL_PAX_BOARD_TIME ] = None
        path2.loc[ (path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT)&(path2["A_id"]!=path2["A_id_veh"]), Assignment.SIM_COL_PAX_MISSED_XFER] = 0
        # no alighttime except on last link
        path2.loc[ (path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT)&(path2["B_id"]!=path2["B_id_veh"]), Assignment.SIM_COL_PAX_ALIGHT_TIME] = None

        # route_id_num                float64 => ignore
        # A_id_veh                     object => A_id
        path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "A_id"       ] = path2["A_id_veh"]
        # A_id_num_veh                float64 => A_id_num
        path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "A_id_num"   ] = path2["A_id_num_veh"]
        # A_seq_veh                   float64 => A_seq
        path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "A_seq"      ] = path2["A_seq_veh"]

        if "A_lat_veh" in path2.columns.values:
            # A_lat_veh                   float64 => A_lat
            path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "A_lat"  ] = path2["A_lat_veh"]
            # A_lon_veh                   float64 => A_lon
            path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "A_lon"  ] = path2["A_lon_veh"]
            # drop these later
            drop_cols.extend(["A_lat_veh","A_lon_veh"])

        # B_id_veh                     object => B_id
        path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "B_id"       ] = path2["B_id_veh"]
        # B_id_num_veh                float64 => B_id_num
        path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "B_id_num"   ] = path2["B_id_num_veh"]
        # B_seq_veh                   float64 => B_seq
        path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "B_seq"      ] = path2["B_seq_veh"]
        # B_arrival_time       datetime64[ns] => new_B_time
        path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "new_B_time" ] = path2["B_arrival_time"]
        # B_departure_time     datetime64[ns] => ignore

        if "B_lat_veh" in path2.columns.values:
            # B_lat_veh                   float64 => B_lat
            path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "B_lat"  ] = path2["B_lat_veh"]
            # B_lon_veh                   float64 => B_lon
            path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT, "B_lon"  ] = path2["B_lon_veh"]
            # drop these later
            drop_cols.extend(["B_lat_veh","B_lon_veh"])

        # update the link time
        path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT,Assignment.SIM_COL_PAX_LINK_TIME] = path2[Assignment.SIM_COL_PAX_B_TIME] - path2[Assignment.SIM_COL_PAX_A_TIME]
        # update transit distance
        Util.calculate_distance_miles(path2, "A_lat","A_lon","B_lat","B_lon", "transit_distance")
        path2.loc[path2[Passenger.PF_COL_LINK_MODE]==Route.MODE_TYPE_TRANSIT,Assignment.SIM_COL_PAX_DISTANCE ] = path2["transit_distance"]

        # revert these back to ints
        path2[["A_id_num","B_id_num","A_seq","B_seq"]] = path2[["A_id_num","B_id_num","A_seq","B_seq"]].astype(int)

        # we're done with the fields - drop them
        drop_cols.extend(["transit_distance", "route_id_num",
                          "A_id_veh","A_id_num_veh","A_seq_veh","A_arrival_time","A_departure_time",
                          "B_id_veh","B_id_num_veh","B_seq_veh","B_arrival_time","B_departure_time"])
        path2.drop(drop_cols, axis=1, inplace=True)

        # renumber linknum?  Let's not bother

        # trace
        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("split_transit_links: path2 (%d) trace\n%s" % (len(path2),
                                  path2.loc[path2[Passenger.TRIP_LIST_COLUMN_TRACE]==True].to_string()))
        FastTripsLogger.debug("split_transit_links: path2 columns\n%s" % str(path2.dtypes))
        return path2

[docs]    @staticmethod
    def calculate_cost(STOCH_DISPERSION, pathset_paths_df, pathset_links_df, veh_trips_df,
                       trip_list_df, routes, tazs, transfers, stops=None, reset_bump_iter=False):
        """
        This is equivalent to the C++ Path::calculateCost() method.  Would it be faster to do it in C++?
        It would require us to package up the networks and paths and send back and forth.  :p

        I think if we can do it using vectorized pandas operations, it should be fast, but we can compare/test.

        It's also messier to have this in two places.  Maybe we should delete it from the C++; the overlap calcs are only in here right now.

        Returns pathset_paths_df with additional columns, Assignment.SIM_COL_PAX_FARE, Assignment.SIM_COL_PAX_COST, Assignment.SIM_COL_PAX_PROBABILITY, Assignment.SIM_COL_PAX_LOGSUM
        And pathset_links_df with additional columns, Assignment.SIM_COL_PAX_FARE, Assignment.SIM_COL_PAX_FARE_PERIOD, Assignment.SIM_COL_PAX_COST and Assignment.SIM_COL_PAX_DISTANCE

        """
        from .Assignment import Assignment

        # if these are here already, remove them since we'll recalculate them
        if Assignment.SIM_COL_PAX_COST in list(pathset_paths_df.columns.values):
            pathset_paths_df.drop([Assignment.SIM_COL_PAX_COST,
                                   Assignment.SIM_COL_PAX_LNPS,
                                   Assignment.SIM_COL_PAX_PROBABILITY,
                                   Assignment.SIM_COL_PAX_LOGSUM], axis=1, inplace=True)
            pathset_links_df.drop([Assignment.SIM_COL_PAX_COST,
                                   Assignment.SIM_COL_PAX_DISTANCE], axis=1, inplace=True)

            # leaving this in for writing to CSV for debugging but I could take it out
            pathset_paths_df.drop(["logsum_component"], axis=1, inplace=True)


        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("calculate_cost: pathset_links_df trace\n%s" % str(pathset_links_df.loc[pathset_links_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))
            FastTripsLogger.debug("calculate_cost: trip_list_df trace\n%s" % str(trip_list_df.loc[trip_list_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))

        # Add fares -- need stop zones first if they're not there.
        # We only need to do this once per pathset.
        # todo -- could remove non-transit links for this?
        FastTripsLogger.debug("calculate_cost columns:\n%s" % str(list(pathset_links_df.columns.values)))
        if "A_zone_id" not in list(pathset_links_df.columns.values):
            assert(stops is not None)
            pathset_links_df = stops.add_stop_zone_id(pathset_links_df, "A_id", "A_zone_id")
            pathset_links_df = stops.add_stop_zone_id(pathset_links_df, "B_id", "B_zone_id")
        # This needs to be done fresh each time since simulation might change the board times and therefore the fare periods
        pathset_links_df = routes.add_fares(pathset_links_df)


        # base this on pathfinding distance
        pathset_links_df[Assignment.SIM_COL_PAX_DISTANCE] = pathset_links_df[Passenger.PF_COL_LINK_DIST]

        pathset_links_to_use = pathset_links_df
        if PathSet.OVERLAP_SPLIT_TRANSIT:
            pathset_links_to_use = PathSet.split_transit_links(pathset_links_df, veh_trips_df, stops)
        else:
            pathset_links_to_use["split_first"] = True  # all transit links are first

        # First, we need user class, purpose, demand modes, and value of time
        pathset_links_cost_df = pd.merge(left =pathset_links_to_use,
                                             right=trip_list_df[[
                                                        Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                                        Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                                        Passenger.TRIP_LIST_COLUMN_USER_CLASS,
                                                        Passenger.TRIP_LIST_COLUMN_PURPOSE,
                                                        Passenger.TRIP_LIST_COLUMN_VOT,
                                                        Passenger.TRIP_LIST_COLUMN_ACCESS_MODE,
                                                        Passenger.TRIP_LIST_COLUMN_EGRESS_MODE,
                                                        Passenger.TRIP_LIST_COLUMN_TRANSIT_MODE,
                                                        ]],
                                             how  ="left",
                                             on   =[Passenger.PERSONS_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID])

        # linkmode = demand_mode_type.  Set demand_mode for the links
        pathset_links_cost_df[PathSet.WEIGHTS_COLUMN_DEMAND_MODE] = None
        pathset_links_cost_df.loc[ pathset_links_cost_df[Passenger.PF_COL_LINK_MODE]== PathSet.STATE_MODE_ACCESS  , PathSet.WEIGHTS_COLUMN_DEMAND_MODE] = pathset_links_cost_df[Passenger.TRIP_LIST_COLUMN_ACCESS_MODE ]
        pathset_links_cost_df.loc[ pathset_links_cost_df[Passenger.PF_COL_LINK_MODE]== PathSet.STATE_MODE_EGRESS  , PathSet.WEIGHTS_COLUMN_DEMAND_MODE] = pathset_links_cost_df[Passenger.TRIP_LIST_COLUMN_EGRESS_MODE ]
        pathset_links_cost_df.loc[ pathset_links_cost_df[Passenger.PF_COL_LINK_MODE]== PathSet.STATE_MODE_TRIP    , PathSet.WEIGHTS_COLUMN_DEMAND_MODE] = pathset_links_cost_df[Passenger.TRIP_LIST_COLUMN_TRANSIT_MODE]
        pathset_links_cost_df.loc[ pathset_links_cost_df[Passenger.PF_COL_LINK_MODE]== PathSet.STATE_MODE_TRANSFER, PathSet.WEIGHTS_COLUMN_DEMAND_MODE] = "transfer"
        # Verify that it's set for every link
        missing_demand_mode = pd.isnull(pathset_links_cost_df[PathSet.WEIGHTS_COLUMN_DEMAND_MODE]).sum()
        assert(missing_demand_mode == 0)

        # drop the individual mode columns, we have what we need
        pathset_links_cost_df.drop([Passenger.TRIP_LIST_COLUMN_ACCESS_MODE,
                                    Passenger.TRIP_LIST_COLUMN_EGRESS_MODE,
                                    Passenger.TRIP_LIST_COLUMN_TRANSIT_MODE], axis=1, inplace=True)

        # if bump_iter doesn't exist or if it needs to be reset
        if reset_bump_iter or Assignment.SIM_COL_PAX_BUMP_ITER not in pathset_links_cost_df:
            pathset_links_cost_df[Assignment.SIM_COL_PAX_BUMP_ITER] = -1

        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("calculate_cost: pathset_links_cost_df trace\n%s" % str(pathset_links_cost_df.loc[pathset_links_cost_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))

        # Inner join with the weights - now each weight has a row
        cost_df = pd.merge(left    =pathset_links_cost_df,
                               right   =PathSet.WEIGHTS_DF,
                               left_on =[Passenger.TRIP_LIST_COLUMN_USER_CLASS,
                                         Passenger.TRIP_LIST_COLUMN_PURPOSE,
                                         Passenger.PF_COL_LINK_MODE,
                                         PathSet.WEIGHTS_COLUMN_DEMAND_MODE,
                                         Passenger.TRIP_LIST_COLUMN_MODE],
                               right_on=[Passenger.TRIP_LIST_COLUMN_USER_CLASS,
                                         Passenger.TRIP_LIST_COLUMN_PURPOSE,
                                         PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE,
                                         PathSet.WEIGHTS_COLUMN_DEMAND_MODE,
                                         PathSet.WEIGHTS_COLUMN_SUPPLY_MODE],
                               how     ="inner")

        # update the fare weight placeholder (ivt pathweight - utils per min)) based on value of time (currency per hour)
        # since generalized cost is in utils, (ivt utils/min)x(60 min/1 hour)x(hour/vot currency) is the weight (utils/currency)
        cost_df.loc[ cost_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]==Assignment.SIM_COL_PAX_FARE, "weight_value" ] *= (60.0/cost_df[Passenger.TRIP_LIST_COLUMN_VOT])

        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("calculate_cost: cost_df\n%s" % str(cost_df.loc[cost_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True].sort_values([
                                  Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,
                                  Passenger.PF_COL_PATH_NUM,Passenger.PF_COL_LINK_NUM]).head(20)))

        # NOW we split it into 3 lists -- access/egress, transit, and transfer
        # This is because they will each be joined to tables specific to those kinds of mode categories, and so we don't want all the transit nulls on the other tables, etc.
        cost_columns = list(cost_df.columns.values)
        cost_df["var_value"] = np.nan  # This means unset
        cost_accegr_df       = cost_df.loc[(cost_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_ACCESS  )|(cost_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_EGRESS)]
        cost_trip_df         = cost_df.loc[(cost_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_TRIP    )]
        cost_transfer_df     = cost_df.loc[(cost_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_TRANSFER)]
        del cost_df

        ##################### First, handle Access/Egress link costs

        for accegr_type in ["walk","bike","drive"]:

            # make copies; we don't want to mess with originals
            if accegr_type == "walk":
                link_df   = tazs.walk_df.copy()
                mode_list = TAZ.WALK_MODE_NUMS
            elif accegr_type == "bike":
                mode_list = TAZ.BIKE_MODE_NUMS
                # not supported yet
                continue
            else:
                link_df   = tazs.drive_df.copy()
                mode_list = TAZ.DRIVE_MODE_NUMS

            FastTripsLogger.debug("Access/egress link_df %s\n%s" % (accegr_type, link_df.head().to_string()))
            if len(link_df) == 0:
                continue

            # format these with A & B instead of TAZ and Stop
            link_df.reset_index(inplace=True)
            link_df["A_id_num"] = -1
            link_df["B_id_num"] = -1
            link_df.loc[link_df[TAZ.WALK_ACCESS_COLUMN_SUPPLY_MODE_NUM].isin(TAZ.ACCESS_MODE_NUMS), "A_id_num"] = link_df[TAZ.WALK_ACCESS_COLUMN_TAZ_NUM ]
            link_df.loc[link_df[TAZ.WALK_ACCESS_COLUMN_SUPPLY_MODE_NUM].isin(TAZ.ACCESS_MODE_NUMS), "B_id_num"] = link_df[TAZ.WALK_ACCESS_COLUMN_STOP_NUM]
            link_df.loc[link_df[TAZ.WALK_ACCESS_COLUMN_SUPPLY_MODE_NUM].isin(TAZ.EGRESS_MODE_NUMS), "A_id_num"] = link_df[TAZ.WALK_ACCESS_COLUMN_STOP_NUM]
            link_df.loc[link_df[TAZ.WALK_ACCESS_COLUMN_SUPPLY_MODE_NUM].isin(TAZ.EGRESS_MODE_NUMS), "B_id_num"] = link_df[TAZ.WALK_ACCESS_COLUMN_TAZ_NUM ]
            link_df.drop([TAZ.WALK_ACCESS_COLUMN_TAZ_NUM, TAZ.WALK_ACCESS_COLUMN_STOP_NUM], axis=1, inplace=True)
            assert(len(link_df.loc[link_df["A_id_num"] < 0]) == 0)

            FastTripsLogger.debug("%s link_df =\n%s" % (accegr_type, link_df.head().to_string()))

            # Merge access/egress with walk|bike|drive access/egress information
            cost_accegr_df = pd.merge(left     = cost_accegr_df,
                                          right    = link_df,
                                          on       = ["A_id_num",
                                                      PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM,
                                                      "B_id_num"],
                                          how      = "left")
            # rename new columns so it's clear it's for walk|bike|drive
            for colname in list(link_df.select_dtypes(include=['float64','int64']).columns.values):
                # don't worry about join columns
                if colname in ["A_id_num", PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM, "B_id_num"]: continue

                # rename the rest
                new_colname = "%s %s" % (colname, accegr_type)
                cost_accegr_df.rename(columns={colname:new_colname}, inplace=True)

                # use it, if relevant
                cost_accegr_df.loc[ (cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == colname)&
                                    (cost_accegr_df[PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM].isin(mode_list)), "var_value"] = cost_accegr_df[new_colname]

        # Access/egress needs passenger trip departure, arrival and time_target
        cost_accegr_df = pd.merge(left =cost_accegr_df,
                                      right=trip_list_df[[
                                                Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                                Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                                Passenger.TRIP_LIST_COLUMN_DEPARTURE_TIME,
                                                Passenger.TRIP_LIST_COLUMN_ARRIVAL_TIME,
                                                Passenger.TRIP_LIST_COLUMN_TIME_TARGET,
                                                ]],
                                      how  ="left",
                                      on   =[Passenger.PERSONS_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID])

        # drop links that are irrelevant based on departure time for access links, or arrival time for egress links
        cost_accegr_df["check_time"] = cost_accegr_df[Assignment.SIM_COL_PAX_A_TIME]  # departure time for access
        cost_accegr_df.loc[ cost_accegr_df[TAZ.MODE_COLUMN_MODE_NUM].isin(TAZ.EGRESS_MODE_NUMS), "check_time" ] = cost_accegr_df[Assignment.SIM_COL_PAX_B_TIME] # arrival time for egress
        cost_accegr_df["check_time"] = (cost_accegr_df["check_time"] - Assignment.NETWORK_BUILD_DATE_START_TIME)/np.timedelta64(1,'m')

        # it's only drive links we need to check
        cost_accegr_df["to_drop"]    = False
        if "%s %s" % (TAZ.DRIVE_ACCESS_COLUMN_START_TIME_MIN, "drive") in cost_accegr_df.columns.values:
            cost_accegr_df.loc[ cost_accegr_df[TAZ.MODE_COLUMN_MODE_NUM].isin(TAZ.DRIVE_MODE_NUMS)&
                               ((cost_accegr_df["check_time"] <  cost_accegr_df["%s %s" % (TAZ.DRIVE_ACCESS_COLUMN_START_TIME_MIN, "drive")])|
                                (cost_accegr_df["check_time"] >= cost_accegr_df["%s %s" % (TAZ.DRIVE_ACCESS_COLUMN_END_TIME_MIN,   "drive")])), "to_drop"] = True

        # if len(Assignment.TRACE_IDS) > 0:
        #     FastTripsLogger.debug("cost_accegr_df=\n%s\ndtypes=\n%s" % (cost_accegr_df.loc[cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]].to_string(), str(cost_accegr_df.dtypes)))

        FastTripsLogger.debug("Dropping %d rows from cost_accegr_df" % cost_accegr_df["to_drop"].sum())
        cost_accegr_df = cost_accegr_df.loc[ cost_accegr_df["to_drop"]==False ]
        cost_accegr_df.drop(["check_time","to_drop"], axis=1, inplace=True)

        # penalty for arriving before preferred arrival time.
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_ARRIVE_EARLY_MIN    )&
                           (cost_accegr_df[Passenger.PF_COL_LINK_MODE]             == PathSet.STATE_MODE_ACCESS), "var_value"] = 0.0
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_ARRIVE_EARLY_MIN) &
                           (cost_accegr_df[Passenger.PF_COL_LINK_MODE]             == PathSet.STATE_MODE_EGRESS) &
                           (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == 'departure'), "var_value"] = 0.0
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_ARRIVE_EARLY_MIN    )& \
                           (cost_accegr_df[Passenger.PF_COL_LINK_MODE]             == PathSet.STATE_MODE_EGRESS)& \
                           (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == 'arrival'), "var_value"] = (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_ARRIVAL_TIME] - cost_accegr_df[Passenger.PF_COL_PAX_B_TIME])/np.timedelta64(1,'m')
        # arrive early is not negative - that would be arriving late
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_ARRIVE_EARLY_MIN)&(cost_accegr_df["var_value"] < 0), "var_value"] = 0.0

        # penalty for departing after preferred departure time.
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_DEPART_LATE_MIN) &
                           (cost_accegr_df[Passenger.PF_COL_LINK_MODE]             == PathSet.STATE_MODE_EGRESS), "var_value"] = 0.0
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_DEPART_LATE_MIN    )&
                           (cost_accegr_df[Passenger.PF_COL_LINK_MODE]             == PathSet.STATE_MODE_ACCESS)&
                           (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == 'arrival'), "var_value"] = 0.0
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_DEPART_LATE_MIN) &
                           (cost_accegr_df[Passenger.PF_COL_LINK_MODE]             == PathSet.STATE_MODE_ACCESS) &
                           (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == 'departure'), "var_value"] = (cost_accegr_df[Passenger.PF_COL_PAX_A_TIME] - cost_accegr_df[Passenger.TRIP_LIST_COLUMN_DEPARTURE_TIME])/ np.timedelta64(1, 'm')
        # depart late is not negative - that would be departing early
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_DEPART_LATE_MIN)&(cost_accegr_df["var_value"] < 0), "var_value"] = 0.0

        # constant growth = exponential growth with 0 percent growth rate

        # depart before preferred or arrive after preferred means the passenger just missed something important
        # Arrive late only impacts the egress link, so set the var_value equal to zero for the access link
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN    ) & \
                           (cost_accegr_df[Passenger.PF_COL_LINK_MODE]             == PathSet.STATE_MODE_ACCESS), "var_value"] = 0.0

        # Arrive late only impacts those that have a preferred arrival time. If preferred departure time,
        # set arrive late equal to zero. --This could have been done with previous line, but it would
        # look ugly mixing and matching 'and' and 'or'.
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN) & \
                           (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == Passenger.TIME_TARGET_DEPARTURE), "var_value"] = 0.0

        # Calculate how late the person arrives after preferred time.
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN    )& \
                           (cost_accegr_df[Passenger.PF_COL_LINK_MODE]             == PathSet.STATE_MODE_EGRESS)& \
                           (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == Passenger.TIME_TARGET_ARRIVAL), "var_value"] = \
                           (cost_accegr_df[Passenger.PF_COL_PAX_B_TIME] - cost_accegr_df[Passenger.TRIP_LIST_COLUMN_ARRIVAL_TIME])/np.timedelta64(1,'m')

        # If arrived before preferred time, set the arrive late field to zero. You don't get a
        # discount for arriving early.
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]    == PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN) & \
                           (cost_accegr_df['var_value'] < 0), "var_value"] = 0

        # preferred delay_min - departure means want to depart after that time
        # Depart early only impacts the access link, so set the var_value equal to zero for the egress link
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_DEPART_EARLY_MIN    )& \
                           (cost_accegr_df[Passenger.PF_COL_LINK_MODE]             == PathSet.STATE_MODE_EGRESS), "var_value"] = 0.0

        # Depart early only impacts those that have a preferred departure time. If preferred arrive time,
        # set depart early equal to zero. --This could have been done with previous line, but it would
        # look ugly mixing and matching 'and' and 'or'.
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_DEPART_EARLY_MIN) & \
                           (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == Passenger.TIME_TARGET_ARRIVAL), "var_value"] = 0.0

        # Calculate how early the person departs before the preferred time.
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]     == PathSet.WEIGHT_NAME_DEPART_EARLY_MIN) & \
                           (cost_accegr_df[Passenger.PF_COL_LINK_MODE]             == PathSet.STATE_MODE_ACCESS) & \
                           (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == Passenger.TIME_TARGET_DEPARTURE), "var_value"] = \
                           (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_DEPARTURE_TIME] - cost_accegr_df[Passenger.PF_COL_PAX_A_TIME])/ np.timedelta64(1, 'm')

        # If departing after preferred time, set the depart early field to zero. You don't get a
        # discount for taking your time.
        cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_DEPART_EARLY_MIN) & \
                           (cost_accegr_df['var_value'] < 0), "var_value"] = 0

        assert 0 == cost_accegr_df[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME].isin([PathSet.WEIGHT_NAME_DEPART_EARLY_MIN, PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN])) & \
                                   (cost_accegr_df['var_value'].isnull())].shape[0]
        assert 0 == cost_accegr_df[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME].isin([PathSet.WEIGHT_NAME_DEPART_EARLY_MIN, PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN])) & \
                                   (cost_accegr_df['var_value']<0)].shape[0]

        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("cost_accegr_df trace\n%s\ndtypes=\n%s" % (cost_accegr_df.loc[cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True].to_string(), str(cost_accegr_df.dtypes)))

        missing_accegr_costs = cost_accegr_df.loc[ pd.isnull(cost_accegr_df["var_value"]) ]
        error_accegr_msg = "Missing %d out of %d access/egress var_value values" % (len(missing_accegr_costs), len(cost_accegr_df))
        FastTripsLogger.debug(error_accegr_msg)

        if len(missing_accegr_costs) > 0:
            error_accegr_msg += "\n%s" % missing_accegr_costs.head(10).to_string()
            FastTripsLogger.fatal(error_accegr_msg)

        ##################### Next, handle Transit Trip link costs


        # set the fare var_values for split_first only
        cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "fare")&(cost_trip_df["split_first"]==True), "var_value"]  = cost_trip_df[Assignment.SIM_COL_PAX_FARE]
        cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "fare")&(cost_trip_df["split_first"]==False), "var_value"] = 0

        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("cost_trip_df trace\n%s\ndtypes=\n%s" % (cost_trip_df.loc[cost_trip_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True].to_string(), str(cost_trip_df.dtypes)))

        # if there's a board time, in_vehicle_time = new_B_time - board_time
        #               otherwise, in_vehicle_time = B time - A time (for when we split)
        cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "in_vehicle_time_min")&pd.notnull(cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME]), "var_value"] = \
             (cost_trip_df[Assignment.SIM_COL_PAX_B_TIME] - cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME])/np.timedelta64(1,'m')
        cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "in_vehicle_time_min")& pd.isnull(cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME]), "var_value"] = \
             (cost_trip_df[Assignment.SIM_COL_PAX_B_TIME] - cost_trip_df[Assignment.SIM_COL_PAX_A_TIME])/np.timedelta64(1,'m')

        # if in vehicle time is less than 0 then off by 1 day error
        cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "in_vehicle_time_min")&(cost_trip_df["var_value"]<0), "var_value"] = cost_trip_df["var_value"] + (24*60)

        # if there's a board time, wait time = board_time - A time
        #               otherwise, wait time = 0 (for when we split transit links)
        cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "wait_time_min")&pd.notnull(cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME]), "var_value"] = \
             (cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME] - cost_trip_df[Assignment.SIM_COL_PAX_A_TIME])/np.timedelta64(1,'m')
        cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "wait_time_min")& pd.isnull(cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME]), "var_value"] = 0

        # which overcap column to use?
        overcap_col = Trip.SIM_COL_VEH_OVERCAP
        if Assignment.MSA_RESULTS and Trip.SIM_COL_VEH_MSA_OVERCAP in list(cost_trip_df.columns.values): overcap_col = Trip.SIM_COL_VEH_MSA_OVERCAP

        # at cap is a binary, 1 if overcap >= 0 and they're not one of the lucky few that boarded
        cost_trip_df["at_capacity"] = 0.0
        if Assignment.SIM_COL_PAX_BOARD_STATE in list(cost_trip_df.columns.values):
            cost_trip_df.loc[ (cost_trip_df[overcap_col] >= 0)&
                              (cost_trip_df[Assignment.SIM_COL_PAX_BOARD_STATE] != "board_easy")&
                              (cost_trip_df[Assignment.SIM_COL_PAX_BOARD_STATE] != "boarded"), "at_capacity" ] = 1.0
        else:
            cost_trip_df.loc[ (cost_trip_df[overcap_col] >= 0)                               , "at_capacity" ] = 1.0

        cost_trip_df.loc[cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "at_capacity"    , "var_value"] = cost_trip_df["at_capacity"]
        cost_trip_df.loc[cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "overcap"        , "var_value"] = cost_trip_df[overcap_col]
        # overcap shouldn't be negative
        cost_trip_df.loc[ (cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "overcap")&(cost_trip_df["var_value"]<0), "var_value"] = 0.0

        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("cost_trip_df trace\n%s\ndtypes=\n%s" % (cost_trip_df.loc[cost_trip_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True].to_string(), str(cost_trip_df.dtypes)))

        missing_trip_costs = cost_trip_df.loc[ pd.isnull(cost_trip_df["var_value"]) ]
        error_trip_msg = "Missing %d out of %d transit trip var_value values" % (len(missing_trip_costs), len(cost_trip_df))
        FastTripsLogger.debug(error_trip_msg)

        if len(missing_trip_costs) > 0:
            error_trip_msg += "\n%s" % missing_trip_costs.head(10).to_string()
            FastTripsLogger.fatal(error_trip_msg)

        ##################### Finally, handle Transfer link costs
        cost_transfer_df = transfers.add_transfer_attributes(cost_transfer_df, pathset_links_df)
        cost_transfer_df.loc[cost_transfer_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "walk_time_min", "var_value"] = cost_transfer_df[Passenger.PF_COL_LINK_TIME]/np.timedelta64(1,'m')

        # any numeric column can be used
        for colname in list(cost_transfer_df.select_dtypes(include=['float64','int64']).columns.values):
            FastTripsLogger.debug("Using numeric column %s" % colname)
            cost_transfer_df.loc[cost_transfer_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == colname, "var_value"] = cost_transfer_df[colname]

        # make zero walk transfers have default var_values 0
        cost_transfer_df.loc[ (cost_transfer_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] != "transfer_penalty")&
                              (cost_transfer_df["A_id_num"]==cost_transfer_df["B_id_num"]), "var_value"] = 0.0
        # zero walk transfers have a transfer penalty although they're not otherwise configured
        cost_transfer_df.loc[ (cost_transfer_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "transfer_penalty")&
                              (pd.isnull(cost_transfer_df["var_value"])), "var_value"] = 1.0

        # FastTripsLogger.debug("cost_transfer_df=\n%s\ndtypes=\n%s" % (cost_transfer_df.head().to_string(), str(cost_transfer_df.dtypes)))

        missing_transfer_costs = cost_transfer_df.loc[ pd.isnull(cost_transfer_df["var_value"]) ]
        error_transfer_msg = "Missing %d out of %d transfer var_value values" % (len(missing_transfer_costs), len(cost_transfer_df))
        FastTripsLogger.debug(error_transfer_msg)

        if len(missing_transfer_costs) > 0:
            error_transfer_msg += "\n%s" % missing_transfer_costs.head(10).to_string()
            FastTripsLogger.fatal(error_transfer_msg)

        # abort here if we're missing anything
        if len(missing_accegr_costs) + len(missing_trip_costs) + len(missing_transfer_costs) > 0:
            abort_error_msg = "\nMissing %d accegr_costs\nMissing %d trip_costs\nMissing %d transfer_costs" % (len(missing_accegr_costs), len(missing_trip_costs), len(missing_transfer_costs))
            FastTripsLogger.debug(abort_error_msg)
            raise NotImplementedError("Missing var_values; See log")

        ##################### Put them back together into a single dataframe
        cost_columns = [Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                        Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                        Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,
                        Passenger.TRIP_LIST_COLUMN_TRACE,
                        Passenger.TRIP_LIST_COLUMN_USER_CLASS,
                        Passenger.TRIP_LIST_COLUMN_PURPOSE,
                        Passenger.TRIP_LIST_COLUMN_VOT,
                        Passenger.PF_COL_PATH_NUM,
                        Passenger.PF_COL_LINK_NUM,
                        PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE,
                        PathSet.WEIGHTS_COLUMN_DEMAND_MODE,
                        PathSet.WEIGHTS_COLUMN_SUPPLY_MODE,
                        PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM,
                        PathSet.WEIGHTS_COLUMN_WEIGHT_NAME,
                        PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE,
                        PathSet.WEIGHTS_GROWTH_TYPE,
                        PathSet.WEIGHTS_GROWTH_LOG_BASE,
                        PathSet.WEIGHTS_GROWTH_LOGISTIC_MAX,
                        PathSet.WEIGHTS_GROWTH_LOGISTIC_MID,
                        "var_value",
                        Assignment.SIM_COL_MISSED_XFER,
                        Assignment.SIM_COL_PAX_BUMP_ITER,
                        Assignment.SIM_COL_PAX_FARE]
        cost_accegr_df   = cost_accegr_df.loc[:, cost_accegr_df.columns.isin(cost_columns)]
        cost_trip_df     = cost_trip_df.loc[:, cost_trip_df.columns.isin(cost_columns)]
        cost_transfer_df = cost_transfer_df.loc[:, cost_transfer_df.columns.isin(cost_columns)]
        cost_df          = pd.concat([cost_accegr_df, cost_trip_df, cost_transfer_df], axis=0, ignore_index=True)

        # FastTripsLogger.debug("calculate_cost: cost_df=\n%s\ndtypes=\n%s" % (cost_df.to_string(), str(cost_df.dtypes)))

        # calculate link cost a function of the variable, weight and weight type
        Util.calculate_pathweight_costs(cost_df, Assignment.SIM_COL_PAX_COST)

        cost_df.sort_values([Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                             Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                             Passenger.PF_COL_PATH_NUM,
                             Passenger.PF_COL_LINK_NUM], inplace=True)
        FastTripsLogger.debug("calculate_cost: cost_df\n%s" % str(cost_df.loc[cost_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))

        # verify all costs are non-negative
        if cost_df[Assignment.SIM_COL_PAX_COST].min() < 0:
            msg = "calculate_cost: Negative costs found:\n%s" % cost_df.loc[ cost_df[Assignment.SIM_COL_PAX_COST]<0 ].to_string()
            FastTripsLogger.fatal(msg)
            raise UnexpectedError(msg)

        ###################### sum linkcost to links
        cost_link_df = cost_df[[Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,
                                Passenger.TRIP_LIST_COLUMN_TRACE,
                                Passenger.PF_COL_PATH_NUM,
                                Passenger.PF_COL_LINK_NUM,
                                Assignment.SIM_COL_PAX_COST]].groupby(
                                   [Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # sort by this first
                                    Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                    Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                    Passenger.TRIP_LIST_COLUMN_TRACE,
                                    Passenger.PF_COL_PATH_NUM,
                                    Passenger.PF_COL_LINK_NUM]).aggregate('sum').reset_index()
        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("calculate_cost: cost_link_df trace\n%s" % str(cost_link_df.loc[cost_link_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))
        # join to pathset_links_df
        pathset_links_df = pd.merge(left =pathset_links_df,
                                        right=cost_link_df,
                                        how  ="left",
                                        on   =[Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                               Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                               Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,
                                               Passenger.TRIP_LIST_COLUMN_TRACE,
                                               Passenger.PF_COL_PATH_NUM,
                                               Passenger.PF_COL_LINK_NUM])
        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("calculate_cost: pathset_links_df trace\n%s" % str(pathset_links_df.loc[pathset_links_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))

        ###################### overlap calcs
        full_overlap_df = None
        if PathSet.OVERLAP_VARIABLE != PathSet.OVERLAP_NONE:
            full_overlap_df = PathSet.calculate_overlap(pathset_links_to_use)

        ###################### sum linkcost to paths
        cost_link_df.drop([Passenger.PF_COL_LINK_NUM], axis=1, inplace=True)
        cost_path_df = cost_link_df.groupby([Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # order by this
                                             Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                             Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                             Passenger.TRIP_LIST_COLUMN_TRACE,
                                             Passenger.PF_COL_PATH_NUM]).aggregate('sum').reset_index()
        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("calculate_cost: cost_path_df trace\n%s" % str(cost_path_df.loc[cost_path_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))
        # join to pathset_paths_df
        pathset_paths_df = pd.merge(left =pathset_paths_df,
                                        right=cost_path_df,
                                        how  ="left",
                                        on   =[Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                               Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                               Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,
                                               Passenger.TRIP_LIST_COLUMN_TRACE,
                                               Passenger.PF_COL_PATH_NUM])

        if PathSet.OVERLAP_VARIABLE == PathSet.OVERLAP_NONE:
            pathset_paths_df[Assignment.SIM_COL_PAX_LNPS] = 0
        else:
            pathset_paths_df = pd.merge(left =pathset_paths_df,
                                            right=full_overlap_df,
                                            how  ="left",
                                            on   =[Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                                   Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                                   Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,
                                                   Passenger.TRIP_LIST_COLUMN_TRACE,
                                                   Passenger.PF_COL_PATH_NUM])

        if PathSet.LEARN_ROUTES:
            #'learn_discount': Exponential decay function
            pathset_paths_df['learn_discount'] = np.exp(-PathSet.LEARN_ROUTES_RATE * pathset_paths_df[PathSet.SUCCESS_FLAG_COLUMN])
            pathset_paths_df['orig_sim_cost'] = pathset_paths_df[Assignment.SIM_COL_PAX_COST]
            pathset_paths_df[Assignment.SIM_COL_PAX_COST] = pathset_paths_df[Assignment.SIM_COL_PAX_COST] * pathset_paths_df['learn_discount']

        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("calculate_cost: pathset_paths_df trace\n%s" % str(pathset_paths_df.loc[pathset_paths_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))

        ###################### logsum and probabilities
        pathset_paths_df["logsum_component"] = np.exp((-1.0*pathset_paths_df[Assignment.SIM_COL_PAX_COST] + pathset_paths_df[Assignment.SIM_COL_PAX_LNPS])/STOCH_DISPERSION)

        # sum across all paths
        pathset_logsum_df = pathset_paths_df[[Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                              Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                              Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,"logsum_component"]].groupby(
                                [Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # sort by this
                                 Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                 Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID]).aggregate('sum').reset_index()
        pathset_logsum_df.rename(columns={"logsum_component":"logsum"}, inplace=True)
        pathset_paths_df = pd.merge(left=pathset_paths_df,
                                        right=pathset_logsum_df,
                                        how="left")
        pathset_paths_df[Assignment.SIM_COL_PAX_PROBABILITY] = pathset_paths_df["logsum_component"]/pathset_paths_df["logsum"]

        if len(Assignment.TRACE_IDS) > 0:
            FastTripsLogger.debug("calculate_cost: pathset_paths_df trace\n%s" % str(pathset_paths_df.loc[pathset_paths_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))

        # Note: the path finding costs won't match the costs here because missed transfers are already calculated here
        # It would be good to have some sanity checking that theyre aligned otherwise though to make sure we're
        # calculating costs consistently
        return (pathset_paths_df, pathset_links_df)

[docs]    @staticmethod
    def calculate_overlap(pathset_links_to_use):
        """
        Given a set of pathset links, returns a the results of overlap calculations.

        This return dataframe will have colums person_id, person_trip_id, pathnum, and ln_PS
        """
        from .Assignment import Assignment

        FastTripsLogger.debug("calculate_overlap() pathset_links_to_use (%d) head=\n%s" % (len(pathset_links_to_use), str(pathset_links_to_use.head(30))))
        # CHUNKING because we run into memory problems
        # TODO: figure out more sophisticated chunk size
        chunk_list = pathset_links_to_use[[Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM]].drop_duplicates().reset_index(drop=True)
        num_chunks = (len(chunk_list)//PathSet.OVERLAP_CHUNK_SIZE) + 1
        chunk_list["chunk_num"] = np.floor_divide(chunk_list.index, PathSet.OVERLAP_CHUNK_SIZE)
        FastTripsLogger.debug("calculate_overlap() chunk_list size=%d head=\n%s\ntail=\n%s" % (len(chunk_list), chunk_list.head().to_string(), chunk_list.tail().to_string()))
        pathset_links_to_use = pd.merge(left  =pathset_links_to_use,
                                            right =chunk_list,
                                            how   ='left')
        FastTripsLogger.debug("calculate_overlap() mem_use=%s pathset_links_to_use has length %d, head=\n%s" % (Util.get_process_mem_use_str(),
                              len(pathset_links_to_use), pathset_links_to_use.head().to_string()))
        full_overlap_df = pd.DataFrame()

        for chunk_num in range(num_chunks):

            # get the person trips in the chunk
            overlap_df = pathset_links_to_use.loc[ pathset_links_to_use["chunk_num"] == chunk_num]

            FastTripsLogger.info("          Calculating overlap for chunk %4d/%4d (len %6d);  mem_use=%8s" % (chunk_num+1, num_chunks, len(overlap_df), Util.get_process_mem_use_str()))

            overlap_df = overlap_df[[Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                     Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                     Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,
                                     Passenger.TRIP_LIST_COLUMN_TRACE,
                                     Passenger.PF_COL_PATH_NUM,
                                     Passenger.PF_COL_LINK_NUM,
                                     "A_id_num","B_id_num",
                                     Route.ROUTES_COLUMN_MODE,
                                     "new_linktime",
                                     Assignment.SIM_COL_PAX_DISTANCE]].copy()
            # get ready to count, time, dist to path and add path sum version to overlap_df -- this is L
            FastTripsLogger.debug("calculate_overlap() chunk_num %d: mem_use=%s overlap_df has length %d, head=\n%s" %
                                  (chunk_num+1, Util.get_process_mem_use_str(), len(overlap_df), overlap_df.head().to_string()))

            # sum to path
            overlap_df["count"] = 1
            overlap_path_df = overlap_df.groupby([Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # sort by this
                                                  Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                                  Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                                  Passenger.TRIP_LIST_COLUMN_TRACE,
                                                  Passenger.PF_COL_PATH_NUM]).aggregate({'count':'sum','new_linktime':'sum',Assignment.SIM_COL_PAX_DISTANCE:'sum'}).reset_index(drop=False)
            overlap_path_df.rename(columns={"count":"path_count", "new_linktime":"path_time", Assignment.SIM_COL_PAX_DISTANCE:"path_distance"}, inplace=True)
            overlap_df.drop(["count"], axis=1, inplace=True)

            FastTripsLogger.debug("calculate_overlap() chunk_num %d: Added path summed count,time,distance. mem_use=%s overlap_path_df has length %d, head=\n%s" %
                                  (chunk_num+1, Util.get_process_mem_use_str(), len(overlap_path_df), overlap_path_df.head().to_string()))

            # add the path summed variables to the link dataframe
            overlap_df = pd.merge(overlap_df, overlap_path_df,
                                      how="left",
                                      on=[Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                          Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                          Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,
                                          Passenger.TRIP_LIST_COLUMN_TRACE,
                                          Passenger.PF_COL_PATH_NUM])
            del overlap_path_df # we're done with this

            # outer join on trip_list_id_num means when they match, we'll get a cartesian product of the links
            overlap_df = pd.merge(overlap_df, overlap_df.copy(),
                                      on=[Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                          Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                          Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,
                                          Passenger.TRIP_LIST_COLUMN_TRACE], how="outer")

            # count matches -- matching A,B,mode
            overlap_df["match"] = 0
            overlap_df.loc[ (overlap_df["A_id_num_x"]==overlap_df["A_id_num_y"])&
                            (overlap_df["B_id_num_x"]==overlap_df["B_id_num_y"])&
                            (overlap_df["mode_x"    ]==overlap_df["mode_y"    ])  , "match"] = 1

            if PathSet.OVERLAP_VARIABLE == PathSet.OVERLAP_COUNT:
                overlap_df["link_prop_x"] = 1.0/overlap_df["path_count_x"]                                 # l_a/L_i
                overlap_df["pathlen_x_y"] = overlap_df["path_count_x"]/overlap_df["path_count_y"]          # L_i/L_j
            elif PathSet.OVERLAP_VARIABLE == PathSet.OVERLAP_TIME:
                overlap_df["link_prop_x"] = overlap_df["new_linktime_x"]/overlap_df["path_time_x"]         # l_a/L_i
                overlap_df["pathlen_x_y"] = overlap_df["path_time_x"]/overlap_df["path_time_y"]            # L_i/L_j
            elif PathSet.OVERLAP_VARIABLE == PathSet.OVERLAP_DISTANCE:
                overlap_df["link_prop_x"] = overlap_df["distance_x"]/overlap_df["path_distance_x"]         # l_a/L_i
                overlap_df["pathlen_x_y"] = overlap_df["path_distance_x"]/overlap_df["path_distance_y"]    # L_i/L_j

            overlap_df["pathlen_x_y_scale"] = overlap_df[["pathlen_x_y"]].pow(PathSet.OVERLAP_SCALE_PARAMETER)  # (L_i/L_j)^gamma
            # zero it out if it's not a match
            overlap_df.loc[overlap_df["match"]==0, "pathlen_x_y_scale"] = 0
            # now pathlen_x_y_scale = (L_i/L_j)^gamma x delta_aj

            if len(Assignment.TRACE_IDS) > 0:
                FastTripsLogger.debug("calculate_overlap(): overlap_df trace\n%s" % str(overlap_df.loc[overlap_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))

            # debug
            # overlap_df_temp = overlap_df.groupby([Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, "pathnum_x","linknum_x","link_prop_x","pathnum_y"]).aggregate({"match":"sum", "pathlen_x_y_scale":"sum"})
            # FastTripsLogger.debug("calculate_cost: overlap_df_temp\n%s" % str(overlap_df_temp.head(50)))

            # group by pathnum_x, linknum_x -- so this sums over paths P_j in equation (or pathnum_y here)
            overlap_df = overlap_df.groupby([Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,  # sort by this
                                             Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                             Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                             Passenger.TRIP_LIST_COLUMN_TRACE,
                                             "pathnum_x","linknum_x","link_prop_x"]).aggregate({"pathlen_x_y_scale":"sum"}).reset_index()
            # now pathlen_x_y_scale = SUM_j (L_i/L_j)^gamma x delta_aj
            overlap_df["PS"] = overlap_df["link_prop_x"]/overlap_df["pathlen_x_y_scale"]  # l_a/L_i * 1/(SUM_j (L_i/L_j)^gamma x delta_aj)
            if len(Assignment.TRACE_IDS) > 0:
                FastTripsLogger.debug("calculate_overlap() trace overlap_df\n%s" % str(overlap_df.loc[overlap_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))

            FastTripsLogger.debug("calculate_overlap() chunk_num %d: Cartesian product of links. mem_use=%s overlap_df has length %d, head=\n%s" %
                                  (chunk_num+1, Util.get_process_mem_use_str(), len(overlap_df), overlap_df.head().to_string()))

            # sum across link in path
            overlap_df = overlap_df.groupby([Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # preserve ordering
                                             Passenger.TRIP_LIST_COLUMN_PERSON_ID,
                                             Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID,
                                             Passenger.TRIP_LIST_COLUMN_TRACE,"pathnum_x"]).aggregate({"PS":"sum"}).reset_index(drop=False)

            # Check all pathsizes are in [0,1]
            min_PS = overlap_df["PS"].min()
            max_PS = overlap_df["PS"].max()
            FastTripsLogger.debug("PathSize min=%f max=%f" % (min_PS, max_PS))
            if min_PS < 0:
                FastTripsLogger.fatal("Min pathsize = %f < 0:\n%s" % (min_PS, overlap_df.loc[overlap_df["PS"]==min_PS].to_string()))
            if max_PS > 1.0001:
                FastTripsLogger.fatal("Max pathsize = %f > 1:\n%s" % (max_PS, overlap_df.loc[overlap_df["PS"]==max_PS].to_string()))

            overlap_df[Assignment.SIM_COL_PAX_LNPS] = np.log(overlap_df["PS"])
            if len(Assignment.TRACE_IDS) > 0:
                FastTripsLogger.debug("calculate_overlap() overlap_df trace\n%s" % str(overlap_df.loc[overlap_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]))

            # rename pathnum_x to pathnum and drop PS.  Now overlap_df has columns trip_list_id_num, pathnum, ln_PS
            overlap_df.rename(columns={"pathnum_x":Passenger.PF_COL_PATH_NUM}, inplace=True)
            overlap_df.drop(["PS"], axis=1, inplace=True) # we have ln_PS

            if len(full_overlap_df) == 0:
                full_overlap_df = overlap_df
            else:
                full_overlap_df = full_overlap_df.append(overlap_df)
            FastTripsLogger.debug("calculate_overlap() chunk_num %d: mem_use=%s full_overlap_df has length %d" % (chunk_num+1, Util.get_process_mem_use_str(), len(full_overlap_df)))

        FastTripsLogger.debug("calculate_overlap() complete: full_overlap_df head=\n%s" % full_overlap_df.head(30))

        return full_overlap_df