Source code for fasttrips.PathSet

from __future__ import division
from builtins import str
from builtins import range
from builtins import object

__copyright__ = "Copyright 2015-2016 Contributing Entities"
__license__   = """
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
"""
import datetime
import os
import sys

import numpy as np
import pandas as pd

from .Error     import NotImplementedError, UnexpectedError
from .Logger    import FastTripsLogger
from .Passenger import Passenger
from .Route     import Route
from .TAZ       import TAZ
from .Trip      import Trip
from .Util      import Util


#: Default user class: just one class called "all"
def generic_user_class(row_series):
    return "all"

[docs]class PathSet(object): """ Represents a path set for a passenger from an origin :py:class:`TAZ` to a destination :py:class:`TAZ` through a set of stops. """ #: Paths output file PATHS_OUTPUT_FILE = 'ft_output_passengerPaths.txt' #: Path times output file PATH_TIMES_OUTPUT_FILE = 'ft_output_passengerTimes.txt' #: Configured functions, indexed by name CONFIGURED_FUNCTIONS = { 'generic_user_class':generic_user_class } #: Path configuration: Name of the function that defines user class USER_CLASS_FUNCTION = None #: File with weights file. Space delimited table. WEIGHTS_FILE = 'pathweight_ft.txt' #: Path weights WEIGHTS_DF = None #: Read weights file as fixed-width format. If false, standard CSV format is read. WEIGHTS_FIXED_WIDTH = False #: Configuration: Minimum transfer penalty. Safeguard against having no transfer penalty #: which can result in terrible paths with excessive transfers. MIN_TRANSFER_PENALTY = None #: Configuration: Overlap scale parameter. OVERLAP_SCALE_PARAMETER = None #: Configuration: Overlap variable. Can be "None", "count", "distance", "time". OVERLAP_VARIABLE = None #: Overlap variable option: None. Don't use overlap pathsize correction. OVERLAP_NONE = "None" #: Overlap variable option: count. Use leg count overlap pathsize correction. OVERLAP_COUNT = "count" #: Overlap variable option: distance. Use leg distance overlap pathsize correction. OVERLAP_DISTANCE = "distance" #: Overlap variable option: time. Use leg time overlap pathsize correction. OVERLAP_TIME = "time" #: Valid values for OVERLAP_VARAIBLE OVERLAP_VARIABLE_OPTIONS = [OVERLAP_NONE, OVERLAP_COUNT, OVERLAP_DISTANCE, OVERLAP_TIME] #: Overlap chunk size. How many person's trips to process at a time in overlap calculations #: in python simulation OVERLAP_CHUNK_SIZE = None #: Overlap option: Split transit leg into component parts? e.g. split A-E #: into A-B-C-D-E for overlap calculations? OVERLAP_SPLIT_TRANSIT = None LEARN_ROUTES = False LEARN_ROUTES_RATE = 0.05 SUCCESS_FLAG_COLUMN = 'success_flag' BUMP_FLAG_COLUMN = 'bump_flag' #: Allow departures and arrivals before / after preferred time ARRIVE_LATE_ALLOWED_MIN = datetime.timedelta(minutes = 0) DEPART_EARLY_ALLOWED_MIN = datetime.timedelta(minutes = 0) CONSTANT_GROWTH_MODEL = 'constant' EXP_GROWTH_MODEL = 'exponential' LOGARITHMIC_GROWTH_MODEL = 'logarithmic' LOGISTIC_GROWTH_MODEL = 'logistic' PENALTY_GROWTH_MODELS = [ CONSTANT_GROWTH_MODEL, EXP_GROWTH_MODEL, LOGARITHMIC_GROWTH_MODEL, LOGISTIC_GROWTH_MODEL, ] #: Weights column: User Class WEIGHTS_COLUMN_USER_CLASS = "user_class" #: Weights column: Purpose WEIGHTS_COLUMN_PURPOSE = "purpose" #: Weights column: Demand Mode Type WEIGHTS_COLUMN_DEMAND_MODE_TYPE = "demand_mode_type" #: Weights column: Demand Mode Type WEIGHTS_COLUMN_DEMAND_MODE = "demand_mode" #: Weights column: Supply Mode WEIGHTS_COLUMN_SUPPLY_MODE = "supply_mode" #: Weights column: Weight Name WEIGHTS_COLUMN_WEIGHT_NAME = "weight_name" #: Weights column: Weight Value WEIGHTS_COLUMN_WEIGHT_VALUE = "weight_value" #: Weights column: Growth Type WEIGHTS_GROWTH_TYPE = "growth_type" #: Weights column: Log Base for logarithmic growth function WEIGHTS_GROWTH_LOG_BASE = "log_base" #: Weights column: Max value for logistic growth function WEIGHTS_GROWTH_LOGISTIC_MAX = "logistic_max" #: Weights column: Midpoint value for logistic growth function WEIGHTS_GROWTH_LOGISTIC_MID = "logistic_mid" WEIGHT_NAME_DEPART_EARLY_MIN = "depart_early_min" WEIGHT_NAME_ARRIVE_LATE_MIN = "arrive_late_min" WEIGHT_NAME_DEPART_LATE_MIN = 'depart_late_min' WEIGHT_NAME_ARRIVE_EARLY_MIN = 'arrive_early_min' WEIGHT_NAME_VALID_NAMES = [ WEIGHT_NAME_DEPART_EARLY_MIN, WEIGHT_NAME_DEPART_LATE_MIN, WEIGHT_NAME_ARRIVE_EARLY_MIN, WEIGHT_NAME_ARRIVE_LATE_MIN, ] # ========== Added by fasttrips ======================================================= #: Weights column: Supply Mode number WEIGHTS_COLUMN_SUPPLY_MODE_NUM = "supply_mode_num" #: File with weights for c++ OUTPUT_WEIGHTS_FILE = "ft_intermediate_weights.txt" DIR_OUTBOUND = 1 #: Trips outbound from home have preferred arrival times DIR_INBOUND = 2 #: Trips inbound to home have preferred departure times PATH_KEY_COST = "pf_cost" #: path cost according to pathfinder PATH_KEY_FARE = "pf_fare" #: path fare according to pathfinder PATH_KEY_PROBABILITY = "pf_probability" #: path probability according to pathfinder PATH_KEY_INIT_COST = "pf_initcost" #: initial cost (in pathfinding, before path was finalized) PATH_KEY_INIT_FARE = "pf_initfare" #: initial fare (in pathfinding, before path was finalized) PATH_KEY_STATES = "states" STATE_IDX_LABEL = 0 #: :py:class:`datetime.timedelta` instance STATE_IDX_DEPARR = 1 #: :py:class:`datetime.datetime` instance. Departure if outbound/backwards, arrival if inbound/forwards. STATE_IDX_DEPARRMODE = 2 #: mode id STATE_IDX_TRIP = 3 #: trip id STATE_IDX_SUCCPRED = 4 #: stop identifier or TAZ identifier STATE_IDX_SEQ = 5 #: sequence (for trip) STATE_IDX_SEQ_SUCCPRED = 6 #: sequence for successor/predecessor STATE_IDX_LINKTIME = 7 #: :py:class:`datetime.timedelta` instance STATE_IDX_LINKFARE = 8 #: fare cost, float STATE_IDX_LINKCOST = 9 #: link generalized cost, float for hyperpath/stochastic, STATE_IDX_LINKDIST = 10 #: link distance, float STATE_IDX_COST = 11 #: cost float, for hyperpath/stochastic assignment STATE_IDX_ARRDEP = 12 #: :py:class:`datetime.datetime` instance. Arrival if outbound/backwards, departure if inbound/forwards. # these are also the demand_mode_type values STATE_MODE_ACCESS = "access" STATE_MODE_EGRESS = "egress" STATE_MODE_TRANSFER = "transfer" # new STATE_MODE_TRIP = "transit" # onboard BUMP_EXPERIENCED_COST = 999999 HUGE_COST = 9999
[docs] def __init__(self, trip_list_dict): """ Constructor from dictionary mapping attribute to value. """ self.__dict__.update(trip_list_dict) #: Direction is one of :py:attr:`PathSet.DIR_OUTBOUND` or :py:attr:`PathSet.DIR_INBOUND` #: Preferred time is a datetime.time object if trip_list_dict[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == "arrival": self.direction = PathSet.DIR_OUTBOUND self.pref_time = trip_list_dict[Passenger.TRIP_LIST_COLUMN_ARRIVAL_TIME] self.pref_time_min = trip_list_dict[Passenger.TRIP_LIST_COLUMN_ARRIVAL_TIME_MIN] elif trip_list_dict[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == "departure": self.direction = PathSet.DIR_INBOUND self.pref_time = trip_list_dict[Passenger.TRIP_LIST_COLUMN_DEPARTURE_TIME] self.pref_time_min = trip_list_dict[Passenger.TRIP_LIST_COLUMN_DEPARTURE_TIME_MIN] else: raise Exception("Don't understand trip_list %s: %s" % (Passenger.TRIP_LIST_COLUMN_TIME_TARGET, str(trip_list_dict))) #: Dict of path-num -> { cost:, probability:, states: [List of (stop_id, stop_state)]} self.pathdict = {}
[docs] def goes_somewhere(self): """ Does this path go somewhere? Does the destination differ from the origin? """ return (self.__dict__[Passenger.TRIP_LIST_COLUMN_ORIGIN_TAZ_ID] != self.__dict__[Passenger.TRIP_LIST_COLUMN_DESTINATION_TAZ_ID])
[docs] def path_found(self): """ Was a a transit path found from the origin to the destination with the constraints? """ return len(self.pathdict) > 0
[docs] def num_paths(self): """ Number of paths in the PathSet """ return len(self.pathdict)
[docs] def reset(self): """ Delete my states, something went wrong and it won't work out. """ self.pathdict = []
[docs] @staticmethod def set_user_class(trip_list_df, new_colname): """ Adds a column called user_class by applying the configured user class function. """ trip_list_df[new_colname] = trip_list_df.apply(PathSet.CONFIGURED_FUNCTIONS[PathSet.USER_CLASS_FUNCTION], axis=1)
[docs] @staticmethod def verify_weight_config(modes_df, output_dir, routes, capacity_constraint, trip_list_df): """ Verify that we have complete weight configurations for the user classes and modes in the given DataFrame. Trips with invalid weight configurations will be dropped from the trip list and warned about. The parameter mode_df is a dataframe with the user_class, demand_mode_type and demand_mode combinations found in the demand file. If *capacity_constraint* is true, make sure there's an at_capacity weight on the transit supply mode links to enforce it. Returns updated trip_list_df. """ (verify, error_str) = PathSet.verify_weights(PathSet.WEIGHTS_DF) # Join - make sure that all demand combinations (user class, purpose, demand mode type and demand mode) are configured weight_check = pd.merge(left=modes_df, right=PathSet.WEIGHTS_DF, on=[PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE], how='left') FastTripsLogger.debug("demand_modes x weights: \n%s" % weight_check.to_string()) FastTripsLogger.debug("trip_list_df head=\n%s" % str(trip_list_df.head())) # If something is missing, warn and remove those trips null_supply_mode_weights = weight_check.loc[pd.isnull(weight_check[PathSet.WEIGHTS_COLUMN_SUPPLY_MODE])] if len(null_supply_mode_weights) > 0: # warn FastTripsLogger.warn("The following user_class, demand_mode_type, demand_mode combinations exist in the demand file but are missing from the weight configuration:") FastTripsLogger.warn("\n%s" % null_supply_mode_weights.to_string()) # remove those trips -- need to do it one demand mode type at a time null_supply_mode_weights = null_supply_mode_weights[[PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE]] null_supply_mode_weights["to_remove"] = 1 for demand_mode_type in [PathSet.STATE_MODE_ACCESS, PathSet.STATE_MODE_EGRESS, PathSet.STATE_MODE_TRIP]: remove_trips = null_supply_mode_weights.loc[null_supply_mode_weights[PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE]==demand_mode_type].copy() if len(remove_trips) == 0: continue remove_trips.rename(columns={PathSet.WEIGHTS_COLUMN_DEMAND_MODE:"%s_mode" % demand_mode_type}, inplace=True) remove_trips.drop([PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE], axis=1, inplace=True) FastTripsLogger.debug("Removing for \n%s" % remove_trips) trip_list_df = pd.merge(left = trip_list_df, right = remove_trips, how = "left") FastTripsLogger.debug("Removing\n%s" % trip_list_df.loc[pd.notnull(trip_list_df["to_remove"])]) # keep only those not flagged to_remove trip_list_df = trip_list_df.loc[pd.isnull(trip_list_df["to_remove"])] trip_list_df.drop(["to_remove"], axis=1, inplace=True) # demand_mode_type and demand_modes implicit to all travel : xfer walk, xfer wait, initial wait user_classes = modes_df[[PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE]].drop_duplicates().reset_index() implicit_df = pd.DataFrame({ PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE:[ 'transfer'], PathSet.WEIGHTS_COLUMN_DEMAND_MODE :[ 'transfer'], PathSet.WEIGHTS_COLUMN_SUPPLY_MODE :[ 'transfer'] }) user_classes['key'] = 1 implicit_df['key'] = 1 implicit_df = pd.merge(left=user_classes, right=implicit_df, on='key') implicit_df.drop(['index','key'], axis=1, inplace=True) # FastTripsLogger.debug("implicit_df: \n%s" % implicit_df) weight_check = pd.merge(left=implicit_df, right=PathSet.WEIGHTS_DF, on=[PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE, PathSet.WEIGHTS_COLUMN_SUPPLY_MODE], how='left') FastTripsLogger.debug("implicit demand_modes x weights: \n%s" % weight_check.to_string()) if pd.isnull(weight_check[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]).sum() > 0: error_str += "\nThe following user_class, purpose, demand_mode_type, demand_mode, supply_mode combinations exist in the demand file but are missing from the weight configuration:\n" error_str += weight_check.loc[pd.isnull(weight_check[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME])].to_string() error_str += "\n\n" # transfer penalty check tp_index = pd.DataFrame({ PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE:['transfer'], PathSet.WEIGHTS_COLUMN_DEMAND_MODE :['transfer'], PathSet.WEIGHTS_COLUMN_SUPPLY_MODE :['transfer'], PathSet.WEIGHTS_COLUMN_WEIGHT_NAME :['transfer_penalty']}) uc_purp_index = PathSet.WEIGHTS_DF[[PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE]].drop_duplicates() FastTripsLogger.debug("uc_purp_index: \n%s" % uc_purp_index) # these are all the transfer penalties we have transfer_penaltes = pd.merge(left=tp_index, right=PathSet.WEIGHTS_DF, how='left') FastTripsLogger.debug("transfer_penaltes: \n%s" % transfer_penaltes) transfer_penalty_check = pd.merge(left=uc_purp_index, right=transfer_penaltes, how='left') FastTripsLogger.debug("transfer_penalty_check: \n%s" % transfer_penalty_check) # missing transfer penalty if pd.isnull(transfer_penalty_check[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]).sum() > 0: error_str += "\nThe following user class x purpose are missing a transfer penalty:\n" error_str += transfer_penalty_check.loc[pd.isnull(transfer_penalty_check[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME])].to_string() error_str += "\n\n" bad_pen = transfer_penalty_check.loc[transfer_penalty_check[PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE] < PathSet.MIN_TRANSFER_PENALTY] if len(bad_pen) > 0: error_str += "\nThe following user class x purpose path weights have invalid (too small) transfer penalties. MIN=(%f)\n" % PathSet.MIN_TRANSFER_PENALTY error_str += bad_pen.to_string() error_str += "\nConfigure smaller min_transfer_penalty AT YOUR OWN RISK since this will make path generation slow/unreliable.\n\n" # If *capacity_constraint* is true, make sure there's an at_capacity weight on the transit supply mode links # to enforce it. if capacity_constraint: # see if it's here already -- we don't know how to handle that... at_capacity = PathSet.WEIGHTS_DF.loc[ PathSet.WEIGHTS_DF[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "at_capacity" ] if len(at_capacity) > 0: error_str += "\nFound at_capacity path weights explicitly set when about to set these for hard capacity constraints.\n" error_str += at_capacity.to_string() error_str += "\n\n" else: # set it for all user_class x transit x demand_mode x supply_mode transit_weights_df = PathSet.WEIGHTS_DF.loc[PathSet.WEIGHTS_DF[PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE] == PathSet.STATE_MODE_TRIP, [PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE, PathSet.WEIGHTS_COLUMN_SUPPLY_MODE]].copy() transit_weights_df.drop_duplicates(inplace=True) transit_weights_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME ] = "at_capacity" transit_weights_df[PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE] = PathSet.HUGE_COST transit_weights_df[PathSet.WEIGHTS_GROWTH_TYPE] = PathSet.CONSTANT_GROWTH_MODEL FastTripsLogger.debug("Adding capacity-constraint weights:\n%s" % transit_weights_df.to_string()) PathSet.WEIGHTS_DF = pd.concat([PathSet.WEIGHTS_DF, transit_weights_df], axis=0) PathSet.WEIGHTS_DF.sort_values(by=[PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE, PathSet.WEIGHTS_COLUMN_SUPPLY_MODE, PathSet.WEIGHTS_COLUMN_WEIGHT_NAME], inplace=True) if len(error_str) > 0: FastTripsLogger.fatal(error_str) sys.exit(2) # add mode numbers to weights DF for relevant rows PathSet.WEIGHTS_DF = routes.add_numeric_mode_id(PathSet.WEIGHTS_DF, id_colname=PathSet.WEIGHTS_COLUMN_SUPPLY_MODE, numeric_newcolname=PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM, warn=True) # don't fail if some supply modes are configured but not used, they may be for future runs FastTripsLogger.debug("PathSet weights: \n%s" % PathSet.WEIGHTS_DF) export_columns = [PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE, PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM, PathSet.WEIGHTS_COLUMN_WEIGHT_NAME, PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE, PathSet.WEIGHTS_GROWTH_TYPE, PathSet.WEIGHTS_GROWTH_LOG_BASE, PathSet.WEIGHTS_GROWTH_LOGISTIC_MAX, PathSet.WEIGHTS_GROWTH_LOGISTIC_MID] PathSet.WEIGHTS_DF.reindex(columns=export_columns).to_csv(os.path.join(output_dir,PathSet.OUTPUT_WEIGHTS_FILE), columns=export_columns, sep=" ", index=False) # add placeholder weights (ivt weight) for fares - one for each user_class, purpose, transit demand mode # these will be updated based on the person's value of time in calculate_cost() fare_weights = PathSet.WEIGHTS_DF.loc[ (PathSet.WEIGHTS_DF[PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE]==PathSet.STATE_MODE_TRIP) & (PathSet.WEIGHTS_DF[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME ]== "in_vehicle_time_min")] fare_weights = fare_weights[[PathSet.WEIGHTS_COLUMN_USER_CLASS, PathSet.WEIGHTS_COLUMN_PURPOSE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE, PathSet.WEIGHTS_COLUMN_SUPPLY_MODE, PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM, PathSet.WEIGHTS_COLUMN_WEIGHT_NAME, PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE]].copy().drop_duplicates() fare_weights[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME ] = "fare" # SIM_COL_PAX_FARE PathSet.WEIGHTS_DF = PathSet.WEIGHTS_DF.append(fare_weights) FastTripsLogger.debug("PathSet.WEIGHTS_DF with fare weights: \n%s" % PathSet.WEIGHTS_DF) return trip_list_df
[docs] @staticmethod def verify_weights(weights): # First, verify required columns are found error_str = "" weight_cols = list(weights.columns.values) FastTripsLogger.debug("verify_weight_config:\n%s" % weights.to_string()) if (PathSet.WEIGHTS_COLUMN_USER_CLASS not in weight_cols): error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_USER_CLASS) if (PathSet.WEIGHTS_COLUMN_PURPOSE not in weight_cols): error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_PURPOSE) if (PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE not in weight_cols): error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE) if (PathSet.WEIGHTS_COLUMN_DEMAND_MODE not in weight_cols): error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_DEMAND_MODE) if (PathSet.WEIGHTS_COLUMN_SUPPLY_MODE not in weight_cols): error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_SUPPLY_MODE) if (PathSet.WEIGHTS_COLUMN_WEIGHT_NAME not in weight_cols): error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_WEIGHT_NAME) if (PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE not in weight_cols): error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE) if (PathSet.WEIGHTS_GROWTH_TYPE not in weight_cols): error_str+='{} not in weight_cols\n'.format(PathSet.WEIGHTS_GROWTH_TYPE) constant_exp_slice = weights.loc[ weights[PathSet.WEIGHTS_GROWTH_TYPE].isin( [PathSet.CONSTANT_GROWTH_MODEL, PathSet.EXP_GROWTH_MODEL]), ] logarithmic_slice = weights.loc[ weights[PathSet.WEIGHTS_GROWTH_TYPE] == PathSet.LOGARITHMIC_GROWTH_MODEL, ] logistic_slice = weights.loc[ weights[PathSet.WEIGHTS_GROWTH_TYPE] == PathSet.LOGISTIC_GROWTH_MODEL, ] # Verify that no extraneous values are set for constant and exponential functions if not pd.isnull(constant_exp_slice.reindex([ PathSet.WEIGHTS_GROWTH_LOG_BASE, PathSet.WEIGHTS_GROWTH_LOGISTIC_MAX, PathSet.WEIGHTS_GROWTH_LOGISTIC_MID, ], axis='columns')).values.all(): error_str += 'Linear or Exponential qualifier includes unnecessary modifier(s)\n' if not pd.isnull(logarithmic_slice.reindex([ PathSet.WEIGHTS_GROWTH_LOGISTIC_MAX, PathSet.WEIGHTS_GROWTH_LOGISTIC_MID, ], axis='columns')).values.all(): error_str += 'Logarithmic qualifier includes unnecessary modifier(s)\n' if not pd.isnull(logistic_slice.reindex([ PathSet.WEIGHTS_GROWTH_LOG_BASE, ], axis='columns')).values.all(): error_str += 'Logistic qualifier includes log_base modifier\n' if not pd.notnull(logarithmic_slice.reindex([ PathSet.WEIGHTS_GROWTH_LOG_BASE, ], axis='columns')).values.all(): error_str += 'Logarithmic qualifier missing necessary log_base modifier\n' if not pd.notnull(logistic_slice.reindex([ PathSet.WEIGHTS_GROWTH_LOGISTIC_MAX, PathSet.WEIGHTS_GROWTH_LOGISTIC_MID, ], axis='columns')).values.all(): error_str += 'Logistic qualifier missing necessary modifiers\n' if error_str: error_str = '\n-------Errors: pathweight_ft.txt---------------\n' + error_str return (not error_str), error_str
def __str__(self): """ Readable string version of the path. Note: If inbound trip, then the states are in reverse order (egress to access) """ ret_str = "Dict vars:\n" for k,v in self.__dict__.items(): ret_str += "%30s => %-30s %s\n" % (str(k), str(v), str(type(v))) # ret_str += PathSet.states_to_str(self.states, self.direction) return ret_str
[docs] @staticmethod def write_paths(passengers_df, output_dir): """ Write the assigned paths to the given output file. :param passengers_df: Passenger paths assignment results :type passengers_df: :py:class:`pandas.DataFrame` instance :param output_dir: Output directory :type output_dir: string """ # get trip information -- board stops, board trips and alight stops passenger_trips = passengers_df.loc[passengers_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_TRIP].copy() ptrip_group = passenger_trips.groupby([Passenger.PERSONS_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID]) # these are Series board_stops_str = ptrip_group.A_id.apply(lambda x:','.join(x)) board_trips_str = ptrip_group.trip_id.apply(lambda x:','.join(x)) alight_stops_str= ptrip_group.B_id.apply(lambda x:','.join(x)) board_stops_str.name = 'board_stop_str' board_trips_str.name = 'board_trip_str' alight_stops_str.name = 'alight_stop_str' # get walking times walk_links = passengers_df.loc[(passengers_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_ACCESS )| \ (passengers_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_TRANSFER)| \ (passengers_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_EGRESS )].copy() walk_links['linktime_str'] = walk_links.pf_linktime.apply(lambda x: "%.2f" % (x/np.timedelta64(1,'m'))) walklink_group = walk_links[['person_id','trip_list_id_num','linktime_str']].groupby(['person_id','trip_list_id_num']) walktimes_str = walklink_group.linktime_str.apply(lambda x:','.join(x)) # aggregate to one line per person_id, trip_list_id print_passengers_df = passengers_df[['person_id','trip_list_id_num','pathmode','A_id','B_id',Passenger.PF_COL_PAX_A_TIME]].groupby(['person_id','trip_list_id_num']).agg( {'pathmode' :'first', # path mode 'A_id' :'first', # origin 'B_id' :'last', # destination Passenger.PF_COL_PAX_A_TIME :'first' # start time }) # put them all together print_passengers_df = pd.concat([print_passengers_df, board_stops_str, board_trips_str, alight_stops_str, walktimes_str], axis=1) print_passengers_df.reset_index(inplace=True) print_passengers_df.sort_values(by=['trip_list_id_num'], inplace=True) print_passengers_df.rename(columns= {'pathmode' :'mode', 'A_id' :'originTaz', 'B_id' :'destinationTaz', Passenger.PF_COL_PAX_A_TIME :'startTime_time', 'board_stop_str' :'boardingStops', 'board_trip_str' :'boardingTrips', 'alight_stop_str' :'alightingStops', 'linktime_str' :'walkingTimes'}, inplace=True) print_passengers_df['startTime'] = print_passengers_df['startTime_time'].apply(Util.datetime64_formatter) print_passengers_df = print_passengers_df[['trip_list_id_num','person_id','mode','originTaz','destinationTaz','startTime', 'boardingStops','boardingTrips','alightingStops','walkingTimes']] print_passengers_df.to_csv(os.path.join(output_dir, PathSet.PATHS_OUTPUT_FILE), sep="\t", index=False)
# passengerId mode originTaz destinationTaz startTime boardingStops boardingTrips alightingStops walkingTimes
[docs] @staticmethod def write_path_times(passengers_df, output_dir): """ Write the assigned path times to the given output file. :param passengers_df: Passenger path links :type passengers_df: :py:class:`pandas.DataFrame` instance :param output_dir: Output directory :type output_dir: string """ passenger_trips = passengers_df.loc[passengers_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_TRIP].copy() ###### TODO: this is really catering to output format; an alternative might be more appropriate from .Assignment import Assignment passenger_trips.loc[:, 'board_time_str'] = passenger_trips[Assignment.SIM_COL_PAX_BOARD_TIME ].apply(Util.datetime64_formatter) passenger_trips.loc[:,'arrival_time_str'] = passenger_trips[Passenger.PF_COL_PAX_A_TIME].apply(Util.datetime64_formatter) passenger_trips.loc[:, 'alight_time_str'] = passenger_trips[Assignment.SIM_COL_PAX_ALIGHT_TIME].apply(Util.datetime64_formatter) # Aggregate (by joining) across each passenger + path ptrip_group = passenger_trips.groupby([Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID]) # these are Series board_time_str = ptrip_group['board_time_str' ].apply(lambda x:','.join(x)) arrival_time_str = ptrip_group['arrival_time_str'].apply(lambda x:','.join(x)) alight_time_str = ptrip_group['alight_time_str' ].apply(lambda x:','.join(x)) # Aggregate other fields across each passenger + path pax_exp_df = passengers_df.groupby([Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID]).agg( {# 'pathmode' :'first', # path mode 'A_id' :'first', # origin 'B_id' :'last', # destination Passenger.PF_COL_PAX_A_TIME :'first', # start time Passenger.PF_COL_PAX_B_TIME :'last', # end time # TODO: cost needs to be updated for updated dwell & travel time # 'cost' :'first', # total travel cost is calculated for the whole path }) # Put them together and return assert(len(pax_exp_df) == len(board_time_str)) pax_exp_df = pd.concat([pax_exp_df, board_time_str, arrival_time_str, alight_time_str], axis=1) # print pax_exp_df.to_string(formatters={'A_time':Assignment.datetime64_min_formatter, # 'B_time':Assignment.datetime64_min_formatter} # reset columns print_pax_exp_df = pax_exp_df.reset_index() print_pax_exp_df.sort_values(by=[Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID], inplace=True) print_pax_exp_df['A_time_str'] = print_pax_exp_df[Passenger.PF_COL_PAX_A_TIME].apply(Util.datetime64_formatter) print_pax_exp_df['B_time_str'] = print_pax_exp_df[Passenger.PF_COL_PAX_B_TIME].apply(Util.datetime64_formatter) # rename columns print_pax_exp_df.rename(columns= {#'pathmode' :'mode', 'A_id' :'originTaz', 'B_id' :'destinationTaz', 'A_time_str' :'startTime', 'B_time_str' :'endTime', 'arrival_time_str' :'arrivalTimes', 'board_time_str' :'boardingTimes', 'alight_time_str' :'alightingTimes', # TODO: cost needs to be updated for updated dwell & travel time # 'cost' :'travelCost', }, inplace=True) # reorder print_pax_exp_df = print_pax_exp_df[[ Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, #'mode', 'originTaz', 'destinationTaz', 'startTime', 'endTime', 'arrivalTimes', 'boardingTimes', 'alightingTimes', # 'travelCost', ]] times_out = open(os.path.join(output_dir, PathSet.PATH_TIMES_OUTPUT_FILE), 'w') print_pax_exp_df.to_csv(times_out, sep="\t", float_format="%.2f", index=False)
[docs] @staticmethod def calculate_cost(STOCH_DISPERSION, pathset_paths_df, pathset_links_df, veh_trips_df, trip_list_df, routes, tazs, transfers, stops=None, reset_bump_iter=False): """ This is equivalent to the C++ Path::calculateCost() method. Would it be faster to do it in C++? It would require us to package up the networks and paths and send back and forth. :p I think if we can do it using vectorized pandas operations, it should be fast, but we can compare/test. It's also messier to have this in two places. Maybe we should delete it from the C++; the overlap calcs are only in here right now. Returns pathset_paths_df with additional columns, Assignment.SIM_COL_PAX_FARE, Assignment.SIM_COL_PAX_COST, Assignment.SIM_COL_PAX_PROBABILITY, Assignment.SIM_COL_PAX_LOGSUM And pathset_links_df with additional columns, Assignment.SIM_COL_PAX_FARE, Assignment.SIM_COL_PAX_FARE_PERIOD, Assignment.SIM_COL_PAX_COST and Assignment.SIM_COL_PAX_DISTANCE """ from .Assignment import Assignment # if these are here already, remove them since we'll recalculate them if Assignment.SIM_COL_PAX_COST in list(pathset_paths_df.columns.values): pathset_paths_df.drop([Assignment.SIM_COL_PAX_COST, Assignment.SIM_COL_PAX_LNPS, Assignment.SIM_COL_PAX_PROBABILITY, Assignment.SIM_COL_PAX_LOGSUM], axis=1, inplace=True) pathset_links_df.drop([Assignment.SIM_COL_PAX_COST, Assignment.SIM_COL_PAX_DISTANCE], axis=1, inplace=True) # leaving this in for writing to CSV for debugging but I could take it out pathset_paths_df.drop(["logsum_component"], axis=1, inplace=True) if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_cost: pathset_links_df trace\n%s" % str(pathset_links_df.loc[pathset_links_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) FastTripsLogger.debug("calculate_cost: trip_list_df trace\n%s" % str(trip_list_df.loc[trip_list_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) # Add fares -- need stop zones first if they're not there. # We only need to do this once per pathset. # todo -- could remove non-transit links for this? FastTripsLogger.debug("calculate_cost columns:\n%s" % str(list(pathset_links_df.columns.values))) if "A_zone_id" not in list(pathset_links_df.columns.values): assert(stops is not None) pathset_links_df = stops.add_stop_zone_id(pathset_links_df, "A_id", "A_zone_id") pathset_links_df = stops.add_stop_zone_id(pathset_links_df, "B_id", "B_zone_id") # This needs to be done fresh each time since simulation might change the board times and therefore the fare periods pathset_links_df = routes.add_fares(pathset_links_df) # base this on pathfinding distance pathset_links_df[Assignment.SIM_COL_PAX_DISTANCE] = pathset_links_df[Passenger.PF_COL_LINK_DIST] pathset_links_to_use = pathset_links_df if PathSet.OVERLAP_SPLIT_TRANSIT: pathset_links_to_use = PathSet.split_transit_links(pathset_links_df, veh_trips_df, stops) else: pathset_links_to_use["split_first"] = True # all transit links are first # First, we need user class, purpose, demand modes, and value of time pathset_links_cost_df = pd.merge(left =pathset_links_to_use, right=trip_list_df[[ Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_USER_CLASS, Passenger.TRIP_LIST_COLUMN_PURPOSE, Passenger.TRIP_LIST_COLUMN_VOT, Passenger.TRIP_LIST_COLUMN_ACCESS_MODE, Passenger.TRIP_LIST_COLUMN_EGRESS_MODE, Passenger.TRIP_LIST_COLUMN_TRANSIT_MODE, ]], how ="left", on =[Passenger.PERSONS_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID]) # linkmode = demand_mode_type. Set demand_mode for the links pathset_links_cost_df[PathSet.WEIGHTS_COLUMN_DEMAND_MODE] = None pathset_links_cost_df.loc[ pathset_links_cost_df[Passenger.PF_COL_LINK_MODE]== PathSet.STATE_MODE_ACCESS , PathSet.WEIGHTS_COLUMN_DEMAND_MODE] = pathset_links_cost_df[Passenger.TRIP_LIST_COLUMN_ACCESS_MODE ] pathset_links_cost_df.loc[ pathset_links_cost_df[Passenger.PF_COL_LINK_MODE]== PathSet.STATE_MODE_EGRESS , PathSet.WEIGHTS_COLUMN_DEMAND_MODE] = pathset_links_cost_df[Passenger.TRIP_LIST_COLUMN_EGRESS_MODE ] pathset_links_cost_df.loc[ pathset_links_cost_df[Passenger.PF_COL_LINK_MODE]== PathSet.STATE_MODE_TRIP , PathSet.WEIGHTS_COLUMN_DEMAND_MODE] = pathset_links_cost_df[Passenger.TRIP_LIST_COLUMN_TRANSIT_MODE] pathset_links_cost_df.loc[ pathset_links_cost_df[Passenger.PF_COL_LINK_MODE]== PathSet.STATE_MODE_TRANSFER, PathSet.WEIGHTS_COLUMN_DEMAND_MODE] = "transfer" # Verify that it's set for every link missing_demand_mode = pd.isnull(pathset_links_cost_df[PathSet.WEIGHTS_COLUMN_DEMAND_MODE]).sum() assert(missing_demand_mode == 0) # drop the individual mode columns, we have what we need pathset_links_cost_df.drop([Passenger.TRIP_LIST_COLUMN_ACCESS_MODE, Passenger.TRIP_LIST_COLUMN_EGRESS_MODE, Passenger.TRIP_LIST_COLUMN_TRANSIT_MODE], axis=1, inplace=True) # if bump_iter doesn't exist or if it needs to be reset if reset_bump_iter or Assignment.SIM_COL_PAX_BUMP_ITER not in pathset_links_cost_df: pathset_links_cost_df[Assignment.SIM_COL_PAX_BUMP_ITER] = -1 if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_cost: pathset_links_cost_df trace\n%s" % str(pathset_links_cost_df.loc[pathset_links_cost_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) # Inner join with the weights - now each weight has a row cost_df = pd.merge(left =pathset_links_cost_df, right =PathSet.WEIGHTS_DF, left_on =[Passenger.TRIP_LIST_COLUMN_USER_CLASS, Passenger.TRIP_LIST_COLUMN_PURPOSE, Passenger.PF_COL_LINK_MODE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE, Passenger.TRIP_LIST_COLUMN_MODE], right_on=[Passenger.TRIP_LIST_COLUMN_USER_CLASS, Passenger.TRIP_LIST_COLUMN_PURPOSE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE, PathSet.WEIGHTS_COLUMN_SUPPLY_MODE], how ="inner") # update the fare weight placeholder (ivt pathweight - utils per min)) based on value of time (currency per hour) # since generalized cost is in utils, (ivt utils/min)x(60 min/1 hour)x(hour/vot currency) is the weight (utils/currency) cost_df.loc[ cost_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME]==Assignment.SIM_COL_PAX_FARE, "weight_value" ] *= (60.0/cost_df[Passenger.TRIP_LIST_COLUMN_VOT]) if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_cost: cost_df\n%s" % str(cost_df.loc[cost_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True].sort_values([ Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, Passenger.PF_COL_PATH_NUM,Passenger.PF_COL_LINK_NUM]).head(20))) # NOW we split it into 3 lists -- access/egress, transit, and transfer # This is because they will each be joined to tables specific to those kinds of mode categories, and so we don't want all the transit nulls on the other tables, etc. cost_columns = list(cost_df.columns.values) cost_df["var_value"] = np.nan # This means unset cost_accegr_df = cost_df.loc[(cost_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_ACCESS )|(cost_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_EGRESS)] cost_trip_df = cost_df.loc[(cost_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_TRIP )] cost_transfer_df = cost_df.loc[(cost_df[Passenger.PF_COL_LINK_MODE]==PathSet.STATE_MODE_TRANSFER)] del cost_df ##################### First, handle Access/Egress link costs for accegr_type in ["walk","bike","drive"]: # make copies; we don't want to mess with originals if accegr_type == "walk": link_df = tazs.walk_df.copy() mode_list = TAZ.WALK_MODE_NUMS elif accegr_type == "bike": mode_list = TAZ.BIKE_MODE_NUMS # not supported yet continue else: link_df = tazs.drive_df.copy() mode_list = TAZ.DRIVE_MODE_NUMS FastTripsLogger.debug("Access/egress link_df %s\n%s" % (accegr_type, link_df.head().to_string())) if len(link_df) == 0: continue # format these with A & B instead of TAZ and Stop link_df.reset_index(inplace=True) link_df["A_id_num"] = -1 link_df["B_id_num"] = -1 link_df.loc[link_df[TAZ.WALK_ACCESS_COLUMN_SUPPLY_MODE_NUM].isin(TAZ.ACCESS_MODE_NUMS), "A_id_num"] = link_df[TAZ.WALK_ACCESS_COLUMN_TAZ_NUM ] link_df.loc[link_df[TAZ.WALK_ACCESS_COLUMN_SUPPLY_MODE_NUM].isin(TAZ.ACCESS_MODE_NUMS), "B_id_num"] = link_df[TAZ.WALK_ACCESS_COLUMN_STOP_NUM] link_df.loc[link_df[TAZ.WALK_ACCESS_COLUMN_SUPPLY_MODE_NUM].isin(TAZ.EGRESS_MODE_NUMS), "A_id_num"] = link_df[TAZ.WALK_ACCESS_COLUMN_STOP_NUM] link_df.loc[link_df[TAZ.WALK_ACCESS_COLUMN_SUPPLY_MODE_NUM].isin(TAZ.EGRESS_MODE_NUMS), "B_id_num"] = link_df[TAZ.WALK_ACCESS_COLUMN_TAZ_NUM ] link_df.drop([TAZ.WALK_ACCESS_COLUMN_TAZ_NUM, TAZ.WALK_ACCESS_COLUMN_STOP_NUM], axis=1, inplace=True) assert(len(link_df.loc[link_df["A_id_num"] < 0]) == 0) FastTripsLogger.debug("%s link_df =\n%s" % (accegr_type, link_df.head().to_string())) # Merge access/egress with walk|bike|drive access/egress information cost_accegr_df = pd.merge(left = cost_accegr_df, right = link_df, on = ["A_id_num", PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM, "B_id_num"], how = "left") # rename new columns so it's clear it's for walk|bike|drive for colname in list(link_df.select_dtypes(include=['float64','int64']).columns.values): # don't worry about join columns if colname in ["A_id_num", PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM, "B_id_num"]: continue # rename the rest new_colname = "%s %s" % (colname, accegr_type) cost_accegr_df.rename(columns={colname:new_colname}, inplace=True) # use it, if relevant cost_accegr_df.loc[ (cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == colname)& (cost_accegr_df[PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM].isin(mode_list)), "var_value"] = cost_accegr_df[new_colname] # Access/egress needs passenger trip departure, arrival and time_target cost_accegr_df = pd.merge(left =cost_accegr_df, right=trip_list_df[[ Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_DEPARTURE_TIME, Passenger.TRIP_LIST_COLUMN_ARRIVAL_TIME, Passenger.TRIP_LIST_COLUMN_TIME_TARGET, ]], how ="left", on =[Passenger.PERSONS_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID]) # drop links that are irrelevant based on departure time for access links, or arrival time for egress links cost_accegr_df["check_time"] = cost_accegr_df[Assignment.SIM_COL_PAX_A_TIME] # departure time for access cost_accegr_df.loc[ cost_accegr_df[TAZ.MODE_COLUMN_MODE_NUM].isin(TAZ.EGRESS_MODE_NUMS), "check_time" ] = cost_accegr_df[Assignment.SIM_COL_PAX_B_TIME] # arrival time for egress cost_accegr_df["check_time"] = (cost_accegr_df["check_time"] - Assignment.NETWORK_BUILD_DATE_START_TIME)/np.timedelta64(1,'m') # it's only drive links we need to check cost_accegr_df["to_drop"] = False if "%s %s" % (TAZ.DRIVE_ACCESS_COLUMN_START_TIME_MIN, "drive") in cost_accegr_df.columns.values: cost_accegr_df.loc[ cost_accegr_df[TAZ.MODE_COLUMN_MODE_NUM].isin(TAZ.DRIVE_MODE_NUMS)& ((cost_accegr_df["check_time"] < cost_accegr_df["%s %s" % (TAZ.DRIVE_ACCESS_COLUMN_START_TIME_MIN, "drive")])| (cost_accegr_df["check_time"] >= cost_accegr_df["%s %s" % (TAZ.DRIVE_ACCESS_COLUMN_END_TIME_MIN, "drive")])), "to_drop"] = True # if len(Assignment.TRACE_IDS) > 0: # FastTripsLogger.debug("cost_accegr_df=\n%s\ndtypes=\n%s" % (cost_accegr_df.loc[cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True]].to_string(), str(cost_accegr_df.dtypes))) FastTripsLogger.debug("Dropping %d rows from cost_accegr_df" % cost_accegr_df["to_drop"].sum()) cost_accegr_df = cost_accegr_df.loc[ cost_accegr_df["to_drop"]==False ] cost_accegr_df.drop(["check_time","to_drop"], axis=1, inplace=True) # penalty for arriving before preferred arrival time. cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_ARRIVE_EARLY_MIN )& (cost_accegr_df[Passenger.PF_COL_LINK_MODE] == PathSet.STATE_MODE_ACCESS), "var_value"] = 0.0 cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_ARRIVE_EARLY_MIN) & (cost_accegr_df[Passenger.PF_COL_LINK_MODE] == PathSet.STATE_MODE_EGRESS) & (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == 'departure'), "var_value"] = 0.0 cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_ARRIVE_EARLY_MIN )& \ (cost_accegr_df[Passenger.PF_COL_LINK_MODE] == PathSet.STATE_MODE_EGRESS)& \ (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == 'arrival'), "var_value"] = (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_ARRIVAL_TIME] - cost_accegr_df[Passenger.PF_COL_PAX_B_TIME])/np.timedelta64(1,'m') # arrive early is not negative - that would be arriving late cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_ARRIVE_EARLY_MIN)&(cost_accegr_df["var_value"] < 0), "var_value"] = 0.0 # penalty for departing after preferred departure time. cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_DEPART_LATE_MIN) & (cost_accegr_df[Passenger.PF_COL_LINK_MODE] == PathSet.STATE_MODE_EGRESS), "var_value"] = 0.0 cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_DEPART_LATE_MIN )& (cost_accegr_df[Passenger.PF_COL_LINK_MODE] == PathSet.STATE_MODE_ACCESS)& (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == 'arrival'), "var_value"] = 0.0 cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_DEPART_LATE_MIN) & (cost_accegr_df[Passenger.PF_COL_LINK_MODE] == PathSet.STATE_MODE_ACCESS) & (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == 'departure'), "var_value"] = (cost_accegr_df[Passenger.PF_COL_PAX_A_TIME] - cost_accegr_df[Passenger.TRIP_LIST_COLUMN_DEPARTURE_TIME])/ np.timedelta64(1, 'm') # depart late is not negative - that would be departing early cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_DEPART_LATE_MIN)&(cost_accegr_df["var_value"] < 0), "var_value"] = 0.0 # constant growth = exponential growth with 0 percent growth rate # depart before preferred or arrive after preferred means the passenger just missed something important # Arrive late only impacts the egress link, so set the var_value equal to zero for the access link cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN ) & \ (cost_accegr_df[Passenger.PF_COL_LINK_MODE] == PathSet.STATE_MODE_ACCESS), "var_value"] = 0.0 # Arrive late only impacts those that have a preferred arrival time. If preferred departure time, # set arrive late equal to zero. --This could have been done with previous line, but it would # look ugly mixing and matching 'and' and 'or'. cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN) & \ (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == Passenger.TIME_TARGET_DEPARTURE), "var_value"] = 0.0 # Calculate how late the person arrives after preferred time. cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN )& \ (cost_accegr_df[Passenger.PF_COL_LINK_MODE] == PathSet.STATE_MODE_EGRESS)& \ (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == Passenger.TIME_TARGET_ARRIVAL), "var_value"] = \ (cost_accegr_df[Passenger.PF_COL_PAX_B_TIME] - cost_accegr_df[Passenger.TRIP_LIST_COLUMN_ARRIVAL_TIME])/np.timedelta64(1,'m') # If arrived before preferred time, set the arrive late field to zero. You don't get a # discount for arriving early. cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN) & \ (cost_accegr_df['var_value'] < 0), "var_value"] = 0 # preferred delay_min - departure means want to depart after that time # Depart early only impacts the access link, so set the var_value equal to zero for the egress link cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_DEPART_EARLY_MIN )& \ (cost_accegr_df[Passenger.PF_COL_LINK_MODE] == PathSet.STATE_MODE_EGRESS), "var_value"] = 0.0 # Depart early only impacts those that have a preferred departure time. If preferred arrive time, # set depart early equal to zero. --This could have been done with previous line, but it would # look ugly mixing and matching 'and' and 'or'. cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_DEPART_EARLY_MIN) & \ (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == Passenger.TIME_TARGET_ARRIVAL), "var_value"] = 0.0 # Calculate how early the person departs before the preferred time. cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_DEPART_EARLY_MIN) & \ (cost_accegr_df[Passenger.PF_COL_LINK_MODE] == PathSet.STATE_MODE_ACCESS) & \ (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TIME_TARGET] == Passenger.TIME_TARGET_DEPARTURE), "var_value"] = \ (cost_accegr_df[Passenger.TRIP_LIST_COLUMN_DEPARTURE_TIME] - cost_accegr_df[Passenger.PF_COL_PAX_A_TIME])/ np.timedelta64(1, 'm') # If departing after preferred time, set the depart early field to zero. You don't get a # discount for taking your time. cost_accegr_df.loc[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == PathSet.WEIGHT_NAME_DEPART_EARLY_MIN) & \ (cost_accegr_df['var_value'] < 0), "var_value"] = 0 assert 0 == cost_accegr_df[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME].isin([PathSet.WEIGHT_NAME_DEPART_EARLY_MIN, PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN])) & \ (cost_accegr_df['var_value'].isnull())].shape[0] assert 0 == cost_accegr_df[(cost_accegr_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME].isin([PathSet.WEIGHT_NAME_DEPART_EARLY_MIN, PathSet.WEIGHT_NAME_ARRIVE_LATE_MIN])) & \ (cost_accegr_df['var_value']<0)].shape[0] if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("cost_accegr_df trace\n%s\ndtypes=\n%s" % (cost_accegr_df.loc[cost_accegr_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True].to_string(), str(cost_accegr_df.dtypes))) missing_accegr_costs = cost_accegr_df.loc[ pd.isnull(cost_accegr_df["var_value"]) ] error_accegr_msg = "Missing %d out of %d access/egress var_value values" % (len(missing_accegr_costs), len(cost_accegr_df)) FastTripsLogger.debug(error_accegr_msg) if len(missing_accegr_costs) > 0: error_accegr_msg += "\n%s" % missing_accegr_costs.head(10).to_string() FastTripsLogger.fatal(error_accegr_msg) ##################### Next, handle Transit Trip link costs # set the fare var_values for split_first only cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "fare")&(cost_trip_df["split_first"]==True), "var_value"] = cost_trip_df[Assignment.SIM_COL_PAX_FARE] cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "fare")&(cost_trip_df["split_first"]==False), "var_value"] = 0 if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("cost_trip_df trace\n%s\ndtypes=\n%s" % (cost_trip_df.loc[cost_trip_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True].to_string(), str(cost_trip_df.dtypes))) # if there's a board time, in_vehicle_time = new_B_time - board_time # otherwise, in_vehicle_time = B time - A time (for when we split) cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "in_vehicle_time_min")&pd.notnull(cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME]), "var_value"] = \ (cost_trip_df[Assignment.SIM_COL_PAX_B_TIME] - cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME])/np.timedelta64(1,'m') cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "in_vehicle_time_min")& pd.isnull(cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME]), "var_value"] = \ (cost_trip_df[Assignment.SIM_COL_PAX_B_TIME] - cost_trip_df[Assignment.SIM_COL_PAX_A_TIME])/np.timedelta64(1,'m') # if in vehicle time is less than 0 then off by 1 day error cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "in_vehicle_time_min")&(cost_trip_df["var_value"]<0), "var_value"] = cost_trip_df["var_value"] + (24*60) # if there's a board time, wait time = board_time - A time # otherwise, wait time = 0 (for when we split transit links) cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "wait_time_min")&pd.notnull(cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME]), "var_value"] = \ (cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME] - cost_trip_df[Assignment.SIM_COL_PAX_A_TIME])/np.timedelta64(1,'m') cost_trip_df.loc[(cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "wait_time_min")& pd.isnull(cost_trip_df[Assignment.SIM_COL_PAX_BOARD_TIME]), "var_value"] = 0 # which overcap column to use? overcap_col = Trip.SIM_COL_VEH_OVERCAP if Assignment.MSA_RESULTS and Trip.SIM_COL_VEH_MSA_OVERCAP in list(cost_trip_df.columns.values): overcap_col = Trip.SIM_COL_VEH_MSA_OVERCAP # at cap is a binary, 1 if overcap >= 0 and they're not one of the lucky few that boarded cost_trip_df["at_capacity"] = 0.0 if Assignment.SIM_COL_PAX_BOARD_STATE in list(cost_trip_df.columns.values): cost_trip_df.loc[ (cost_trip_df[overcap_col] >= 0)& (cost_trip_df[Assignment.SIM_COL_PAX_BOARD_STATE] != "board_easy")& (cost_trip_df[Assignment.SIM_COL_PAX_BOARD_STATE] != "boarded"), "at_capacity" ] = 1.0 else: cost_trip_df.loc[ (cost_trip_df[overcap_col] >= 0) , "at_capacity" ] = 1.0 cost_trip_df.loc[cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "at_capacity" , "var_value"] = cost_trip_df["at_capacity"] cost_trip_df.loc[cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "overcap" , "var_value"] = cost_trip_df[overcap_col] # overcap shouldn't be negative cost_trip_df.loc[ (cost_trip_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "overcap")&(cost_trip_df["var_value"]<0), "var_value"] = 0.0 if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("cost_trip_df trace\n%s\ndtypes=\n%s" % (cost_trip_df.loc[cost_trip_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True].to_string(), str(cost_trip_df.dtypes))) missing_trip_costs = cost_trip_df.loc[ pd.isnull(cost_trip_df["var_value"]) ] error_trip_msg = "Missing %d out of %d transit trip var_value values" % (len(missing_trip_costs), len(cost_trip_df)) FastTripsLogger.debug(error_trip_msg) if len(missing_trip_costs) > 0: error_trip_msg += "\n%s" % missing_trip_costs.head(10).to_string() FastTripsLogger.fatal(error_trip_msg) ##################### Finally, handle Transfer link costs cost_transfer_df = transfers.add_transfer_attributes(cost_transfer_df, pathset_links_df) cost_transfer_df.loc[cost_transfer_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "walk_time_min", "var_value"] = cost_transfer_df[Passenger.PF_COL_LINK_TIME]/np.timedelta64(1,'m') # any numeric column can be used for colname in list(cost_transfer_df.select_dtypes(include=['float64','int64']).columns.values): FastTripsLogger.debug("Using numeric column %s" % colname) cost_transfer_df.loc[cost_transfer_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == colname, "var_value"] = cost_transfer_df[colname] # make zero walk transfers have default var_values 0 cost_transfer_df.loc[ (cost_transfer_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] != "transfer_penalty")& (cost_transfer_df["A_id_num"]==cost_transfer_df["B_id_num"]), "var_value"] = 0.0 # zero walk transfers have a transfer penalty although they're not otherwise configured cost_transfer_df.loc[ (cost_transfer_df[PathSet.WEIGHTS_COLUMN_WEIGHT_NAME] == "transfer_penalty")& (pd.isnull(cost_transfer_df["var_value"])), "var_value"] = 1.0 # FastTripsLogger.debug("cost_transfer_df=\n%s\ndtypes=\n%s" % (cost_transfer_df.head().to_string(), str(cost_transfer_df.dtypes))) missing_transfer_costs = cost_transfer_df.loc[ pd.isnull(cost_transfer_df["var_value"]) ] error_transfer_msg = "Missing %d out of %d transfer var_value values" % (len(missing_transfer_costs), len(cost_transfer_df)) FastTripsLogger.debug(error_transfer_msg) if len(missing_transfer_costs) > 0: error_transfer_msg += "\n%s" % missing_transfer_costs.head(10).to_string() FastTripsLogger.fatal(error_transfer_msg) # abort here if we're missing anything if len(missing_accegr_costs) + len(missing_trip_costs) + len(missing_transfer_costs) > 0: abort_error_msg = "\nMissing %d accegr_costs\nMissing %d trip_costs\nMissing %d transfer_costs" % (len(missing_accegr_costs), len(missing_trip_costs), len(missing_transfer_costs)) FastTripsLogger.debug(abort_error_msg) raise NotImplementedError("Missing var_values; See log") ##################### Put them back together into a single dataframe cost_columns = [Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, Passenger.TRIP_LIST_COLUMN_TRACE, Passenger.TRIP_LIST_COLUMN_USER_CLASS, Passenger.TRIP_LIST_COLUMN_PURPOSE, Passenger.TRIP_LIST_COLUMN_VOT, Passenger.PF_COL_PATH_NUM, Passenger.PF_COL_LINK_NUM, PathSet.WEIGHTS_COLUMN_DEMAND_MODE_TYPE, PathSet.WEIGHTS_COLUMN_DEMAND_MODE, PathSet.WEIGHTS_COLUMN_SUPPLY_MODE, PathSet.WEIGHTS_COLUMN_SUPPLY_MODE_NUM, PathSet.WEIGHTS_COLUMN_WEIGHT_NAME, PathSet.WEIGHTS_COLUMN_WEIGHT_VALUE, PathSet.WEIGHTS_GROWTH_TYPE, PathSet.WEIGHTS_GROWTH_LOG_BASE, PathSet.WEIGHTS_GROWTH_LOGISTIC_MAX, PathSet.WEIGHTS_GROWTH_LOGISTIC_MID, "var_value", Assignment.SIM_COL_MISSED_XFER, Assignment.SIM_COL_PAX_BUMP_ITER, Assignment.SIM_COL_PAX_FARE] cost_accegr_df = cost_accegr_df.loc[:, cost_accegr_df.columns.isin(cost_columns)] cost_trip_df = cost_trip_df.loc[:, cost_trip_df.columns.isin(cost_columns)] cost_transfer_df = cost_transfer_df.loc[:, cost_transfer_df.columns.isin(cost_columns)] cost_df = pd.concat([cost_accegr_df, cost_trip_df, cost_transfer_df], axis=0, ignore_index=True) # FastTripsLogger.debug("calculate_cost: cost_df=\n%s\ndtypes=\n%s" % (cost_df.to_string(), str(cost_df.dtypes))) # calculate link cost a function of the variable, weight and weight type Util.calculate_pathweight_costs(cost_df, Assignment.SIM_COL_PAX_COST) cost_df.sort_values([Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.PF_COL_PATH_NUM, Passenger.PF_COL_LINK_NUM], inplace=True) FastTripsLogger.debug("calculate_cost: cost_df\n%s" % str(cost_df.loc[cost_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) # verify all costs are non-negative if cost_df[Assignment.SIM_COL_PAX_COST].min() < 0: msg = "calculate_cost: Negative costs found:\n%s" % cost_df.loc[ cost_df[Assignment.SIM_COL_PAX_COST]<0 ].to_string() FastTripsLogger.fatal(msg) raise UnexpectedError(msg) ###################### sum linkcost to links cost_link_df = cost_df[[Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, Passenger.TRIP_LIST_COLUMN_TRACE, Passenger.PF_COL_PATH_NUM, Passenger.PF_COL_LINK_NUM, Assignment.SIM_COL_PAX_COST]].groupby( [Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # sort by this first Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRACE, Passenger.PF_COL_PATH_NUM, Passenger.PF_COL_LINK_NUM]).aggregate('sum').reset_index() if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_cost: cost_link_df trace\n%s" % str(cost_link_df.loc[cost_link_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) # join to pathset_links_df pathset_links_df = pd.merge(left =pathset_links_df, right=cost_link_df, how ="left", on =[Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, Passenger.TRIP_LIST_COLUMN_TRACE, Passenger.PF_COL_PATH_NUM, Passenger.PF_COL_LINK_NUM]) if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_cost: pathset_links_df trace\n%s" % str(pathset_links_df.loc[pathset_links_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) ###################### overlap calcs full_overlap_df = None if PathSet.OVERLAP_VARIABLE != PathSet.OVERLAP_NONE: full_overlap_df = PathSet.calculate_overlap(pathset_links_to_use) ###################### sum linkcost to paths cost_link_df.drop([Passenger.PF_COL_LINK_NUM], axis=1, inplace=True) cost_path_df = cost_link_df.groupby([Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # order by this Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRACE, Passenger.PF_COL_PATH_NUM]).aggregate('sum').reset_index() if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_cost: cost_path_df trace\n%s" % str(cost_path_df.loc[cost_path_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) # join to pathset_paths_df pathset_paths_df = pd.merge(left =pathset_paths_df, right=cost_path_df, how ="left", on =[Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, Passenger.TRIP_LIST_COLUMN_TRACE, Passenger.PF_COL_PATH_NUM]) if PathSet.OVERLAP_VARIABLE == PathSet.OVERLAP_NONE: pathset_paths_df[Assignment.SIM_COL_PAX_LNPS] = 0 else: pathset_paths_df = pd.merge(left =pathset_paths_df, right=full_overlap_df, how ="left", on =[Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, Passenger.TRIP_LIST_COLUMN_TRACE, Passenger.PF_COL_PATH_NUM]) if PathSet.LEARN_ROUTES: #'learn_discount': Exponential decay function pathset_paths_df['learn_discount'] = np.exp(-PathSet.LEARN_ROUTES_RATE * pathset_paths_df[PathSet.SUCCESS_FLAG_COLUMN]) pathset_paths_df['orig_sim_cost'] = pathset_paths_df[Assignment.SIM_COL_PAX_COST] pathset_paths_df[Assignment.SIM_COL_PAX_COST] = pathset_paths_df[Assignment.SIM_COL_PAX_COST] * pathset_paths_df['learn_discount'] if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_cost: pathset_paths_df trace\n%s" % str(pathset_paths_df.loc[pathset_paths_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) ###################### logsum and probabilities pathset_paths_df["logsum_component"] = np.exp((-1.0*pathset_paths_df[Assignment.SIM_COL_PAX_COST] + pathset_paths_df[Assignment.SIM_COL_PAX_LNPS])/STOCH_DISPERSION) # sum across all paths pathset_logsum_df = pathset_paths_df[[Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM,"logsum_component"]].groupby( [Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # sort by this Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID]).aggregate('sum').reset_index() pathset_logsum_df.rename(columns={"logsum_component":"logsum"}, inplace=True) pathset_paths_df = pd.merge(left=pathset_paths_df, right=pathset_logsum_df, how="left") pathset_paths_df[Assignment.SIM_COL_PAX_PROBABILITY] = pathset_paths_df["logsum_component"]/pathset_paths_df["logsum"] if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_cost: pathset_paths_df trace\n%s" % str(pathset_paths_df.loc[pathset_paths_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) # Note: the path finding costs won't match the costs here because missed transfers are already calculated here # It would be good to have some sanity checking that theyre aligned otherwise though to make sure we're # calculating costs consistently return (pathset_paths_df, pathset_links_df)
[docs] @staticmethod def calculate_overlap(pathset_links_to_use): """ Given a set of pathset links, returns a the results of overlap calculations. This return dataframe will have colums person_id, person_trip_id, pathnum, and ln_PS """ from .Assignment import Assignment FastTripsLogger.debug("calculate_overlap() pathset_links_to_use (%d) head=\n%s" % (len(pathset_links_to_use), str(pathset_links_to_use.head(30)))) # CHUNKING because we run into memory problems # TODO: figure out more sophisticated chunk size chunk_list = pathset_links_to_use[[Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM]].drop_duplicates().reset_index(drop=True) num_chunks = (len(chunk_list)//PathSet.OVERLAP_CHUNK_SIZE) + 1 chunk_list["chunk_num"] = np.floor_divide(chunk_list.index, PathSet.OVERLAP_CHUNK_SIZE) FastTripsLogger.debug("calculate_overlap() chunk_list size=%d head=\n%s\ntail=\n%s" % (len(chunk_list), chunk_list.head().to_string(), chunk_list.tail().to_string())) pathset_links_to_use = pd.merge(left =pathset_links_to_use, right =chunk_list, how ='left') FastTripsLogger.debug("calculate_overlap() mem_use=%s pathset_links_to_use has length %d, head=\n%s" % (Util.get_process_mem_use_str(), len(pathset_links_to_use), pathset_links_to_use.head().to_string())) full_overlap_df = pd.DataFrame() for chunk_num in range(num_chunks): # get the person trips in the chunk overlap_df = pathset_links_to_use.loc[ pathset_links_to_use["chunk_num"] == chunk_num] FastTripsLogger.info(" Calculating overlap for chunk %4d/%4d (len %6d); mem_use=%8s" % (chunk_num+1, num_chunks, len(overlap_df), Util.get_process_mem_use_str())) overlap_df = overlap_df[[Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, Passenger.TRIP_LIST_COLUMN_TRACE, Passenger.PF_COL_PATH_NUM, Passenger.PF_COL_LINK_NUM, "A_id_num","B_id_num", Route.ROUTES_COLUMN_MODE, "new_linktime", Assignment.SIM_COL_PAX_DISTANCE]].copy() # get ready to count, time, dist to path and add path sum version to overlap_df -- this is L FastTripsLogger.debug("calculate_overlap() chunk_num %d: mem_use=%s overlap_df has length %d, head=\n%s" % (chunk_num+1, Util.get_process_mem_use_str(), len(overlap_df), overlap_df.head().to_string())) # sum to path overlap_df["count"] = 1 overlap_path_df = overlap_df.groupby([Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # sort by this Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRACE, Passenger.PF_COL_PATH_NUM]).aggregate({'count':'sum','new_linktime':'sum',Assignment.SIM_COL_PAX_DISTANCE:'sum'}).reset_index(drop=False) overlap_path_df.rename(columns={"count":"path_count", "new_linktime":"path_time", Assignment.SIM_COL_PAX_DISTANCE:"path_distance"}, inplace=True) overlap_df.drop(["count"], axis=1, inplace=True) FastTripsLogger.debug("calculate_overlap() chunk_num %d: Added path summed count,time,distance. mem_use=%s overlap_path_df has length %d, head=\n%s" % (chunk_num+1, Util.get_process_mem_use_str(), len(overlap_path_df), overlap_path_df.head().to_string())) # add the path summed variables to the link dataframe overlap_df = pd.merge(overlap_df, overlap_path_df, how="left", on=[Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, Passenger.TRIP_LIST_COLUMN_TRACE, Passenger.PF_COL_PATH_NUM]) del overlap_path_df # we're done with this # outer join on trip_list_id_num means when they match, we'll get a cartesian product of the links overlap_df = pd.merge(overlap_df, overlap_df.copy(), on=[Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, Passenger.TRIP_LIST_COLUMN_TRACE], how="outer") # count matches -- matching A,B,mode overlap_df["match"] = 0 overlap_df.loc[ (overlap_df["A_id_num_x"]==overlap_df["A_id_num_y"])& (overlap_df["B_id_num_x"]==overlap_df["B_id_num_y"])& (overlap_df["mode_x" ]==overlap_df["mode_y" ]) , "match"] = 1 if PathSet.OVERLAP_VARIABLE == PathSet.OVERLAP_COUNT: overlap_df["link_prop_x"] = 1.0/overlap_df["path_count_x"] # l_a/L_i overlap_df["pathlen_x_y"] = overlap_df["path_count_x"]/overlap_df["path_count_y"] # L_i/L_j elif PathSet.OVERLAP_VARIABLE == PathSet.OVERLAP_TIME: overlap_df["link_prop_x"] = overlap_df["new_linktime_x"]/overlap_df["path_time_x"] # l_a/L_i overlap_df["pathlen_x_y"] = overlap_df["path_time_x"]/overlap_df["path_time_y"] # L_i/L_j elif PathSet.OVERLAP_VARIABLE == PathSet.OVERLAP_DISTANCE: overlap_df["link_prop_x"] = overlap_df["distance_x"]/overlap_df["path_distance_x"] # l_a/L_i overlap_df["pathlen_x_y"] = overlap_df["path_distance_x"]/overlap_df["path_distance_y"] # L_i/L_j overlap_df["pathlen_x_y_scale"] = overlap_df[["pathlen_x_y"]].pow(PathSet.OVERLAP_SCALE_PARAMETER) # (L_i/L_j)^gamma # zero it out if it's not a match overlap_df.loc[overlap_df["match"]==0, "pathlen_x_y_scale"] = 0 # now pathlen_x_y_scale = (L_i/L_j)^gamma x delta_aj if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_overlap(): overlap_df trace\n%s" % str(overlap_df.loc[overlap_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) # debug # overlap_df_temp = overlap_df.groupby([Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, "pathnum_x","linknum_x","link_prop_x","pathnum_y"]).aggregate({"match":"sum", "pathlen_x_y_scale":"sum"}) # FastTripsLogger.debug("calculate_cost: overlap_df_temp\n%s" % str(overlap_df_temp.head(50))) # group by pathnum_x, linknum_x -- so this sums over paths P_j in equation (or pathnum_y here) overlap_df = overlap_df.groupby([Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # sort by this Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRACE, "pathnum_x","linknum_x","link_prop_x"]).aggregate({"pathlen_x_y_scale":"sum"}).reset_index() # now pathlen_x_y_scale = SUM_j (L_i/L_j)^gamma x delta_aj overlap_df["PS"] = overlap_df["link_prop_x"]/overlap_df["pathlen_x_y_scale"] # l_a/L_i * 1/(SUM_j (L_i/L_j)^gamma x delta_aj) if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_overlap() trace overlap_df\n%s" % str(overlap_df.loc[overlap_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) FastTripsLogger.debug("calculate_overlap() chunk_num %d: Cartesian product of links. mem_use=%s overlap_df has length %d, head=\n%s" % (chunk_num+1, Util.get_process_mem_use_str(), len(overlap_df), overlap_df.head().to_string())) # sum across link in path overlap_df = overlap_df.groupby([Passenger.TRIP_LIST_COLUMN_TRIP_LIST_ID_NUM, # preserve ordering Passenger.TRIP_LIST_COLUMN_PERSON_ID, Passenger.TRIP_LIST_COLUMN_PERSON_TRIP_ID, Passenger.TRIP_LIST_COLUMN_TRACE,"pathnum_x"]).aggregate({"PS":"sum"}).reset_index(drop=False) # Check all pathsizes are in [0,1] min_PS = overlap_df["PS"].min() max_PS = overlap_df["PS"].max() FastTripsLogger.debug("PathSize min=%f max=%f" % (min_PS, max_PS)) if min_PS < 0: FastTripsLogger.fatal("Min pathsize = %f < 0:\n%s" % (min_PS, overlap_df.loc[overlap_df["PS"]==min_PS].to_string())) if max_PS > 1.0001: FastTripsLogger.fatal("Max pathsize = %f > 1:\n%s" % (max_PS, overlap_df.loc[overlap_df["PS"]==max_PS].to_string())) overlap_df[Assignment.SIM_COL_PAX_LNPS] = np.log(overlap_df["PS"]) if len(Assignment.TRACE_IDS) > 0: FastTripsLogger.debug("calculate_overlap() overlap_df trace\n%s" % str(overlap_df.loc[overlap_df[Passenger.TRIP_LIST_COLUMN_TRACE]==True])) # rename pathnum_x to pathnum and drop PS. Now overlap_df has columns trip_list_id_num, pathnum, ln_PS overlap_df.rename(columns={"pathnum_x":Passenger.PF_COL_PATH_NUM}, inplace=True) overlap_df.drop(["PS"], axis=1, inplace=True) # we have ln_PS if len(full_overlap_df) == 0: full_overlap_df = overlap_df else: full_overlap_df = full_overlap_df.append(overlap_df) FastTripsLogger.debug("calculate_overlap() chunk_num %d: mem_use=%s full_overlap_df has length %d" % (chunk_num+1, Util.get_process_mem_use_str(), len(full_overlap_df))) FastTripsLogger.debug("calculate_overlap() complete: full_overlap_df head=\n%s" % full_overlap_df.head(30)) return full_overlap_df