from __future__ import division
from builtins import str
from builtins import object
__copyright__ = "Copyright 2015 Contributing Entities"
__license__ = """
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import pandas as pd
from .Logger import FastTripsLogger
from .Trip import Trip
from .Util import Util
[docs]class Stop(object):
"""
Stop class.
One instance represents all of the Stops as well as their transfer links.
Stores stop information in :py:attr:`Stop.stops_df`, an instance of :py:class:`pandas.DataFrame`,
"""
#: File with fasttrips stop information (this extends the
#: `gtfs stops <https://github.com/osplanning-data-standards/GTFS-PLUS/blob/master/files/stops.md>`_ file).
#: See `stops_ft specification <https://github.com/osplanning-data-standards/GTFS-PLUS/blob/master/files/stops_ft.md>`_.
INPUT_STOPS_FILE = "stops_ft.txt"
#: gtfs Stops column name: Unique identifier (object)
STOPS_COLUMN_STOP_ID = 'stop_id'
#: gtfs Stops column name: Stop name (string)
STOPS_COLUMN_STOP_NAME = 'stop_name'
#: gtfs Stops column name: Latitude
STOPS_COLUMN_STOP_LATITUDE = 'stop_lat'
#: gtfs Stops column name: Longitude
STOPS_COLUMN_STOP_LONGITUDE = 'stop_lon'
#: gtfs Stops column name: Zone ID
STOPS_COLUMN_ZONE_ID = 'zone_id'
#: fasttrips Stops column name: Shelter
STOPS_COLUMN_SHELTER = 'shelter'
#: fasttrips Stops column name: Lighting
STOPS_COLUMN_LIGHTING = 'lighting'
#: fasttrips Stops column name: Bike Parking
STOPS_COLUMN_BIKE_PARKING = 'bike_parking'
#: fasttrips Stops column name: Bike Share Station
STOPS_COLUMN_BIKE_SHARE_STATION = 'bike_share_station'
#: fasttrips Stops column name: Seating
STOPS_COLUMN_SEATING = 'seating'
#: fasttrips Stops column name: Platform Height
STOPS_COLUMN_PLATFORM_HEIGHT = 'platform_height'
#: fasttrips Stops column name: Level
STOPS_COLUMN_LEVEL = 'level'
#: fasttrips Stops column name: Off-Board Payment
STOPS_COLUMN_OFF_BOARD_PAYMENT = 'off_board_payment'
# ========== Added by fasttrips =======================================================
#: fasttrips Stops column name: Stop Numerical Identifier. Int.
STOPS_COLUMN_STOP_ID_NUM = 'stop_id_num'
#: fasttrips Stops column name: Zone Numerical Identifier. Int.
STOPS_COLUMN_ZONE_ID_NUM = 'zone_id_num'
#: File with stop ID, stop ID number correspondence
OUTPUT_STOP_ID_NUM_FILE = 'ft_intermediate_stop_id.txt'
[docs] def __init__(self, input_archive, output_dir, gtfs, today):
"""
Constructor. Reads the gtfs data from the transitfeed schedule, and the additional
fast-trips stops data from the input files in *input_archive*.
"""
# keep this for later
self.output_dir = output_dir
self.stops_df = gtfs.stops
FastTripsLogger.info("Read %7d %15s from %25d %25s" %
(len(self.stops_df), 'date valid stop', len(gtfs.stops), 'total stops'))
# Read the fast-trips supplemental stops data file. Make sure stop ID is read as a string.
stops_ft_df = gtfs.get(Stop.INPUT_STOPS_FILE)
assert(len(stops_ft_df) > 0)
# verify required columns are present
stops_ft_cols = list(stops_ft_df.columns.values)
assert(Stop.STOPS_COLUMN_STOP_ID in stops_ft_cols)
# if more than one column, join to the stops dataframe
if len(stops_ft_cols) > 1:
self.stops_df = pd.merge(left=self.stops_df, right=stops_ft_df,
how='left', on=Stop.STOPS_COLUMN_STOP_ID)
# Stop IDs are strings. Create a unique numeric stop ID.
self.stop_id_df = Util.add_numeric_column(self.stops_df[[Stop.STOPS_COLUMN_STOP_ID]],
id_colname=Stop.STOPS_COLUMN_STOP_ID,
numeric_newcolname=Stop.STOPS_COLUMN_STOP_ID_NUM)
FastTripsLogger.debug("Stop ID to number correspondence\n" + str(self.stop_id_df.head()))
FastTripsLogger.debug(str(self.stop_id_df.dtypes))
#: Note the max stop ID num in :py:attr:`Stop.max_stop_id_num`.
self.max_stop_id_num = self.stop_id_df[Stop.STOPS_COLUMN_STOP_ID_NUM].max()
FastTripsLogger.debug("max stop ID number: %d" % self.max_stop_id_num)
self.stops_df = self.add_numeric_stop_id(self.stops_df,
id_colname=Stop.STOPS_COLUMN_STOP_ID,
numeric_newcolname=Stop.STOPS_COLUMN_STOP_ID_NUM)
# Zone IDs are strings. Add numeric zone ID.
self.zone_id_df = pd.DataFrame()
if Stop.STOPS_COLUMN_ZONE_ID in list(self.stops_df.columns.values):
# Blank zone IDs should be null
self.stops_df.loc[ self.stops_df[Stop.STOPS_COLUMN_ZONE_ID]=="", Stop.STOPS_COLUMN_ZONE_ID] = None
zones_df = self.stops_df.loc[ pd.notnull(self.stops_df[Stop.STOPS_COLUMN_ZONE_ID]) ]
if len(zones_df) > 0:
self.zone_id_df = Util.add_numeric_column(zones_df[[Stop.STOPS_COLUMN_ZONE_ID]],
id_colname=Stop.STOPS_COLUMN_ZONE_ID,
numeric_newcolname=Stop.STOPS_COLUMN_ZONE_ID_NUM)
# add it to the stops
self.stops_df = pd.merge(left=self.stops_df, right=self.zone_id_df, how="left", on=Stop.STOPS_COLUMN_ZONE_ID)
# and the stop_id_df
self.stop_id_df = pd.merge(left =self.stops_df,
right=self.stops_df[[Stop.STOPS_COLUMN_STOP_ID_NUM,
Stop.STOPS_COLUMN_ZONE_ID,
Stop.STOPS_COLUMN_ZONE_ID_NUM]].drop_duplicates(),
how="left")
FastTripsLogger.debug("Zone ID to number correspondence\n" + str(self.zone_id_df.head()))
FastTripsLogger.debug("=========== STOPS ===========\n" + str(self.stops_df.head()))
FastTripsLogger.debug("\n"+str(self.stops_df.index.dtype)+"\n"+str(self.stops_df.dtypes))
FastTripsLogger.info("Read %7d %15s from %25s, %25s" %
(len(self.stops_df), "stops", "stops.txt", Stop.INPUT_STOPS_FILE))
#: Trips table.
self.trip_times_df = None
[docs] def add_daps_tazs_to_stops(self, dap_df, dap_id_colname, taz_df, taz_id_colname):
"""
Drive access points (PNR lots, KNR lots, etc) and TAZs are like stops.
Add the DAPs and TAZs to our stop list and their numbering in the
:py:attr:`Stop.stop_id_df`.
Pass in dataframes with JUST an ID column.
This method will also update the :py:attr:`Stop.transfers_df` with Stop IDs since this is
now possible since DAPs needed to be numbered for this to work.
"""
assert(len(dap_df.columns) == 1)
# make sure the DAP IDs are unique from Stop IDs
daps_unique_df = dap_df.drop_duplicates().reset_index(drop=True)
join_daps_stops = pd.merge(left=daps_unique_df, right=self.stop_id_df,
how="left",
left_on=dap_id_colname, right_on=Stop.STOPS_COLUMN_STOP_ID)
# there should be only NaNs since DAP lot IDs need to be unique from Stop IDs
# non_unique_lots = join_daps_stops.loc[ pd.notnull(join_daps_stops[Stop.STOPS_COLUMN_STOP_ID]) ]
# if len(non_unique_lots) > 0:
# error_str = "These drive access lot IDs are also stop IDs: \n%s" % str(non_unique_lots)
# FastTripsLogger.warn(error_str)
# raise NetworkInputError("drive_access_ft.txt", error_str)
# assert(pd.isnull(join_daps_stops[Stop.STOPS_COLUMN_STOP_ID]).sum() == len(join_daps_stops))
# number them starting at self.max_stop_id_num
daps_unique_df[Stop.STOPS_COLUMN_STOP_ID_NUM] = daps_unique_df.index + self.max_stop_id_num + 1
# rename DAP lot id to stop id
daps_unique_df.rename(columns={dap_id_colname:Stop.STOPS_COLUMN_STOP_ID}, inplace=True)
# append daps to stop ids
self.stop_id_df = pd.concat([self.stop_id_df, daps_unique_df], axis=0)
self.max_dap_id_num = self.stop_id_df[Stop.STOPS_COLUMN_STOP_ID_NUM].max()
##############################################################################################
assert(len(taz_df.columns) == 1)
# make sure the TAZ IDs are unique from Stop IDs
tazs_unique_df = taz_df.drop_duplicates().reset_index(drop=True)
join_tazs_stops = pd.merge(left=tazs_unique_df, right=self.stop_id_df,
how="left",
left_on=taz_id_colname, right_on=Stop.STOPS_COLUMN_STOP_ID)
# there should be only NaNs since TAZ IDs need to be unique from Stop IDs
assert(pd.isnull(join_tazs_stops[Stop.STOPS_COLUMN_STOP_ID]).sum() == len(join_tazs_stops))
# number them starting at self.max_stop_id_num
tazs_unique_df[Stop.STOPS_COLUMN_STOP_ID_NUM] = tazs_unique_df.index + self.max_dap_id_num + 1
# rename TAZ id to stop id
tazs_unique_df.rename(columns={taz_id_colname:Stop.STOPS_COLUMN_STOP_ID}, inplace=True)
# append daps to stop ids
self.stop_id_df = pd.concat([self.stop_id_df, tazs_unique_df], axis=0)
##############################################################################################
# write the stop ids and zone ids to numbering file
stop_id_df = self.stop_id_df # local copy with filled NA
if Stop.STOPS_COLUMN_ZONE_ID_NUM not in stop_id_df.columns.values:
stop_id_df[Stop.STOPS_COLUMN_ZONE_ID_NUM] = -1
stop_id_df.fillna(value={Stop.STOPS_COLUMN_ZONE_ID_NUM:-1,
Stop.STOPS_COLUMN_ZONE_ID:"None"}, inplace=True)
stop_id_df[Stop.STOPS_COLUMN_ZONE_ID_NUM] = stop_id_df[Stop.STOPS_COLUMN_ZONE_ID_NUM].astype(int)
stop_id_df.to_csv(os.path.join(self.output_dir, Stop.OUTPUT_STOP_ID_NUM_FILE),
columns=[Stop.STOPS_COLUMN_STOP_ID_NUM, Stop.STOPS_COLUMN_STOP_ID,
Stop.STOPS_COLUMN_ZONE_ID_NUM, Stop.STOPS_COLUMN_ZONE_ID],
sep=" ", index=False)
FastTripsLogger.debug("Wrote %s" % os.path.join(self.output_dir, Stop.OUTPUT_STOP_ID_NUM_FILE))
[docs] def add_numeric_stop_id(self, input_df, id_colname, numeric_newcolname, warn=False, warn_msg=None, drop_failures=True):
"""
Passing a :py:class:`pandas.DataFrame` with a stop ID column called *id_colname*,
adds the numeric stop id as a column named *numeric_newcolname* and returns it.
"""
return Util.add_new_id(input_df, id_colname, numeric_newcolname,
mapping_df=self.stop_id_df[[Stop.STOPS_COLUMN_STOP_ID, Stop.STOPS_COLUMN_STOP_ID_NUM]],
mapping_id_colname=Stop.STOPS_COLUMN_STOP_ID,
mapping_newid_colname=Stop.STOPS_COLUMN_STOP_ID_NUM,
warn=warn, warn_msg=warn_msg, drop_failures=drop_failures)
[docs] def add_stop_id_for_numeric_id(self, input_df, numeric_id, id_colname):
"""
Passing a :py:class:`pandas.DataFrame` with a stop ID num column called *numeric_id*,
adds the string stop id as a column named *id_colname* and returns it.
"""
return Util.add_new_id(input_df, id_colname=numeric_id, newid_colname=id_colname,
mapping_df=self.stop_id_df[[Stop.STOPS_COLUMN_STOP_ID, Stop.STOPS_COLUMN_STOP_ID_NUM]],
mapping_id_colname=Stop.STOPS_COLUMN_STOP_ID_NUM,
mapping_newid_colname=Stop.STOPS_COLUMN_STOP_ID,
warn=True, warn_msg=None)
[docs] def add_numeric_stop_zone_id(self, input_df, id_colname, numeric_newcolname, warn=False, warn_msg=None):
"""
Passing a :py:class:`pandas.DataFrame` with a stop zone ID column called *id_colname*,
adds the numeric stop zone id as a column named *numeric_newcolname* and returns it.
"""
return Util.add_new_id(input_df, id_colname, numeric_newcolname,
mapping_df=self.zone_id_df[[Stop.STOPS_COLUMN_ZONE_ID, Stop.STOPS_COLUMN_ZONE_ID_NUM]],
mapping_id_colname=Stop.STOPS_COLUMN_ZONE_ID,
mapping_newid_colname=Stop.STOPS_COLUMN_ZONE_ID_NUM,
warn=warn, warn_msg=warn_msg)
[docs] def add_stop_lat_lon(self, input_df, id_colname, new_lat_colname, new_lon_colname, new_stop_name_colname=None):
"""
Passing a :py:class:`pandas.DataFrame` with a stop ID column called *id_colname*,
adds the stop latitude and longitude as columns named *new_lat_colname* and *new_lon_colname*
and returns it.
Pass *new_stop_name_colname* to also get the stop name.
"""
stop_cols = [Stop.STOPS_COLUMN_STOP_ID, Stop.STOPS_COLUMN_STOP_LATITUDE, Stop.STOPS_COLUMN_STOP_LONGITUDE]
if new_stop_name_colname: stop_cols.append(Stop.STOPS_COLUMN_STOP_NAME)
input_df = pd.merge(left =input_df,
right =self.stops_df[stop_cols],
how ="left",
left_on =id_colname,
right_on=Stop.STOPS_COLUMN_STOP_ID)
# don't want to add this column
if Stop.STOPS_COLUMN_STOP_ID != id_colname:
input_df.drop(Stop.STOPS_COLUMN_STOP_ID, axis=1, inplace=True)
rename_dict = {Stop.STOPS_COLUMN_STOP_LATITUDE :new_lat_colname,
Stop.STOPS_COLUMN_STOP_LONGITUDE:new_lon_colname}
if new_stop_name_colname: rename_dict[Stop.STOPS_COLUMN_STOP_NAME] = new_stop_name_colname
input_df.rename(columns=rename_dict, inplace=True)
return input_df
[docs] def add_stop_zone_id(self, input_df, id_colname, zone_colname):
"""
Passing a :py:class:`pandas.DataFrame` with a stop ID column called *id_colname*,
adds the stop zone id as a column named *zone_colname* and returns it.
If no zone_ids specified, this is a no-op.
"""
if Stop.STOPS_COLUMN_ZONE_ID not in self.stops_df.columns.values:
return input_df
input_df = pd.merge(left =input_df,
right = self.stops_df[[Stop.STOPS_COLUMN_STOP_ID, Stop.STOPS_COLUMN_ZONE_ID]],
how ="left",
left_on =id_colname,
right_on=Stop.STOPS_COLUMN_STOP_ID)
# don't want to add this column
if Stop.STOPS_COLUMN_STOP_ID != id_colname:
input_df.drop(Stop.STOPS_COLUMN_STOP_ID, axis=1, inplace=True)
input_df.rename(columns={Stop.STOPS_COLUMN_ZONE_ID:zone_colname}, inplace=True)
return input_df
[docs] def add_trips(self, stop_times_df):
"""
Add myself to the given trip.
:param stop_times_df: The :py:attr:`Trip.stop_times_df` table
:type stop_times_df: a :py:class:`pandas.DataFrame` instance
"""
self.trip_times_df = stop_times_df.copy()
self.trip_times_df.reset_index(inplace=True)
self.trip_times_df.set_index([Trip.STOPTIMES_COLUMN_STOP_ID, Trip.STOPTIMES_COLUMN_TRIP_ID, Trip.STOPTIMES_COLUMN_SEQUENCE], inplace=True, verify_integrity=True)
FastTripsLogger.debug("Stop trip_times_df\n" + str(self.trip_times_df.head()))
[docs] def get_transfers(self, stop_id, xfer_from):
if xfer_from:
return self.transfers_df.loc[self.transfers_df[Stop.TRANSFERS_COLUMN_FROM_STOP]==stop_id]
else:
return self.transfers_df.loc[self.transfers_df[Stop.TRANSFERS_COLUMN_TO_STOP]==stop_id]
[docs] def get_trips_arriving_within_time(self, stop_id, latest_arrival, time_window):
"""
Return list of [(trip_id, sequence, arrival_time)] where the arrival time is before *latest_arrival* but within *time_window*.
:param latest_arrival: The latest time the transit vehicle can arrive.
:type latest_arrival: a :py:class:`datetime.time` instance
:param time_window: The time window extending before *latest_arrival* within which an arrival is valid.
:type time_window: a :py:class:`datetime.timedelta` instance
"""
latest_arrival_min = 60.0*latest_arrival.hour + latest_arrival.minute + (latest_arrival.second/60.0)
# filter to stop
df = self.trip_times_df.loc[stop_id]
# arrive before latest arrival and arrive within time window
df = df.loc[(df[Trip.STOPTIMES_COLUMN_ARRIVAL_TIME_MIN] < latest_arrival_min)&
(df[Trip.STOPTIMES_COLUMN_ARRIVAL_TIME_MIN] > (latest_arrival_min - (time_window.total_seconds()/60.0)))]
to_return = []
df = df[[Trip.STOPTIMES_COLUMN_ARRIVAL_TIME]]
for index, row in df.iterrows():
to_return.append( (index[0], # trip id
index[1], # sequence,
row[Trip.STOPTIMES_COLUMN_ARRIVAL_TIME] # arrival time
) )
return to_return
[docs] def get_trips_departing_within_time(self, stop_id, earliest_departure, time_window):
"""
Return list of [(trip_id, sequence, departure_time)] where the departure time is after *earliest_departure* but within *time_window*.
:param earliest_departure: The earliest time the transit vehicle can depart.
:type earliest_departure: a :py:class:`datetime.time` instance
:param time_window: The time window extending after *earliest_departure* within which a departure is valid.
:type time_window: a :py:class:`datetime.timedelta` instance
"""
earliest_departure_min = 60.0*earliest_departure.hour + earliest_departure.minute + (earliest_departure.second/60.0)
# filter to stop
df = self.trip_times_df.loc[stop_id]
# depart after the earliest departure
df = df.loc[(df[Trip.STOPTIMES_COLUMN_DEPARTURE_TIME_MIN] > earliest_departure_min)&
(df[Trip.STOPTIMES_COLUMN_DEPARTURE_TIME_MIN] < (earliest_departure_min + (time_window.total_seconds()/60.0)))]
to_return = []
df = df[[Trip.STOPTIMES_COLUMN_DEPARTURE_TIME]]
for index, row in df.iterrows():
to_return.append( (index[0], # trip id
index[1], # sequence,
row[Trip.STOPTIMES_COLUMN_DEPARTURE_TIME] # arrival time
) )
return to_return
[docs] def is_transfer(self, stop_id, xfer_from):
"""
Returns true iff this is a transfer stop; e.g. if it's served by multiple routes or has a transfer link.
"""
if xfer_from and len(self.transfers_df.loc[self.transfers_df[Stop.TRANSFERS_COLUMN_FROM_STOP]==stop_id]) > 0:
return True
if not xfer_from and len(self.transfers_df.loc[self.transfers_df[Stop.TRANSFERS_COLUMN_TO_STOP]==stop_id]) > 0:
return True
return False
[docs] def stop_min_max_lat_lon(self):
"""
Returns (min_stop_lat, max_stop_lat,
min_stop_lon, max_stop_lon)
"""
return (self.stops_df[Stop.STOPS_COLUMN_STOP_LATITUDE].min(),
self.stops_df[Stop.STOPS_COLUMN_STOP_LATITUDE].max(),
self.stops_df[Stop.STOPS_COLUMN_STOP_LONGITUDE].min(),
self.stops_df[Stop.STOPS_COLUMN_STOP_LONGITUDE].max())