Source code for spopt.locate.p_median
import numpy as np
import pulp
from geopandas import GeoDataFrame
from .base import (
BaseOutputMixin,
LocateSolver,
FacilityModelBuilder,
MeanDistanceMixin,
)
from scipy.spatial.distance import cdist
from typing import Union
import warnings
[docs]class PMedian(LocateSolver, BaseOutputMixin, MeanDistanceMixin):
r"""
Implement the :math:`p`-median optimization model and solve it. The
:math:`p`-median problem, as adapted from :cite:`daskin_2013`,
can be formulated as:
.. math::
\begin{array}{lllll}
\displaystyle \textbf{Minimize} & \displaystyle \sum_{i \in I}\sum_{j \in J}{a_i d_{ij} X_{ij}} && & (1) \\
\displaystyle \textbf{Subject To} & \displaystyle \sum_{j \in J}{X_{ij} = 1} && \forall i \in I & (2) \\
& \displaystyle \sum_{j \in J}{Y_j} = p && & (3) \\
& X_{ij} \leq Y_{j} && \forall i \in I \quad \forall j \in J & (4) \\
& X_{ij} \in \{0, 1\} && \forall i \in I \quad \forall j \in J & (5) \\
& Y_j \in \{0, 1\} && \forall j \in J & (6) \\
& && & \\
\displaystyle \textbf{Where} && i & = & \textrm{index of demand points/areas/objects in set } I \\
&& j & = & \textrm{index of potential facility sites in set } J \\
&& p & = & \textrm{the number of facilities to be sited} \\
&& a_i & = & \textrm{service load or population demand at client location } i \\
&& d_{ij} & = & \textrm{shortest distance or travel time between locations } i \textrm{ and } j \\
&& X_{ij} & = & \begin{cases}
1, \textrm{if client location } i \textrm{ is served by facility } j \\
0, \textrm{otherwise} \\
\end{cases} \\
&& Y_j & = & \begin{cases}
1, \textrm{if a facility is sited at location } j \\
0, \textrm{otherwise} \\
\end{cases} \\
\end{array}
Parameters
----------
name : str
The problem name.
problem : pulp.LpProblem
A ``pulp`` instance of an optimization model that contains
constraints, variables, and an objective function.
aij : numpy.array
A cost matrix in the form of a 2D array between origins and destinations.
Attributes
----------
name : str
The problem name.
problem : pulp.LpProblem
A ``pulp`` instance of an optimization model that contains
constraints, variables, and an objective function.
fac2cli : numpy.array
A 2D array storing facility to client relationships where each
row represents a facility and contains an array of client indices
with which it is associated. An empty client array indicates
the facility is associated with no clients.
cli2fac : numpy.array
The inverse of ``fac2cli`` where client to facility relationships
are shown.
aij : numpy.array
A cost matrix in the form of a 2D array between origins and destinations.
""" # noqa
[docs] def __init__(
self,
name: str,
problem: pulp.LpProblem,
aij: np.array,
weights_sum: Union[int, float],
):
self.aij = aij
self.ai_sum = weights_sum
self.name = name
self.problem = problem
def __add_obj(self, range_clients: range, range_facility: range) -> None:
"""
Add the objective function to the model.
Minimize s0_0 * z0_0 + s0_1 * z0_1 + ... + si_j * zi_j
Parameters
----------
range_clients : range
The range of demand points.
range_facility : range
The range of facility point.
Returns
-------
None
"""
cli_assgn_vars = getattr(self, "cli_assgn_vars")
self.problem += (
pulp.lpSum(
[
self.aij[i, j] * cli_assgn_vars[i, j]
for i in range_clients
for j in range_facility
]
),
"objective function",
)
[docs] @classmethod
def from_cost_matrix(
cls,
cost_matrix: np.array,
weights: np.array,
p_facilities: int,
predefined_facilities_arr: np.array = None,
name: str = "p-median",
):
"""
Create a ``PMedian`` object based on a cost matrix.
Parameters
----------
cost_matrix : numpy.array
A cost matrix in the form of a 2D array between origins and destinations.
weights : numpy.array
A 1D array of service load or population demand.
p_facilities : int
The number of facilities to be located.
predefined_facilities_arr : numpy.array (default None)
Predefined facilities that must appear in the solution.
name : str (default 'p-median')
The problem name.
Returns
-------
spopt.locate.p_median.PMedian
Examples
--------
>>> from spopt.locate import PMedian
>>> from spopt.locate.util import simulated_geo_points
>>> import geopandas
>>> import numpy
>>> import pulp
>>> import spaghetti
Create a regular lattice.
>>> lattice = spaghetti.regular_lattice((0, 0, 10, 10), 9, exterior=True)
>>> ntw = spaghetti.Network(in_data=lattice)
>>> streets = spaghetti.element_as_gdf(ntw, arcs=True)
>>> streets_buffered = geopandas.GeoDataFrame(
... geopandas.GeoSeries(streets["geometry"].buffer(0.2).unary_union),
... crs=streets.crs,
... columns=["geometry"]
... )
Simulate points about the lattice.
>>> demand_points = simulated_geo_points(streets_buffered, needed=100, seed=5)
>>> facility_points = simulated_geo_points(streets_buffered, needed=5, seed=6)
Snap the points to the network of lattice edges.
>>> ntw.snapobservations(demand_points, "clients", attribute=True)
>>> clients_snapped = spaghetti.element_as_gdf(
... ntw, pp_name="clients", snapped=True
... )
>>> ntw.snapobservations(facility_points, "facilities", attribute=True)
>>> facilities_snapped = spaghetti.element_as_gdf(
... ntw, pp_name="facilities", snapped=True
... )
Calculate the cost matrix from origins to destinations.
>>> cost_matrix = ntw.allneighbordistances(
... sourcepattern=ntw.pointpatterns["clients"],
... destpattern=ntw.pointpatterns["facilities"]
... )
Simulate demand weights from ``1`` to ``12``.
>>> ai = numpy.random.randint(1, 12, 100)
Create and solve a ``PMedian`` instance from the cost matrix.
>>> pmedian_from_cost_matrix = PMedian.from_cost_matrix(
... cost_matrix, ai, p_facilities=4
... )
>>> pmedian_from_cost_matrix = pmedian_from_cost_matrix.solve(
... pulp.PULP_CBC_CMD(msg=False)
... )
Get the facility-client associations.
>>> for fac, cli in enumerate(pmedian_from_cost_matrix.fac2cli):
... print(f"facility {fac} serving {len(cli)} clients")
facility 0 serving 14 clients
facility 1 serving 29 clients
facility 2 serving 31 clients
facility 3 serving 0 clients
facility 4 serving 26 clients
Get the total and average weighted travel cost.
>>> round(pmedian_from_cost_matrix.problem.objective.value(), 3)
1870.747
>>> round(pmedian_from_cost_matrix.mean_dist, 3)
3.027
"""
n_cli = cost_matrix.shape[0]
r_cli = range(n_cli)
r_fac = range(cost_matrix.shape[1])
model = pulp.LpProblem(name, pulp.LpMinimize)
weights_sum = weights.sum()
weights = np.reshape(weights, (n_cli, 1))
aij = weights * cost_matrix
p_median = PMedian(name, model, aij, weights_sum)
FacilityModelBuilder.add_facility_integer_variable(p_median, r_fac, "y[{i}]")
FacilityModelBuilder.add_client_assign_variable(
p_median, r_cli, r_fac, "z[{i}_{j}]"
)
if predefined_facilities_arr is not None:
FacilityModelBuilder.add_predefined_facility_constraint(
p_median, predefined_facilities_arr
)
p_median.__add_obj(r_cli, r_fac)
FacilityModelBuilder.add_facility_constraint(p_median, p_facilities)
FacilityModelBuilder.add_assignment_constraint(p_median, r_fac, r_cli)
FacilityModelBuilder.add_opening_constraint(p_median, r_fac, r_cli)
return p_median
[docs] @classmethod
def from_geodataframe(
cls,
gdf_demand: GeoDataFrame,
gdf_fac: GeoDataFrame,
demand_col: str,
facility_col: str,
weights_cols: str,
p_facilities: int,
predefined_facility_col: str = None,
distance_metric: str = "euclidean",
name: str = "p-median",
):
"""
Create an ``PMedian`` object from ``geopandas.GeoDataFrame`` objects.
Calculate the cost matrix between demand and facility locations
before building the problem within the ``from_cost_matrix()`` method.
Parameters
----------
gdf_demand : geopandas.GeoDataFrame
Demand locations.
gdf_fac : geopandas.GeoDataFrame
Facility locations.
demand_col : str
Demand sites geometry column name.
facility_col : str
Facility candidate sites geometry column name.
weights_cols : str
The weight column name representing service load or demand.
p_facilities: int
The number of facilities to be located.
predefined_facility_col : str (default None)
Column name representing facilities are already defined.
distance_metric : str (default 'euclidean')
A metric used for the distance calculations supported by
`scipy.spatial.distance.cdist <https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html>`_.
name : str (default 'p-median')
The name of the problem.
Returns
-------
spopt.locate.p_median.PMedian
Examples
--------
>>> from spopt.locate import PMedian
>>> from spopt.locate.util import simulated_geo_points
>>> import geopandas
>>> import numpy
>>> import pulp
>>> import spaghetti
Create a regular lattice.
>>> lattice = spaghetti.regular_lattice((0, 0, 10, 10), 9, exterior=True)
>>> ntw = spaghetti.Network(in_data=lattice)
>>> streets = spaghetti.element_as_gdf(ntw, arcs=True)
>>> streets_buffered = geopandas.GeoDataFrame(
... geopandas.GeoSeries(streets["geometry"].buffer(0.2).unary_union),
... crs=streets.crs,
... columns=["geometry"]
... )
Simulate points about the lattice.
>>> demand_points = simulated_geo_points(streets_buffered, needed=100, seed=5)
>>> facility_points = simulated_geo_points(streets_buffered, needed=5, seed=6)
Snap the points to the network of lattice edges
and extract as ``GeoDataFrame`` objects.
>>> ntw.snapobservations(demand_points, "clients", attribute=True)
>>> clients_snapped = spaghetti.element_as_gdf(
... ntw, pp_name="clients", snapped=True
... )
>>> ntw.snapobservations(facility_points, "facilities", attribute=True)
>>> facilities_snapped = spaghetti.element_as_gdf(
... ntw, pp_name="facilities", snapped=True
... )
Simulate demand weights from ``1`` to ``12``.
>>> ai = numpy.random.randint(1, 12, 100)
>>> clients_snapped['weights'] = ai
Create and solve a ``PMedian`` instance from the ``GeoDataFrame`` object.
>>> pmedian_from_geodataframe = PMedian.from_geodataframe(
... clients_snapped,
... facilities_snapped,
... "geometry",
... "geometry",
... "weights",
... p_facilities=4,
... distance_metric="euclidean"
... )
>>> pmedian_from_geodataframe = pmedian_from_geodataframe.solve(
... pulp.PULP_CBC_CMD(msg=False)
... )
Get the facility-client associations.
>>> for fac, cli in enumerate(pmedian_from_geodataframe.fac2cli):
... print(f"facility {fac} serving {len(cli)} clients")
facility 0 serving 13 clients
facility 1 serving 29 clients
facility 2 serving 31 clients
facility 3 serving 0 clients
facility 4 serving 27 clients
""" # noqa
predefined_facilities_arr = None
if predefined_facility_col is not None:
predefined_facilities_arr = gdf_fac[predefined_facility_col].to_numpy()
service_load = gdf_demand[weights_cols].to_numpy()
dem = gdf_demand[demand_col]
fac = gdf_fac[facility_col]
dem_type_geom = dem.geom_type.unique()
fac_type_geom = fac.geom_type.unique()
_msg = (
" geodataframe contains mixed type geometries or is not a point. Be "
"sure deriving centroid from geometries doesn't affect the results."
)
if len(dem_type_geom) > 1 or not "Point" in dem_type_geom:
warnings.warn(f"Demand{_msg}", UserWarning)
dem = dem.centroid
if len(fac_type_geom) > 1 or not "Point" in fac_type_geom:
warnings.warn(f"Facility{_msg}", UserWarning)
fac = fac.centroid
dem_data = np.array([dem.x.to_numpy(), dem.y.to_numpy()]).T
fac_data = np.array([fac.x.to_numpy(), fac.y.to_numpy()]).T
if gdf_demand.crs != gdf_fac.crs:
raise ValueError(
"Geodataframes crs are different: "
f"gdf_demand-{gdf_demand.crs}, gdf_fac-{gdf_fac.crs}"
)
distances = cdist(dem_data, fac_data, distance_metric)
return cls.from_cost_matrix(
distances, service_load, p_facilities, predefined_facilities_arr, name
)
[docs] def facility_client_array(self) -> None:
"""
Create a 2D array storing **facility to client relationships** where each
row represents a facility and contains an array of client indices
with which it is associated. An empty client array indicates
the facility is associated with no clients.
Returns
-------
None
"""
fac_vars = getattr(self, "fac_vars")
cli_vars = getattr(self, "cli_assgn_vars")
len_fac_vars = len(fac_vars)
self.fac2cli = []
for j in range(len_fac_vars):
array_cli = []
if fac_vars[j].value() > 0:
for i in range(len(cli_vars)):
if cli_vars[i, j].value() > 0:
array_cli.append(i)
self.fac2cli.append(array_cli)
[docs] def solve(self, solver: pulp.LpSolver, results: bool = True):
"""
Solve the ``PMedian`` model.
Parameters
----------
solver : pulp.LpSolver
A solver supported by ``pulp``.
results : bool (default True)
If ``True`` it will create metainfo (which facilities cover
which demand) and vice-versa, and the uncovered demand.
Returns
-------
spopt.locate.p_median.PMedian
"""
self.problem.solve(solver)
self.check_status()
if results:
self.facility_client_array()
self.client_facility_array()
self.get_mean_distance()
return self