From a01099aa274ffded3b86ad707a120f469eaef98d Mon Sep 17 00:00:00 2001 From: faph Date: Thu, 19 Sep 2024 09:32:38 +0100 Subject: [PATCH 1/8] Add model_dir arg to testing.post_invocations --- src/inference_server/testing.py | 13 +++++++--- tests/test_inference_server.py | 42 ++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/inference_server/testing.py b/src/inference_server/testing.py index 3ac8efd..0597f09 100644 --- a/src/inference_server/testing.py +++ b/src/inference_server/testing.py @@ -14,11 +14,13 @@ """ import io +import pathlib from types import ModuleType from typing import Any, Callable, Optional, Protocol, Tuple, Type, Union import botocore.response # type: ignore[import-untyped] import pluggy +import pytest import werkzeug.test import inference_server @@ -117,15 +119,20 @@ def client() -> werkzeug.test.Client: return werkzeug.test.Client(inference_server.create_app()) -def post_invocations(**kwargs) -> werkzeug.test.TestResponse: +def post_invocations(*, model_dir: Optional[pathlib.Path] = None, **kwargs) -> werkzeug.test.TestResponse: """ Send an HTTP POST request to ``/invocations`` using a test HTTP client and return the response This function should be used to verify an inference request using the full **inference-server** logic. - :param kwargs: Keyword arguments passed to :meth:`werkzeug.test.Client.post` + :param model_dir: Optional pass a custom model directory to load the model from. Default is :file:`/opt/ml/model/`. + :param kwargs: Keyword arguments passed to :meth:`werkzeug.test.Client.post` """ - response = client().post("/invocations", **kwargs) + with pytest.MonkeyPatch.context() as monkeypatch: + if model_dir: + monkeypatch.setattr(inference_server, "_MODEL_DIR", str(model_dir)) + response = client().post("/invocations", **kwargs) + assert response.status_code == 200 return response diff --git a/tests/test_inference_server.py b/tests/test_inference_server.py index 8f0ccc9..e77341e 100644 --- a/tests/test_inference_server.py +++ b/tests/test_inference_server.py @@ -8,7 +8,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the # specific language governing permissions and limitations under the License. - +import pathlib from typing import Tuple import botocore.response @@ -22,6 +22,14 @@ def test_package_has_version(): assert inference_server.__version__ is not None +@pytest.fixture(autouse=True) +def reset_caches(): + try: + yield + finally: + inference_server._model.cache_clear() + + @pytest.fixture def client(): return inference_server.testing.client() @@ -46,6 +54,26 @@ def ping_fn(model): pm.unregister(PingPlugin) +@pytest.fixture +def model_using_dir(): + class ModelPlugin: + """Plugin which just defines a model_fn""" + + @staticmethod + @inference_server.plugin_hook() + def model_fn(model_dir: str): + """Model function for testing we are passing a custom directory""" + assert model_dir != "/opt/ml/model" + return lambda data: data + + pm = inference_server.testing.plugin_manager() + pm.register(ModelPlugin) + try: + yield + finally: + pm.unregister(ModelPlugin) + + def test_version(): """Test that the package has a version""" assert inference_server.__version__ is not None @@ -80,6 +108,18 @@ def test_invocations(): assert response.headers["Content-Type"] == "application/octet-stream" +def test_invocations_custom_model_dir(model_using_dir): + """Test the default plugin (which passes through any input bytes) using low-level testing.post_invocations""" + data = b"What's the shipping forecast for tomorrow" + model_dir = pathlib.Path(__file__).parent + + response = inference_server.testing.post_invocations( + data=data, model_dir=model_dir, headers={"Accept": "application/octet-stream"} + ) + assert response.data == data + assert response.headers["Content-Type"] == "application/octet-stream" + + def test_prediction_custom_serializer(): """Test the default plugin again, now using high-level testing.predict""" From 670a507a0e9f437cb8721bc0584bccda4da444b6 Mon Sep 17 00:00:00 2001 From: faph Date: Thu, 19 Sep 2024 09:37:25 +0100 Subject: [PATCH 2/8] Add model_dir arg to testing.predict --- src/inference_server/testing.py | 10 ++++++++-- tests/test_inference_server.py | 9 ++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/inference_server/testing.py b/src/inference_server/testing.py index 0597f09..4d806a3 100644 --- a/src/inference_server/testing.py +++ b/src/inference_server/testing.py @@ -81,12 +81,18 @@ def deserialize(self, stream: "botocore.response.StreamingBody", content_type: s def predict( - data: Any, serializer: Optional[ImplementsSerialize] = None, deserializer: Optional[ImplementsDeserialize] = None + data: Any, + *, + model_dir: Optional[pathlib.Path] = None, + serializer: Optional[ImplementsSerialize] = None, + deserializer: Optional[ImplementsDeserialize] = None, ) -> Any: """ Invoke the model and return a prediction :param data: Model input data + :param model_dir: Optional pass a custom model directory to load the model from. Default is + :file:`/opt/ml/model/`. :param serializer: Optional. A serializer for sending the data as bytes to the model server. Should be compatible with :class:`sagemaker.serializers.BaseSerializer`. Default: bytes pass-through. :param deserializer: Optional. A deserializer for processing the prediction as sent by the model server. Should be @@ -100,7 +106,7 @@ def predict( "Content-Type": serializer.CONTENT_TYPE, # The serializer declares the content-type of the input data "Accept": ", ".join(deserializer.ACCEPT), # The deserializer dictates the content-type of the prediction } - prediction_response = post_invocations(data=serialized_data, headers=http_headers) + prediction_response = post_invocations(model_dir=model_dir, data=serialized_data, headers=http_headers) prediction_stream = botocore.response.StreamingBody( raw_stream=io.BytesIO(prediction_response.data), content_length=prediction_response.content_length, diff --git a/tests/test_inference_server.py b/tests/test_inference_server.py index e77341e..15e73fd 100644 --- a/tests/test_inference_server.py +++ b/tests/test_inference_server.py @@ -117,7 +117,6 @@ def test_invocations_custom_model_dir(model_using_dir): data=data, model_dir=model_dir, headers={"Accept": "application/octet-stream"} ) assert response.data == data - assert response.headers["Content-Type"] == "application/octet-stream" def test_prediction_custom_serializer(): @@ -155,6 +154,14 @@ def test_prediction_no_serializer(): assert prediction == input_data +def test_prediction_model_dir(model_using_dir): + input_data = b"What's the shipping forecast for tomorrow" + model_dir = pathlib.Path(__file__).parent + + prediction = inference_server.testing.predict(input_data, model_dir=model_dir) + assert prediction == input_data + + def test_execution_parameters(client): response = client.get("/execution-parameters") assert response.data == b'{"BatchStrategy":"MultiRecord","MaxConcurrentTransforms":1,"MaxPayloadInMB":6}' From c59d0664fe0389655f010db5de9a59cf408471be Mon Sep 17 00:00:00 2001 From: faph Date: Thu, 19 Sep 2024 09:39:23 +0100 Subject: [PATCH 3/8] Add comment about use of pytest --- src/inference_server/testing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/inference_server/testing.py b/src/inference_server/testing.py index 4d806a3..03ab783 100644 --- a/src/inference_server/testing.py +++ b/src/inference_server/testing.py @@ -134,6 +134,7 @@ def post_invocations(*, model_dir: Optional[pathlib.Path] = None, **kwargs) -> w :param model_dir: Optional pass a custom model directory to load the model from. Default is :file:`/opt/ml/model/`. :param kwargs: Keyword arguments passed to :meth:`werkzeug.test.Client.post` """ + # pytest should be available when we are using inference_server.testing with pytest.MonkeyPatch.context() as monkeypatch: if model_dir: monkeypatch.setattr(inference_server, "_MODEL_DIR", str(model_dir)) From 181ad5a49ff408ccc8f48eb800b29cc49d94334e Mon Sep 17 00:00:00 2001 From: faph Date: Thu, 19 Sep 2024 09:42:36 +0100 Subject: [PATCH 4/8] Document default value of '/opt/ml/model' --- src/inference_server/_plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/inference_server/_plugin.py b/src/inference_server/_plugin.py index b0b4e34..b90bfae 100644 --- a/src/inference_server/_plugin.py +++ b/src/inference_server/_plugin.py @@ -46,7 +46,8 @@ def model_fn(model_dir: str) -> ModelType: This function will be called when the server starts up. Here, ``ModelType`` can be any Python class corresponding to the model, for example :class:`sklearn.tree.DecisionTreeClassifier`. - :param model_dir: Local filesystem directory containing the model files + :param model_dir: Local filesystem directory containing the model files. This is always :file:`/opt/ml/model` when + invoked by **inference-server**. """ raise NotImplementedError From b4c53928424275a4f03d6630162a9434eeab717b Mon Sep 17 00:00:00 2001 From: faph Date: Thu, 19 Sep 2024 09:55:17 +0100 Subject: [PATCH 5/8] Document model_dir arg --- docs/testing.rst | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/testing.rst b/docs/testing.rst index 6ce5967..324e24a 100644 --- a/docs/testing.rst +++ b/docs/testing.rst @@ -39,6 +39,20 @@ Here we can use any serializer compatible with :mod:`sagemaker.serializers` and If no serializer or deserializer is configured, bytes data are passed through as is for both input and output. +:func:`testing.predict` accepts a ``model_dir`` argument which can used to set the directory containing the model +artifacts to be loaded. At runtime, this directory is always file:`/opt/ml/model`. For testing purposes however, we may +want to create model artifacts on the fly, for example in a temporary directory using a Pytest fixture, like this:: + + import pathlib + + @pytest.fixture + def model_artifacts_dir(tmp_path) -> pathlib.Path: + dir_ = tmp_path / "model" + dir_.mkdir() + # instantiate a model object and serialize as 1 or more files to the directory + ... + return dir_ + Testing model predictions (low-level API) ----------------------------------------- @@ -63,7 +77,6 @@ Instead of using the high-level testing API, we can also use invoke requests sim assert response.json() == expected_prediction - Verifying plugin registration ----------------------------- From 3b0b0e93a565afe35b1daad42a26305032952815 Mon Sep 17 00:00:00 2001 From: faph Date: Thu, 19 Sep 2024 10:01:07 +0100 Subject: [PATCH 6/8] Add pytest dependency for docs and linting --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 40ba7ce..7e90480 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ dependencies = [ [project.optional-dependencies] docs = [ + "pytest", # Because we import this in inference_server.testing "sphinx", "sphinx-rtd-theme", ] @@ -81,6 +82,7 @@ linting = [ "isort", "mypy", "pre-commit", + "pytest", # Because we import this in inference_server.testing ] From 86b99563cdf66a212f218afc4a9d4e002a37087f Mon Sep 17 00:00:00 2001 From: faph Date: Thu, 19 Sep 2024 10:19:51 +0100 Subject: [PATCH 7/8] Tweak docs --- docs/testing.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/testing.rst b/docs/testing.rst index 324e24a..fccde01 100644 --- a/docs/testing.rst +++ b/docs/testing.rst @@ -39,9 +39,9 @@ Here we can use any serializer compatible with :mod:`sagemaker.serializers` and If no serializer or deserializer is configured, bytes data are passed through as is for both input and output. -:func:`testing.predict` accepts a ``model_dir`` argument which can used to set the directory containing the model -artifacts to be loaded. At runtime, this directory is always file:`/opt/ml/model`. For testing purposes however, we may -want to create model artifacts on the fly, for example in a temporary directory using a Pytest fixture, like this:: +:func:`inference_server.testing.predict` accepts a ``model_dir`` argument which can used to set the directory containing +the model artifacts to be loaded. At runtime, this directory is always :file:`/opt/ml/model`. In our tests, we may want +to create model artifacts on the fly, for example in a temporary directory using a Pytest fixture, like this:: import pathlib From 3aa8fa32faeeff90c7eb6dd696162a021499ad99 Mon Sep 17 00:00:00 2001 From: faph Date: Thu, 19 Sep 2024 12:34:49 +0100 Subject: [PATCH 8/8] Add inference server as dependency in Docker docs --- docs/deployment.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/deployment.rst b/docs/deployment.rst index 55ea87d..3746b4e 100644 --- a/docs/deployment.rst +++ b/docs/deployment.rst @@ -18,6 +18,7 @@ like this: COPY entrypoint.sh /usr/local/bin/ RUN python -m pip install \ gunicorn \ + inference-server \ shipping-forecast # Our package implementing the hooks EXPOSE 8080