From a01099aa274ffded3b86ad707a120f469eaef98d Mon Sep 17 00:00:00 2001
From: faph <faph@users.noreply.github.com>
Date: Thu, 19 Sep 2024 09:32:38 +0100
Subject: [PATCH 1/8] Add model_dir arg to testing.post_invocations

---
 src/inference_server/testing.py | 13 +++++++---
 tests/test_inference_server.py  | 42 ++++++++++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/src/inference_server/testing.py b/src/inference_server/testing.py
index 3ac8efd..0597f09 100644
--- a/src/inference_server/testing.py
+++ b/src/inference_server/testing.py
@@ -14,11 +14,13 @@
 """
 
 import io
+import pathlib
 from types import ModuleType
 from typing import Any, Callable, Optional, Protocol, Tuple, Type, Union
 
 import botocore.response  # type: ignore[import-untyped]
 import pluggy
+import pytest
 import werkzeug.test
 
 import inference_server
@@ -117,15 +119,20 @@ def client() -> werkzeug.test.Client:
     return werkzeug.test.Client(inference_server.create_app())
 
 
-def post_invocations(**kwargs) -> werkzeug.test.TestResponse:
+def post_invocations(*, model_dir: Optional[pathlib.Path] = None, **kwargs) -> werkzeug.test.TestResponse:
     """
     Send an HTTP POST request to ``/invocations`` using a test HTTP client and return the response
 
     This function should be used to verify an inference request using the full **inference-server** logic.
 
-    :param kwargs: Keyword arguments passed to :meth:`werkzeug.test.Client.post`
+    :param model_dir: Optional pass a custom model directory to load the model from. Default is :file:`/opt/ml/model/`.
+    :param kwargs:    Keyword arguments passed to :meth:`werkzeug.test.Client.post`
     """
-    response = client().post("/invocations", **kwargs)
+    with pytest.MonkeyPatch.context() as monkeypatch:
+        if model_dir:
+            monkeypatch.setattr(inference_server, "_MODEL_DIR", str(model_dir))
+        response = client().post("/invocations", **kwargs)
+
     assert response.status_code == 200
     return response
 
diff --git a/tests/test_inference_server.py b/tests/test_inference_server.py
index 8f0ccc9..e77341e 100644
--- a/tests/test_inference_server.py
+++ b/tests/test_inference_server.py
@@ -8,7 +8,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations under the License.
-
+import pathlib
 from typing import Tuple
 
 import botocore.response
@@ -22,6 +22,14 @@ def test_package_has_version():
     assert inference_server.__version__ is not None
 
 
+@pytest.fixture(autouse=True)
+def reset_caches():
+    try:
+        yield
+    finally:
+        inference_server._model.cache_clear()
+
+
 @pytest.fixture
 def client():
     return inference_server.testing.client()
@@ -46,6 +54,26 @@ def ping_fn(model):
         pm.unregister(PingPlugin)
 
 
+@pytest.fixture
+def model_using_dir():
+    class ModelPlugin:
+        """Plugin which just defines a model_fn"""
+
+        @staticmethod
+        @inference_server.plugin_hook()
+        def model_fn(model_dir: str):
+            """Model function for testing we are passing a custom directory"""
+            assert model_dir != "/opt/ml/model"
+            return lambda data: data
+
+    pm = inference_server.testing.plugin_manager()
+    pm.register(ModelPlugin)
+    try:
+        yield
+    finally:
+        pm.unregister(ModelPlugin)
+
+
 def test_version():
     """Test that the package has a version"""
     assert inference_server.__version__ is not None
@@ -80,6 +108,18 @@ def test_invocations():
     assert response.headers["Content-Type"] == "application/octet-stream"
 
 
+def test_invocations_custom_model_dir(model_using_dir):
+    """Test the default plugin (which passes through any input bytes) using low-level testing.post_invocations"""
+    data = b"What's the shipping forecast for tomorrow"
+    model_dir = pathlib.Path(__file__).parent
+
+    response = inference_server.testing.post_invocations(
+        data=data, model_dir=model_dir, headers={"Accept": "application/octet-stream"}
+    )
+    assert response.data == data
+    assert response.headers["Content-Type"] == "application/octet-stream"
+
+
 def test_prediction_custom_serializer():
     """Test the default plugin again, now using high-level testing.predict"""
 

From 670a507a0e9f437cb8721bc0584bccda4da444b6 Mon Sep 17 00:00:00 2001
From: faph <faph@users.noreply.github.com>
Date: Thu, 19 Sep 2024 09:37:25 +0100
Subject: [PATCH 2/8] Add model_dir arg to testing.predict

---
 src/inference_server/testing.py | 10 ++++++++--
 tests/test_inference_server.py  |  9 ++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/inference_server/testing.py b/src/inference_server/testing.py
index 0597f09..4d806a3 100644
--- a/src/inference_server/testing.py
+++ b/src/inference_server/testing.py
@@ -81,12 +81,18 @@ def deserialize(self, stream: "botocore.response.StreamingBody", content_type: s
 
 
 def predict(
-    data: Any, serializer: Optional[ImplementsSerialize] = None, deserializer: Optional[ImplementsDeserialize] = None
+    data: Any,
+    *,
+    model_dir: Optional[pathlib.Path] = None,
+    serializer: Optional[ImplementsSerialize] = None,
+    deserializer: Optional[ImplementsDeserialize] = None,
 ) -> Any:
     """
     Invoke the model and return a prediction
 
     :param data:         Model input data
+    :param model_dir:    Optional pass a custom model directory to load the model from. Default is
+                         :file:`/opt/ml/model/`.
     :param serializer:   Optional. A serializer for sending the data as bytes to the model server. Should be compatible
                          with :class:`sagemaker.serializers.BaseSerializer`. Default: bytes pass-through.
     :param deserializer: Optional. A deserializer for processing the prediction as sent by the model server. Should be
@@ -100,7 +106,7 @@ def predict(
         "Content-Type": serializer.CONTENT_TYPE,  # The serializer declares the content-type of the input data
         "Accept": ", ".join(deserializer.ACCEPT),  # The deserializer dictates the content-type of the prediction
     }
-    prediction_response = post_invocations(data=serialized_data, headers=http_headers)
+    prediction_response = post_invocations(model_dir=model_dir, data=serialized_data, headers=http_headers)
     prediction_stream = botocore.response.StreamingBody(
         raw_stream=io.BytesIO(prediction_response.data),
         content_length=prediction_response.content_length,
diff --git a/tests/test_inference_server.py b/tests/test_inference_server.py
index e77341e..15e73fd 100644
--- a/tests/test_inference_server.py
+++ b/tests/test_inference_server.py
@@ -117,7 +117,6 @@ def test_invocations_custom_model_dir(model_using_dir):
         data=data, model_dir=model_dir, headers={"Accept": "application/octet-stream"}
     )
     assert response.data == data
-    assert response.headers["Content-Type"] == "application/octet-stream"
 
 
 def test_prediction_custom_serializer():
@@ -155,6 +154,14 @@ def test_prediction_no_serializer():
     assert prediction == input_data
 
 
+def test_prediction_model_dir(model_using_dir):
+    input_data = b"What's the shipping forecast for tomorrow"
+    model_dir = pathlib.Path(__file__).parent
+
+    prediction = inference_server.testing.predict(input_data, model_dir=model_dir)
+    assert prediction == input_data
+
+
 def test_execution_parameters(client):
     response = client.get("/execution-parameters")
     assert response.data == b'{"BatchStrategy":"MultiRecord","MaxConcurrentTransforms":1,"MaxPayloadInMB":6}'

From c59d0664fe0389655f010db5de9a59cf408471be Mon Sep 17 00:00:00 2001
From: faph <faph@users.noreply.github.com>
Date: Thu, 19 Sep 2024 09:39:23 +0100
Subject: [PATCH 3/8] Add comment about use of pytest

---
 src/inference_server/testing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/inference_server/testing.py b/src/inference_server/testing.py
index 4d806a3..03ab783 100644
--- a/src/inference_server/testing.py
+++ b/src/inference_server/testing.py
@@ -134,6 +134,7 @@ def post_invocations(*, model_dir: Optional[pathlib.Path] = None, **kwargs) -> w
     :param model_dir: Optional pass a custom model directory to load the model from. Default is :file:`/opt/ml/model/`.
     :param kwargs:    Keyword arguments passed to :meth:`werkzeug.test.Client.post`
     """
+    # pytest should be available when we are using inference_server.testing
     with pytest.MonkeyPatch.context() as monkeypatch:
         if model_dir:
             monkeypatch.setattr(inference_server, "_MODEL_DIR", str(model_dir))

From 181ad5a49ff408ccc8f48eb800b29cc49d94334e Mon Sep 17 00:00:00 2001
From: faph <faph@users.noreply.github.com>
Date: Thu, 19 Sep 2024 09:42:36 +0100
Subject: [PATCH 4/8] Document default value of '/opt/ml/model'

---
 src/inference_server/_plugin.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/inference_server/_plugin.py b/src/inference_server/_plugin.py
index b0b4e34..b90bfae 100644
--- a/src/inference_server/_plugin.py
+++ b/src/inference_server/_plugin.py
@@ -46,7 +46,8 @@ def model_fn(model_dir: str) -> ModelType:
     This function will be called when the server starts up. Here, ``ModelType`` can be any Python class corresponding to
     the model, for example :class:`sklearn.tree.DecisionTreeClassifier`.
 
-    :param model_dir: Local filesystem directory containing the model files
+    :param model_dir: Local filesystem directory containing the model files. This is always :file:`/opt/ml/model` when
+                      invoked by **inference-server**.
     """
     raise NotImplementedError
 

From b4c53928424275a4f03d6630162a9434eeab717b Mon Sep 17 00:00:00 2001
From: faph <faph@users.noreply.github.com>
Date: Thu, 19 Sep 2024 09:55:17 +0100
Subject: [PATCH 5/8] Document model_dir arg

---
 docs/testing.rst | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/docs/testing.rst b/docs/testing.rst
index 6ce5967..324e24a 100644
--- a/docs/testing.rst
+++ b/docs/testing.rst
@@ -39,6 +39,20 @@ Here we can use any serializer compatible with :mod:`sagemaker.serializers` and
 
 If no serializer or deserializer is configured, bytes data are passed through as is for both input and output.
 
+:func:`testing.predict` accepts a ``model_dir`` argument which can used to set the directory containing the model
+artifacts to be loaded. At runtime, this directory is always file:`/opt/ml/model`. For testing purposes however, we may
+want to create model artifacts on the fly, for example in a temporary directory using a Pytest fixture, like this::
+
+   import pathlib
+
+   @pytest.fixture
+   def model_artifacts_dir(tmp_path) -> pathlib.Path:
+       dir_ = tmp_path / "model"
+       dir_.mkdir()
+       # instantiate a model object and serialize as 1 or more files to the directory
+       ...
+       return dir_
+
 
 Testing model predictions (low-level API)
 -----------------------------------------
@@ -63,7 +77,6 @@ Instead of using the high-level testing API, we can also use invoke requests sim
        assert response.json() == expected_prediction
 
 
-
 Verifying plugin registration
 -----------------------------
 

From 3b0b0e93a565afe35b1daad42a26305032952815 Mon Sep 17 00:00:00 2001
From: faph <faph@users.noreply.github.com>
Date: Thu, 19 Sep 2024 10:01:07 +0100
Subject: [PATCH 6/8] Add pytest dependency for docs and linting

---
 pyproject.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 40ba7ce..7e90480 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,6 +67,7 @@ dependencies = [
 [project.optional-dependencies]
 
 docs = [
+    "pytest",  # Because we import this in inference_server.testing
     "sphinx",
     "sphinx-rtd-theme",
 ]
@@ -81,6 +82,7 @@ linting = [
     "isort",
     "mypy",
     "pre-commit",
+    "pytest",  # Because we import this in inference_server.testing
 ]
 
 

From 86b99563cdf66a212f218afc4a9d4e002a37087f Mon Sep 17 00:00:00 2001
From: faph <faph@users.noreply.github.com>
Date: Thu, 19 Sep 2024 10:19:51 +0100
Subject: [PATCH 7/8] Tweak docs

---
 docs/testing.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/testing.rst b/docs/testing.rst
index 324e24a..fccde01 100644
--- a/docs/testing.rst
+++ b/docs/testing.rst
@@ -39,9 +39,9 @@ Here we can use any serializer compatible with :mod:`sagemaker.serializers` and
 
 If no serializer or deserializer is configured, bytes data are passed through as is for both input and output.
 
-:func:`testing.predict` accepts a ``model_dir`` argument which can used to set the directory containing the model
-artifacts to be loaded. At runtime, this directory is always file:`/opt/ml/model`. For testing purposes however, we may
-want to create model artifacts on the fly, for example in a temporary directory using a Pytest fixture, like this::
+:func:`inference_server.testing.predict` accepts a ``model_dir`` argument which can used to set the directory containing
+the model artifacts to be loaded. At runtime, this directory is always :file:`/opt/ml/model`. In our tests, we may want
+to create model artifacts on the fly, for example in a temporary directory using a Pytest fixture, like this::
 
    import pathlib
 

From 3aa8fa32faeeff90c7eb6dd696162a021499ad99 Mon Sep 17 00:00:00 2001
From: faph <faph@users.noreply.github.com>
Date: Thu, 19 Sep 2024 12:34:49 +0100
Subject: [PATCH 8/8] Add inference server as dependency in Docker docs

---
 docs/deployment.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/deployment.rst b/docs/deployment.rst
index 55ea87d..3746b4e 100644
--- a/docs/deployment.rst
+++ b/docs/deployment.rst
@@ -18,6 +18,7 @@ like this:
    COPY entrypoint.sh /usr/local/bin/
    RUN python -m pip install \
        gunicorn  \
+       inference-server  \
        shipping-forecast  # Our package implementing the hooks
 
    EXPOSE 8080