Skip to content

Commit

Permalink
fix: (Temporarily) Re-add suport for pre-2.6.0 YAMLs with `PyPDFConve…
Browse files Browse the repository at this point in the history
…rter` (#8443)
  • Loading branch information
shadeMe authored and silvanocerza committed Oct 8, 2024
1 parent 89dc8b8 commit f29ce97
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 2 deletions.
8 changes: 7 additions & 1 deletion haystack/components/converters/pypdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from haystack.dataclasses import ByteStream
from haystack.lazy_imports import LazyImport
from haystack.utils.base_serialization import deserialize_class_instance, serialize_class_instance
from haystack.utils.type_serialization import deserialize_type

with LazyImport("Run 'pip install pypdf'") as pypdf_import:
from pypdf import PdfReader
Expand Down Expand Up @@ -118,7 +119,12 @@ def from_dict(cls, data):
"""
custom_converter_data = data["init_parameters"]["converter"]
if custom_converter_data is not None:
data["init_parameters"]["converter"] = deserialize_class_instance(custom_converter_data)
if "data" in custom_converter_data:
data["init_parameters"]["converter"] = deserialize_class_instance(custom_converter_data)
else:
# TODO: Remove in 2.7.0
converter_class = deserialize_type(custom_converter_data["type"])
data["init_parameters"]["converter"] = converter_class.from_dict(custom_converter_data)
return default_from_dict(cls, data)

def _default_convert(self, reader: "PdfReader") -> Document:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
Revert change to PyPDFConverter that broke the deserialization of pre 2.6.0 YAMLs.
11 changes: 10 additions & 1 deletion test/components/converters/test_pypdf_to_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest

from haystack import Document, default_from_dict, default_to_dict
from haystack.components.converters.pypdf import PyPDFToDocument
from haystack.components.converters.pypdf import PyPDFToDocument, DefaultConverter
from haystack.dataclasses import ByteStream


Expand Down Expand Up @@ -79,6 +79,15 @@ def test_from_dict_custom_converter(self):
assert isinstance(instance, PyPDFToDocument)
assert isinstance(instance.converter, CustomConverter)

def test_from_dict_pre_2_6_0(self):
data = {
"type": "haystack.components.converters.pypdf.PyPDFToDocument",
"init_parameters": {"converter": {"type": "haystack.components.converters.pypdf.DefaultConverter"}},
}
instance = PyPDFToDocument.from_dict(data)
assert isinstance(instance, PyPDFToDocument)
assert isinstance(instance.converter, DefaultConverter)

@pytest.mark.integration
def test_run(self, test_files_path, pypdf_converter):
"""
Expand Down

0 comments on commit f29ce97

Please sign in to comment.