Skip to content

Commit

Permalink
Merge pull request #81 from scrapy/feature/fluent-interface
Browse files Browse the repository at this point in the history
Add support for fluent interface / method chaining
  • Loading branch information
wRAR authored May 6, 2024
2 parents 6c8940d + c35a59a commit 6558315
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 14 deletions.
58 changes: 44 additions & 14 deletions itemloaders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def add_value(
*processors: Callable[..., Any],
re: Union[str, Pattern[str], None] = None,
**kw: Any,
) -> None:
) -> Self:
"""
Process and then add the given ``value`` for the given field.
Expand All @@ -205,13 +205,17 @@ def add_value(
multiple fields may be added. And the processed value should be a dict
with field_name mapped to values.
:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader
Examples::
loader.add_value('name', 'Color TV')
loader.add_value('colours', ['white', 'blue'])
loader.add_value('length', '100')
loader.add_value('name', 'name: foo', TakeFirst(), re='name: (.+)')
loader.add_value(None, {'name': 'foo', 'sex': 'male'})
"""
value = self.get_value(value, *processors, re=re, **kw)
if value is None:
Expand All @@ -221,6 +225,7 @@ def add_value(
self._add_value(k, v)
else:
self._add_value(field_name, value)
return self

def replace_value(
self,
Expand All @@ -229,10 +234,13 @@ def replace_value(
*processors: Callable[..., Any],
re: Union[str, Pattern[str], None] = None,
**kw: Any,
) -> None:
) -> Self:
"""
Similar to :meth:`add_value` but replaces the collected data with the
new value instead of adding it.
:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader
"""
value = self.get_value(value, *processors, re=re, **kw)
if value is None:
Expand All @@ -242,6 +250,7 @@ def replace_value(
self._replace_value(k, v)
else:
self._replace_value(field_name, value)
return self

def _add_value(self, field_name: str, value: Any) -> None:
value = arg_to_iter(value)
Expand Down Expand Up @@ -387,7 +396,7 @@ def add_xpath(
*processors: Callable[..., Any],
re: Union[str, Pattern[str], None] = None,
**kw: Any,
) -> None:
) -> Self:
"""
Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
value, which is used to extract a list of strings from the
Expand All @@ -398,6 +407,9 @@ def add_xpath(
:param xpath: the XPath to extract data from
:type xpath: str
:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader
Examples::
# HTML snippet: <p class="product-name">Color TV</p>
Expand All @@ -407,7 +419,7 @@ def add_xpath(
"""
values = self._get_xpathvalues(xpath, **kw)
self.add_value(field_name, values, *processors, re=re, **kw)
return self.add_value(field_name, values, *processors, re=re, **kw)

def replace_xpath(
self,
Expand All @@ -416,12 +428,16 @@ def replace_xpath(
*processors: Callable[..., Any],
re: Union[str, Pattern[str], None] = None,
**kw: Any,
) -> None:
) -> Self:
"""
Similar to :meth:`add_xpath` but replaces collected data instead of adding it.
:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader
"""
values = self._get_xpathvalues(xpath, **kw)
self.replace_value(field_name, values, *processors, re=re, **kw)
return self.replace_value(field_name, values, *processors, re=re, **kw)

def get_xpath(
self,
Expand Down Expand Up @@ -468,7 +484,7 @@ def add_css(
*processors: Callable[..., Any],
re: Union[str, Pattern[str], None] = None,
**kw: Any,
) -> None:
) -> Self:
"""
Similar to :meth:`ItemLoader.add_value` but receives a CSS selector
instead of a value, which is used to extract a list of unicode strings
Expand All @@ -479,15 +495,19 @@ def add_css(
:param css: the CSS selector to extract data from
:type css: str
:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader
Examples::
# HTML snippet: <p class="product-name">Color TV</p>
loader.add_css('name', 'p.product-name')
# HTML snippet: <p id="price">the price is $1200</p>
loader.add_css('price', 'p#price', re='the price is (.*)')
"""
values = self._get_cssvalues(css)
self.add_value(field_name, values, *processors, re=re, **kw)
return self.add_value(field_name, values, *processors, re=re, **kw)

def replace_css(
self,
Expand All @@ -496,12 +516,16 @@ def replace_css(
*processors: Callable[..., Any],
re: Union[str, Pattern[str], None] = None,
**kw: Any,
) -> None:
) -> Self:
"""
Similar to :meth:`add_css` but replaces collected data instead of adding it.
:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader
"""
values = self._get_cssvalues(css)
self.replace_value(field_name, values, *processors, re=re, **kw)
return self.replace_value(field_name, values, *processors, re=re, **kw)

def get_css(
self,
Expand Down Expand Up @@ -545,7 +569,7 @@ def add_jmes(
*processors: Callable[..., Any],
re: Union[str, Pattern[str], None] = None,
**kw: Any,
) -> None:
) -> Self:
"""
Similar to :meth:`ItemLoader.add_value` but receives a JMESPath selector
instead of a value, which is used to extract a list of unicode strings
Expand All @@ -556,6 +580,9 @@ def add_jmes(
:param jmes: the JMESPath selector to extract data from
:type jmes: str
:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader
Examples::
# HTML snippet: {"name": "Color TV"}
Expand All @@ -564,7 +591,7 @@ def add_jmes(
loader.add_jmes('price', TakeFirst(), re='the price is (.*)')
"""
values = self._get_jmesvalues(jmes)
self.add_value(field_name, values, *processors, re=re, **kw)
return self.add_value(field_name, values, *processors, re=re, **kw)

def replace_jmes(
self,
Expand All @@ -573,12 +600,15 @@ def replace_jmes(
*processors: Callable[..., Any],
re: Union[str, Pattern[str], None] = None,
**kw: Any,
) -> None:
) -> Self:
"""
Similar to :meth:`add_jmes` but replaces collected data instead of adding it.
:returns: The current ItemLoader instance for method chaining.
:rtype: ItemLoader
"""
values = self._get_jmesvalues(jmes)
self.replace_value(field_name, values, *processors, re=re, **kw)
return self.replace_value(field_name, values, *processors, re=re, **kw)

def get_jmes(
self,
Expand Down
16 changes: 16 additions & 0 deletions tests/test_selector_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,19 @@ def test_replace_jmes_re(self):
self.assertEqual(loader.get_output_value("url"), ["http://www.scrapy.org"])
loader.replace_jmes("url", "website.url", re=r"http://www\.(.+)")
self.assertEqual(loader.get_output_value("url"), ["scrapy.org"])

def test_fluent_interface(self):
loader = ItemLoader(selector=self.selector)
item = (
loader.add_xpath("name", "//body/text()")
.replace_xpath("name", "//div/text()")
.add_css("description", "div::text")
.replace_css("description", "p::text")
.add_value("url", "http://example.com")
.replace_value("url", "http://foo")
.load_item()
)
self.assertEqual(
item,
{"name": ["marta"], "description": ["paragraph"], "url": ["http://foo"]},
)

0 comments on commit 6558315

Please sign in to comment.