Home Assistant Unofficial Reference 2024.12.1
coordinator.py
Go to the documentation of this file.
1 """Data update coordinator for RSS/Atom feeds."""
2 
3 from __future__ import annotations
4 
5 from calendar import timegm
6 from datetime import datetime
7 import html
8 from logging import getLogger
9 from time import gmtime, struct_time
10 from typing import TYPE_CHECKING
11 from urllib.error import URLError
12 
13 import feedparser
14 
15 from homeassistant.config_entries import ConfigEntry
16 from homeassistant.core import HomeAssistant, callback
17 from homeassistant.helpers.storage import Store
18 from homeassistant.helpers.update_coordinator import DataUpdateCoordinator, UpdateFailed
19 from homeassistant.util import dt as dt_util
20 
21 from .const import DEFAULT_SCAN_INTERVAL, DOMAIN, EVENT_FEEDREADER
22 
23 DELAY_SAVE = 30
24 STORAGE_VERSION = 1
25 
26 
27 _LOGGER = getLogger(__name__)
28 
29 
31  DataUpdateCoordinator[list[feedparser.FeedParserDict] | None]
32 ):
33  """Abstraction over Feedparser module."""
34 
35  config_entry: ConfigEntry
36 
37  def __init__(
38  self,
39  hass: HomeAssistant,
40  url: str,
41  max_entries: int,
42  storage: StoredData,
43  ) -> None:
44  """Initialize the FeedManager object, poll as per scan interval."""
45  super().__init__(
46  hass=hass,
47  logger=_LOGGER,
48  name=f"{DOMAIN} {url}",
49  update_interval=DEFAULT_SCAN_INTERVAL,
50  )
51  self.urlurl = url
52  self.feed_authorfeed_author: str | None = None
53  self.feed_versionfeed_version: str | None = None
54  self._max_entries_max_entries = max_entries
55  self._storage_storage = storage
56  self._last_entry_timestamp_last_entry_timestamp: struct_time | None = None
57  self._event_type_event_type = EVENT_FEEDREADER
58  self._feed_feed: feedparser.FeedParserDict | None = None
59  self._feed_id_feed_id = url
60 
61  @callback
62  def _log_no_entries(self) -> None:
63  """Send no entries log at debug level."""
64  _LOGGER.debug("No new entries to be published in feed %s", self.urlurl)
65 
66  async def _async_fetch_feed(self) -> feedparser.FeedParserDict:
67  """Fetch the feed data."""
68  _LOGGER.debug("Fetching new data from feed %s", self.urlurl)
69 
70  def _parse_feed() -> feedparser.FeedParserDict:
71  return feedparser.parse(
72  self.urlurl,
73  etag=None if not self._feed_feed else self._feed_feed.get("etag"),
74  modified=None if not self._feed_feed else self._feed_feed.get("modified"),
75  )
76 
77  feed = await self.hasshass.async_add_executor_job(_parse_feed)
78 
79  if not feed:
80  raise UpdateFailed(f"Error fetching feed data from {self.url}")
81 
82  # The 'bozo' flag really only indicates that there was an issue
83  # during the initial parsing of the XML, but it doesn't indicate
84  # whether this is an unrecoverable error. In this case the
85  # feedparser lib is trying a less strict parsing approach.
86  # If an error is detected here, log warning message but continue
87  # processing the feed entries if present.
88  if feed.bozo != 0:
89  if isinstance(feed.bozo_exception, URLError):
90  raise UpdateFailed(
91  f"Error fetching feed data from {self.url} : {feed.bozo_exception}"
92  )
93 
94  # no connection issue, but parsing issue
95  _LOGGER.warning(
96  "Possible issue parsing feed %s: %s",
97  self.urlurl,
98  feed.bozo_exception,
99  )
100  return feed
101 
102  async def async_setup(self) -> None:
103  """Set up the feed manager."""
104  feed = await self._async_fetch_feed_async_fetch_feed()
105  self.loggerlogger.debug("Feed data fetched from %s : %s", self.urlurl, feed["feed"])
106  if feed_author := feed["feed"].get("author"):
107  self.feed_authorfeed_author = html.unescape(feed_author)
108  self.feed_versionfeed_version = feedparser.api.SUPPORTED_VERSIONS.get(feed["version"])
109  self._feed_feed = feed
110 
111  async def _async_update_data(self) -> list[feedparser.FeedParserDict] | None:
112  """Update the feed and publish new entries to the event bus."""
113  assert self._feed_feed is not None
114  # _last_entry_timestamp is not set during async_setup, but we have already
115  # fetched data, so we can use them, instead of fetch again
116  if self._last_entry_timestamp_last_entry_timestamp:
117  self._feed_feed = await self._async_fetch_feed_async_fetch_feed()
118 
119  # Using etag and modified, if there's no new data available,
120  # the entries list will be empty
121  _LOGGER.debug(
122  "%s entri(es) available in feed %s",
123  len(self._feed_feed.entries),
124  self.urlurl,
125  )
126  if not self._feed_feed.entries:
127  self._log_no_entries_log_no_entries()
128  return None
129 
130  if TYPE_CHECKING:
131  assert isinstance(self._feed_feed.entries, list)
132 
133  self._filter_entries_filter_entries()
134  self._publish_new_entries_publish_new_entries()
135 
136  _LOGGER.debug("Fetch from feed %s completed", self.urlurl)
137 
138  if self._last_entry_timestamp_last_entry_timestamp:
139  self._storage_storage.async_put_timestamp(self._feed_id_feed_id, self._last_entry_timestamp_last_entry_timestamp)
140 
141  return self._feed_feed.entries
142 
143  @callback
144  def _filter_entries(self) -> None:
145  """Filter the entries provided and return the ones to keep."""
146  assert self._feed_feed is not None
147  if len(self._feed_feed.entries) > self._max_entries_max_entries:
148  _LOGGER.debug(
149  "Processing only the first %s entries in feed %s",
150  self._max_entries_max_entries,
151  self.urlurl,
152  )
153  self._feed_feed.entries = self._feed_feed.entries[0 : self._max_entries_max_entries]
154 
155  @callback
156  def _update_and_fire_entry(self, entry: feedparser.FeedParserDict) -> None:
157  """Update last_entry_timestamp and fire entry."""
158  # Check if the entry has a updated or published date.
159  # Start from a updated date because generally `updated` > `published`.
160  if time_stamp := entry.get("updated_parsed") or entry.get("published_parsed"):
161  self._last_entry_timestamp_last_entry_timestamp = time_stamp
162  else:
163  _LOGGER.debug(
164  "No updated_parsed or published_parsed info available for entry %s",
165  entry,
166  )
167  entry["feed_url"] = self.urlurl
168  self.hasshass.bus.async_fire(self._event_type_event_type, entry)
169  _LOGGER.debug("New event fired for entry %s", entry.get("link"))
170 
171  @callback
172  def _publish_new_entries(self) -> None:
173  """Publish new entries to the event bus."""
174  assert self._feed_feed is not None
175  new_entry_count = 0
176  firstrun = False
177  self._last_entry_timestamp_last_entry_timestamp = self._storage_storage.get_timestamp(self._feed_id_feed_id)
178  if not self._last_entry_timestamp_last_entry_timestamp:
179  firstrun = True
180  # Set last entry timestamp as epoch time if not available
181  self._last_entry_timestamp_last_entry_timestamp = dt_util.utc_from_timestamp(0).timetuple()
182  # locally cache self._last_entry_timestamp so that entries published at identical times can be processed
183  last_entry_timestamp = self._last_entry_timestamp_last_entry_timestamp
184  for entry in self._feed_feed.entries:
185  if firstrun or (
186  (
187  time_stamp := entry.get("updated_parsed")
188  or entry.get("published_parsed")
189  )
190  and time_stamp > last_entry_timestamp
191  ):
192  self._update_and_fire_entry_update_and_fire_entry(entry)
193  new_entry_count += 1
194  else:
195  _LOGGER.debug("Already processed entry %s", entry.get("link"))
196  if new_entry_count == 0:
197  self._log_no_entries_log_no_entries()
198  else:
199  _LOGGER.debug("%d entries published in feed %s", new_entry_count, self.urlurl)
200 
201 
203  """Represent a data storage."""
204 
205  def __init__(self, hass: HomeAssistant) -> None:
206  """Initialize data storage."""
207  self._data_data: dict[str, struct_time] = {}
208  self.hasshass = hass
209  self._store: Store[dict[str, str]] = Store(hass, STORAGE_VERSION, DOMAIN)
210  self.is_initializedis_initialized = False
211 
212  async def async_setup(self) -> None:
213  """Set up storage."""
214  if (store_data := await self._store.async_load()) is not None:
215  # Make sure that dst is set to 0, by using gmtime() on the timestamp.
216  self._data_data = {
217  feed_id: gmtime(datetime.fromisoformat(timestamp_string).timestamp())
218  for feed_id, timestamp_string in store_data.items()
219  }
220  self.is_initializedis_initialized = True
221 
222  def get_timestamp(self, feed_id: str) -> struct_time | None:
223  """Return stored timestamp for given feed id."""
224  return self._data_data.get(feed_id)
225 
226  @callback
227  def async_put_timestamp(self, feed_id: str, timestamp: struct_time) -> None:
228  """Update timestamp for given feed id."""
229  self._data_data[feed_id] = timestamp
230  self._store.async_delay_save(self._async_save_data_async_save_data, DELAY_SAVE)
231 
232  @callback
233  def _async_save_data(self) -> dict[str, str]:
234  """Save feed data to storage."""
235  return {
236  feed_id: dt_util.utc_from_timestamp(timegm(struct_utc)).isoformat()
237  for feed_id, struct_utc in self._data_data.items()
238  }
None _update_and_fire_entry(self, feedparser.FeedParserDict entry)
Definition: coordinator.py:156
None __init__(self, HomeAssistant hass, str url, int max_entries, StoredData storage)
Definition: coordinator.py:43
list[feedparser.FeedParserDict]|None _async_update_data(self)
Definition: coordinator.py:111
None async_put_timestamp(self, str feed_id, struct_time timestamp)
Definition: coordinator.py:227
struct_time|None get_timestamp(self, str feed_id)
Definition: coordinator.py:222
web.Response get(self, web.Request request, str config_key)
Definition: view.py:88
None async_load(HomeAssistant hass)
None async_delay_save(self, Callable[[], _T] data_func, float delay=0)
Definition: storage.py:444