Files
core/homeassistant/util/json.py
J. Nick Koston 8b067e83f7 Initial orjson support take 3 (#73849)
* Initial orjson support take 2

Still need to work out problem building wheels

--

Redux of #72754 / #32153 Now possible since the following is solved:
ijl/orjson#220 (comment)

This implements orjson where we use our default encoder.  This does not implement orjson where `ExtendedJSONEncoder` is used as these areas tend to be called far less frequently.  If its desired, this could be done in a followup, but it seemed like a case of diminishing returns (except maybe for large diagnostics files, or traces, but those are not expected to be downloaded frequently).

Areas where this makes a perceptible difference:
- Anything that subscribes to entities (Initial subscribe_entities payload)
- Initial download of registries on first connection / restore
- History queries
- Saving states to the database
- Large logbook queries
- Anything that subscribes to events (appdaemon)

Cavets:
orjson supports serializing dataclasses natively (and much faster) which
eliminates the need to implement `as_dict` in many places
when the data is already in a dataclass. This works
well as long as all the data in the dataclass can also
be serialized. I audited all places where we have an `as_dict`
for a dataclass and found only backups needs to be adjusted (support for `Path` needed to be added for backups).  I was a little bit worried about `SensorExtraStoredData` with `Decimal` but it all seems to work out from since it converts it before it gets to the json encoding cc @dgomes

If it turns out to be a problem we can disable this
with option |= [orjson.OPT_PASSTHROUGH_DATACLASS](https://github.com/ijl/orjson#opt_passthrough_dataclass) and it
will fallback to `as_dict`

Its quite impressive for history queries
<img width="1271" alt="Screen_Shot_2022-05-30_at_23_46_30" src="https://user-images.githubusercontent.com/663432/171145699-661ad9db-d91d-4b2d-9c1a-9d7866c03a73.png">

* use for views as well

* handle UnicodeEncodeError

* tweak

* DRY

* DRY

* not needed

* fix tests

* Update tests/components/http/test_view.py

* Update tests/components/http/test_view.py

* black

* templates
2022-06-22 21:59:51 +02:00

131 lines
3.9 KiB
Python

"""JSON utility functions."""
from __future__ import annotations
from collections import deque
from collections.abc import Callable
import json
import logging
from typing import Any
import orjson
from homeassistant.core import Event, State
from homeassistant.exceptions import HomeAssistantError
from .file import write_utf8_file, write_utf8_file_atomic
_LOGGER = logging.getLogger(__name__)
class SerializationError(HomeAssistantError):
"""Error serializing the data to JSON."""
class WriteError(HomeAssistantError):
"""Error writing the data."""
def load_json(filename: str, default: list | dict | None = None) -> list | dict:
"""Load JSON data from a file and return as dict or list.
Defaults to returning empty dict if file is not found.
"""
try:
with open(filename, encoding="utf-8") as fdesc:
return orjson.loads(fdesc.read()) # type: ignore[no-any-return]
except FileNotFoundError:
# This is not a fatal error
_LOGGER.debug("JSON file not found: %s", filename)
except ValueError as error:
_LOGGER.exception("Could not parse JSON content: %s", filename)
raise HomeAssistantError(error) from error
except OSError as error:
_LOGGER.exception("JSON file reading failed: %s", filename)
raise HomeAssistantError(error) from error
return {} if default is None else default
def save_json(
filename: str,
data: list | dict,
private: bool = False,
*,
encoder: type[json.JSONEncoder] | None = None,
atomic_writes: bool = False,
) -> None:
"""Save JSON data to a file.
Returns True on success.
"""
try:
if encoder:
json_data = json.dumps(data, indent=2, cls=encoder)
else:
json_data = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode("utf-8")
except TypeError as error:
msg = f"Failed to serialize to JSON: {filename}. Bad data at {format_unserializable_data(find_paths_unserializable_data(data))}"
_LOGGER.error(msg)
raise SerializationError(msg) from error
if atomic_writes:
write_utf8_file_atomic(filename, json_data, private)
else:
write_utf8_file(filename, json_data, private)
def format_unserializable_data(data: dict[str, Any]) -> str:
"""Format output of find_paths in a friendly way.
Format is comma separated: <path>=<value>(<type>)
"""
return ", ".join(f"{path}={value}({type(value)}" for path, value in data.items())
def find_paths_unserializable_data(
bad_data: Any, *, dump: Callable[[Any], str] = json.dumps
) -> dict[str, Any]:
"""Find the paths to unserializable data.
This method is slow! Only use for error handling.
"""
to_process = deque([(bad_data, "$")])
invalid = {}
while to_process:
obj, obj_path = to_process.popleft()
try:
dump(obj)
continue
except (ValueError, TypeError):
pass
# We convert objects with as_dict to their dict values so we can find bad data inside it
if hasattr(obj, "as_dict"):
desc = obj.__class__.__name__
if isinstance(obj, State):
desc += f": {obj.entity_id}"
elif isinstance(obj, Event):
desc += f": {obj.event_type}"
obj_path += f"({desc})"
obj = obj.as_dict()
if isinstance(obj, dict):
for key, value in obj.items():
try:
# Is key valid?
dump({key: None})
except TypeError:
invalid[f"{obj_path}<key: {key}>"] = key
else:
# Process value
to_process.append((value, f"{obj_path}.{key}"))
elif isinstance(obj, list):
for idx, value in enumerate(obj):
to_process.append((value, f"{obj_path}[{idx}]"))
else:
invalid[obj_path] = obj
return invalid