entity_address_utils.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. from __future__ import annotations
  2. import logging
  3. from urllib.parse import urlparse
  4. import re
  5. from tldextract import extract as tld_extract
  6. from tldextract.tldextract import ExtractResult as TldExtractResult
  7. from flask import request, current_app, has_request_context
  8. from flexmeasures.utils.time_utils import get_first_day_of_next_month
  9. """
  10. Functionality to support parsing and building Entity Addresses as defined by USEF [1].
  11. See our documentation for more details.
  12. [1] https://www.usef.energy/app/uploads/2020/01/USEF-Flex-Trading-Protocol-Specifications-1.01.pdf
  13. """
  14. ADDR_SCHEME = "ea1"
  15. FM1_ADDR_SCHEME = "fm1"
  16. FM0_ADDR_SCHEME = "fm0"
  17. class EntityAddressException(Exception):
  18. pass
  19. def get_host() -> str:
  20. """Get host from the context of the request.
  21. Strips off www. but keeps subdomains.
  22. Can be localhost, too.
  23. """
  24. if has_request_context():
  25. host = urlparse(request.url).netloc.lstrip("www.")
  26. if host[:9] != "127.0.0.1":
  27. return host
  28. # Assume localhost (for CLI/tests/simulations)
  29. return "localhost"
  30. def build_entity_address(
  31. entity_info: dict,
  32. entity_type: str,
  33. host: str | None = None,
  34. fm_scheme: str = FM1_ADDR_SCHEME,
  35. ) -> str:
  36. """
  37. Build an entity address.
  38. fm1 type entity address should use entity_info["sensor_id"]
  39. todo: implement entity addresses for actuators with entity_info["actuator_id"] (first ensuring globally unique ids across sensors and actuators)
  40. If the host is not given, it is attempted to be taken from the request.
  41. entity_info is expected to contain the required fields for the custom string.
  42. Returns the address as string.
  43. """
  44. if host is None:
  45. host = get_host()
  46. def build_field(field: str, required: bool = True):
  47. if required and field not in entity_info:
  48. raise EntityAddressException(
  49. f"Cannot find required field '{field}' to build entity address."
  50. )
  51. if field not in entity_info:
  52. return ""
  53. return f"{entity_info[field]}:"
  54. if fm_scheme == FM1_ADDR_SCHEME: # and entity_type == "sensor":
  55. locally_unique_str = f"{build_field('sensor_id')}"
  56. # elif fm_scheme == FM1_ADDR_SCHEME and entity_type == "actuator":
  57. # locally_unique_str = f"{build_field('actuator_id')}"
  58. elif fm_scheme != FM0_ADDR_SCHEME:
  59. raise EntityAddressException(
  60. f"Unrecognized FlexMeasures scheme for entity addresses: {fm_scheme}"
  61. )
  62. elif entity_type == "connection":
  63. locally_unique_str = (
  64. f"{build_field('owner_id', required=False)}{build_field('asset_id')}"
  65. )
  66. elif entity_type == "weather_sensor":
  67. locally_unique_str = f"{build_field('weather_sensor_type_name')}{build_field('latitude')}{build_field('longitude')}"
  68. elif entity_type == "market":
  69. locally_unique_str = f"{build_field('market_name')}"
  70. elif entity_type == "event":
  71. locally_unique_str = f"{build_field('owner_id', required=False)}{build_field('asset_id')}{build_field('event_id')}{build_field('event_type')}"
  72. else:
  73. raise EntityAddressException(f"Unrecognized entity type: {entity_type}")
  74. return (
  75. build_ea_scheme_and_naming_authority(host)
  76. + ":"
  77. + fm_scheme
  78. + "."
  79. + locally_unique_str.rstrip(":")
  80. )
  81. def parse_entity_address( # noqa: C901
  82. entity_address: str,
  83. entity_type: str,
  84. fm_scheme: str = FM1_ADDR_SCHEME,
  85. ) -> dict:
  86. """
  87. Parses an entity address into an info dict.
  88. Returns a dictionary with scheme, naming_authority and various other fields,
  89. depending on the entity type and FlexMeasures scheme (see examples above).
  90. Returns None if entity type is unknown or entity_address is not parse-able.
  91. We recommend to `return invalid_domain()` in that case.
  92. Examples for the fm1 scheme:
  93. sensor = ea1.2021-01.io.flexmeasures:fm1.42
  94. sensor = ea1.2021-01.io.flexmeasures:fm1.<sensor_id>
  95. connection = ea1.2021-01.io.flexmeasures:fm1.<sensor_id>
  96. market = ea1.2021-01.io.flexmeasures:fm1.<sensor_id>
  97. weather_station = ea1.2021-01.io.flexmeasures:fm1.<sensor_id>
  98. todo: UDI events are not yet modelled in the fm1 scheme, but will probably be ea1.2021-01.io.flexmeasures:fm1.<actuator_id>
  99. Examples for the fm0 scheme:
  100. connection = ea1.2021-01.localhost:fm0.40:30
  101. connection = ea1.2021-01.io.flexmeasures:fm0.<owner_id>:<asset_id>
  102. weather_sensor = ea1.2021-01.io.flexmeasures:fm0.temperature:52:73.0
  103. weather_sensor = ea1.2021-01.io.flexmeasures:fm0.<sensor_type>:<latitude>:<longitude>
  104. market = ea1.2021-01.io.flexmeasures:fm0.epex_da
  105. market = ea1.2021-01.io.flexmeasures:fm0.<market_name>
  106. event = ea1.2021-01.io.flexmeasures:fm0.40:30:302:soc
  107. event = ea1.2021-01.io.flexmeasures:fm0.<owner_id>:<asset_id>:<event_id>:<event_type>
  108. For the fm0 scheme, the 'fm0.' part is optional, for backwards compatibility.
  109. """
  110. # Check the scheme and naming authority date
  111. if not entity_address.startswith(ADDR_SCHEME):
  112. raise EntityAddressException(
  113. f"A valid type 1 USEF entity address starts with '{ADDR_SCHEME}', please review {entity_address}"
  114. )
  115. date_regex = r"([0-9]{4})-(0[1-9]|1[012])"
  116. if not re.search(rf"^{date_regex}$", entity_address[4:11]):
  117. raise EntityAddressException(
  118. f"After '{ADDR_SCHEME}.', a date specification of the format {date_regex} is expected."
  119. )
  120. # Check the entity type
  121. if entity_type not in ("sensor", "connection", "weather_sensor", "market", "event"):
  122. raise EntityAddressException(f"Unrecognized entity type: {entity_type}")
  123. def validate_ea_for_fm_scheme(ea: dict, fm_scheme: str):
  124. if "fm_scheme" not in ea:
  125. # Backwards compatibility: assume fm0 if fm_scheme is not specified
  126. ea["fm_scheme"] = FM0_ADDR_SCHEME
  127. scheme = ea["scheme"]
  128. naming_authority = ea["naming_authority"]
  129. if ea["fm_scheme"] != fm_scheme:
  130. raise EntityAddressException(
  131. f"A valid type {fm_scheme[2:]} FlexMeasures entity address starts with '{scheme}.{naming_authority}:{fm_scheme}', please review {entity_address}"
  132. )
  133. if fm_scheme == FM1_ADDR_SCHEME:
  134. # Check the FlexMeasures scheme
  135. if entity_address.split(":")[1][: len(fm_scheme) + 1] != FM1_ADDR_SCHEME + ".":
  136. raise EntityAddressException(
  137. f"A valid type {fm_scheme[2:]} FlexMeasures entity address starts with '{build_ea_scheme_and_naming_authority(get_host())}:{fm_scheme}.', please review {entity_address}"
  138. )
  139. match = re.search(
  140. r"^"
  141. r"(?P<scheme>.+)\."
  142. rf"(?P<naming_authority>{date_regex}\.[^:]+)" # everything until the colon (no port)
  143. r":"
  144. r"((?P<fm_scheme>.+)\.)"
  145. r"(?P<sensor_id>\d+)"
  146. r"$",
  147. entity_address,
  148. )
  149. if match is None:
  150. raise EntityAddressException(
  151. f"Could not parse {entity_type} {entity_address}."
  152. )
  153. value_types = {
  154. "scheme": str,
  155. "naming_authority": str,
  156. "fm_scheme": str,
  157. "sensor_id": int,
  158. }
  159. elif fm_scheme != FM0_ADDR_SCHEME:
  160. raise EntityAddressException(
  161. f"Unrecognized FlexMeasures scheme for entity addresses: {fm_scheme}"
  162. )
  163. elif entity_type == "connection":
  164. match = re.search(
  165. r"^"
  166. r"(?P<scheme>.+)\."
  167. rf"(?P<naming_authority>{date_regex}\.[^:]+)" # everything until the colon (no port)
  168. r":"
  169. r"((?P<fm_scheme>.+)\.)*" # for backwards compatibility, missing fm_scheme is interpreted as fm0
  170. r"((?P<owner_id>\d+):)*" # owner id is optional
  171. r"(?P<asset_id>\d+)"
  172. r"$",
  173. entity_address,
  174. )
  175. if match is None:
  176. raise EntityAddressException(
  177. f"Could not parse {entity_type} {entity_address}."
  178. )
  179. value_types = {
  180. "scheme": str,
  181. "naming_authority": str,
  182. "owner_id": int,
  183. "asset_id": int,
  184. }
  185. elif entity_type == "weather_sensor":
  186. match = re.search(
  187. r"^"
  188. r"(?P<scheme>.+)"
  189. r"\."
  190. rf"(?P<naming_authority>{date_regex}\.[^:]+)"
  191. r":"
  192. r"((?P<fm_scheme>.+)\.)*" # for backwards compatibility, missing fm_scheme is interpreted as fm0
  193. r"(?=[a-zA-Z])(?P<weather_sensor_type_name>[\w\s]+)" # should start with at least one letter
  194. r":"
  195. r"(?P<latitude>\-?\d+(\.\d+)?)"
  196. r":"
  197. r"(?P<longitude>\-?\d+(\.\d+)?)"
  198. r"$",
  199. entity_address,
  200. )
  201. if match is None:
  202. raise EntityAddressException(
  203. f"Could not parse {entity_type} {entity_address}."
  204. )
  205. value_types = {
  206. "scheme": str,
  207. "naming_authority": str,
  208. "weather_sensor_type_name": str,
  209. "latitude": float,
  210. "longitude": float,
  211. }
  212. elif entity_type == "market":
  213. match = re.search(
  214. r"^"
  215. r"(?P<scheme>.+)"
  216. r"\."
  217. rf"(?P<naming_authority>{date_regex}\.[^:]+)"
  218. r":"
  219. r"((?P<fm_scheme>.+)\.)*" # for backwards compatibility, missing fm_scheme is interpreted as fm0
  220. r"(?=[a-zA-Z])(?P<market_name>[\w]+)" # should start with at least one letter
  221. r"$",
  222. entity_address,
  223. )
  224. if match is None:
  225. raise EntityAddressException(
  226. f"Could not parse {entity_type} {entity_address}."
  227. )
  228. value_types = {"scheme": str, "naming_authority": str, "market_name": str}
  229. elif entity_type == "event":
  230. match = re.search(
  231. r"^"
  232. r"(?P<scheme>.+)"
  233. r"\."
  234. rf"(?P<naming_authority>{date_regex}\.[^:]+)"
  235. r":"
  236. r"((?P<fm_scheme>.+)\.)*" # for backwards compatibility, missing fm_scheme is interpreted as fm0
  237. r"((?P<owner_id>\d+):)*" # owner id is optional
  238. r"(?P<asset_id>\d+)"
  239. r":"
  240. r"(?P<event_id>\d+)"
  241. r":"
  242. r"(?P<event_type>.+)"
  243. r"$",
  244. entity_address,
  245. )
  246. if match is None:
  247. raise EntityAddressException(
  248. f"Could not parse {entity_type} {entity_address}."
  249. )
  250. value_types = {
  251. "scheme": str,
  252. "naming_authority": str,
  253. "owner_id": int,
  254. "asset_id": int,
  255. "event_id": int,
  256. "event_type": str,
  257. }
  258. else:
  259. # Finally, we simply raise without precise information what went wrong
  260. raise EntityAddressException(f"Could not parse {entity_address}.")
  261. ea = _typed_regex_results(match, value_types)
  262. validate_ea_for_fm_scheme(ea, fm_scheme)
  263. return ea
  264. def build_ea_scheme_and_naming_authority(
  265. host: str, host_auth_start_month: str | None = None
  266. ) -> str:
  267. """
  268. This function creates the host identification part of
  269. USEF's EA1 addressing scheme, so everything but the locally unique string.
  270. If not given nor configured, host_auth_start_month is the start of the next month for
  271. localhost.
  272. """
  273. domain_parts: TldExtractResult = get_domain_parts(host)
  274. if host_auth_start_month is None:
  275. config_var_domain_key = ".".join(
  276. filter(
  277. lambda x: x,
  278. [domain_parts.subdomain, domain_parts.domain, domain_parts.suffix],
  279. )
  280. )
  281. if domain_parts.domain in ("localhost", "127.0.0.1"):
  282. host_auth_start_month = get_first_day_of_next_month().strftime("%Y-%m")
  283. elif config_var_domain_key in current_app.config.get(
  284. "FLEXMEASURES_HOSTS_AND_AUTH_START", {}
  285. ):
  286. host_auth_start_month = current_app.config.get(
  287. "FLEXMEASURES_HOSTS_AND_AUTH_START", {}
  288. )[config_var_domain_key]
  289. else:
  290. raise EntityAddressException(
  291. f"Could not find out when authority for {config_var_domain_key} started. Is FLEXMEASURES_HOSTS_AND_AUTH_START configured for it?"
  292. )
  293. regex = r"^\d{4}-\d{2}$"
  294. if not re.search(regex, host_auth_start_month):
  295. raise ValueError(
  296. f"{host_auth_start_month} should adhere to the format {regex}."
  297. )
  298. if not int(host_auth_start_month[-2:]) in range(1, 13):
  299. raise ValueError(
  300. f"Month in {host_auth_start_month} should be in the range of 1 to 12."
  301. )
  302. reversed_domain_name = reverse_domain_name(domain_parts)
  303. if reversed_domain_name == "":
  304. raise Exception(f"Could not make domain name from {host}!")
  305. return f"{ADDR_SCHEME}.{host_auth_start_month}.{reversed_domain_name}"
  306. def reverse_domain_name(domain: str | TldExtractResult) -> str:
  307. """
  308. Returns the reverse notation of the domain.
  309. You can pass in a string domain or an extraction result from tldextract
  310. """
  311. if isinstance(domain, str):
  312. domain_parts: TldExtractResult = get_domain_parts(domain)
  313. else:
  314. domain_parts = domain
  315. suffix = domain_parts.suffix
  316. if suffix != "":
  317. if "." in suffix:
  318. suffix = ".".join(suffix.split(".")[::-1])
  319. suffix = f"{suffix}."
  320. domain = domain_parts.domain
  321. reversed_subdomain = ""
  322. if domain_parts.subdomain != "":
  323. sd_list = ".".join(domain_parts.subdomain.split(".")[::-1])
  324. reversed_subdomain = f".{sd_list}"
  325. return f"{suffix}{domain}{reversed_subdomain}"
  326. def get_domain_parts(domain: str) -> TldExtractResult:
  327. """wrapper for calling tldextract as it logs things about file locks we don't care about."""
  328. logger = logging.getLogger()
  329. level = logger.getEffectiveLevel()
  330. logger.setLevel(logging.ERROR)
  331. domain_parts: TldExtractResult = tld_extract(domain)
  332. logging.getLogger().setLevel(level)
  333. return domain_parts
  334. def _typed_regex_results(match, value_types) -> dict:
  335. return {
  336. k: v_type(v) if v is not None else v
  337. for k, v, v_type in _zip_dic(match.groupdict(), value_types)
  338. }
  339. def _zip_dic(*dicts):
  340. for i in set(dicts[0]).intersection(*dicts[1:]):
  341. yield (i,) + tuple(d[i] for d in dicts)