utils.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. from __future__ import annotations
  2. from datetime import datetime, timedelta
  3. from sqlalchemy import select
  4. from flexmeasures.data import db
  5. from flexmeasures.data.models.forecasting.exceptions import NotEnoughDataException
  6. from flexmeasures.data.models.time_series import Sensor
  7. from flexmeasures.utils.time_utils import as_server_time
  8. def check_data_availability(
  9. old_sensor_model,
  10. old_time_series_data_model,
  11. forecast_start: datetime,
  12. forecast_end: datetime,
  13. query_window: tuple[datetime, datetime],
  14. horizon: timedelta,
  15. ):
  16. """Check if enough data is available in the database in the first place,
  17. for training window and lagged variables. Otherwise, suggest new forecast period.
  18. TODO: we could also check regressor data, if we get regressor specs passed in here.
  19. """
  20. q = (
  21. select(old_time_series_data_model)
  22. .join(old_sensor_model.__class__)
  23. .filter(old_sensor_model.__class__.name == old_sensor_model.name)
  24. )
  25. first_value = db.session.scalars(
  26. q.order_by(old_time_series_data_model.event_start.asc()).limit(1)
  27. ).first()
  28. last_value = db.session.scalars(
  29. q.order_by(old_time_series_data_model.event_start.desc()).limit(1)
  30. ).first()
  31. if first_value is None:
  32. raise NotEnoughDataException(
  33. "No data available at all. Forecasting impossible."
  34. )
  35. first = as_server_time(first_value.event_start)
  36. last = as_server_time(last_value.event_start)
  37. if query_window[0] < first:
  38. suggested_start = forecast_start + (first - query_window[0])
  39. raise NotEnoughDataException(
  40. f"Not enough data to forecast {old_sensor_model.name} "
  41. f"for the forecast window {as_server_time(forecast_start)} to {as_server_time(forecast_end)}. "
  42. f"I needed to query from {as_server_time(query_window[0])}, "
  43. f"but the first value available is from {first} to {first + old_sensor_model.event_resolution}. "
  44. f"Consider setting the start date to {as_server_time(suggested_start)}."
  45. )
  46. if query_window[1] - horizon > last + old_sensor_model.event_resolution:
  47. suggested_end = forecast_end + (last - (query_window[1] - horizon))
  48. raise NotEnoughDataException(
  49. f"Not enough data to forecast {old_sensor_model.name} "
  50. f"for the forecast window {as_server_time(forecast_start)} to {as_server_time(forecast_end)}. "
  51. f"I needed to query until {as_server_time(query_window[1] - horizon)}, "
  52. f"but the last value available is from {last} to {last + old_sensor_model.event_resolution}. "
  53. f"Consider setting the end date to {as_server_time(suggested_end)}."
  54. )
  55. def create_lags(
  56. n_lags: int,
  57. sensor: Sensor,
  58. horizon: timedelta,
  59. resolution: timedelta,
  60. use_periodicity: bool,
  61. ) -> list[timedelta]:
  62. """List the lags for this asset type, using horizon and resolution information."""
  63. lags = []
  64. # Include a zero lag in case of backwards forecasting
  65. # Todo: we should always take into account the latest forecast, so always append the zero lag if that belief exists
  66. if horizon < timedelta(hours=0):
  67. lags.append(timedelta(hours=0))
  68. # Include latest measurements
  69. lag_period = resolution
  70. number_of_nan_lags = 1 + (horizon - resolution) // lag_period
  71. for L in range(n_lags):
  72. lags.append((L + number_of_nan_lags) * lag_period)
  73. # Include relevant measurements given the asset's periodicity
  74. if use_periodicity and sensor.get_attribute("daily_seasonality"):
  75. lag_period = timedelta(days=1)
  76. number_of_nan_lags = 1 + (horizon - resolution) // lag_period
  77. for L in range(n_lags):
  78. lags.append((L + number_of_nan_lags) * lag_period)
  79. # Remove possible double entries
  80. return list(set(lags))
  81. def get_query_window(
  82. training_start: datetime, forecast_end: datetime, lags: list[timedelta]
  83. ) -> tuple[datetime, datetime]:
  84. """Derive query window from start and end date, as well as lags (if any).
  85. This makes sure we have enough data for lagging and forecasting."""
  86. if not lags:
  87. query_start = training_start
  88. else:
  89. query_start = training_start - max(lags)
  90. query_end = forecast_end
  91. return query_start, query_end
  92. def set_training_and_testing_dates(
  93. forecast_start: datetime,
  94. training_and_testing_period: timedelta | tuple[datetime, datetime],
  95. ) -> tuple[datetime, datetime]:
  96. """If needed (if training_and_testing_period is a timedelta),
  97. derive training_start and testing_end from forecasting_start,
  98. otherwise simply return training_and_testing_period.
  99. |------forecast_horizon/belief_horizon------|
  100. | |-------resolution-------|
  101. belief_time event_start event_end
  102. |--resolution--|--resolution--|--resolution--|--resolution--|--resolution--|--resolution--|
  103. |---------forecast_horizon--------| | | | | |
  104. belief_time event_start | | | | | |
  105. |---------forecast_horizon--------| | | | |
  106. belief_time event_start | | | | |
  107. | |---------forecast_horizon--------| | | |
  108. | belief_time event_start | | | |
  109. |--------max_lag-------|--------training_and_testing_period---------|---------------forecast_period--------------|
  110. query_start training_start | | testing_end/forecast_start | forecast_end
  111. |------min_lag-----| | |---------forecast_horizon--------| | |
  112. | | belief_time event_start | | |
  113. | | | |---------forecast_horizon--------| |
  114. | | | belief_time event_start | |
  115. | | | | |---------forecast_horizon--------|
  116. | | | | belief_time event_start |
  117. |--------------------------------------------------query_window--------------------------------------------------|
  118. """
  119. if isinstance(training_and_testing_period, timedelta):
  120. return forecast_start - training_and_testing_period, forecast_start
  121. else:
  122. return training_and_testing_period