annotations.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. from __future__ import annotations
  2. from datetime import datetime
  3. from itertools import chain
  4. from textwrap import wrap
  5. import pandas as pd
  6. def stack_annotations(x: pd.DataFrame) -> pd.DataFrame:
  7. """Select earliest start, and include all annotations as a list.
  8. The list of strings results in a multi-line text encoding in the chart.
  9. """
  10. x = x.sort_values(["start", "belief_time"], ascending=True)
  11. x["content"].iloc[0] = list(chain(*(x["content"].tolist())))
  12. return x.head(1)
  13. def prepare_annotations_for_chart(
  14. df: pd.DataFrame,
  15. event_starts_after: datetime | None = None,
  16. event_ends_before: datetime | None = None,
  17. max_line_length: int = 60,
  18. ) -> pd.DataFrame:
  19. """Prepare a DataFrame with annotations for use in a chart.
  20. - Clips annotations outside the requested time window.
  21. - Wraps on whitespace with a given max line length
  22. - Stacks annotations for the same event
  23. """
  24. # Clip annotations outside the requested time window
  25. if event_starts_after is not None:
  26. df.loc[df["start"] < event_starts_after, "start"] = event_starts_after
  27. if event_ends_before is not None:
  28. df.loc[df["end"] > event_ends_before, "end"] = event_ends_before
  29. # Wrap on whitespace with some max line length
  30. df["content"] = df["content"].apply(wrap, args=[max_line_length])
  31. # Stack annotations for the same event
  32. if not df.empty:
  33. df = df.groupby("end", group_keys=False).apply(stack_annotations)
  34. return df