12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- from __future__ import annotations
- from datetime import datetime
- from itertools import chain
- from textwrap import wrap
- import pandas as pd
- def stack_annotations(x: pd.DataFrame) -> pd.DataFrame:
- """Select earliest start, and include all annotations as a list.
- The list of strings results in a multi-line text encoding in the chart.
- """
- x = x.sort_values(["start", "belief_time"], ascending=True)
- x["content"].iloc[0] = list(chain(*(x["content"].tolist())))
- return x.head(1)
- def prepare_annotations_for_chart(
- df: pd.DataFrame,
- event_starts_after: datetime | None = None,
- event_ends_before: datetime | None = None,
- max_line_length: int = 60,
- ) -> pd.DataFrame:
- """Prepare a DataFrame with annotations for use in a chart.
- - Clips annotations outside the requested time window.
- - Wraps on whitespace with a given max line length
- - Stacks annotations for the same event
- """
- # Clip annotations outside the requested time window
- if event_starts_after is not None:
- df.loc[df["start"] < event_starts_after, "start"] = event_starts_after
- if event_ends_before is not None:
- df.loc[df["end"] > event_ends_before, "end"] = event_ends_before
- # Wrap on whitespace with some max line length
- df["content"] = df["content"].apply(wrap, args=[max_line_length])
- # Stack annotations for the same event
- if not df.empty:
- df = df.groupby("end", group_keys=False).apply(stack_annotations)
- return df
|