Add hourly granularity support with period-based time column handling, window calculations using hour intervals, and adaptive date axis formatting for hourly vs daily data across drill-down plots, site views, heatmaps, and correlation analysis
Browse files
panel_app/kpi_health_check_drilldown_plots.py
CHANGED
|
@@ -20,6 +20,7 @@ def build_drilldown_plot(
|
|
| 20 |
recent_days_n: int = 7,
|
| 21 |
rel_threshold_pct: float = 10.0,
|
| 22 |
normalization: str = "None",
|
|
|
|
| 23 |
) -> go.Figure | None:
|
| 24 |
"""
|
| 25 |
Builds the drill-down trend plot with native Plotly annotations.
|
|
@@ -32,12 +33,16 @@ def build_drilldown_plot(
|
|
| 32 |
if not valid_kpis:
|
| 33 |
return None
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
try:
|
| 40 |
-
plot_df[
|
| 41 |
except Exception:
|
| 42 |
pass
|
| 43 |
|
|
@@ -69,7 +74,7 @@ def build_drilldown_plot(
|
|
| 69 |
|
| 70 |
for kpi in valid_kpis:
|
| 71 |
# Data preparation
|
| 72 |
-
x_data = plot_df[
|
| 73 |
y_data = pd.to_numeric(plot_df[kpi], errors="coerce")
|
| 74 |
if do_norm:
|
| 75 |
if norm_mode == "Min-Max":
|
|
@@ -163,12 +168,17 @@ def build_drilldown_plot(
|
|
| 163 |
except Exception:
|
| 164 |
sla_eval = None
|
| 165 |
|
| 166 |
-
end_dt = pd.to_datetime(plot_df[
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
-
dts = pd.to_datetime(plot_df[
|
| 172 |
baseline_mask = (dts >= bs) & (dts <= be)
|
| 173 |
recent_mask = (dts >= rs) & (dts <= end_dt)
|
| 174 |
baseline_val = pd.to_numeric(
|
|
@@ -211,7 +221,7 @@ def build_drilldown_plot(
|
|
| 211 |
hover_txt.append(f"OUTSIDE WINDOW ({main_kpi})")
|
| 212 |
fig.add_trace(
|
| 213 |
go.Scatter(
|
| 214 |
-
x=plot_df[
|
| 215 |
y=[0] * len(plot_df),
|
| 216 |
mode="markers",
|
| 217 |
marker=dict(symbol="square", size=10, color=colors),
|
|
@@ -249,7 +259,7 @@ def build_drilldown_plot(
|
|
| 249 |
if idx_bad:
|
| 250 |
fig.add_trace(
|
| 251 |
go.Scatter(
|
| 252 |
-
x=[plot_df[
|
| 253 |
y=[y_main.iloc[i] for i in idx_bad],
|
| 254 |
mode="markers",
|
| 255 |
marker=dict(size=10, color=bad_color, symbol="circle"),
|
|
@@ -267,7 +277,7 @@ def build_drilldown_plot(
|
|
| 267 |
if not plot_df.empty and not highlight_bad_days:
|
| 268 |
fig.add_trace(
|
| 269 |
go.Scatter(
|
| 270 |
-
x=plot_df[
|
| 271 |
y=[0] * len(plot_df),
|
| 272 |
mode="markers",
|
| 273 |
opacity=0,
|
|
@@ -289,10 +299,10 @@ def build_drilldown_plot(
|
|
| 289 |
try:
|
| 290 |
force_all_dates = False
|
| 291 |
try:
|
| 292 |
-
x_min = pd.to_datetime(plot_df[
|
| 293 |
-
x_max = pd.to_datetime(plot_df[
|
| 294 |
span_days = int((x_max - x_min).days) + 1
|
| 295 |
-
n_dates = int(pd.to_datetime(plot_df[
|
| 296 |
force_all_dates = (span_days <= 200) and (n_dates <= 200)
|
| 297 |
except Exception:
|
| 298 |
force_all_dates = False
|
|
@@ -310,7 +320,7 @@ def build_drilldown_plot(
|
|
| 310 |
col=1,
|
| 311 |
)
|
| 312 |
|
| 313 |
-
if force_all_dates:
|
| 314 |
fig.update_xaxes(
|
| 315 |
tickmode="linear",
|
| 316 |
dtick=86400000,
|
|
@@ -332,7 +342,10 @@ def build_drilldown_plot(
|
|
| 332 |
ticklen=6,
|
| 333 |
showgrid=True,
|
| 334 |
tickformatstops=[
|
| 335 |
-
{
|
|
|
|
|
|
|
|
|
|
| 336 |
{"dtickrange": [86400000, 7 * 86400000], "value": "%d-%b"},
|
| 337 |
{"dtickrange": [7 * 86400000, "M1"], "value": "%d-%b"},
|
| 338 |
{"dtickrange": ["M1", "M12"], "value": "%b\n%Y"},
|
|
|
|
| 20 |
recent_days_n: int = 7,
|
| 21 |
rel_threshold_pct: float = 10.0,
|
| 22 |
normalization: str = "None",
|
| 23 |
+
granularity: str = "Daily",
|
| 24 |
) -> go.Figure | None:
|
| 25 |
"""
|
| 26 |
Builds the drill-down trend plot with native Plotly annotations.
|
|
|
|
| 33 |
if not valid_kpis:
|
| 34 |
return None
|
| 35 |
|
| 36 |
+
g = str(granularity or "Daily").strip().lower()
|
| 37 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 38 |
+
time_col = (
|
| 39 |
+
"period_start" if (is_hourly and "period_start" in df.columns) else "date_only"
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
plot_df = df.sort_values(time_col).copy()
|
| 43 |
|
| 44 |
try:
|
| 45 |
+
plot_df[time_col] = pd.to_datetime(plot_df[time_col])
|
| 46 |
except Exception:
|
| 47 |
pass
|
| 48 |
|
|
|
|
| 74 |
|
| 75 |
for kpi in valid_kpis:
|
| 76 |
# Data preparation
|
| 77 |
+
x_data = plot_df[time_col]
|
| 78 |
y_data = pd.to_numeric(plot_df[kpi], errors="coerce")
|
| 79 |
if do_norm:
|
| 80 |
if norm_mode == "Min-Max":
|
|
|
|
| 168 |
except Exception:
|
| 169 |
sla_eval = None
|
| 170 |
|
| 171 |
+
end_dt = pd.to_datetime(plot_df[time_col]).max()
|
| 172 |
+
if is_hourly:
|
| 173 |
+
rs = end_dt - timedelta(hours=max(int(recent_days_n), 1) * 24 - 1)
|
| 174 |
+
be = rs - timedelta(hours=1)
|
| 175 |
+
bs = be - timedelta(hours=max(int(baseline_days_n), 1) * 24 - 1)
|
| 176 |
+
else:
|
| 177 |
+
rs = end_dt - timedelta(days=max(int(recent_days_n), 1) - 1)
|
| 178 |
+
be = rs - timedelta(days=1)
|
| 179 |
+
bs = be - timedelta(days=max(int(baseline_days_n), 1) - 1)
|
| 180 |
|
| 181 |
+
dts = pd.to_datetime(plot_df[time_col])
|
| 182 |
baseline_mask = (dts >= bs) & (dts <= be)
|
| 183 |
recent_mask = (dts >= rs) & (dts <= end_dt)
|
| 184 |
baseline_val = pd.to_numeric(
|
|
|
|
| 221 |
hover_txt.append(f"OUTSIDE WINDOW ({main_kpi})")
|
| 222 |
fig.add_trace(
|
| 223 |
go.Scatter(
|
| 224 |
+
x=plot_df[time_col],
|
| 225 |
y=[0] * len(plot_df),
|
| 226 |
mode="markers",
|
| 227 |
marker=dict(symbol="square", size=10, color=colors),
|
|
|
|
| 259 |
if idx_bad:
|
| 260 |
fig.add_trace(
|
| 261 |
go.Scatter(
|
| 262 |
+
x=[plot_df[time_col].iloc[i] for i in idx_bad],
|
| 263 |
y=[y_main.iloc[i] for i in idx_bad],
|
| 264 |
mode="markers",
|
| 265 |
marker=dict(size=10, color=bad_color, symbol="circle"),
|
|
|
|
| 277 |
if not plot_df.empty and not highlight_bad_days:
|
| 278 |
fig.add_trace(
|
| 279 |
go.Scatter(
|
| 280 |
+
x=plot_df[time_col],
|
| 281 |
y=[0] * len(plot_df),
|
| 282 |
mode="markers",
|
| 283 |
opacity=0,
|
|
|
|
| 299 |
try:
|
| 300 |
force_all_dates = False
|
| 301 |
try:
|
| 302 |
+
x_min = pd.to_datetime(plot_df[time_col]).min()
|
| 303 |
+
x_max = pd.to_datetime(plot_df[time_col]).max()
|
| 304 |
span_days = int((x_max - x_min).days) + 1
|
| 305 |
+
n_dates = int(pd.to_datetime(plot_df[time_col]).nunique())
|
| 306 |
force_all_dates = (span_days <= 200) and (n_dates <= 200)
|
| 307 |
except Exception:
|
| 308 |
force_all_dates = False
|
|
|
|
| 320 |
col=1,
|
| 321 |
)
|
| 322 |
|
| 323 |
+
if force_all_dates and not is_hourly:
|
| 324 |
fig.update_xaxes(
|
| 325 |
tickmode="linear",
|
| 326 |
dtick=86400000,
|
|
|
|
| 342 |
ticklen=6,
|
| 343 |
showgrid=True,
|
| 344 |
tickformatstops=[
|
| 345 |
+
{
|
| 346 |
+
"dtickrange": [None, 86400000],
|
| 347 |
+
"value": "%d-%b\n%H:%M" if is_hourly else "%d-%b\n%Y",
|
| 348 |
+
},
|
| 349 |
{"dtickrange": [86400000, 7 * 86400000], "value": "%d-%b"},
|
| 350 |
{"dtickrange": [7 * 86400000, "M1"], "value": "%d-%b"},
|
| 351 |
{"dtickrange": ["M1", "M12"], "value": "%b\n%Y"},
|
panel_app/kpi_health_check_panel.py
CHANGED
|
@@ -23,7 +23,9 @@ from process_kpi.kpi_health_check.engine import (
|
|
| 23 |
evaluate_health_check,
|
| 24 |
is_bad,
|
| 25 |
max_consecutive_days,
|
|
|
|
| 26 |
window_bounds,
|
|
|
|
| 27 |
)
|
| 28 |
from process_kpi.kpi_health_check.export import build_export_bytes
|
| 29 |
from process_kpi.kpi_health_check.io import read_bytes_to_df
|
|
@@ -31,6 +33,7 @@ from process_kpi.kpi_health_check.kpi_groups import filter_kpis, get_kpis_by_gro
|
|
| 31 |
from process_kpi.kpi_health_check.multi_rat import compute_multirat_views
|
| 32 |
from process_kpi.kpi_health_check.normalization import (
|
| 33 |
build_daily_kpi,
|
|
|
|
| 34 |
infer_date_col,
|
| 35 |
infer_id_col,
|
| 36 |
)
|
|
@@ -195,6 +198,7 @@ def _drilldown_cache_key(site_code: int, rat: str, kpi: str) -> tuple:
|
|
| 195 |
int(site_code),
|
| 196 |
str(rat or ""),
|
| 197 |
str(kpi or ""),
|
|
|
|
| 198 |
compare_kpis_key,
|
| 199 |
norm_key,
|
| 200 |
ar_key,
|
|
@@ -375,6 +379,9 @@ complaint_sites_file = pn.widgets.FileInput(
|
|
| 375 |
only_complaint_sites = pn.widgets.Checkbox(name="Only complaint sites", value=False)
|
| 376 |
|
| 377 |
analysis_range = pn.widgets.DateRangePicker(name="Analysis date range (optional)")
|
|
|
|
|
|
|
|
|
|
| 378 |
baseline_days = pn.widgets.IntInput(name="Baseline window (days)", value=30)
|
| 379 |
recent_days = pn.widgets.IntInput(name="Recent window (days)", value=7)
|
| 380 |
rel_threshold_pct = pn.widgets.FloatInput(
|
|
@@ -1485,7 +1492,7 @@ def _validate_inputs() -> tuple[list[str], list[str]]:
|
|
| 1485 |
return errors, warnings
|
| 1486 |
|
| 1487 |
|
| 1488 |
-
def _refresh_validation_state(
|
| 1489 |
errors, warnings = _validate_inputs()
|
| 1490 |
|
| 1491 |
msgs: list[str] = []
|
|
@@ -1499,15 +1506,7 @@ def _refresh_validation_state(event=None) -> None:
|
|
| 1499 |
else:
|
| 1500 |
validation_pane.object = ""
|
| 1501 |
validation_pane.alert_type = "success"
|
| 1502 |
-
validation_pane.visible =
|
| 1503 |
-
|
| 1504 |
-
has_data = bool(current_daily_by_rat)
|
| 1505 |
-
has_results = (
|
| 1506 |
-
isinstance(current_status_df, pd.DataFrame)
|
| 1507 |
-
and not current_status_df.empty
|
| 1508 |
-
and isinstance(current_summary_df, pd.DataFrame)
|
| 1509 |
-
and not current_summary_df.empty
|
| 1510 |
-
)
|
| 1511 |
|
| 1512 |
try:
|
| 1513 |
run_button.disabled = bool(errors) or not has_data
|
|
@@ -1610,7 +1609,16 @@ def _update_kpi_options() -> None:
|
|
| 1610 |
kpis = [
|
| 1611 |
c
|
| 1612 |
for c in df.columns
|
| 1613 |
-
if c
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1614 |
]
|
| 1615 |
kpis = sorted([str(c) for c in kpis])
|
| 1616 |
|
|
@@ -1744,7 +1752,15 @@ def _update_site_view(event=None) -> None:
|
|
| 1744 |
c
|
| 1745 |
for c in d.columns
|
| 1746 |
if c
|
| 1747 |
-
not in {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1748 |
]
|
| 1749 |
candidate_kpis = sorted([str(c) for c in candidate_kpis])
|
| 1750 |
if not candidate_kpis:
|
|
@@ -1757,7 +1773,12 @@ def _update_site_view(event=None) -> None:
|
|
| 1757 |
new_kpi = candidate_kpis[0]
|
| 1758 |
_set_widget_value(kpi_select, new_kpi)
|
| 1759 |
kpi = new_kpi
|
| 1760 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1761 |
if s.empty:
|
| 1762 |
trend_plot_pane.object = None
|
| 1763 |
heatmap_plot_pane.object = None
|
|
@@ -1831,6 +1852,7 @@ def _update_site_view(event=None) -> None:
|
|
| 1831 |
recent_days_n=int(_coerce_int(recent_days.value) or 7),
|
| 1832 |
rel_threshold_pct=float(_coerce_float(rel_threshold_pct.value) or 10.0),
|
| 1833 |
normalization=str(kpi_compare_norm.value or "None"),
|
|
|
|
| 1834 |
)
|
| 1835 |
trend_plot_pane.object = fig
|
| 1836 |
kpis_for_heatmap = []
|
|
@@ -1883,13 +1905,25 @@ def _update_site_view(event=None) -> None:
|
|
| 1883 |
w = str(corr_window_select.value or "")
|
| 1884 |
if w.startswith("Recent"):
|
| 1885 |
df_corr = d[
|
| 1886 |
-
(
|
| 1887 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1888 |
].copy()
|
| 1889 |
elif w.startswith("Baseline"):
|
| 1890 |
df_corr = d[
|
| 1891 |
-
(
|
| 1892 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1893 |
].copy()
|
| 1894 |
except Exception: # noqa: BLE001
|
| 1895 |
df_corr = d
|
|
@@ -2071,18 +2105,52 @@ def _infer_rule_row(rules_df: pd.DataFrame, rat: str, kpi: str) -> dict:
|
|
| 2071 |
|
| 2072 |
def _compute_site_windows(
|
| 2073 |
daily_filtered: pd.DataFrame,
|
| 2074 |
-
) ->
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2075 |
if daily_filtered is None or daily_filtered.empty:
|
| 2076 |
return None
|
| 2077 |
rd = _coerce_int(recent_days.value)
|
| 2078 |
bd = _coerce_int(baseline_days.value)
|
| 2079 |
if rd is None or rd < 1 or bd is None or bd < 1:
|
| 2080 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2081 |
try:
|
| 2082 |
-
|
| 2083 |
-
|
| 2084 |
-
|
| 2085 |
-
)
|
| 2086 |
except Exception: # noqa: BLE001
|
| 2087 |
return None
|
| 2088 |
recent_start, recent_end = window_bounds(end_date, int(rd))
|
|
@@ -2105,16 +2173,25 @@ def _build_site_heatmap(
|
|
| 2105 |
return None
|
| 2106 |
baseline_start, baseline_end, recent_start, recent_end = windows
|
| 2107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2108 |
site_daily = daily_filtered[daily_filtered["site_code"] == int(site_code)].copy()
|
| 2109 |
if site_daily.empty:
|
| 2110 |
return None
|
| 2111 |
-
site_daily = site_daily.sort_values(
|
| 2112 |
|
| 2113 |
dates = []
|
| 2114 |
cur = recent_start
|
|
|
|
| 2115 |
while cur <= recent_end:
|
| 2116 |
dates.append(cur)
|
| 2117 |
-
cur = cur +
|
| 2118 |
|
| 2119 |
z = []
|
| 2120 |
hover = []
|
|
@@ -2133,9 +2210,10 @@ def _build_site_heatmap(
|
|
| 2133 |
|
| 2134 |
sla_eval = None if policy == "notify" else sla_val
|
| 2135 |
|
| 2136 |
-
s = site_daily[[
|
| 2137 |
-
|
| 2138 |
-
|
|
|
|
| 2139 |
)
|
| 2140 |
baseline = s.loc[baseline_mask, kpi].median() if baseline_mask.any() else np.nan
|
| 2141 |
baseline_val = float(baseline) if pd.notna(baseline) else None
|
|
@@ -2143,7 +2221,11 @@ def _build_site_heatmap(
|
|
| 2143 |
row_z = []
|
| 2144 |
row_h = []
|
| 2145 |
for d in dates:
|
| 2146 |
-
v_series = site_daily.loc[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2147 |
v = v_series.iloc[0] if not v_series.empty else np.nan
|
| 2148 |
if v is None or (isinstance(v, float) and np.isnan(v)):
|
| 2149 |
row_z.append(None)
|
|
@@ -2186,7 +2268,7 @@ def _build_site_heatmap(
|
|
| 2186 |
fig.update_layout(
|
| 2187 |
template="plotly_white",
|
| 2188 |
title=f"{rat} - Site {int(site_code)} - Recent window heatmap",
|
| 2189 |
-
xaxis_title="
|
| 2190 |
yaxis_title="KPI",
|
| 2191 |
height=420,
|
| 2192 |
margin=dict(l=40, r=20, t=60, b=40),
|
|
@@ -2212,15 +2294,26 @@ def _build_baseline_recent_hist(
|
|
| 2212 |
return None
|
| 2213 |
baseline_start, baseline_end, recent_start, recent_end = windows
|
| 2214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2215 |
site_daily = daily_filtered[daily_filtered["site_code"] == int(site_code)].copy()
|
| 2216 |
if site_daily.empty:
|
| 2217 |
return None
|
| 2218 |
|
| 2219 |
-
s = site_daily[[
|
| 2220 |
-
|
| 2221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2222 |
)
|
| 2223 |
-
recent_mask = (s["date_only"] >= recent_start) & (s["date_only"] <= recent_end)
|
| 2224 |
|
| 2225 |
baseline_vals = (
|
| 2226 |
pd.to_numeric(s.loc[baseline_mask, kpi], errors="coerce").dropna().astype(float)
|
|
@@ -2276,7 +2369,16 @@ def _build_corr_heatmap(
|
|
| 2276 |
cols = [
|
| 2277 |
c
|
| 2278 |
for c in cols
|
| 2279 |
-
if c
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2280 |
]
|
| 2281 |
cols = list(dict.fromkeys(cols))
|
| 2282 |
if len(cols) < 2:
|
|
@@ -2600,6 +2702,8 @@ def _refresh_profiles(event=None) -> None:
|
|
| 2600 |
def _current_profile_config() -> dict:
|
| 2601 |
cfg: dict = {}
|
| 2602 |
|
|
|
|
|
|
|
| 2603 |
cfg["analysis_range"] = (
|
| 2604 |
[
|
| 2605 |
(
|
|
@@ -2653,6 +2757,13 @@ def _apply_profile_config(cfg: dict) -> None:
|
|
| 2653 |
_applying_profile = True
|
| 2654 |
|
| 2655 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2656 |
try:
|
| 2657 |
ar = cfg.get("analysis_range", [None, None])
|
| 2658 |
if isinstance(ar, (list, tuple)) and len(ar) == 2 and ar[0] and ar[1]:
|
|
@@ -2979,10 +3090,22 @@ def load_datasets(event=None) -> None:
|
|
| 2979 |
except Exception: # noqa: BLE001
|
| 2980 |
id_col = None
|
| 2981 |
|
| 2982 |
-
daily, kpi_cols =
|
| 2983 |
current_daily_by_rat[rat] = daily
|
| 2984 |
|
| 2985 |
d = _filtered_daily(daily)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2986 |
rows.append(
|
| 2987 |
{
|
| 2988 |
"RAT": rat,
|
|
@@ -2992,6 +3115,11 @@ def load_datasets(event=None) -> None:
|
|
| 2992 |
"id_col": id_col,
|
| 2993 |
"sites": int(d["site_code"].nunique()),
|
| 2994 |
"days": int(d["date_only"].nunique()),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2995 |
"kpis": int(len(kpi_cols)),
|
| 2996 |
}
|
| 2997 |
)
|
|
@@ -3110,6 +3238,7 @@ def run_health_check(event=None) -> None:
|
|
| 3110 |
int(rd),
|
| 3111 |
float(thr),
|
| 3112 |
int(mcd),
|
|
|
|
| 3113 |
)
|
| 3114 |
if not status_df.empty:
|
| 3115 |
all_status.append(status_df)
|
|
@@ -3230,6 +3359,7 @@ def _build_export_bytes() -> bytes:
|
|
| 3230 |
daily_by_rat=(
|
| 3231 |
current_daily_by_rat if isinstance(current_daily_by_rat, dict) else None
|
| 3232 |
),
|
|
|
|
| 3233 |
multirat_summary_df=(
|
| 3234 |
current_multirat_df
|
| 3235 |
if isinstance(current_multirat_df, pd.DataFrame)
|
|
@@ -3273,6 +3403,7 @@ def _export_callback() -> io.BytesIO:
|
|
| 3273 |
|
| 3274 |
def _build_alert_pack_bytes() -> bytes:
|
| 3275 |
params = {
|
|
|
|
| 3276 |
"baseline_days": baseline_days.value,
|
| 3277 |
"recent_days": recent_days.value,
|
| 3278 |
"rel_threshold_pct": rel_threshold_pct.value,
|
|
@@ -3478,6 +3609,27 @@ def _on_drilldown_params_change(event=None) -> None:
|
|
| 3478 |
_refresh_validation_state()
|
| 3479 |
|
| 3480 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3481 |
rat_select.param.watch(_on_rat_change, "value")
|
| 3482 |
kpi_group_select.param.watch(
|
| 3483 |
_on_rat_change, "value"
|
|
@@ -3498,6 +3650,7 @@ map_status_filter.param.watch(lambda e: _refresh_map_view(), "value")
|
|
| 3498 |
map_auto_fit.param.watch(lambda e: _refresh_map_view(), "value")
|
| 3499 |
|
| 3500 |
analysis_range.param.watch(_on_drilldown_params_change, "value")
|
|
|
|
| 3501 |
baseline_days.param.watch(_on_drilldown_params_change, "value")
|
| 3502 |
recent_days.param.watch(_on_drilldown_params_change, "value")
|
| 3503 |
rel_threshold_pct.param.watch(_on_drilldown_params_change, "value")
|
|
@@ -3586,7 +3739,12 @@ def _build_drilldown_export_bytes() -> bytes:
|
|
| 3586 |
if d is None or d.empty:
|
| 3587 |
return b""
|
| 3588 |
|
| 3589 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3590 |
if s.empty:
|
| 3591 |
return b""
|
| 3592 |
|
|
@@ -3598,9 +3756,9 @@ def _build_drilldown_export_bytes() -> bytes:
|
|
| 3598 |
selected_kpis = [str(kpi_select.value)] + selected_kpis
|
| 3599 |
|
| 3600 |
selected_kpis = [k for k in selected_kpis if k in d.columns]
|
| 3601 |
-
base_cols = [
|
| 3602 |
daily_cols = base_cols + selected_kpis
|
| 3603 |
-
daily_out = s[daily_cols].copy() if selected_kpis else s[[
|
| 3604 |
|
| 3605 |
rules_df = (
|
| 3606 |
rules_table.value
|
|
@@ -3623,15 +3781,23 @@ def _build_drilldown_export_bytes() -> bytes:
|
|
| 3623 |
except Exception: # noqa: BLE001
|
| 3624 |
sla_val = None
|
| 3625 |
|
| 3626 |
-
sk = s[[
|
| 3627 |
sk[k] = pd.to_numeric(sk[k], errors="coerce")
|
| 3628 |
sk = sk.dropna(subset=[k])
|
| 3629 |
|
| 3630 |
-
baseline_mask = (
|
| 3631 |
-
sk["
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3632 |
)
|
| 3633 |
-
recent_mask = (
|
| 3634 |
-
sk["
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3635 |
)
|
| 3636 |
|
| 3637 |
baseline_med = (
|
|
@@ -3644,8 +3810,8 @@ def _build_drilldown_export_bytes() -> bytes:
|
|
| 3644 |
)
|
| 3645 |
|
| 3646 |
bad_flags = []
|
| 3647 |
-
recent_vals = sk.loc[recent_mask, [
|
| 3648 |
-
bad_dates
|
| 3649 |
for _, r in recent_vals.iterrows():
|
| 3650 |
v = r.get(k)
|
| 3651 |
is_bad_day = bool(
|
|
@@ -3660,13 +3826,9 @@ def _build_drilldown_export_bytes() -> bytes:
|
|
| 3660 |
bad_flags.append(is_bad_day)
|
| 3661 |
if is_bad_day:
|
| 3662 |
try:
|
| 3663 |
-
d0 = r.get(
|
| 3664 |
if d0 is not None:
|
| 3665 |
-
bad_dates.append(
|
| 3666 |
-
d0
|
| 3667 |
-
if isinstance(d0, date)
|
| 3668 |
-
else pd.to_datetime(d0).date()
|
| 3669 |
-
)
|
| 3670 |
except Exception: # noqa: BLE001
|
| 3671 |
pass
|
| 3672 |
|
|
@@ -3680,7 +3842,16 @@ def _build_drilldown_export_bytes() -> bytes:
|
|
| 3680 |
"baseline_median": baseline_med,
|
| 3681 |
"recent_median": recent_med,
|
| 3682 |
"bad_days_recent": int(sum(bad_flags)),
|
| 3683 |
-
"max_streak_recent": int(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3684 |
}
|
| 3685 |
)
|
| 3686 |
summary_out = pd.DataFrame(rows)
|
|
@@ -3692,14 +3863,16 @@ def _build_drilldown_export_bytes() -> bytes:
|
|
| 3692 |
)
|
| 3693 |
if not status_df.empty:
|
| 3694 |
status_out = status_df[
|
| 3695 |
-
(status_df["site_code"] == int(
|
| 3696 |
].copy()
|
| 3697 |
else:
|
| 3698 |
status_out = pd.DataFrame()
|
| 3699 |
|
|
|
|
|
|
|
| 3700 |
return write_dfs_to_excel(
|
| 3701 |
[summary_out, daily_out, status_out],
|
| 3702 |
-
["Summary",
|
| 3703 |
index=False,
|
| 3704 |
)
|
| 3705 |
|
|
@@ -3732,6 +3905,7 @@ sidebar = pn.Column(
|
|
| 3732 |
sidebar,
|
| 3733 |
pn.Card(
|
| 3734 |
analysis_range,
|
|
|
|
| 3735 |
pn.Row(baseline_days, recent_days, sizing_mode="stretch_width"),
|
| 3736 |
rel_threshold_pct,
|
| 3737 |
min_consecutive_days,
|
|
@@ -3757,7 +3931,6 @@ sidebar = pn.Column(
|
|
| 3757 |
min_criticality,
|
| 3758 |
min_anomaly_score,
|
| 3759 |
city_filter,
|
| 3760 |
-
only_complaint_sites,
|
| 3761 |
top_rat_filter,
|
| 3762 |
top_status_filter,
|
| 3763 |
title="Filters",
|
|
|
|
| 23 |
evaluate_health_check,
|
| 24 |
is_bad,
|
| 25 |
max_consecutive_days,
|
| 26 |
+
max_consecutive_periods,
|
| 27 |
window_bounds,
|
| 28 |
+
window_bounds_period,
|
| 29 |
)
|
| 30 |
from process_kpi.kpi_health_check.export import build_export_bytes
|
| 31 |
from process_kpi.kpi_health_check.io import read_bytes_to_df
|
|
|
|
| 33 |
from process_kpi.kpi_health_check.multi_rat import compute_multirat_views
|
| 34 |
from process_kpi.kpi_health_check.normalization import (
|
| 35 |
build_daily_kpi,
|
| 36 |
+
build_period_kpi,
|
| 37 |
infer_date_col,
|
| 38 |
infer_id_col,
|
| 39 |
)
|
|
|
|
| 198 |
int(site_code),
|
| 199 |
str(rat or ""),
|
| 200 |
str(kpi or ""),
|
| 201 |
+
str(granularity_select.value or "Daily"),
|
| 202 |
compare_kpis_key,
|
| 203 |
norm_key,
|
| 204 |
ar_key,
|
|
|
|
| 379 |
only_complaint_sites = pn.widgets.Checkbox(name="Only complaint sites", value=False)
|
| 380 |
|
| 381 |
analysis_range = pn.widgets.DateRangePicker(name="Analysis date range (optional)")
|
| 382 |
+
granularity_select = pn.widgets.RadioButtonGroup(
|
| 383 |
+
name="Granularity", options=["Daily", "Hourly"], value="Daily"
|
| 384 |
+
)
|
| 385 |
baseline_days = pn.widgets.IntInput(name="Baseline window (days)", value=30)
|
| 386 |
recent_days = pn.widgets.IntInput(name="Recent window (days)", value=7)
|
| 387 |
rel_threshold_pct = pn.widgets.FloatInput(
|
|
|
|
| 1492 |
return errors, warnings
|
| 1493 |
|
| 1494 |
|
| 1495 |
+
def _refresh_validation_state() -> None:
|
| 1496 |
errors, warnings = _validate_inputs()
|
| 1497 |
|
| 1498 |
msgs: list[str] = []
|
|
|
|
| 1506 |
else:
|
| 1507 |
validation_pane.object = ""
|
| 1508 |
validation_pane.alert_type = "success"
|
| 1509 |
+
validation_pane.visible = bool(errors or warnings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1510 |
|
| 1511 |
try:
|
| 1512 |
run_button.disabled = bool(errors) or not has_data
|
|
|
|
| 1609 |
kpis = [
|
| 1610 |
c
|
| 1611 |
for c in df.columns
|
| 1612 |
+
if c
|
| 1613 |
+
not in {
|
| 1614 |
+
"site_code",
|
| 1615 |
+
"date_only",
|
| 1616 |
+
"period_start",
|
| 1617 |
+
"Longitude",
|
| 1618 |
+
"Latitude",
|
| 1619 |
+
"City",
|
| 1620 |
+
"RAT",
|
| 1621 |
+
}
|
| 1622 |
]
|
| 1623 |
kpis = sorted([str(c) for c in kpis])
|
| 1624 |
|
|
|
|
| 1752 |
c
|
| 1753 |
for c in d.columns
|
| 1754 |
if c
|
| 1755 |
+
not in {
|
| 1756 |
+
"site_code",
|
| 1757 |
+
"date_only",
|
| 1758 |
+
"period_start",
|
| 1759 |
+
"Longitude",
|
| 1760 |
+
"Latitude",
|
| 1761 |
+
"City",
|
| 1762 |
+
"RAT",
|
| 1763 |
+
}
|
| 1764 |
]
|
| 1765 |
candidate_kpis = sorted([str(c) for c in candidate_kpis])
|
| 1766 |
if not candidate_kpis:
|
|
|
|
| 1773 |
new_kpi = candidate_kpis[0]
|
| 1774 |
_set_widget_value(kpi_select, new_kpi)
|
| 1775 |
kpi = new_kpi
|
| 1776 |
+
g = str(granularity_select.value or "Daily").strip().lower()
|
| 1777 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 1778 |
+
time_col = (
|
| 1779 |
+
"period_start" if (is_hourly and "period_start" in d.columns) else "date_only"
|
| 1780 |
+
)
|
| 1781 |
+
s = d[d["site_code"] == int(code_int)].copy().sort_values(time_col)
|
| 1782 |
if s.empty:
|
| 1783 |
trend_plot_pane.object = None
|
| 1784 |
heatmap_plot_pane.object = None
|
|
|
|
| 1852 |
recent_days_n=int(_coerce_int(recent_days.value) or 7),
|
| 1853 |
rel_threshold_pct=float(_coerce_float(rel_threshold_pct.value) or 10.0),
|
| 1854 |
normalization=str(kpi_compare_norm.value or "None"),
|
| 1855 |
+
granularity=str(granularity_select.value or "Daily"),
|
| 1856 |
)
|
| 1857 |
trend_plot_pane.object = fig
|
| 1858 |
kpis_for_heatmap = []
|
|
|
|
| 1905 |
w = str(corr_window_select.value or "")
|
| 1906 |
if w.startswith("Recent"):
|
| 1907 |
df_corr = d[
|
| 1908 |
+
(
|
| 1909 |
+
pd.to_datetime(d[time_col], errors="coerce")
|
| 1910 |
+
>= pd.to_datetime(recent_start)
|
| 1911 |
+
)
|
| 1912 |
+
& (
|
| 1913 |
+
pd.to_datetime(d[time_col], errors="coerce")
|
| 1914 |
+
<= pd.to_datetime(recent_end)
|
| 1915 |
+
)
|
| 1916 |
].copy()
|
| 1917 |
elif w.startswith("Baseline"):
|
| 1918 |
df_corr = d[
|
| 1919 |
+
(
|
| 1920 |
+
pd.to_datetime(d[time_col], errors="coerce")
|
| 1921 |
+
>= pd.to_datetime(baseline_start)
|
| 1922 |
+
)
|
| 1923 |
+
& (
|
| 1924 |
+
pd.to_datetime(d[time_col], errors="coerce")
|
| 1925 |
+
<= pd.to_datetime(baseline_end)
|
| 1926 |
+
)
|
| 1927 |
].copy()
|
| 1928 |
except Exception: # noqa: BLE001
|
| 1929 |
df_corr = d
|
|
|
|
| 2105 |
|
| 2106 |
def _compute_site_windows(
|
| 2107 |
daily_filtered: pd.DataFrame,
|
| 2108 |
+
) -> (
|
| 2109 |
+
tuple[
|
| 2110 |
+
date | pd.Timestamp,
|
| 2111 |
+
date | pd.Timestamp,
|
| 2112 |
+
date | pd.Timestamp,
|
| 2113 |
+
date | pd.Timestamp,
|
| 2114 |
+
]
|
| 2115 |
+
| None
|
| 2116 |
+
):
|
| 2117 |
if daily_filtered is None or daily_filtered.empty:
|
| 2118 |
return None
|
| 2119 |
rd = _coerce_int(recent_days.value)
|
| 2120 |
bd = _coerce_int(baseline_days.value)
|
| 2121 |
if rd is None or rd < 1 or bd is None or bd < 1:
|
| 2122 |
return None
|
| 2123 |
+
g = str(granularity_select.value or "Daily").strip().lower()
|
| 2124 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 2125 |
+
time_col = (
|
| 2126 |
+
"period_start"
|
| 2127 |
+
if (is_hourly and "period_start" in daily_filtered.columns)
|
| 2128 |
+
else "date_only"
|
| 2129 |
+
)
|
| 2130 |
+
|
| 2131 |
+
if is_hourly and time_col == "period_start":
|
| 2132 |
+
try:
|
| 2133 |
+
end_dt = pd.to_datetime(daily_filtered[time_col], errors="coerce").max()
|
| 2134 |
+
if pd.isna(end_dt):
|
| 2135 |
+
return None
|
| 2136 |
+
end_dt = pd.Timestamp(end_dt).floor("h")
|
| 2137 |
+
except Exception: # noqa: BLE001
|
| 2138 |
+
return None
|
| 2139 |
+
|
| 2140 |
+
recent_periods = int(rd) * 24
|
| 2141 |
+
baseline_periods = int(bd) * 24
|
| 2142 |
+
step = timedelta(hours=1)
|
| 2143 |
+
|
| 2144 |
+
recent_start, recent_end = window_bounds_period(end_dt, recent_periods, step)
|
| 2145 |
+
baseline_end = recent_start - step
|
| 2146 |
+
baseline_start, _ = window_bounds_period(baseline_end, baseline_periods, step)
|
| 2147 |
+
return baseline_start, baseline_end, recent_start, recent_end
|
| 2148 |
+
|
| 2149 |
try:
|
| 2150 |
+
end_ts = pd.to_datetime(daily_filtered["date_only"], errors="coerce").max()
|
| 2151 |
+
if pd.isna(end_ts):
|
| 2152 |
+
return None
|
| 2153 |
+
end_date = end_ts.date()
|
| 2154 |
except Exception: # noqa: BLE001
|
| 2155 |
return None
|
| 2156 |
recent_start, recent_end = window_bounds(end_date, int(rd))
|
|
|
|
| 2173 |
return None
|
| 2174 |
baseline_start, baseline_end, recent_start, recent_end = windows
|
| 2175 |
|
| 2176 |
+
g = str(granularity_select.value or "Daily").strip().lower()
|
| 2177 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 2178 |
+
time_col = (
|
| 2179 |
+
"period_start"
|
| 2180 |
+
if (is_hourly and "period_start" in daily_filtered.columns)
|
| 2181 |
+
else "date_only"
|
| 2182 |
+
)
|
| 2183 |
+
|
| 2184 |
site_daily = daily_filtered[daily_filtered["site_code"] == int(site_code)].copy()
|
| 2185 |
if site_daily.empty:
|
| 2186 |
return None
|
| 2187 |
+
site_daily = site_daily.sort_values(time_col)
|
| 2188 |
|
| 2189 |
dates = []
|
| 2190 |
cur = recent_start
|
| 2191 |
+
step = timedelta(hours=1) if is_hourly else timedelta(days=1)
|
| 2192 |
while cur <= recent_end:
|
| 2193 |
dates.append(cur)
|
| 2194 |
+
cur = cur + step
|
| 2195 |
|
| 2196 |
z = []
|
| 2197 |
hover = []
|
|
|
|
| 2210 |
|
| 2211 |
sla_eval = None if policy == "notify" else sla_val
|
| 2212 |
|
| 2213 |
+
s = site_daily[[time_col, kpi]].dropna(subset=[kpi])
|
| 2214 |
+
t = pd.to_datetime(s[time_col], errors="coerce")
|
| 2215 |
+
baseline_mask = (t >= pd.to_datetime(baseline_start)) & (
|
| 2216 |
+
t <= pd.to_datetime(baseline_end)
|
| 2217 |
)
|
| 2218 |
baseline = s.loc[baseline_mask, kpi].median() if baseline_mask.any() else np.nan
|
| 2219 |
baseline_val = float(baseline) if pd.notna(baseline) else None
|
|
|
|
| 2221 |
row_z = []
|
| 2222 |
row_h = []
|
| 2223 |
for d in dates:
|
| 2224 |
+
v_series = site_daily.loc[
|
| 2225 |
+
pd.to_datetime(site_daily[time_col], errors="coerce")
|
| 2226 |
+
== pd.to_datetime(d),
|
| 2227 |
+
kpi,
|
| 2228 |
+
]
|
| 2229 |
v = v_series.iloc[0] if not v_series.empty else np.nan
|
| 2230 |
if v is None or (isinstance(v, float) and np.isnan(v)):
|
| 2231 |
row_z.append(None)
|
|
|
|
| 2268 |
fig.update_layout(
|
| 2269 |
template="plotly_white",
|
| 2270 |
title=f"{rat} - Site {int(site_code)} - Recent window heatmap",
|
| 2271 |
+
xaxis_title="period",
|
| 2272 |
yaxis_title="KPI",
|
| 2273 |
height=420,
|
| 2274 |
margin=dict(l=40, r=20, t=60, b=40),
|
|
|
|
| 2294 |
return None
|
| 2295 |
baseline_start, baseline_end, recent_start, recent_end = windows
|
| 2296 |
|
| 2297 |
+
g = str(granularity_select.value or "Daily").strip().lower()
|
| 2298 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 2299 |
+
time_col = (
|
| 2300 |
+
"period_start"
|
| 2301 |
+
if (is_hourly and "period_start" in daily_filtered.columns)
|
| 2302 |
+
else "date_only"
|
| 2303 |
+
)
|
| 2304 |
+
|
| 2305 |
site_daily = daily_filtered[daily_filtered["site_code"] == int(site_code)].copy()
|
| 2306 |
if site_daily.empty:
|
| 2307 |
return None
|
| 2308 |
|
| 2309 |
+
s = site_daily[[time_col, kpi]].dropna(subset=[kpi])
|
| 2310 |
+
t = pd.to_datetime(s[time_col], errors="coerce")
|
| 2311 |
+
baseline_mask = (t >= pd.to_datetime(baseline_start)) & (
|
| 2312 |
+
t <= pd.to_datetime(baseline_end)
|
| 2313 |
+
)
|
| 2314 |
+
recent_mask = (t >= pd.to_datetime(recent_start)) & (
|
| 2315 |
+
t <= pd.to_datetime(recent_end)
|
| 2316 |
)
|
|
|
|
| 2317 |
|
| 2318 |
baseline_vals = (
|
| 2319 |
pd.to_numeric(s.loc[baseline_mask, kpi], errors="coerce").dropna().astype(float)
|
|
|
|
| 2369 |
cols = [
|
| 2370 |
c
|
| 2371 |
for c in cols
|
| 2372 |
+
if c
|
| 2373 |
+
not in {
|
| 2374 |
+
"site_code",
|
| 2375 |
+
"date_only",
|
| 2376 |
+
"period_start",
|
| 2377 |
+
"Longitude",
|
| 2378 |
+
"Latitude",
|
| 2379 |
+
"City",
|
| 2380 |
+
"RAT",
|
| 2381 |
+
}
|
| 2382 |
]
|
| 2383 |
cols = list(dict.fromkeys(cols))
|
| 2384 |
if len(cols) < 2:
|
|
|
|
| 2702 |
def _current_profile_config() -> dict:
|
| 2703 |
cfg: dict = {}
|
| 2704 |
|
| 2705 |
+
cfg["granularity"] = str(granularity_select.value or "Daily")
|
| 2706 |
+
|
| 2707 |
cfg["analysis_range"] = (
|
| 2708 |
[
|
| 2709 |
(
|
|
|
|
| 2757 |
_applying_profile = True
|
| 2758 |
|
| 2759 |
try:
|
| 2760 |
+
try:
|
| 2761 |
+
g = str(cfg.get("granularity", "") or "").strip()
|
| 2762 |
+
if g and g in list(granularity_select.options):
|
| 2763 |
+
granularity_select.value = g
|
| 2764 |
+
except Exception: # noqa: BLE001
|
| 2765 |
+
pass
|
| 2766 |
+
|
| 2767 |
try:
|
| 2768 |
ar = cfg.get("analysis_range", [None, None])
|
| 2769 |
if isinstance(ar, (list, tuple)) and len(ar) == 2 and ar[0] and ar[1]:
|
|
|
|
| 3090 |
except Exception: # noqa: BLE001
|
| 3091 |
id_col = None
|
| 3092 |
|
| 3093 |
+
daily, kpi_cols = build_period_kpi(df_raw, rat, granularity_select.value)
|
| 3094 |
current_daily_by_rat[rat] = daily
|
| 3095 |
|
| 3096 |
d = _filtered_daily(daily)
|
| 3097 |
+
periods_n = None
|
| 3098 |
+
try:
|
| 3099 |
+
if (
|
| 3100 |
+
isinstance(d, pd.DataFrame)
|
| 3101 |
+
and not d.empty
|
| 3102 |
+
and "period_start" in d.columns
|
| 3103 |
+
):
|
| 3104 |
+
periods_n = int(
|
| 3105 |
+
pd.to_datetime(d["period_start"], errors="coerce").nunique()
|
| 3106 |
+
)
|
| 3107 |
+
except Exception: # noqa: BLE001
|
| 3108 |
+
periods_n = None
|
| 3109 |
rows.append(
|
| 3110 |
{
|
| 3111 |
"RAT": rat,
|
|
|
|
| 3115 |
"id_col": id_col,
|
| 3116 |
"sites": int(d["site_code"].nunique()),
|
| 3117 |
"days": int(d["date_only"].nunique()),
|
| 3118 |
+
"periods": (
|
| 3119 |
+
int(periods_n)
|
| 3120 |
+
if periods_n is not None
|
| 3121 |
+
else int(d["date_only"].nunique())
|
| 3122 |
+
),
|
| 3123 |
"kpis": int(len(kpi_cols)),
|
| 3124 |
}
|
| 3125 |
)
|
|
|
|
| 3238 |
int(rd),
|
| 3239 |
float(thr),
|
| 3240 |
int(mcd),
|
| 3241 |
+
granularity=str(granularity_select.value or "Daily"),
|
| 3242 |
)
|
| 3243 |
if not status_df.empty:
|
| 3244 |
all_status.append(status_df)
|
|
|
|
| 3359 |
daily_by_rat=(
|
| 3360 |
current_daily_by_rat if isinstance(current_daily_by_rat, dict) else None
|
| 3361 |
),
|
| 3362 |
+
granularity=str(granularity_select.value or "Daily"),
|
| 3363 |
multirat_summary_df=(
|
| 3364 |
current_multirat_df
|
| 3365 |
if isinstance(current_multirat_df, pd.DataFrame)
|
|
|
|
| 3403 |
|
| 3404 |
def _build_alert_pack_bytes() -> bytes:
|
| 3405 |
params = {
|
| 3406 |
+
"granularity": str(granularity_select.value or "Daily"),
|
| 3407 |
"baseline_days": baseline_days.value,
|
| 3408 |
"recent_days": recent_days.value,
|
| 3409 |
"rel_threshold_pct": rel_threshold_pct.value,
|
|
|
|
| 3609 |
_refresh_validation_state()
|
| 3610 |
|
| 3611 |
|
| 3612 |
+
def _on_granularity_change(event=None) -> None:
|
| 3613 |
+
if _applying_profile or _loading_datasets:
|
| 3614 |
+
return
|
| 3615 |
+
_invalidate_drilldown_cache(data_changed=True, healthcheck_changed=True)
|
| 3616 |
+
_refresh_validation_state()
|
| 3617 |
+
try:
|
| 3618 |
+
has_any = bool(
|
| 3619 |
+
(file_2g and file_2g.value)
|
| 3620 |
+
or (file_3g and file_3g.value)
|
| 3621 |
+
or (file_lte and file_lte.value)
|
| 3622 |
+
or (file_twamp and file_twamp.value)
|
| 3623 |
+
)
|
| 3624 |
+
except Exception: # noqa: BLE001
|
| 3625 |
+
has_any = False
|
| 3626 |
+
if has_any:
|
| 3627 |
+
try:
|
| 3628 |
+
load_datasets()
|
| 3629 |
+
except Exception: # noqa: BLE001
|
| 3630 |
+
pass
|
| 3631 |
+
|
| 3632 |
+
|
| 3633 |
rat_select.param.watch(_on_rat_change, "value")
|
| 3634 |
kpi_group_select.param.watch(
|
| 3635 |
_on_rat_change, "value"
|
|
|
|
| 3650 |
map_auto_fit.param.watch(lambda e: _refresh_map_view(), "value")
|
| 3651 |
|
| 3652 |
analysis_range.param.watch(_on_drilldown_params_change, "value")
|
| 3653 |
+
granularity_select.param.watch(_on_granularity_change, "value")
|
| 3654 |
baseline_days.param.watch(_on_drilldown_params_change, "value")
|
| 3655 |
recent_days.param.watch(_on_drilldown_params_change, "value")
|
| 3656 |
rel_threshold_pct.param.watch(_on_drilldown_params_change, "value")
|
|
|
|
| 3739 |
if d is None or d.empty:
|
| 3740 |
return b""
|
| 3741 |
|
| 3742 |
+
g = str(granularity_select.value or "Daily").strip().lower()
|
| 3743 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 3744 |
+
time_col = (
|
| 3745 |
+
"period_start" if (is_hourly and "period_start" in d.columns) else "date_only"
|
| 3746 |
+
)
|
| 3747 |
+
s = d[d["site_code"] == int(code_int)].copy().sort_values(time_col)
|
| 3748 |
if s.empty:
|
| 3749 |
return b""
|
| 3750 |
|
|
|
|
| 3756 |
selected_kpis = [str(kpi_select.value)] + selected_kpis
|
| 3757 |
|
| 3758 |
selected_kpis = [k for k in selected_kpis if k in d.columns]
|
| 3759 |
+
base_cols = [time_col]
|
| 3760 |
daily_cols = base_cols + selected_kpis
|
| 3761 |
+
daily_out = s[daily_cols].copy() if selected_kpis else s[[time_col]].copy()
|
| 3762 |
|
| 3763 |
rules_df = (
|
| 3764 |
rules_table.value
|
|
|
|
| 3781 |
except Exception: # noqa: BLE001
|
| 3782 |
sla_val = None
|
| 3783 |
|
| 3784 |
+
sk = s[[time_col, k]].copy()
|
| 3785 |
sk[k] = pd.to_numeric(sk[k], errors="coerce")
|
| 3786 |
sk = sk.dropna(subset=[k])
|
| 3787 |
|
| 3788 |
+
baseline_mask = (
|
| 3789 |
+
pd.to_datetime(sk[time_col], errors="coerce")
|
| 3790 |
+
>= pd.to_datetime(baseline_start)
|
| 3791 |
+
) & (
|
| 3792 |
+
pd.to_datetime(sk[time_col], errors="coerce")
|
| 3793 |
+
<= pd.to_datetime(baseline_end)
|
| 3794 |
)
|
| 3795 |
+
recent_mask = (
|
| 3796 |
+
pd.to_datetime(sk[time_col], errors="coerce")
|
| 3797 |
+
>= pd.to_datetime(recent_start)
|
| 3798 |
+
) & (
|
| 3799 |
+
pd.to_datetime(sk[time_col], errors="coerce")
|
| 3800 |
+
<= pd.to_datetime(recent_end)
|
| 3801 |
)
|
| 3802 |
|
| 3803 |
baseline_med = (
|
|
|
|
| 3810 |
)
|
| 3811 |
|
| 3812 |
bad_flags = []
|
| 3813 |
+
recent_vals = sk.loc[recent_mask, [time_col, k]].sort_values(time_col)
|
| 3814 |
+
bad_dates = []
|
| 3815 |
for _, r in recent_vals.iterrows():
|
| 3816 |
v = r.get(k)
|
| 3817 |
is_bad_day = bool(
|
|
|
|
| 3826 |
bad_flags.append(is_bad_day)
|
| 3827 |
if is_bad_day:
|
| 3828 |
try:
|
| 3829 |
+
d0 = r.get(time_col)
|
| 3830 |
if d0 is not None:
|
| 3831 |
+
bad_dates.append(pd.to_datetime(d0, errors="coerce"))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3832 |
except Exception: # noqa: BLE001
|
| 3833 |
pass
|
| 3834 |
|
|
|
|
| 3842 |
"baseline_median": baseline_med,
|
| 3843 |
"recent_median": recent_med,
|
| 3844 |
"bad_days_recent": int(sum(bad_flags)),
|
| 3845 |
+
"max_streak_recent": int(
|
| 3846 |
+
max_consecutive_periods(
|
| 3847 |
+
bad_dates,
|
| 3848 |
+
step=(
|
| 3849 |
+
timedelta(hours=1) if is_hourly else timedelta(days=1)
|
| 3850 |
+
),
|
| 3851 |
+
)
|
| 3852 |
+
if bad_dates
|
| 3853 |
+
else 0
|
| 3854 |
+
),
|
| 3855 |
}
|
| 3856 |
)
|
| 3857 |
summary_out = pd.DataFrame(rows)
|
|
|
|
| 3863 |
)
|
| 3864 |
if not status_df.empty:
|
| 3865 |
status_out = status_df[
|
| 3866 |
+
(status_df["site_code"] == int(code_int)) & (status_df["RAT"] == str(rat))
|
| 3867 |
].copy()
|
| 3868 |
else:
|
| 3869 |
status_out = pd.DataFrame()
|
| 3870 |
|
| 3871 |
+
g2 = str(granularity_select.value or "Daily").strip().lower()
|
| 3872 |
+
data_sheet = "Hourly" if (g2.startswith("hour") or g2.startswith("h")) else "Daily"
|
| 3873 |
return write_dfs_to_excel(
|
| 3874 |
[summary_out, daily_out, status_out],
|
| 3875 |
+
["Summary", data_sheet, "KPI_Status"],
|
| 3876 |
index=False,
|
| 3877 |
)
|
| 3878 |
|
|
|
|
| 3905 |
sidebar,
|
| 3906 |
pn.Card(
|
| 3907 |
analysis_range,
|
| 3908 |
+
granularity_select,
|
| 3909 |
pn.Row(baseline_days, recent_days, sizing_mode="stretch_width"),
|
| 3910 |
rel_threshold_pct,
|
| 3911 |
min_consecutive_days,
|
|
|
|
| 3931 |
min_criticality,
|
| 3932 |
min_anomaly_score,
|
| 3933 |
city_filter,
|
|
|
|
| 3934 |
top_rat_filter,
|
| 3935 |
top_status_filter,
|
| 3936 |
title="Filters",
|
process_kpi/kpi_health_check/engine.py
CHANGED
|
@@ -1,9 +1,34 @@
|
|
| 1 |
-
from datetime import date, timedelta
|
| 2 |
|
| 3 |
import numpy as np
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
def window_bounds(end_date: date, days: int) -> tuple[date, date]:
|
| 8 |
start = end_date - timedelta(days=days - 1)
|
| 9 |
return start, end_date
|
|
@@ -34,14 +59,18 @@ def is_bad(
|
|
| 34 |
return bad or (value > baseline + abs(baseline) * thr)
|
| 35 |
|
| 36 |
|
| 37 |
-
def
|
| 38 |
-
if not
|
| 39 |
return 0
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
streak = 1
|
| 42 |
best = 1
|
| 43 |
-
for prev, cur in zip(
|
| 44 |
-
if cur == prev +
|
| 45 |
streak += 1
|
| 46 |
else:
|
| 47 |
streak = 1
|
|
@@ -50,6 +79,10 @@ def max_consecutive_days(dates: list[date]) -> int:
|
|
| 50 |
return best
|
| 51 |
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
def evaluate_health_check(
|
| 54 |
daily: pd.DataFrame,
|
| 55 |
rat: str,
|
|
@@ -58,14 +91,33 @@ def evaluate_health_check(
|
|
| 58 |
recent_days_n: int,
|
| 59 |
rel_threshold_pct: float,
|
| 60 |
min_consecutive_days: int,
|
|
|
|
| 61 |
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
| 62 |
if daily.empty:
|
| 63 |
return pd.DataFrame(), pd.DataFrame()
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
rat_rules = rules_df[rules_df["RAT"] == rat].copy()
|
| 71 |
kpis = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
|
|
@@ -78,7 +130,7 @@ def evaluate_health_check(
|
|
| 78 |
if ("City" in g_site.columns and g_site["City"].notna().any())
|
| 79 |
else None
|
| 80 |
)
|
| 81 |
-
g_site = g_site.sort_values(
|
| 82 |
|
| 83 |
for kpi in kpis:
|
| 84 |
rule = rat_rules[rat_rules["KPI"] == kpi].iloc[0]
|
|
@@ -92,7 +144,7 @@ def evaluate_health_check(
|
|
| 92 |
|
| 93 |
sla_eval = None if policy == "notify" else sla_val
|
| 94 |
|
| 95 |
-
s = g_site[[
|
| 96 |
if s.empty:
|
| 97 |
rows.append(
|
| 98 |
{
|
|
@@ -105,23 +157,20 @@ def evaluate_health_check(
|
|
| 105 |
)
|
| 106 |
continue
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
)
|
| 111 |
-
recent_mask = (s["date_only"] >= recent_start) & (
|
| 112 |
-
s["date_only"] <= recent_end
|
| 113 |
-
)
|
| 114 |
|
| 115 |
baseline = (
|
| 116 |
s.loc[baseline_mask, kpi].median() if baseline_mask.any() else np.nan
|
| 117 |
)
|
| 118 |
recent = s.loc[recent_mask, kpi].median() if recent_mask.any() else np.nan
|
| 119 |
|
| 120 |
-
daily_recent = s.loc[recent_mask, [
|
| 121 |
bad_dates = []
|
| 122 |
if not daily_recent.empty:
|
| 123 |
for d, v in zip(
|
| 124 |
-
daily_recent[
|
| 125 |
):
|
| 126 |
if is_bad(
|
| 127 |
float(v) if pd.notna(v) else None,
|
|
@@ -132,8 +181,8 @@ def evaluate_health_check(
|
|
| 132 |
):
|
| 133 |
bad_dates.append(d)
|
| 134 |
|
| 135 |
-
max_streak =
|
| 136 |
-
persistent = max_streak >= int(
|
| 137 |
|
| 138 |
is_bad_recent = is_bad(
|
| 139 |
float(recent) if pd.notna(recent) else None,
|
|
@@ -145,7 +194,7 @@ def evaluate_health_check(
|
|
| 145 |
|
| 146 |
is_bad_current = is_bad_recent
|
| 147 |
if not daily_recent.empty:
|
| 148 |
-
last_row = daily_recent.sort_values(
|
| 149 |
last_val = last_row[kpi]
|
| 150 |
is_bad_current = is_bad(
|
| 151 |
float(last_val) if pd.notna(last_val) else None,
|
|
|
|
| 1 |
+
from datetime import date, datetime, timedelta
|
| 2 |
|
| 3 |
import numpy as np
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
|
| 7 |
+
def _to_timestamp(value) -> pd.Timestamp | None:
|
| 8 |
+
if value is None:
|
| 9 |
+
return None
|
| 10 |
+
if isinstance(value, pd.Timestamp):
|
| 11 |
+
return value
|
| 12 |
+
if isinstance(value, datetime):
|
| 13 |
+
return pd.Timestamp(value)
|
| 14 |
+
if isinstance(value, date):
|
| 15 |
+
return pd.Timestamp(value)
|
| 16 |
+
try:
|
| 17 |
+
v = pd.to_datetime(value, errors="coerce")
|
| 18 |
+
return v if pd.notna(v) else None
|
| 19 |
+
except Exception: # noqa: BLE001
|
| 20 |
+
return None
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def window_bounds_period(
|
| 24 |
+
end_dt: pd.Timestamp,
|
| 25 |
+
periods: int,
|
| 26 |
+
step: timedelta,
|
| 27 |
+
) -> tuple[pd.Timestamp, pd.Timestamp]:
|
| 28 |
+
start = end_dt - step * (int(periods) - 1)
|
| 29 |
+
return start, end_dt
|
| 30 |
+
|
| 31 |
+
|
| 32 |
def window_bounds(end_date: date, days: int) -> tuple[date, date]:
|
| 33 |
start = end_date - timedelta(days=days - 1)
|
| 34 |
return start, end_date
|
|
|
|
| 59 |
return bad or (value > baseline + abs(baseline) * thr)
|
| 60 |
|
| 61 |
|
| 62 |
+
def max_consecutive_periods(values: list, step: timedelta) -> int:
|
| 63 |
+
if not values:
|
| 64 |
return 0
|
| 65 |
+
ts = [_to_timestamp(v) for v in values]
|
| 66 |
+
ts2 = [t for t in ts if t is not None]
|
| 67 |
+
if not ts2:
|
| 68 |
+
return 0
|
| 69 |
+
ts_sorted = sorted(set(ts2))
|
| 70 |
streak = 1
|
| 71 |
best = 1
|
| 72 |
+
for prev, cur in zip(ts_sorted, ts_sorted[1:]):
|
| 73 |
+
if cur == prev + step:
|
| 74 |
streak += 1
|
| 75 |
else:
|
| 76 |
streak = 1
|
|
|
|
| 79 |
return best
|
| 80 |
|
| 81 |
|
| 82 |
+
def max_consecutive_days(dates: list[date]) -> int:
|
| 83 |
+
return max_consecutive_periods(dates, step=timedelta(days=1))
|
| 84 |
+
|
| 85 |
+
|
| 86 |
def evaluate_health_check(
|
| 87 |
daily: pd.DataFrame,
|
| 88 |
rat: str,
|
|
|
|
| 91 |
recent_days_n: int,
|
| 92 |
rel_threshold_pct: float,
|
| 93 |
min_consecutive_days: int,
|
| 94 |
+
granularity: str = "Daily",
|
| 95 |
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
| 96 |
if daily.empty:
|
| 97 |
return pd.DataFrame(), pd.DataFrame()
|
| 98 |
|
| 99 |
+
g = str(granularity or "Daily").strip().lower()
|
| 100 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 101 |
+
time_col = (
|
| 102 |
+
"period_start"
|
| 103 |
+
if (is_hourly and "period_start" in daily.columns)
|
| 104 |
+
else "date_only"
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
step = timedelta(hours=1) if is_hourly else timedelta(days=1)
|
| 108 |
+
baseline_periods = int(baseline_days_n) * 24 if is_hourly else int(baseline_days_n)
|
| 109 |
+
recent_periods = int(recent_days_n) * 24 if is_hourly else int(recent_days_n)
|
| 110 |
+
min_periods = (
|
| 111 |
+
int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
end_dt = _to_timestamp(daily[time_col].max())
|
| 115 |
+
if end_dt is None:
|
| 116 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 117 |
+
|
| 118 |
+
recent_start_dt, recent_end_dt = window_bounds_period(end_dt, recent_periods, step)
|
| 119 |
+
baseline_end_dt = recent_start_dt - step
|
| 120 |
+
baseline_start_dt, _ = window_bounds_period(baseline_end_dt, baseline_periods, step)
|
| 121 |
|
| 122 |
rat_rules = rules_df[rules_df["RAT"] == rat].copy()
|
| 123 |
kpis = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
|
|
|
|
| 130 |
if ("City" in g_site.columns and g_site["City"].notna().any())
|
| 131 |
else None
|
| 132 |
)
|
| 133 |
+
g_site = g_site.sort_values(time_col)
|
| 134 |
|
| 135 |
for kpi in kpis:
|
| 136 |
rule = rat_rules[rat_rules["KPI"] == kpi].iloc[0]
|
|
|
|
| 144 |
|
| 145 |
sla_eval = None if policy == "notify" else sla_val
|
| 146 |
|
| 147 |
+
s = g_site[[time_col, kpi]].dropna(subset=[kpi])
|
| 148 |
if s.empty:
|
| 149 |
rows.append(
|
| 150 |
{
|
|
|
|
| 157 |
)
|
| 158 |
continue
|
| 159 |
|
| 160 |
+
t = pd.to_datetime(s[time_col], errors="coerce")
|
| 161 |
+
baseline_mask = (t >= baseline_start_dt) & (t <= baseline_end_dt)
|
| 162 |
+
recent_mask = (t >= recent_start_dt) & (t <= recent_end_dt)
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
baseline = (
|
| 165 |
s.loc[baseline_mask, kpi].median() if baseline_mask.any() else np.nan
|
| 166 |
)
|
| 167 |
recent = s.loc[recent_mask, kpi].median() if recent_mask.any() else np.nan
|
| 168 |
|
| 169 |
+
daily_recent = s.loc[recent_mask, [time_col, kpi]].copy()
|
| 170 |
bad_dates = []
|
| 171 |
if not daily_recent.empty:
|
| 172 |
for d, v in zip(
|
| 173 |
+
daily_recent[time_col].tolist(), daily_recent[kpi].tolist()
|
| 174 |
):
|
| 175 |
if is_bad(
|
| 176 |
float(v) if pd.notna(v) else None,
|
|
|
|
| 181 |
):
|
| 182 |
bad_dates.append(d)
|
| 183 |
|
| 184 |
+
max_streak = max_consecutive_periods(bad_dates, step=step)
|
| 185 |
+
persistent = max_streak >= int(min_periods)
|
| 186 |
|
| 187 |
is_bad_recent = is_bad(
|
| 188 |
float(recent) if pd.notna(recent) else None,
|
|
|
|
| 194 |
|
| 195 |
is_bad_current = is_bad_recent
|
| 196 |
if not daily_recent.empty:
|
| 197 |
+
last_row = daily_recent.sort_values(time_col).iloc[-1]
|
| 198 |
last_val = last_row[kpi]
|
| 199 |
is_bad_current = is_bad(
|
| 200 |
float(last_val) if pd.notna(last_val) else None,
|
process_kpi/kpi_health_check/export.py
CHANGED
|
@@ -9,6 +9,7 @@ def build_export_bytes(
|
|
| 9 |
summary_df: pd.DataFrame | None,
|
| 10 |
status_df: pd.DataFrame | None,
|
| 11 |
daily_by_rat: dict[str, pd.DataFrame] | None = None,
|
|
|
|
| 12 |
multirat_summary_df: pd.DataFrame | None = None,
|
| 13 |
top_anomalies_df: pd.DataFrame | None = None,
|
| 14 |
complaint_multirat_df: pd.DataFrame | None = None,
|
|
@@ -32,10 +33,12 @@ def build_export_bytes(
|
|
| 32 |
|
| 33 |
max_data_rows = 1048575
|
| 34 |
if daily_by_rat and isinstance(daily_by_rat, dict):
|
|
|
|
|
|
|
| 35 |
for rat, df in daily_by_rat.items():
|
| 36 |
if not isinstance(df, pd.DataFrame):
|
| 37 |
continue
|
| 38 |
-
base = f"
|
| 39 |
if len(df) <= max_data_rows:
|
| 40 |
dfs.append(df)
|
| 41 |
sheet_names.append(base[:31])
|
|
|
|
| 9 |
summary_df: pd.DataFrame | None,
|
| 10 |
status_df: pd.DataFrame | None,
|
| 11 |
daily_by_rat: dict[str, pd.DataFrame] | None = None,
|
| 12 |
+
granularity: str = "Daily",
|
| 13 |
multirat_summary_df: pd.DataFrame | None = None,
|
| 14 |
top_anomalies_df: pd.DataFrame | None = None,
|
| 15 |
complaint_multirat_df: pd.DataFrame | None = None,
|
|
|
|
| 33 |
|
| 34 |
max_data_rows = 1048575
|
| 35 |
if daily_by_rat and isinstance(daily_by_rat, dict):
|
| 36 |
+
g = str(granularity or "Daily").strip().lower()
|
| 37 |
+
prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
|
| 38 |
for rat, df in daily_by_rat.items():
|
| 39 |
if not isinstance(df, pd.DataFrame):
|
| 40 |
continue
|
| 41 |
+
base = f"{prefix}_All_{str(rat)}"
|
| 42 |
if len(df) <= max_data_rows:
|
| 43 |
dfs.append(df)
|
| 44 |
sheet_names.append(base[:31])
|
process_kpi/kpi_health_check/normalization.py
CHANGED
|
@@ -228,20 +228,29 @@ def load_physical_db() -> pd.DataFrame:
|
|
| 228 |
return physical_db[keep].drop_duplicates("code")
|
| 229 |
|
| 230 |
|
| 231 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
df = df_raw.copy()
|
| 233 |
date_col = infer_date_col(df)
|
| 234 |
id_col = infer_id_col(df, rat)
|
| 235 |
|
| 236 |
df["date"] = parse_datetime(df[date_col])
|
| 237 |
df = df.dropna(subset=["date"])
|
| 238 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
df["site_code"] = df[id_col].apply(extract_site_code)
|
| 241 |
df = df.dropna(subset=["site_code"])
|
| 242 |
df["site_code"] = df["site_code"].astype(int)
|
| 243 |
|
| 244 |
-
meta = {date_col, id_col, "date", "
|
| 245 |
meta = meta.union(non_kpi_identifier_cols(df, rat))
|
| 246 |
candidate_cols = [c for c in df.columns if c not in meta]
|
| 247 |
|
|
@@ -259,22 +268,25 @@ def build_daily_kpi(df_raw: pd.DataFrame, rat: str) -> tuple[pd.DataFrame, list[
|
|
| 259 |
|
| 260 |
base = pd.concat(
|
| 261 |
[
|
| 262 |
-
df[["site_code", "
|
| 263 |
numeric_df[kpi_cols].reset_index(drop=True),
|
| 264 |
],
|
| 265 |
axis=1,
|
| 266 |
)
|
| 267 |
|
| 268 |
agg_dict = {k: infer_agg(k) for k in kpi_cols}
|
| 269 |
-
|
|
|
|
| 270 |
|
| 271 |
physical = load_physical_db()
|
| 272 |
if not physical.empty:
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
)
|
| 276 |
-
daily = daily.drop(columns=[c for c in ["code"] if c in daily.columns])
|
| 277 |
|
| 278 |
-
|
| 279 |
|
| 280 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
return physical_db[keep].drop_duplicates("code")
|
| 229 |
|
| 230 |
|
| 231 |
+
def build_period_kpi(
|
| 232 |
+
df_raw: pd.DataFrame,
|
| 233 |
+
rat: str,
|
| 234 |
+
granularity: str = "Daily",
|
| 235 |
+
) -> tuple[pd.DataFrame, list[str]]:
|
| 236 |
df = df_raw.copy()
|
| 237 |
date_col = infer_date_col(df)
|
| 238 |
id_col = infer_id_col(df, rat)
|
| 239 |
|
| 240 |
df["date"] = parse_datetime(df[date_col])
|
| 241 |
df = df.dropna(subset=["date"])
|
| 242 |
+
|
| 243 |
+
g = str(granularity or "Daily").strip().lower()
|
| 244 |
+
if g.startswith("hour") or g.startswith("h"):
|
| 245 |
+
df["period_start"] = df["date"].dt.floor("H")
|
| 246 |
+
else:
|
| 247 |
+
df["period_start"] = df["date"].dt.floor("D")
|
| 248 |
|
| 249 |
df["site_code"] = df[id_col].apply(extract_site_code)
|
| 250 |
df = df.dropna(subset=["site_code"])
|
| 251 |
df["site_code"] = df["site_code"].astype(int)
|
| 252 |
|
| 253 |
+
meta = {date_col, id_col, "date", "site_code", "period_start"}
|
| 254 |
meta = meta.union(non_kpi_identifier_cols(df, rat))
|
| 255 |
candidate_cols = [c for c in df.columns if c not in meta]
|
| 256 |
|
|
|
|
| 268 |
|
| 269 |
base = pd.concat(
|
| 270 |
[
|
| 271 |
+
df[["site_code", "period_start"]].reset_index(drop=True),
|
| 272 |
numeric_df[kpi_cols].reset_index(drop=True),
|
| 273 |
],
|
| 274 |
axis=1,
|
| 275 |
)
|
| 276 |
|
| 277 |
agg_dict = {k: infer_agg(k) for k in kpi_cols}
|
| 278 |
+
out = base.groupby(["site_code", "period_start"], as_index=False).agg(agg_dict)
|
| 279 |
+
out["date_only"] = pd.to_datetime(out["period_start"]).dt.date
|
| 280 |
|
| 281 |
physical = load_physical_db()
|
| 282 |
if not physical.empty:
|
| 283 |
+
out = pd.merge(out, physical, left_on="site_code", right_on="code", how="left")
|
| 284 |
+
out = out.drop(columns=[c for c in ["code"] if c in out.columns])
|
|
|
|
|
|
|
| 285 |
|
| 286 |
+
out["RAT"] = rat
|
| 287 |
|
| 288 |
+
return out, kpi_cols
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
def build_daily_kpi(df_raw: pd.DataFrame, rat: str) -> tuple[pd.DataFrame, list[str]]:
|
| 292 |
+
return build_period_kpi(df_raw, rat, granularity="Daily")
|