DavMelchi commited on
Commit
e2ef38a
·
1 Parent(s): 55d1d4c

Add hourly granularity support with period-based time column handling, window calculations using hour intervals, and adaptive date axis formatting for hourly vs daily data across drill-down plots, site views, heatmaps, and correlation analysis

Browse files
panel_app/kpi_health_check_drilldown_plots.py CHANGED
@@ -20,6 +20,7 @@ def build_drilldown_plot(
20
  recent_days_n: int = 7,
21
  rel_threshold_pct: float = 10.0,
22
  normalization: str = "None",
 
23
  ) -> go.Figure | None:
24
  """
25
  Builds the drill-down trend plot with native Plotly annotations.
@@ -32,12 +33,16 @@ def build_drilldown_plot(
32
  if not valid_kpis:
33
  return None
34
 
35
- # Sort by date
36
- # Sort by date
37
- plot_df = df.sort_values("date_only").copy()
 
 
 
 
38
 
39
  try:
40
- plot_df["date_only"] = pd.to_datetime(plot_df["date_only"])
41
  except Exception:
42
  pass
43
 
@@ -69,7 +74,7 @@ def build_drilldown_plot(
69
 
70
  for kpi in valid_kpis:
71
  # Data preparation
72
- x_data = plot_df["date_only"]
73
  y_data = pd.to_numeric(plot_df[kpi], errors="coerce")
74
  if do_norm:
75
  if norm_mode == "Min-Max":
@@ -163,12 +168,17 @@ def build_drilldown_plot(
163
  except Exception:
164
  sla_eval = None
165
 
166
- end_dt = pd.to_datetime(plot_df["date_only"]).max()
167
- rs = end_dt - timedelta(days=max(int(recent_days_n), 1) - 1)
168
- be = rs - timedelta(days=1)
169
- bs = be - timedelta(days=max(int(baseline_days_n), 1) - 1)
 
 
 
 
 
170
 
171
- dts = pd.to_datetime(plot_df["date_only"])
172
  baseline_mask = (dts >= bs) & (dts <= be)
173
  recent_mask = (dts >= rs) & (dts <= end_dt)
174
  baseline_val = pd.to_numeric(
@@ -211,7 +221,7 @@ def build_drilldown_plot(
211
  hover_txt.append(f"OUTSIDE WINDOW ({main_kpi})")
212
  fig.add_trace(
213
  go.Scatter(
214
- x=plot_df["date_only"],
215
  y=[0] * len(plot_df),
216
  mode="markers",
217
  marker=dict(symbol="square", size=10, color=colors),
@@ -249,7 +259,7 @@ def build_drilldown_plot(
249
  if idx_bad:
250
  fig.add_trace(
251
  go.Scatter(
252
- x=[plot_df["date_only"].iloc[i] for i in idx_bad],
253
  y=[y_main.iloc[i] for i in idx_bad],
254
  mode="markers",
255
  marker=dict(size=10, color=bad_color, symbol="circle"),
@@ -267,7 +277,7 @@ def build_drilldown_plot(
267
  if not plot_df.empty and not highlight_bad_days:
268
  fig.add_trace(
269
  go.Scatter(
270
- x=plot_df["date_only"],
271
  y=[0] * len(plot_df),
272
  mode="markers",
273
  opacity=0,
@@ -289,10 +299,10 @@ def build_drilldown_plot(
289
  try:
290
  force_all_dates = False
291
  try:
292
- x_min = pd.to_datetime(plot_df["date_only"]).min()
293
- x_max = pd.to_datetime(plot_df["date_only"]).max()
294
  span_days = int((x_max - x_min).days) + 1
295
- n_dates = int(pd.to_datetime(plot_df["date_only"]).nunique())
296
  force_all_dates = (span_days <= 200) and (n_dates <= 200)
297
  except Exception:
298
  force_all_dates = False
@@ -310,7 +320,7 @@ def build_drilldown_plot(
310
  col=1,
311
  )
312
 
313
- if force_all_dates:
314
  fig.update_xaxes(
315
  tickmode="linear",
316
  dtick=86400000,
@@ -332,7 +342,10 @@ def build_drilldown_plot(
332
  ticklen=6,
333
  showgrid=True,
334
  tickformatstops=[
335
- {"dtickrange": [None, 86400000], "value": "%d-%b\n%Y"},
 
 
 
336
  {"dtickrange": [86400000, 7 * 86400000], "value": "%d-%b"},
337
  {"dtickrange": [7 * 86400000, "M1"], "value": "%d-%b"},
338
  {"dtickrange": ["M1", "M12"], "value": "%b\n%Y"},
 
20
  recent_days_n: int = 7,
21
  rel_threshold_pct: float = 10.0,
22
  normalization: str = "None",
23
+ granularity: str = "Daily",
24
  ) -> go.Figure | None:
25
  """
26
  Builds the drill-down trend plot with native Plotly annotations.
 
33
  if not valid_kpis:
34
  return None
35
 
36
+ g = str(granularity or "Daily").strip().lower()
37
+ is_hourly = g.startswith("hour") or g.startswith("h")
38
+ time_col = (
39
+ "period_start" if (is_hourly and "period_start" in df.columns) else "date_only"
40
+ )
41
+
42
+ plot_df = df.sort_values(time_col).copy()
43
 
44
  try:
45
+ plot_df[time_col] = pd.to_datetime(plot_df[time_col])
46
  except Exception:
47
  pass
48
 
 
74
 
75
  for kpi in valid_kpis:
76
  # Data preparation
77
+ x_data = plot_df[time_col]
78
  y_data = pd.to_numeric(plot_df[kpi], errors="coerce")
79
  if do_norm:
80
  if norm_mode == "Min-Max":
 
168
  except Exception:
169
  sla_eval = None
170
 
171
+ end_dt = pd.to_datetime(plot_df[time_col]).max()
172
+ if is_hourly:
173
+ rs = end_dt - timedelta(hours=max(int(recent_days_n), 1) * 24 - 1)
174
+ be = rs - timedelta(hours=1)
175
+ bs = be - timedelta(hours=max(int(baseline_days_n), 1) * 24 - 1)
176
+ else:
177
+ rs = end_dt - timedelta(days=max(int(recent_days_n), 1) - 1)
178
+ be = rs - timedelta(days=1)
179
+ bs = be - timedelta(days=max(int(baseline_days_n), 1) - 1)
180
 
181
+ dts = pd.to_datetime(plot_df[time_col])
182
  baseline_mask = (dts >= bs) & (dts <= be)
183
  recent_mask = (dts >= rs) & (dts <= end_dt)
184
  baseline_val = pd.to_numeric(
 
221
  hover_txt.append(f"OUTSIDE WINDOW ({main_kpi})")
222
  fig.add_trace(
223
  go.Scatter(
224
+ x=plot_df[time_col],
225
  y=[0] * len(plot_df),
226
  mode="markers",
227
  marker=dict(symbol="square", size=10, color=colors),
 
259
  if idx_bad:
260
  fig.add_trace(
261
  go.Scatter(
262
+ x=[plot_df[time_col].iloc[i] for i in idx_bad],
263
  y=[y_main.iloc[i] for i in idx_bad],
264
  mode="markers",
265
  marker=dict(size=10, color=bad_color, symbol="circle"),
 
277
  if not plot_df.empty and not highlight_bad_days:
278
  fig.add_trace(
279
  go.Scatter(
280
+ x=plot_df[time_col],
281
  y=[0] * len(plot_df),
282
  mode="markers",
283
  opacity=0,
 
299
  try:
300
  force_all_dates = False
301
  try:
302
+ x_min = pd.to_datetime(plot_df[time_col]).min()
303
+ x_max = pd.to_datetime(plot_df[time_col]).max()
304
  span_days = int((x_max - x_min).days) + 1
305
+ n_dates = int(pd.to_datetime(plot_df[time_col]).nunique())
306
  force_all_dates = (span_days <= 200) and (n_dates <= 200)
307
  except Exception:
308
  force_all_dates = False
 
320
  col=1,
321
  )
322
 
323
+ if force_all_dates and not is_hourly:
324
  fig.update_xaxes(
325
  tickmode="linear",
326
  dtick=86400000,
 
342
  ticklen=6,
343
  showgrid=True,
344
  tickformatstops=[
345
+ {
346
+ "dtickrange": [None, 86400000],
347
+ "value": "%d-%b\n%H:%M" if is_hourly else "%d-%b\n%Y",
348
+ },
349
  {"dtickrange": [86400000, 7 * 86400000], "value": "%d-%b"},
350
  {"dtickrange": [7 * 86400000, "M1"], "value": "%d-%b"},
351
  {"dtickrange": ["M1", "M12"], "value": "%b\n%Y"},
panel_app/kpi_health_check_panel.py CHANGED
@@ -23,7 +23,9 @@ from process_kpi.kpi_health_check.engine import (
23
  evaluate_health_check,
24
  is_bad,
25
  max_consecutive_days,
 
26
  window_bounds,
 
27
  )
28
  from process_kpi.kpi_health_check.export import build_export_bytes
29
  from process_kpi.kpi_health_check.io import read_bytes_to_df
@@ -31,6 +33,7 @@ from process_kpi.kpi_health_check.kpi_groups import filter_kpis, get_kpis_by_gro
31
  from process_kpi.kpi_health_check.multi_rat import compute_multirat_views
32
  from process_kpi.kpi_health_check.normalization import (
33
  build_daily_kpi,
 
34
  infer_date_col,
35
  infer_id_col,
36
  )
@@ -195,6 +198,7 @@ def _drilldown_cache_key(site_code: int, rat: str, kpi: str) -> tuple:
195
  int(site_code),
196
  str(rat or ""),
197
  str(kpi or ""),
 
198
  compare_kpis_key,
199
  norm_key,
200
  ar_key,
@@ -375,6 +379,9 @@ complaint_sites_file = pn.widgets.FileInput(
375
  only_complaint_sites = pn.widgets.Checkbox(name="Only complaint sites", value=False)
376
 
377
  analysis_range = pn.widgets.DateRangePicker(name="Analysis date range (optional)")
 
 
 
378
  baseline_days = pn.widgets.IntInput(name="Baseline window (days)", value=30)
379
  recent_days = pn.widgets.IntInput(name="Recent window (days)", value=7)
380
  rel_threshold_pct = pn.widgets.FloatInput(
@@ -1485,7 +1492,7 @@ def _validate_inputs() -> tuple[list[str], list[str]]:
1485
  return errors, warnings
1486
 
1487
 
1488
- def _refresh_validation_state(event=None) -> None:
1489
  errors, warnings = _validate_inputs()
1490
 
1491
  msgs: list[str] = []
@@ -1499,15 +1506,7 @@ def _refresh_validation_state(event=None) -> None:
1499
  else:
1500
  validation_pane.object = ""
1501
  validation_pane.alert_type = "success"
1502
- validation_pane.visible = False
1503
-
1504
- has_data = bool(current_daily_by_rat)
1505
- has_results = (
1506
- isinstance(current_status_df, pd.DataFrame)
1507
- and not current_status_df.empty
1508
- and isinstance(current_summary_df, pd.DataFrame)
1509
- and not current_summary_df.empty
1510
- )
1511
 
1512
  try:
1513
  run_button.disabled = bool(errors) or not has_data
@@ -1610,7 +1609,16 @@ def _update_kpi_options() -> None:
1610
  kpis = [
1611
  c
1612
  for c in df.columns
1613
- if c not in {"site_code", "date_only", "Longitude", "Latitude", "City", "RAT"}
 
 
 
 
 
 
 
 
 
1614
  ]
1615
  kpis = sorted([str(c) for c in kpis])
1616
 
@@ -1744,7 +1752,15 @@ def _update_site_view(event=None) -> None:
1744
  c
1745
  for c in d.columns
1746
  if c
1747
- not in {"site_code", "date_only", "Longitude", "Latitude", "City", "RAT"}
 
 
 
 
 
 
 
 
1748
  ]
1749
  candidate_kpis = sorted([str(c) for c in candidate_kpis])
1750
  if not candidate_kpis:
@@ -1757,7 +1773,12 @@ def _update_site_view(event=None) -> None:
1757
  new_kpi = candidate_kpis[0]
1758
  _set_widget_value(kpi_select, new_kpi)
1759
  kpi = new_kpi
1760
- s = d[d["site_code"] == int(code_int)].copy().sort_values("date_only")
 
 
 
 
 
1761
  if s.empty:
1762
  trend_plot_pane.object = None
1763
  heatmap_plot_pane.object = None
@@ -1831,6 +1852,7 @@ def _update_site_view(event=None) -> None:
1831
  recent_days_n=int(_coerce_int(recent_days.value) or 7),
1832
  rel_threshold_pct=float(_coerce_float(rel_threshold_pct.value) or 10.0),
1833
  normalization=str(kpi_compare_norm.value or "None"),
 
1834
  )
1835
  trend_plot_pane.object = fig
1836
  kpis_for_heatmap = []
@@ -1883,13 +1905,25 @@ def _update_site_view(event=None) -> None:
1883
  w = str(corr_window_select.value or "")
1884
  if w.startswith("Recent"):
1885
  df_corr = d[
1886
- (d["date_only"] >= recent_start)
1887
- & (d["date_only"] <= recent_end)
 
 
 
 
 
 
1888
  ].copy()
1889
  elif w.startswith("Baseline"):
1890
  df_corr = d[
1891
- (d["date_only"] >= baseline_start)
1892
- & (d["date_only"] <= baseline_end)
 
 
 
 
 
 
1893
  ].copy()
1894
  except Exception: # noqa: BLE001
1895
  df_corr = d
@@ -2071,18 +2105,52 @@ def _infer_rule_row(rules_df: pd.DataFrame, rat: str, kpi: str) -> dict:
2071
 
2072
  def _compute_site_windows(
2073
  daily_filtered: pd.DataFrame,
2074
- ) -> tuple[date, date, date, date] | None:
 
 
 
 
 
 
 
 
2075
  if daily_filtered is None or daily_filtered.empty:
2076
  return None
2077
  rd = _coerce_int(recent_days.value)
2078
  bd = _coerce_int(baseline_days.value)
2079
  if rd is None or rd < 1 or bd is None or bd < 1:
2080
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2081
  try:
2082
- end_raw = max(daily_filtered["date_only"])
2083
- end_date = (
2084
- end_raw if isinstance(end_raw, date) else pd.to_datetime(end_raw).date()
2085
- )
2086
  except Exception: # noqa: BLE001
2087
  return None
2088
  recent_start, recent_end = window_bounds(end_date, int(rd))
@@ -2105,16 +2173,25 @@ def _build_site_heatmap(
2105
  return None
2106
  baseline_start, baseline_end, recent_start, recent_end = windows
2107
 
 
 
 
 
 
 
 
 
2108
  site_daily = daily_filtered[daily_filtered["site_code"] == int(site_code)].copy()
2109
  if site_daily.empty:
2110
  return None
2111
- site_daily = site_daily.sort_values("date_only")
2112
 
2113
  dates = []
2114
  cur = recent_start
 
2115
  while cur <= recent_end:
2116
  dates.append(cur)
2117
- cur = cur + timedelta(days=1)
2118
 
2119
  z = []
2120
  hover = []
@@ -2133,9 +2210,10 @@ def _build_site_heatmap(
2133
 
2134
  sla_eval = None if policy == "notify" else sla_val
2135
 
2136
- s = site_daily[["date_only", kpi]].dropna(subset=[kpi])
2137
- baseline_mask = (s["date_only"] >= baseline_start) & (
2138
- s["date_only"] <= baseline_end
 
2139
  )
2140
  baseline = s.loc[baseline_mask, kpi].median() if baseline_mask.any() else np.nan
2141
  baseline_val = float(baseline) if pd.notna(baseline) else None
@@ -2143,7 +2221,11 @@ def _build_site_heatmap(
2143
  row_z = []
2144
  row_h = []
2145
  for d in dates:
2146
- v_series = site_daily.loc[site_daily["date_only"] == d, kpi]
 
 
 
 
2147
  v = v_series.iloc[0] if not v_series.empty else np.nan
2148
  if v is None or (isinstance(v, float) and np.isnan(v)):
2149
  row_z.append(None)
@@ -2186,7 +2268,7 @@ def _build_site_heatmap(
2186
  fig.update_layout(
2187
  template="plotly_white",
2188
  title=f"{rat} - Site {int(site_code)} - Recent window heatmap",
2189
- xaxis_title="date",
2190
  yaxis_title="KPI",
2191
  height=420,
2192
  margin=dict(l=40, r=20, t=60, b=40),
@@ -2212,15 +2294,26 @@ def _build_baseline_recent_hist(
2212
  return None
2213
  baseline_start, baseline_end, recent_start, recent_end = windows
2214
 
 
 
 
 
 
 
 
 
2215
  site_daily = daily_filtered[daily_filtered["site_code"] == int(site_code)].copy()
2216
  if site_daily.empty:
2217
  return None
2218
 
2219
- s = site_daily[["date_only", kpi]].dropna(subset=[kpi])
2220
- baseline_mask = (s["date_only"] >= baseline_start) & (
2221
- s["date_only"] <= baseline_end
 
 
 
 
2222
  )
2223
- recent_mask = (s["date_only"] >= recent_start) & (s["date_only"] <= recent_end)
2224
 
2225
  baseline_vals = (
2226
  pd.to_numeric(s.loc[baseline_mask, kpi], errors="coerce").dropna().astype(float)
@@ -2276,7 +2369,16 @@ def _build_corr_heatmap(
2276
  cols = [
2277
  c
2278
  for c in cols
2279
- if c not in {"site_code", "date_only", "Longitude", "Latitude", "City", "RAT"}
 
 
 
 
 
 
 
 
 
2280
  ]
2281
  cols = list(dict.fromkeys(cols))
2282
  if len(cols) < 2:
@@ -2600,6 +2702,8 @@ def _refresh_profiles(event=None) -> None:
2600
  def _current_profile_config() -> dict:
2601
  cfg: dict = {}
2602
 
 
 
2603
  cfg["analysis_range"] = (
2604
  [
2605
  (
@@ -2653,6 +2757,13 @@ def _apply_profile_config(cfg: dict) -> None:
2653
  _applying_profile = True
2654
 
2655
  try:
 
 
 
 
 
 
 
2656
  try:
2657
  ar = cfg.get("analysis_range", [None, None])
2658
  if isinstance(ar, (list, tuple)) and len(ar) == 2 and ar[0] and ar[1]:
@@ -2979,10 +3090,22 @@ def load_datasets(event=None) -> None:
2979
  except Exception: # noqa: BLE001
2980
  id_col = None
2981
 
2982
- daily, kpi_cols = build_daily_kpi(df_raw, rat)
2983
  current_daily_by_rat[rat] = daily
2984
 
2985
  d = _filtered_daily(daily)
 
 
 
 
 
 
 
 
 
 
 
 
2986
  rows.append(
2987
  {
2988
  "RAT": rat,
@@ -2992,6 +3115,11 @@ def load_datasets(event=None) -> None:
2992
  "id_col": id_col,
2993
  "sites": int(d["site_code"].nunique()),
2994
  "days": int(d["date_only"].nunique()),
 
 
 
 
 
2995
  "kpis": int(len(kpi_cols)),
2996
  }
2997
  )
@@ -3110,6 +3238,7 @@ def run_health_check(event=None) -> None:
3110
  int(rd),
3111
  float(thr),
3112
  int(mcd),
 
3113
  )
3114
  if not status_df.empty:
3115
  all_status.append(status_df)
@@ -3230,6 +3359,7 @@ def _build_export_bytes() -> bytes:
3230
  daily_by_rat=(
3231
  current_daily_by_rat if isinstance(current_daily_by_rat, dict) else None
3232
  ),
 
3233
  multirat_summary_df=(
3234
  current_multirat_df
3235
  if isinstance(current_multirat_df, pd.DataFrame)
@@ -3273,6 +3403,7 @@ def _export_callback() -> io.BytesIO:
3273
 
3274
  def _build_alert_pack_bytes() -> bytes:
3275
  params = {
 
3276
  "baseline_days": baseline_days.value,
3277
  "recent_days": recent_days.value,
3278
  "rel_threshold_pct": rel_threshold_pct.value,
@@ -3478,6 +3609,27 @@ def _on_drilldown_params_change(event=None) -> None:
3478
  _refresh_validation_state()
3479
 
3480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3481
  rat_select.param.watch(_on_rat_change, "value")
3482
  kpi_group_select.param.watch(
3483
  _on_rat_change, "value"
@@ -3498,6 +3650,7 @@ map_status_filter.param.watch(lambda e: _refresh_map_view(), "value")
3498
  map_auto_fit.param.watch(lambda e: _refresh_map_view(), "value")
3499
 
3500
  analysis_range.param.watch(_on_drilldown_params_change, "value")
 
3501
  baseline_days.param.watch(_on_drilldown_params_change, "value")
3502
  recent_days.param.watch(_on_drilldown_params_change, "value")
3503
  rel_threshold_pct.param.watch(_on_drilldown_params_change, "value")
@@ -3586,7 +3739,12 @@ def _build_drilldown_export_bytes() -> bytes:
3586
  if d is None or d.empty:
3587
  return b""
3588
 
3589
- s = d[d["site_code"] == int(code_int)].copy().sort_values("date_only")
 
 
 
 
 
3590
  if s.empty:
3591
  return b""
3592
 
@@ -3598,9 +3756,9 @@ def _build_drilldown_export_bytes() -> bytes:
3598
  selected_kpis = [str(kpi_select.value)] + selected_kpis
3599
 
3600
  selected_kpis = [k for k in selected_kpis if k in d.columns]
3601
- base_cols = ["date_only"]
3602
  daily_cols = base_cols + selected_kpis
3603
- daily_out = s[daily_cols].copy() if selected_kpis else s[["date_only"]].copy()
3604
 
3605
  rules_df = (
3606
  rules_table.value
@@ -3623,15 +3781,23 @@ def _build_drilldown_export_bytes() -> bytes:
3623
  except Exception: # noqa: BLE001
3624
  sla_val = None
3625
 
3626
- sk = s[["date_only", k]].copy()
3627
  sk[k] = pd.to_numeric(sk[k], errors="coerce")
3628
  sk = sk.dropna(subset=[k])
3629
 
3630
- baseline_mask = (sk["date_only"] >= baseline_start) & (
3631
- sk["date_only"] <= baseline_end
 
 
 
 
3632
  )
3633
- recent_mask = (sk["date_only"] >= recent_start) & (
3634
- sk["date_only"] <= recent_end
 
 
 
 
3635
  )
3636
 
3637
  baseline_med = (
@@ -3644,8 +3810,8 @@ def _build_drilldown_export_bytes() -> bytes:
3644
  )
3645
 
3646
  bad_flags = []
3647
- recent_vals = sk.loc[recent_mask, ["date_only", k]].sort_values("date_only")
3648
- bad_dates: list[date] = []
3649
  for _, r in recent_vals.iterrows():
3650
  v = r.get(k)
3651
  is_bad_day = bool(
@@ -3660,13 +3826,9 @@ def _build_drilldown_export_bytes() -> bytes:
3660
  bad_flags.append(is_bad_day)
3661
  if is_bad_day:
3662
  try:
3663
- d0 = r.get("date_only")
3664
  if d0 is not None:
3665
- bad_dates.append(
3666
- d0
3667
- if isinstance(d0, date)
3668
- else pd.to_datetime(d0).date()
3669
- )
3670
  except Exception: # noqa: BLE001
3671
  pass
3672
 
@@ -3680,7 +3842,16 @@ def _build_drilldown_export_bytes() -> bytes:
3680
  "baseline_median": baseline_med,
3681
  "recent_median": recent_med,
3682
  "bad_days_recent": int(sum(bad_flags)),
3683
- "max_streak_recent": int(max_consecutive_days(bad_dates)),
 
 
 
 
 
 
 
 
 
3684
  }
3685
  )
3686
  summary_out = pd.DataFrame(rows)
@@ -3692,14 +3863,16 @@ def _build_drilldown_export_bytes() -> bytes:
3692
  )
3693
  if not status_df.empty:
3694
  status_out = status_df[
3695
- (status_df["site_code"] == int(code)) & (status_df["RAT"] == str(rat))
3696
  ].copy()
3697
  else:
3698
  status_out = pd.DataFrame()
3699
 
 
 
3700
  return write_dfs_to_excel(
3701
  [summary_out, daily_out, status_out],
3702
- ["Summary", "Daily", "KPI_Status"],
3703
  index=False,
3704
  )
3705
 
@@ -3732,6 +3905,7 @@ sidebar = pn.Column(
3732
  sidebar,
3733
  pn.Card(
3734
  analysis_range,
 
3735
  pn.Row(baseline_days, recent_days, sizing_mode="stretch_width"),
3736
  rel_threshold_pct,
3737
  min_consecutive_days,
@@ -3757,7 +3931,6 @@ sidebar = pn.Column(
3757
  min_criticality,
3758
  min_anomaly_score,
3759
  city_filter,
3760
- only_complaint_sites,
3761
  top_rat_filter,
3762
  top_status_filter,
3763
  title="Filters",
 
23
  evaluate_health_check,
24
  is_bad,
25
  max_consecutive_days,
26
+ max_consecutive_periods,
27
  window_bounds,
28
+ window_bounds_period,
29
  )
30
  from process_kpi.kpi_health_check.export import build_export_bytes
31
  from process_kpi.kpi_health_check.io import read_bytes_to_df
 
33
  from process_kpi.kpi_health_check.multi_rat import compute_multirat_views
34
  from process_kpi.kpi_health_check.normalization import (
35
  build_daily_kpi,
36
+ build_period_kpi,
37
  infer_date_col,
38
  infer_id_col,
39
  )
 
198
  int(site_code),
199
  str(rat or ""),
200
  str(kpi or ""),
201
+ str(granularity_select.value or "Daily"),
202
  compare_kpis_key,
203
  norm_key,
204
  ar_key,
 
379
  only_complaint_sites = pn.widgets.Checkbox(name="Only complaint sites", value=False)
380
 
381
  analysis_range = pn.widgets.DateRangePicker(name="Analysis date range (optional)")
382
+ granularity_select = pn.widgets.RadioButtonGroup(
383
+ name="Granularity", options=["Daily", "Hourly"], value="Daily"
384
+ )
385
  baseline_days = pn.widgets.IntInput(name="Baseline window (days)", value=30)
386
  recent_days = pn.widgets.IntInput(name="Recent window (days)", value=7)
387
  rel_threshold_pct = pn.widgets.FloatInput(
 
1492
  return errors, warnings
1493
 
1494
 
1495
+ def _refresh_validation_state() -> None:
1496
  errors, warnings = _validate_inputs()
1497
 
1498
  msgs: list[str] = []
 
1506
  else:
1507
  validation_pane.object = ""
1508
  validation_pane.alert_type = "success"
1509
+ validation_pane.visible = bool(errors or warnings)
 
 
 
 
 
 
 
 
1510
 
1511
  try:
1512
  run_button.disabled = bool(errors) or not has_data
 
1609
  kpis = [
1610
  c
1611
  for c in df.columns
1612
+ if c
1613
+ not in {
1614
+ "site_code",
1615
+ "date_only",
1616
+ "period_start",
1617
+ "Longitude",
1618
+ "Latitude",
1619
+ "City",
1620
+ "RAT",
1621
+ }
1622
  ]
1623
  kpis = sorted([str(c) for c in kpis])
1624
 
 
1752
  c
1753
  for c in d.columns
1754
  if c
1755
+ not in {
1756
+ "site_code",
1757
+ "date_only",
1758
+ "period_start",
1759
+ "Longitude",
1760
+ "Latitude",
1761
+ "City",
1762
+ "RAT",
1763
+ }
1764
  ]
1765
  candidate_kpis = sorted([str(c) for c in candidate_kpis])
1766
  if not candidate_kpis:
 
1773
  new_kpi = candidate_kpis[0]
1774
  _set_widget_value(kpi_select, new_kpi)
1775
  kpi = new_kpi
1776
+ g = str(granularity_select.value or "Daily").strip().lower()
1777
+ is_hourly = g.startswith("hour") or g.startswith("h")
1778
+ time_col = (
1779
+ "period_start" if (is_hourly and "period_start" in d.columns) else "date_only"
1780
+ )
1781
+ s = d[d["site_code"] == int(code_int)].copy().sort_values(time_col)
1782
  if s.empty:
1783
  trend_plot_pane.object = None
1784
  heatmap_plot_pane.object = None
 
1852
  recent_days_n=int(_coerce_int(recent_days.value) or 7),
1853
  rel_threshold_pct=float(_coerce_float(rel_threshold_pct.value) or 10.0),
1854
  normalization=str(kpi_compare_norm.value or "None"),
1855
+ granularity=str(granularity_select.value or "Daily"),
1856
  )
1857
  trend_plot_pane.object = fig
1858
  kpis_for_heatmap = []
 
1905
  w = str(corr_window_select.value or "")
1906
  if w.startswith("Recent"):
1907
  df_corr = d[
1908
+ (
1909
+ pd.to_datetime(d[time_col], errors="coerce")
1910
+ >= pd.to_datetime(recent_start)
1911
+ )
1912
+ & (
1913
+ pd.to_datetime(d[time_col], errors="coerce")
1914
+ <= pd.to_datetime(recent_end)
1915
+ )
1916
  ].copy()
1917
  elif w.startswith("Baseline"):
1918
  df_corr = d[
1919
+ (
1920
+ pd.to_datetime(d[time_col], errors="coerce")
1921
+ >= pd.to_datetime(baseline_start)
1922
+ )
1923
+ & (
1924
+ pd.to_datetime(d[time_col], errors="coerce")
1925
+ <= pd.to_datetime(baseline_end)
1926
+ )
1927
  ].copy()
1928
  except Exception: # noqa: BLE001
1929
  df_corr = d
 
2105
 
2106
  def _compute_site_windows(
2107
  daily_filtered: pd.DataFrame,
2108
+ ) -> (
2109
+ tuple[
2110
+ date | pd.Timestamp,
2111
+ date | pd.Timestamp,
2112
+ date | pd.Timestamp,
2113
+ date | pd.Timestamp,
2114
+ ]
2115
+ | None
2116
+ ):
2117
  if daily_filtered is None or daily_filtered.empty:
2118
  return None
2119
  rd = _coerce_int(recent_days.value)
2120
  bd = _coerce_int(baseline_days.value)
2121
  if rd is None or rd < 1 or bd is None or bd < 1:
2122
  return None
2123
+ g = str(granularity_select.value or "Daily").strip().lower()
2124
+ is_hourly = g.startswith("hour") or g.startswith("h")
2125
+ time_col = (
2126
+ "period_start"
2127
+ if (is_hourly and "period_start" in daily_filtered.columns)
2128
+ else "date_only"
2129
+ )
2130
+
2131
+ if is_hourly and time_col == "period_start":
2132
+ try:
2133
+ end_dt = pd.to_datetime(daily_filtered[time_col], errors="coerce").max()
2134
+ if pd.isna(end_dt):
2135
+ return None
2136
+ end_dt = pd.Timestamp(end_dt).floor("h")
2137
+ except Exception: # noqa: BLE001
2138
+ return None
2139
+
2140
+ recent_periods = int(rd) * 24
2141
+ baseline_periods = int(bd) * 24
2142
+ step = timedelta(hours=1)
2143
+
2144
+ recent_start, recent_end = window_bounds_period(end_dt, recent_periods, step)
2145
+ baseline_end = recent_start - step
2146
+ baseline_start, _ = window_bounds_period(baseline_end, baseline_periods, step)
2147
+ return baseline_start, baseline_end, recent_start, recent_end
2148
+
2149
  try:
2150
+ end_ts = pd.to_datetime(daily_filtered["date_only"], errors="coerce").max()
2151
+ if pd.isna(end_ts):
2152
+ return None
2153
+ end_date = end_ts.date()
2154
  except Exception: # noqa: BLE001
2155
  return None
2156
  recent_start, recent_end = window_bounds(end_date, int(rd))
 
2173
  return None
2174
  baseline_start, baseline_end, recent_start, recent_end = windows
2175
 
2176
+ g = str(granularity_select.value or "Daily").strip().lower()
2177
+ is_hourly = g.startswith("hour") or g.startswith("h")
2178
+ time_col = (
2179
+ "period_start"
2180
+ if (is_hourly and "period_start" in daily_filtered.columns)
2181
+ else "date_only"
2182
+ )
2183
+
2184
  site_daily = daily_filtered[daily_filtered["site_code"] == int(site_code)].copy()
2185
  if site_daily.empty:
2186
  return None
2187
+ site_daily = site_daily.sort_values(time_col)
2188
 
2189
  dates = []
2190
  cur = recent_start
2191
+ step = timedelta(hours=1) if is_hourly else timedelta(days=1)
2192
  while cur <= recent_end:
2193
  dates.append(cur)
2194
+ cur = cur + step
2195
 
2196
  z = []
2197
  hover = []
 
2210
 
2211
  sla_eval = None if policy == "notify" else sla_val
2212
 
2213
+ s = site_daily[[time_col, kpi]].dropna(subset=[kpi])
2214
+ t = pd.to_datetime(s[time_col], errors="coerce")
2215
+ baseline_mask = (t >= pd.to_datetime(baseline_start)) & (
2216
+ t <= pd.to_datetime(baseline_end)
2217
  )
2218
  baseline = s.loc[baseline_mask, kpi].median() if baseline_mask.any() else np.nan
2219
  baseline_val = float(baseline) if pd.notna(baseline) else None
 
2221
  row_z = []
2222
  row_h = []
2223
  for d in dates:
2224
+ v_series = site_daily.loc[
2225
+ pd.to_datetime(site_daily[time_col], errors="coerce")
2226
+ == pd.to_datetime(d),
2227
+ kpi,
2228
+ ]
2229
  v = v_series.iloc[0] if not v_series.empty else np.nan
2230
  if v is None or (isinstance(v, float) and np.isnan(v)):
2231
  row_z.append(None)
 
2268
  fig.update_layout(
2269
  template="plotly_white",
2270
  title=f"{rat} - Site {int(site_code)} - Recent window heatmap",
2271
+ xaxis_title="period",
2272
  yaxis_title="KPI",
2273
  height=420,
2274
  margin=dict(l=40, r=20, t=60, b=40),
 
2294
  return None
2295
  baseline_start, baseline_end, recent_start, recent_end = windows
2296
 
2297
+ g = str(granularity_select.value or "Daily").strip().lower()
2298
+ is_hourly = g.startswith("hour") or g.startswith("h")
2299
+ time_col = (
2300
+ "period_start"
2301
+ if (is_hourly and "period_start" in daily_filtered.columns)
2302
+ else "date_only"
2303
+ )
2304
+
2305
  site_daily = daily_filtered[daily_filtered["site_code"] == int(site_code)].copy()
2306
  if site_daily.empty:
2307
  return None
2308
 
2309
+ s = site_daily[[time_col, kpi]].dropna(subset=[kpi])
2310
+ t = pd.to_datetime(s[time_col], errors="coerce")
2311
+ baseline_mask = (t >= pd.to_datetime(baseline_start)) & (
2312
+ t <= pd.to_datetime(baseline_end)
2313
+ )
2314
+ recent_mask = (t >= pd.to_datetime(recent_start)) & (
2315
+ t <= pd.to_datetime(recent_end)
2316
  )
 
2317
 
2318
  baseline_vals = (
2319
  pd.to_numeric(s.loc[baseline_mask, kpi], errors="coerce").dropna().astype(float)
 
2369
  cols = [
2370
  c
2371
  for c in cols
2372
+ if c
2373
+ not in {
2374
+ "site_code",
2375
+ "date_only",
2376
+ "period_start",
2377
+ "Longitude",
2378
+ "Latitude",
2379
+ "City",
2380
+ "RAT",
2381
+ }
2382
  ]
2383
  cols = list(dict.fromkeys(cols))
2384
  if len(cols) < 2:
 
2702
  def _current_profile_config() -> dict:
2703
  cfg: dict = {}
2704
 
2705
+ cfg["granularity"] = str(granularity_select.value or "Daily")
2706
+
2707
  cfg["analysis_range"] = (
2708
  [
2709
  (
 
2757
  _applying_profile = True
2758
 
2759
  try:
2760
+ try:
2761
+ g = str(cfg.get("granularity", "") or "").strip()
2762
+ if g and g in list(granularity_select.options):
2763
+ granularity_select.value = g
2764
+ except Exception: # noqa: BLE001
2765
+ pass
2766
+
2767
  try:
2768
  ar = cfg.get("analysis_range", [None, None])
2769
  if isinstance(ar, (list, tuple)) and len(ar) == 2 and ar[0] and ar[1]:
 
3090
  except Exception: # noqa: BLE001
3091
  id_col = None
3092
 
3093
+ daily, kpi_cols = build_period_kpi(df_raw, rat, granularity_select.value)
3094
  current_daily_by_rat[rat] = daily
3095
 
3096
  d = _filtered_daily(daily)
3097
+ periods_n = None
3098
+ try:
3099
+ if (
3100
+ isinstance(d, pd.DataFrame)
3101
+ and not d.empty
3102
+ and "period_start" in d.columns
3103
+ ):
3104
+ periods_n = int(
3105
+ pd.to_datetime(d["period_start"], errors="coerce").nunique()
3106
+ )
3107
+ except Exception: # noqa: BLE001
3108
+ periods_n = None
3109
  rows.append(
3110
  {
3111
  "RAT": rat,
 
3115
  "id_col": id_col,
3116
  "sites": int(d["site_code"].nunique()),
3117
  "days": int(d["date_only"].nunique()),
3118
+ "periods": (
3119
+ int(periods_n)
3120
+ if periods_n is not None
3121
+ else int(d["date_only"].nunique())
3122
+ ),
3123
  "kpis": int(len(kpi_cols)),
3124
  }
3125
  )
 
3238
  int(rd),
3239
  float(thr),
3240
  int(mcd),
3241
+ granularity=str(granularity_select.value or "Daily"),
3242
  )
3243
  if not status_df.empty:
3244
  all_status.append(status_df)
 
3359
  daily_by_rat=(
3360
  current_daily_by_rat if isinstance(current_daily_by_rat, dict) else None
3361
  ),
3362
+ granularity=str(granularity_select.value or "Daily"),
3363
  multirat_summary_df=(
3364
  current_multirat_df
3365
  if isinstance(current_multirat_df, pd.DataFrame)
 
3403
 
3404
  def _build_alert_pack_bytes() -> bytes:
3405
  params = {
3406
+ "granularity": str(granularity_select.value or "Daily"),
3407
  "baseline_days": baseline_days.value,
3408
  "recent_days": recent_days.value,
3409
  "rel_threshold_pct": rel_threshold_pct.value,
 
3609
  _refresh_validation_state()
3610
 
3611
 
3612
+ def _on_granularity_change(event=None) -> None:
3613
+ if _applying_profile or _loading_datasets:
3614
+ return
3615
+ _invalidate_drilldown_cache(data_changed=True, healthcheck_changed=True)
3616
+ _refresh_validation_state()
3617
+ try:
3618
+ has_any = bool(
3619
+ (file_2g and file_2g.value)
3620
+ or (file_3g and file_3g.value)
3621
+ or (file_lte and file_lte.value)
3622
+ or (file_twamp and file_twamp.value)
3623
+ )
3624
+ except Exception: # noqa: BLE001
3625
+ has_any = False
3626
+ if has_any:
3627
+ try:
3628
+ load_datasets()
3629
+ except Exception: # noqa: BLE001
3630
+ pass
3631
+
3632
+
3633
  rat_select.param.watch(_on_rat_change, "value")
3634
  kpi_group_select.param.watch(
3635
  _on_rat_change, "value"
 
3650
  map_auto_fit.param.watch(lambda e: _refresh_map_view(), "value")
3651
 
3652
  analysis_range.param.watch(_on_drilldown_params_change, "value")
3653
+ granularity_select.param.watch(_on_granularity_change, "value")
3654
  baseline_days.param.watch(_on_drilldown_params_change, "value")
3655
  recent_days.param.watch(_on_drilldown_params_change, "value")
3656
  rel_threshold_pct.param.watch(_on_drilldown_params_change, "value")
 
3739
  if d is None or d.empty:
3740
  return b""
3741
 
3742
+ g = str(granularity_select.value or "Daily").strip().lower()
3743
+ is_hourly = g.startswith("hour") or g.startswith("h")
3744
+ time_col = (
3745
+ "period_start" if (is_hourly and "period_start" in d.columns) else "date_only"
3746
+ )
3747
+ s = d[d["site_code"] == int(code_int)].copy().sort_values(time_col)
3748
  if s.empty:
3749
  return b""
3750
 
 
3756
  selected_kpis = [str(kpi_select.value)] + selected_kpis
3757
 
3758
  selected_kpis = [k for k in selected_kpis if k in d.columns]
3759
+ base_cols = [time_col]
3760
  daily_cols = base_cols + selected_kpis
3761
+ daily_out = s[daily_cols].copy() if selected_kpis else s[[time_col]].copy()
3762
 
3763
  rules_df = (
3764
  rules_table.value
 
3781
  except Exception: # noqa: BLE001
3782
  sla_val = None
3783
 
3784
+ sk = s[[time_col, k]].copy()
3785
  sk[k] = pd.to_numeric(sk[k], errors="coerce")
3786
  sk = sk.dropna(subset=[k])
3787
 
3788
+ baseline_mask = (
3789
+ pd.to_datetime(sk[time_col], errors="coerce")
3790
+ >= pd.to_datetime(baseline_start)
3791
+ ) & (
3792
+ pd.to_datetime(sk[time_col], errors="coerce")
3793
+ <= pd.to_datetime(baseline_end)
3794
  )
3795
+ recent_mask = (
3796
+ pd.to_datetime(sk[time_col], errors="coerce")
3797
+ >= pd.to_datetime(recent_start)
3798
+ ) & (
3799
+ pd.to_datetime(sk[time_col], errors="coerce")
3800
+ <= pd.to_datetime(recent_end)
3801
  )
3802
 
3803
  baseline_med = (
 
3810
  )
3811
 
3812
  bad_flags = []
3813
+ recent_vals = sk.loc[recent_mask, [time_col, k]].sort_values(time_col)
3814
+ bad_dates = []
3815
  for _, r in recent_vals.iterrows():
3816
  v = r.get(k)
3817
  is_bad_day = bool(
 
3826
  bad_flags.append(is_bad_day)
3827
  if is_bad_day:
3828
  try:
3829
+ d0 = r.get(time_col)
3830
  if d0 is not None:
3831
+ bad_dates.append(pd.to_datetime(d0, errors="coerce"))
 
 
 
 
3832
  except Exception: # noqa: BLE001
3833
  pass
3834
 
 
3842
  "baseline_median": baseline_med,
3843
  "recent_median": recent_med,
3844
  "bad_days_recent": int(sum(bad_flags)),
3845
+ "max_streak_recent": int(
3846
+ max_consecutive_periods(
3847
+ bad_dates,
3848
+ step=(
3849
+ timedelta(hours=1) if is_hourly else timedelta(days=1)
3850
+ ),
3851
+ )
3852
+ if bad_dates
3853
+ else 0
3854
+ ),
3855
  }
3856
  )
3857
  summary_out = pd.DataFrame(rows)
 
3863
  )
3864
  if not status_df.empty:
3865
  status_out = status_df[
3866
+ (status_df["site_code"] == int(code_int)) & (status_df["RAT"] == str(rat))
3867
  ].copy()
3868
  else:
3869
  status_out = pd.DataFrame()
3870
 
3871
+ g2 = str(granularity_select.value or "Daily").strip().lower()
3872
+ data_sheet = "Hourly" if (g2.startswith("hour") or g2.startswith("h")) else "Daily"
3873
  return write_dfs_to_excel(
3874
  [summary_out, daily_out, status_out],
3875
+ ["Summary", data_sheet, "KPI_Status"],
3876
  index=False,
3877
  )
3878
 
 
3905
  sidebar,
3906
  pn.Card(
3907
  analysis_range,
3908
+ granularity_select,
3909
  pn.Row(baseline_days, recent_days, sizing_mode="stretch_width"),
3910
  rel_threshold_pct,
3911
  min_consecutive_days,
 
3931
  min_criticality,
3932
  min_anomaly_score,
3933
  city_filter,
 
3934
  top_rat_filter,
3935
  top_status_filter,
3936
  title="Filters",
process_kpi/kpi_health_check/engine.py CHANGED
@@ -1,9 +1,34 @@
1
- from datetime import date, timedelta
2
 
3
  import numpy as np
4
  import pandas as pd
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def window_bounds(end_date: date, days: int) -> tuple[date, date]:
8
  start = end_date - timedelta(days=days - 1)
9
  return start, end_date
@@ -34,14 +59,18 @@ def is_bad(
34
  return bad or (value > baseline + abs(baseline) * thr)
35
 
36
 
37
- def max_consecutive_days(dates: list[date]) -> int:
38
- if not dates:
39
  return 0
40
- dates_sorted = sorted(set(dates))
 
 
 
 
41
  streak = 1
42
  best = 1
43
- for prev, cur in zip(dates_sorted, dates_sorted[1:]):
44
- if cur == prev + timedelta(days=1):
45
  streak += 1
46
  else:
47
  streak = 1
@@ -50,6 +79,10 @@ def max_consecutive_days(dates: list[date]) -> int:
50
  return best
51
 
52
 
 
 
 
 
53
  def evaluate_health_check(
54
  daily: pd.DataFrame,
55
  rat: str,
@@ -58,14 +91,33 @@ def evaluate_health_check(
58
  recent_days_n: int,
59
  rel_threshold_pct: float,
60
  min_consecutive_days: int,
 
61
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
62
  if daily.empty:
63
  return pd.DataFrame(), pd.DataFrame()
64
 
65
- end_date = max(daily["date_only"])
66
- recent_start, recent_end = window_bounds(end_date, int(recent_days_n))
67
- baseline_end = recent_start - timedelta(days=1)
68
- baseline_start = baseline_end - timedelta(days=int(baseline_days_n) - 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  rat_rules = rules_df[rules_df["RAT"] == rat].copy()
71
  kpis = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
@@ -78,7 +130,7 @@ def evaluate_health_check(
78
  if ("City" in g_site.columns and g_site["City"].notna().any())
79
  else None
80
  )
81
- g_site = g_site.sort_values("date_only")
82
 
83
  for kpi in kpis:
84
  rule = rat_rules[rat_rules["KPI"] == kpi].iloc[0]
@@ -92,7 +144,7 @@ def evaluate_health_check(
92
 
93
  sla_eval = None if policy == "notify" else sla_val
94
 
95
- s = g_site[["date_only", kpi]].dropna(subset=[kpi])
96
  if s.empty:
97
  rows.append(
98
  {
@@ -105,23 +157,20 @@ def evaluate_health_check(
105
  )
106
  continue
107
 
108
- baseline_mask = (s["date_only"] >= baseline_start) & (
109
- s["date_only"] <= baseline_end
110
- )
111
- recent_mask = (s["date_only"] >= recent_start) & (
112
- s["date_only"] <= recent_end
113
- )
114
 
115
  baseline = (
116
  s.loc[baseline_mask, kpi].median() if baseline_mask.any() else np.nan
117
  )
118
  recent = s.loc[recent_mask, kpi].median() if recent_mask.any() else np.nan
119
 
120
- daily_recent = s.loc[recent_mask, ["date_only", kpi]].copy()
121
  bad_dates = []
122
  if not daily_recent.empty:
123
  for d, v in zip(
124
- daily_recent["date_only"].tolist(), daily_recent[kpi].tolist()
125
  ):
126
  if is_bad(
127
  float(v) if pd.notna(v) else None,
@@ -132,8 +181,8 @@ def evaluate_health_check(
132
  ):
133
  bad_dates.append(d)
134
 
135
- max_streak = max_consecutive_days(bad_dates)
136
- persistent = max_streak >= int(min_consecutive_days)
137
 
138
  is_bad_recent = is_bad(
139
  float(recent) if pd.notna(recent) else None,
@@ -145,7 +194,7 @@ def evaluate_health_check(
145
 
146
  is_bad_current = is_bad_recent
147
  if not daily_recent.empty:
148
- last_row = daily_recent.sort_values("date_only").iloc[-1]
149
  last_val = last_row[kpi]
150
  is_bad_current = is_bad(
151
  float(last_val) if pd.notna(last_val) else None,
 
1
+ from datetime import date, datetime, timedelta
2
 
3
  import numpy as np
4
  import pandas as pd
5
 
6
 
7
+ def _to_timestamp(value) -> pd.Timestamp | None:
8
+ if value is None:
9
+ return None
10
+ if isinstance(value, pd.Timestamp):
11
+ return value
12
+ if isinstance(value, datetime):
13
+ return pd.Timestamp(value)
14
+ if isinstance(value, date):
15
+ return pd.Timestamp(value)
16
+ try:
17
+ v = pd.to_datetime(value, errors="coerce")
18
+ return v if pd.notna(v) else None
19
+ except Exception: # noqa: BLE001
20
+ return None
21
+
22
+
23
+ def window_bounds_period(
24
+ end_dt: pd.Timestamp,
25
+ periods: int,
26
+ step: timedelta,
27
+ ) -> tuple[pd.Timestamp, pd.Timestamp]:
28
+ start = end_dt - step * (int(periods) - 1)
29
+ return start, end_dt
30
+
31
+
32
  def window_bounds(end_date: date, days: int) -> tuple[date, date]:
33
  start = end_date - timedelta(days=days - 1)
34
  return start, end_date
 
59
  return bad or (value > baseline + abs(baseline) * thr)
60
 
61
 
62
+ def max_consecutive_periods(values: list, step: timedelta) -> int:
63
+ if not values:
64
  return 0
65
+ ts = [_to_timestamp(v) for v in values]
66
+ ts2 = [t for t in ts if t is not None]
67
+ if not ts2:
68
+ return 0
69
+ ts_sorted = sorted(set(ts2))
70
  streak = 1
71
  best = 1
72
+ for prev, cur in zip(ts_sorted, ts_sorted[1:]):
73
+ if cur == prev + step:
74
  streak += 1
75
  else:
76
  streak = 1
 
79
  return best
80
 
81
 
82
+ def max_consecutive_days(dates: list[date]) -> int:
83
+ return max_consecutive_periods(dates, step=timedelta(days=1))
84
+
85
+
86
  def evaluate_health_check(
87
  daily: pd.DataFrame,
88
  rat: str,
 
91
  recent_days_n: int,
92
  rel_threshold_pct: float,
93
  min_consecutive_days: int,
94
+ granularity: str = "Daily",
95
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
96
  if daily.empty:
97
  return pd.DataFrame(), pd.DataFrame()
98
 
99
+ g = str(granularity or "Daily").strip().lower()
100
+ is_hourly = g.startswith("hour") or g.startswith("h")
101
+ time_col = (
102
+ "period_start"
103
+ if (is_hourly and "period_start" in daily.columns)
104
+ else "date_only"
105
+ )
106
+
107
+ step = timedelta(hours=1) if is_hourly else timedelta(days=1)
108
+ baseline_periods = int(baseline_days_n) * 24 if is_hourly else int(baseline_days_n)
109
+ recent_periods = int(recent_days_n) * 24 if is_hourly else int(recent_days_n)
110
+ min_periods = (
111
+ int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
112
+ )
113
+
114
+ end_dt = _to_timestamp(daily[time_col].max())
115
+ if end_dt is None:
116
+ return pd.DataFrame(), pd.DataFrame()
117
+
118
+ recent_start_dt, recent_end_dt = window_bounds_period(end_dt, recent_periods, step)
119
+ baseline_end_dt = recent_start_dt - step
120
+ baseline_start_dt, _ = window_bounds_period(baseline_end_dt, baseline_periods, step)
121
 
122
  rat_rules = rules_df[rules_df["RAT"] == rat].copy()
123
  kpis = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
 
130
  if ("City" in g_site.columns and g_site["City"].notna().any())
131
  else None
132
  )
133
+ g_site = g_site.sort_values(time_col)
134
 
135
  for kpi in kpis:
136
  rule = rat_rules[rat_rules["KPI"] == kpi].iloc[0]
 
144
 
145
  sla_eval = None if policy == "notify" else sla_val
146
 
147
+ s = g_site[[time_col, kpi]].dropna(subset=[kpi])
148
  if s.empty:
149
  rows.append(
150
  {
 
157
  )
158
  continue
159
 
160
+ t = pd.to_datetime(s[time_col], errors="coerce")
161
+ baseline_mask = (t >= baseline_start_dt) & (t <= baseline_end_dt)
162
+ recent_mask = (t >= recent_start_dt) & (t <= recent_end_dt)
 
 
 
163
 
164
  baseline = (
165
  s.loc[baseline_mask, kpi].median() if baseline_mask.any() else np.nan
166
  )
167
  recent = s.loc[recent_mask, kpi].median() if recent_mask.any() else np.nan
168
 
169
+ daily_recent = s.loc[recent_mask, [time_col, kpi]].copy()
170
  bad_dates = []
171
  if not daily_recent.empty:
172
  for d, v in zip(
173
+ daily_recent[time_col].tolist(), daily_recent[kpi].tolist()
174
  ):
175
  if is_bad(
176
  float(v) if pd.notna(v) else None,
 
181
  ):
182
  bad_dates.append(d)
183
 
184
+ max_streak = max_consecutive_periods(bad_dates, step=step)
185
+ persistent = max_streak >= int(min_periods)
186
 
187
  is_bad_recent = is_bad(
188
  float(recent) if pd.notna(recent) else None,
 
194
 
195
  is_bad_current = is_bad_recent
196
  if not daily_recent.empty:
197
+ last_row = daily_recent.sort_values(time_col).iloc[-1]
198
  last_val = last_row[kpi]
199
  is_bad_current = is_bad(
200
  float(last_val) if pd.notna(last_val) else None,
process_kpi/kpi_health_check/export.py CHANGED
@@ -9,6 +9,7 @@ def build_export_bytes(
9
  summary_df: pd.DataFrame | None,
10
  status_df: pd.DataFrame | None,
11
  daily_by_rat: dict[str, pd.DataFrame] | None = None,
 
12
  multirat_summary_df: pd.DataFrame | None = None,
13
  top_anomalies_df: pd.DataFrame | None = None,
14
  complaint_multirat_df: pd.DataFrame | None = None,
@@ -32,10 +33,12 @@ def build_export_bytes(
32
 
33
  max_data_rows = 1048575
34
  if daily_by_rat and isinstance(daily_by_rat, dict):
 
 
35
  for rat, df in daily_by_rat.items():
36
  if not isinstance(df, pd.DataFrame):
37
  continue
38
- base = f"Daily_All_{str(rat)}"
39
  if len(df) <= max_data_rows:
40
  dfs.append(df)
41
  sheet_names.append(base[:31])
 
9
  summary_df: pd.DataFrame | None,
10
  status_df: pd.DataFrame | None,
11
  daily_by_rat: dict[str, pd.DataFrame] | None = None,
12
+ granularity: str = "Daily",
13
  multirat_summary_df: pd.DataFrame | None = None,
14
  top_anomalies_df: pd.DataFrame | None = None,
15
  complaint_multirat_df: pd.DataFrame | None = None,
 
33
 
34
  max_data_rows = 1048575
35
  if daily_by_rat and isinstance(daily_by_rat, dict):
36
+ g = str(granularity or "Daily").strip().lower()
37
+ prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
38
  for rat, df in daily_by_rat.items():
39
  if not isinstance(df, pd.DataFrame):
40
  continue
41
+ base = f"{prefix}_All_{str(rat)}"
42
  if len(df) <= max_data_rows:
43
  dfs.append(df)
44
  sheet_names.append(base[:31])
process_kpi/kpi_health_check/normalization.py CHANGED
@@ -228,20 +228,29 @@ def load_physical_db() -> pd.DataFrame:
228
  return physical_db[keep].drop_duplicates("code")
229
 
230
 
231
- def build_daily_kpi(df_raw: pd.DataFrame, rat: str) -> tuple[pd.DataFrame, list[str]]:
 
 
 
 
232
  df = df_raw.copy()
233
  date_col = infer_date_col(df)
234
  id_col = infer_id_col(df, rat)
235
 
236
  df["date"] = parse_datetime(df[date_col])
237
  df = df.dropna(subset=["date"])
238
- df["date_only"] = df["date"].dt.date
 
 
 
 
 
239
 
240
  df["site_code"] = df[id_col].apply(extract_site_code)
241
  df = df.dropna(subset=["site_code"])
242
  df["site_code"] = df["site_code"].astype(int)
243
 
244
- meta = {date_col, id_col, "date", "date_only", "site_code"}
245
  meta = meta.union(non_kpi_identifier_cols(df, rat))
246
  candidate_cols = [c for c in df.columns if c not in meta]
247
 
@@ -259,22 +268,25 @@ def build_daily_kpi(df_raw: pd.DataFrame, rat: str) -> tuple[pd.DataFrame, list[
259
 
260
  base = pd.concat(
261
  [
262
- df[["site_code", "date_only"]].reset_index(drop=True),
263
  numeric_df[kpi_cols].reset_index(drop=True),
264
  ],
265
  axis=1,
266
  )
267
 
268
  agg_dict = {k: infer_agg(k) for k in kpi_cols}
269
- daily = base.groupby(["site_code", "date_only"], as_index=False).agg(agg_dict)
 
270
 
271
  physical = load_physical_db()
272
  if not physical.empty:
273
- daily = pd.merge(
274
- daily, physical, left_on="site_code", right_on="code", how="left"
275
- )
276
- daily = daily.drop(columns=[c for c in ["code"] if c in daily.columns])
277
 
278
- daily["RAT"] = rat
279
 
280
- return daily, kpi_cols
 
 
 
 
 
228
  return physical_db[keep].drop_duplicates("code")
229
 
230
 
231
+ def build_period_kpi(
232
+ df_raw: pd.DataFrame,
233
+ rat: str,
234
+ granularity: str = "Daily",
235
+ ) -> tuple[pd.DataFrame, list[str]]:
236
  df = df_raw.copy()
237
  date_col = infer_date_col(df)
238
  id_col = infer_id_col(df, rat)
239
 
240
  df["date"] = parse_datetime(df[date_col])
241
  df = df.dropna(subset=["date"])
242
+
243
+ g = str(granularity or "Daily").strip().lower()
244
+ if g.startswith("hour") or g.startswith("h"):
245
+ df["period_start"] = df["date"].dt.floor("H")
246
+ else:
247
+ df["period_start"] = df["date"].dt.floor("D")
248
 
249
  df["site_code"] = df[id_col].apply(extract_site_code)
250
  df = df.dropna(subset=["site_code"])
251
  df["site_code"] = df["site_code"].astype(int)
252
 
253
+ meta = {date_col, id_col, "date", "site_code", "period_start"}
254
  meta = meta.union(non_kpi_identifier_cols(df, rat))
255
  candidate_cols = [c for c in df.columns if c not in meta]
256
 
 
268
 
269
  base = pd.concat(
270
  [
271
+ df[["site_code", "period_start"]].reset_index(drop=True),
272
  numeric_df[kpi_cols].reset_index(drop=True),
273
  ],
274
  axis=1,
275
  )
276
 
277
  agg_dict = {k: infer_agg(k) for k in kpi_cols}
278
+ out = base.groupby(["site_code", "period_start"], as_index=False).agg(agg_dict)
279
+ out["date_only"] = pd.to_datetime(out["period_start"]).dt.date
280
 
281
  physical = load_physical_db()
282
  if not physical.empty:
283
+ out = pd.merge(out, physical, left_on="site_code", right_on="code", how="left")
284
+ out = out.drop(columns=[c for c in ["code"] if c in out.columns])
 
 
285
 
286
+ out["RAT"] = rat
287
 
288
+ return out, kpi_cols
289
+
290
+
291
+ def build_daily_kpi(df_raw: pd.DataFrame, rat: str) -> tuple[pd.DataFrame, list[str]]:
292
+ return build_period_kpi(df_raw, rat, granularity="Daily")