Spaces:

DavMelchi
/

db_query

Sleeping

App Files Files Community

DavMelchi commited on Jul 4

Commit

f991b3c

1 Parent(s): 39c7db8

adding fn4b parser

Browse files

Files changed (4) hide show

Changelog.md +4 -0
app.py +2 -1
apps/fnb_parser.py +324 -0
requirements.txt +0 -0

Changelog.md CHANGED Viewed

@@ -1,6 +1,10 @@
 # CHANGELOGS
 ## [0.2.10] - 2025-07-01
 - Add KPI analysis App

 # CHANGELOGS
+## [0.2.11] - 2025-07-04
+- Add FNB parser App
 ## [0.2.10] - 2025-07-01
 - Add KPI analysis App

app.py CHANGED Viewed

@@ -108,7 +108,7 @@ if check_password():
         layout="wide",
         initial_sidebar_state="expanded",
         menu_items={
-            "About": "**📡 NPO DB Query v0.2.10**",
         },
     )
@@ -133,6 +133,7 @@ if check_password():
                 "apps/clustering.py",
                 title="📡 Automatic Site Clustering",
             ),
             st.Page(
                 "apps/import_physical_db.py", title="🌏Physical Database Verification"
             ),

         layout="wide",
         initial_sidebar_state="expanded",
         menu_items={
+            "About": "**📡 NPO DB Query v0.2.11**",
         },
     )
                 "apps/clustering.py",
                 title="📡 Automatic Site Clustering",
             ),
+            st.Page("apps/fnb_parser.py", title="📄 F4NB Extractor"),
             st.Page(
                 "apps/import_physical_db.py", title="🌏Physical Database Verification"
             ),

apps/fnb_parser.py ADDED Viewed

	@@ -0,0 +1,324 @@

+"""
+Streamlit application for extracting site and sector information from .docx design files.
+The logic is adapted from `Sector Stacked.py` but provides an interactive UI where users can
+upload one or many Word documents and instantly visualise / download the results.
+"""
+import io
+import os
+import re
+from typing import List
+import pandas as pd
+import plotly.express as px
+import streamlit as st
+from docx import Document
+###############################################################################
+# ---------------------------  Core extract logic  -------------------------- #
+###############################################################################
+def extract_info_from_docx_separated_sectors(
+    docx_bytes: bytes, filename: str
+) -> List[dict]:
+    """Extract the site-level and sector-level information from a Word design file.
+    Parameters
+    ----------
+    docx_bytes : bytes
+        Raw bytes of the `.docx` file – read directly from the Streamlit uploader.
+    filename : str
+        Original filename. Used only for reference in the output.
+    Returns
+    -------
+    list[dict]
+        A list containing up to three dictionaries – one for each sector.
+    """
+    # python-docx can open a file-like object, so we wrap the bytes in BytesIO
+    doc = Document(io.BytesIO(docx_bytes))
+    # Shared site information
+    site_shared = {
+        "File": filename,
+        "Code": None,
+        "Site Name": None,
+        "Localité": None,
+        "Adresse": None,
+        "X": None,
+        "Y": None,
+        "Z": None,
+        "UTM_Zone": None,
+    }
+    # Per-sector placeholders (we assume max 3 sectors)
+    sector_data = {
+        "Azimuth": [None] * 3,
+        "Height": [None] * 3,
+        "MechTilt": [None] * 3,
+        "ElecTilt": [None] * 3,
+    }
+    # Iterate tables / rows / cells once, filling the data structures
+    for table in doc.tables:
+        for row in table.rows:
+            # Drop empty cells and overspaces
+            cells = [cell.text.strip() for cell in row.cells if cell.text.strip()]
+            if not cells:
+                continue
+            row_text_lower = " | ".join(cells).lower()
+            # Code (assumes pattern "T00" / "N01" typical of site codes)
+            if site_shared["Code"] is None and any("code" in c.lower() for c in cells):
+                for val in cells:
+                    if ("t00" in val.lower()) or ("n01" in val.lower()):
+                        site_shared["Code"] = val.replace(" ", "").strip()
+                        break
+            # Site Name – same heuristic as original script
+            if site_shared["Site Name"] is None and any(
+                "nom" in c.lower() for c in cells
+            ):
+                for val in cells:
+                    if ("t00" in val.lower()) or ("n01" in val.lower()):
+                        site_shared["Site Name"] = val.strip()
+                        break
+            # UTM Zone
+            if site_shared["UTM_Zone"] is None:
+                utm_match = re.search(r"utm\s*(\d+)", row_text_lower)
+                if utm_match:
+                    site_shared["UTM_Zone"] = f"UTM{utm_match.group(1)}"
+            # Localité and Adresse
+            if site_shared["Localité"] is None and any(
+                "localité" in c.lower() for c in cells
+            ):
+                for val in cells:
+                    if val.lower() != "localité:":
+                        site_shared["Localité"] = val.strip()
+                        break
+            if site_shared["Adresse"] is None and any(
+                "adresse" in c.lower() for c in cells
+            ):
+                for val in cells:
+                    if val.lower() != "adresse:":
+                        site_shared["Adresse"] = val.strip()
+                        break
+            # Coordinates (X, Y, Z)
+            if {"X", "Y", "Z"}.intersection(cells):
+                for i, cell_text in enumerate(cells):
+                    text = cell_text.strip()
+                    # X coordinate
+                    if text == "X" and i + 1 < len(cells):
+                        site_shared["X"] = cells[i + 1].strip()
+                    # Y coordinate – could be in same cell e.g. "Y 123" or split
+                    elif re.search(r"Y\s*[0-9]", text):
+                        match = re.search(r"Y\s*([0-9°'\.\sWE]+)", text)
+                        if match:
+                            site_shared["Y"] = match.group(1).strip()
+                    elif text == "Y" and i + 1 < len(cells):
+                        site_shared["Y"] = cells[i + 1].strip()
+                    # Z / Elevation
+                    elif re.search(r"Z\s*[0-9]", text):
+                        match = re.search(r"Z\s*([0-9]+)", text)
+                        if match:
+                            site_shared["Z"] = match.group(1).strip()
+                    elif text == "Z" and i + 1 < len(cells):
+                        z_val = re.search(r"([0-9]+)", cells[i + 1])
+                        if z_val:
+                            site_shared["Z"] = z_val.group(1).strip()
+            # Sector-specific lines
+            first_cell = cells[0].lower()
+            if first_cell == "azimut":
+                for i in range(min(3, len(cells) - 1)):
+                    sector_data["Azimuth"][i] = cells[i + 1]
+            elif "hauteur des aériens" in first_cell:
+                for i in range(min(3, len(cells) - 1)):
+                    sector_data["Height"][i] = cells[i + 1]
+            elif "tilt mécanique" in first_cell:
+                for i in range(min(3, len(cells) - 1)):
+                    sector_data["MechTilt"][i] = cells[i + 1]
+            elif "tilt électrique" in first_cell:
+                for i in range(min(3, len(cells) - 1)):
+                    sector_data["ElecTilt"][i] = cells[i + 1]
+    # Convert to per-sector rows
+    rows: List[dict] = []
+    for sector_id in range(3):
+        if sector_data["Azimuth"][sector_id]:
+            rows.append(
+                {
+                    **site_shared,
+                    "Sector ID": sector_id + 1,
+                    "Azimuth": sector_data["Azimuth"][sector_id],
+                    "Height": sector_data["Height"][sector_id],
+                    "MechTilt": sector_data["MechTilt"][sector_id],
+                    "ElecTilt": sector_data["ElecTilt"][sector_id],
+                }
+            )
+    return rows
+def convert_coord_to_decimal(coord: str, default_direction: str | None = None):
+    """Convert coordinate strings containing degrees/minutes/seconds to decimal degrees.
+    Handles various formats, e.g. "3° 33' 12.4\" W", "3 33 12.4 O", "-3.5534", "3.5534E".
+    West (W/O) or South (S) are returned as negative values.
+    Returns None if conversion fails.
+    """
+    if coord is None or (isinstance(coord, float) and pd.isna(coord)):
+        return None
+    # Normalise the string – unify decimal separator and strip spaces
+    text = str(coord).replace(",", ".").strip()
+    if not text:
+        return None
+    # Detect hemisphere / direction letters
+    direction = None
+    match_dir = re.search(r"([NSEWnsewOo])", text)
+    if match_dir:
+        direction = match_dir.group(1).upper()
+        text = text.replace(match_dir.group(1), "")  # remove letter for numeric parsing
+    else:
+        # No explicit letter – use supplied default if provided
+        if default_direction is not None:
+            direction = default_direction.upper()
+    # Grab all numeric components
+    nums = re.findall(r"[-+]?(?:\d+\.?\d*)", text)
+    if not nums:
+        return None
+    # Convert strings to float
+    nums_f = [float(n) for n in nums]
+    # Determine decimal value depending on how many components we have
+    if len(nums_f) >= 3:
+        deg, minute, sec = nums_f[0], nums_f[1], nums_f[2]
+        dec = deg + minute / 60 + sec / 3600
+    elif len(nums_f) == 2:
+        deg, minute = nums_f[0], nums_f[1]
+        dec = deg + minute / 60
+    else:  # Already decimal degrees
+        dec = nums_f[0]
+    # Apply sign for West/Ouest/South
+    if direction in {"W", "O", "S"}:  # West/Ouest or South => negative
+        dec = -abs(dec)
+    return dec
+def process_files_to_dataframe(uploaded_files) -> pd.DataFrame:
+    """Run extraction on the uploaded files and return a concatenated dataframe."""
+    all_rows: List[dict] = []
+    for uploaded in uploaded_files:
+        rows = extract_info_from_docx_separated_sectors(uploaded.read(), uploaded.name)
+        all_rows.extend(rows)
+    df = pd.DataFrame(all_rows)
+    # Add decimal conversion for X and Y
+    if not df.empty and {"X", "Y"}.issubset(df.columns):
+        df["X_decimal"] = df["X"].apply(
+            lambda c: convert_coord_to_decimal(c, default_direction="N")
+        )
+        df["Y_decimal"] = df["Y"].apply(
+            lambda c: convert_coord_to_decimal(c, default_direction="W")
+        )
+    return df
+###############################################################################
+# -----------------------------  Streamlit UI  ------------------------------ #
+###############################################################################
+def main() -> None:
+    st.set_page_config(
+        page_title="F4NB Extractor to Excel", page_icon="📄", layout="wide"
+    )
+    st.title("📄 F4NB Extractor to Excel")
+    st.markdown(
+        "Convert F4NB Word documents into a tidy Excel / DataFrame containing site & sector information.\n"
+        "Upload one or many F4NB `.docx` files and hit **Process**."
+    )
+    st.subheader("Upload Files")
+    uploaded_files = st.file_uploader(
+        "Select one or more F4NB `.docx` files",
+        type=["docx"],
+        accept_multiple_files=True,
+    )
+    process_btn = st.button("Process", type="primary", disabled=not uploaded_files)
+    if process_btn and uploaded_files:
+        with st.spinner("Extracting information…"):
+            df = process_files_to_dataframe(uploaded_files)
+        if df.empty:
+            st.warning(
+                "No data extracted. Check that the files conform to the expected format."
+            )
+            return
+        st.success(
+            f"Processed {len(uploaded_files)} file(s) – extracted {len(df)} sector rows."
+        )
+        st.dataframe(df, use_container_width=True)
+        # Interactive map of extracted coordinates using Plotly
+        if {"Y_decimal", "X_decimal"}.issubset(df.columns):
+            geo_df = (
+                df[["Y_decimal", "X_decimal", "Site Name", "Code"]]
+                .dropna()
+                .rename(columns={"Y_decimal": "Longitude", "X_decimal": "Latitude"})
+                .assign(
+                    Size=lambda d: (
+                        pd.to_numeric(d["Height"], errors="coerce").fillna(10)
+                        if "Height" in d.columns
+                        else 10
+                    )
+                )
+            )
+            if not geo_df.empty:
+                st.subheader("🗺️ Site Locations")
+                fig = px.scatter_map(
+                    geo_df,
+                    lat="Latitude",
+                    lon="Longitude",
+                    hover_name="Site Name",
+                    hover_data={"Code": True},
+                    size="Size",
+                    size_max=10,
+                    zoom=6,
+                    height=500,
+                )
+                fig.update_layout(
+                    mapbox_style="open-street-map",
+                    margin={"r": 0, "t": 0, "l": 0, "b": 0},
+                )
+                st.plotly_chart(fig, use_container_width=True)
+        # Offer download as Excel
+        buffer = io.BytesIO()
+        with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
+            df.to_excel(writer, index=False, sheet_name="Extract")
+        st.download_button(
+            label="💾 Download Excel",
+            data=buffer.getvalue(),
+            file_name="extracted_fnb.xlsx",
+            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        )
+if __name__ == "__main__":  # pragma: no cover
+    main()

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ