Spaces:

triflix
/

pravah

Sleeping

App Files Files Community

triflix commited on 14 days ago

Commit

5a52b4f

verified ·

1 Parent(s): ef45502

Upload 17 files

Browse files

Files changed (2) hide show

app/ai_engine.py +7 -1
app/knowledge.py +18 -0

app/ai_engine.py CHANGED Viewed

@@ -107,11 +107,17 @@ STRICT OUTPUT JSON ONLY:
 SQL RULES:
 - Read-only.
-- Always LIMIT 100.
 - Use `DISTINCT` for lookup questions to avoid duplicates.
 - Dates: parse lake_level_reading_time using `TO_TIMESTAMP(..., 'DD/MM/YYYY HH:MI AM')`.
 - If you use TRIM/REPLACE/regex, do it on `col::text`.
 - Safe numeric: `NULLIF(REPLACE(TRIM(col::text), ',', ''), '')::numeric`.
 """
     def _system_prompt(self, schema_context: str, rag_context: str) -> str:

 SQL RULES:
 - Read-only.
+- Use LIMIT 100 for lookups/inventory only. For complete time-bounded daily/monthly trend queries, do NOT use LIMIT.
 - Use `DISTINCT` for lookup questions to avoid duplicates.
 - Dates: parse lake_level_reading_time using `TO_TIMESTAMP(..., 'DD/MM/YYYY HH:MI AM')`.
 - If you use TRIM/REPLACE/regex, do it on `col::text`.
 - Safe numeric: `NULLIF(REPLACE(TRIM(col::text), ',', ''), '')::numeric`.
+PRODUCTION RULES (WRD/audit-grade):
+- Storage/level/"usage" (live storage) are state values. If user asks a daily trend ("each day", "daily", a month like "May 2023"), do NOT use AVG.
+- Instead select the latest reading per day using `ROW_NUMBER() OVER (PARTITION BY report_date ORDER BY ts DESC) = 1`.
+- Avoid repeated `TO_TIMESTAMP` calls: parse once in a CTE (`parsed`).
+- Prefer half-open ranges for month windows: `ts >= 'YYYY-MM-01' AND ts < 'YYYY-MM-01' + INTERVAL '1 month'`.
 """
     def _system_prompt(self, schema_context: str, rag_context: str) -> str:

app/knowledge.py CHANGED Viewed

@@ -43,6 +43,18 @@ KB_CONFIG: list[dict[str, Any]] = [
         "context_note": "For trends, select the date column (casted) and the metric. Order by date ASC.",
         "sql_template": "NOTE_ONLY",
     },
     {
         "category": "intent_inventory",
         "keywords": ["inventory", "details", "show all", "table", "gated", "non-gated"],
@@ -61,6 +73,12 @@ KB_CONFIG: list[dict[str, Any]] = [
         "context_note": "To fetch the latest reading for a dam, order by parsed timestamp DESC and LIMIT 1.",
         "sql_template": "SELECT * FROM reservoir_reports WHERE name_of_dam ILIKE '%{dam_name}%' ORDER BY TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM') DESC LIMIT 1;",
     },
     {
         "category": "playbook_districtwise_stock",
         "keywords": ["district-wise", "useful water stock", "projected", "designed", "current", "last year"],

         "context_note": "For trends, select the date column (casted) and the metric. Order by date ASC.",
         "sql_template": "NOTE_ONLY",
     },
+    {
+        "category": "rules_state_timeseries",
+        "keywords": ["trend", "daily", "each day", "time series", "live storage", "lake level"],
+        "context_note": "Storage/level are state values. For daily trends, use the latest reading per day (ROW_NUMBER() OVER (PARTITION BY date ORDER BY ts DESC)=1). Do NOT use AVG for daily values.",
+        "sql_template": "NOTE_ONLY",
+    },
+    {
+        "category": "rules_limit_safety",
+        "keywords": ["may", "month", "between", "date range", "from", "to"],
+        "context_note": "Do not use LIMIT for strictly time-bounded daily/monthly trend queries (it can silently truncate days). LIMIT is fine for lookups/inventory but not for complete date windows.",
+        "sql_template": "NOTE_ONLY",
+    },
     {
         "category": "intent_inventory",
         "keywords": ["inventory", "details", "show all", "table", "gated", "non-gated"],
         "context_note": "To fetch the latest reading for a dam, order by parsed timestamp DESC and LIMIT 1.",
         "sql_template": "SELECT * FROM reservoir_reports WHERE name_of_dam ILIKE '%{dam_name}%' ORDER BY TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM') DESC LIMIT 1;",
     },
+    {
+        "category": "playbook_daily_state_trend",
+        "keywords": ["trend", "daily", "may", "month", "each day", "live storage", "tmc", "mcum", "lake level"],
+        "context_note": "Canonical daily trend for state metrics: parse timestamp once, compute report_date, rank rows per day by ts DESC and pick rn=1.",
+        "sql_template": "WITH parsed AS (SELECT TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM') AS ts, TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM')::date AS report_date, NULLIF(REPLACE(TRIM(live_storage_tmc::text), ',', ''), '')::numeric AS value FROM reservoir_reports WHERE name_of_dam ILIKE '%{dam_name}%'), ranked AS (SELECT report_date, value, ROW_NUMBER() OVER (PARTITION BY report_date ORDER BY ts DESC) AS rn FROM parsed WHERE ts >= TO_DATE('{start}','YYYY-MM-DD') AND ts < TO_DATE('{end}','YYYY-MM-DD') AND value IS NOT NULL) SELECT report_date AS date, value FROM ranked WHERE rn = 1 ORDER BY report_date;",
+    },
     {
         "category": "playbook_districtwise_stock",
         "keywords": ["district-wise", "useful water stock", "projected", "designed", "current", "last year"],