Upload 17 files
Browse files- app/ai_engine.py +7 -1
- app/knowledge.py +18 -0
app/ai_engine.py
CHANGED
|
@@ -107,11 +107,17 @@ STRICT OUTPUT JSON ONLY:
|
|
| 107 |
|
| 108 |
SQL RULES:
|
| 109 |
- Read-only.
|
| 110 |
-
-
|
| 111 |
- Use `DISTINCT` for lookup questions to avoid duplicates.
|
| 112 |
- Dates: parse lake_level_reading_time using `TO_TIMESTAMP(..., 'DD/MM/YYYY HH:MI AM')`.
|
| 113 |
- If you use TRIM/REPLACE/regex, do it on `col::text`.
|
| 114 |
- Safe numeric: `NULLIF(REPLACE(TRIM(col::text), ',', ''), '')::numeric`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
"""
|
| 116 |
|
| 117 |
def _system_prompt(self, schema_context: str, rag_context: str) -> str:
|
|
|
|
| 107 |
|
| 108 |
SQL RULES:
|
| 109 |
- Read-only.
|
| 110 |
+
- Use LIMIT 100 for lookups/inventory only. For complete time-bounded daily/monthly trend queries, do NOT use LIMIT.
|
| 111 |
- Use `DISTINCT` for lookup questions to avoid duplicates.
|
| 112 |
- Dates: parse lake_level_reading_time using `TO_TIMESTAMP(..., 'DD/MM/YYYY HH:MI AM')`.
|
| 113 |
- If you use TRIM/REPLACE/regex, do it on `col::text`.
|
| 114 |
- Safe numeric: `NULLIF(REPLACE(TRIM(col::text), ',', ''), '')::numeric`.
|
| 115 |
+
|
| 116 |
+
PRODUCTION RULES (WRD/audit-grade):
|
| 117 |
+
- Storage/level/"usage" (live storage) are state values. If user asks a daily trend ("each day", "daily", a month like "May 2023"), do NOT use AVG.
|
| 118 |
+
- Instead select the latest reading per day using `ROW_NUMBER() OVER (PARTITION BY report_date ORDER BY ts DESC) = 1`.
|
| 119 |
+
- Avoid repeated `TO_TIMESTAMP` calls: parse once in a CTE (`parsed`).
|
| 120 |
+
- Prefer half-open ranges for month windows: `ts >= 'YYYY-MM-01' AND ts < 'YYYY-MM-01' + INTERVAL '1 month'`.
|
| 121 |
"""
|
| 122 |
|
| 123 |
def _system_prompt(self, schema_context: str, rag_context: str) -> str:
|
app/knowledge.py
CHANGED
|
@@ -43,6 +43,18 @@ KB_CONFIG: list[dict[str, Any]] = [
|
|
| 43 |
"context_note": "For trends, select the date column (casted) and the metric. Order by date ASC.",
|
| 44 |
"sql_template": "NOTE_ONLY",
|
| 45 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
{
|
| 47 |
"category": "intent_inventory",
|
| 48 |
"keywords": ["inventory", "details", "show all", "table", "gated", "non-gated"],
|
|
@@ -61,6 +73,12 @@ KB_CONFIG: list[dict[str, Any]] = [
|
|
| 61 |
"context_note": "To fetch the latest reading for a dam, order by parsed timestamp DESC and LIMIT 1.",
|
| 62 |
"sql_template": "SELECT * FROM reservoir_reports WHERE name_of_dam ILIKE '%{dam_name}%' ORDER BY TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM') DESC LIMIT 1;",
|
| 63 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
{
|
| 65 |
"category": "playbook_districtwise_stock",
|
| 66 |
"keywords": ["district-wise", "useful water stock", "projected", "designed", "current", "last year"],
|
|
|
|
| 43 |
"context_note": "For trends, select the date column (casted) and the metric. Order by date ASC.",
|
| 44 |
"sql_template": "NOTE_ONLY",
|
| 45 |
},
|
| 46 |
+
{
|
| 47 |
+
"category": "rules_state_timeseries",
|
| 48 |
+
"keywords": ["trend", "daily", "each day", "time series", "live storage", "lake level"],
|
| 49 |
+
"context_note": "Storage/level are state values. For daily trends, use the latest reading per day (ROW_NUMBER() OVER (PARTITION BY date ORDER BY ts DESC)=1). Do NOT use AVG for daily values.",
|
| 50 |
+
"sql_template": "NOTE_ONLY",
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"category": "rules_limit_safety",
|
| 54 |
+
"keywords": ["may", "month", "between", "date range", "from", "to"],
|
| 55 |
+
"context_note": "Do not use LIMIT for strictly time-bounded daily/monthly trend queries (it can silently truncate days). LIMIT is fine for lookups/inventory but not for complete date windows.",
|
| 56 |
+
"sql_template": "NOTE_ONLY",
|
| 57 |
+
},
|
| 58 |
{
|
| 59 |
"category": "intent_inventory",
|
| 60 |
"keywords": ["inventory", "details", "show all", "table", "gated", "non-gated"],
|
|
|
|
| 73 |
"context_note": "To fetch the latest reading for a dam, order by parsed timestamp DESC and LIMIT 1.",
|
| 74 |
"sql_template": "SELECT * FROM reservoir_reports WHERE name_of_dam ILIKE '%{dam_name}%' ORDER BY TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM') DESC LIMIT 1;",
|
| 75 |
},
|
| 76 |
+
{
|
| 77 |
+
"category": "playbook_daily_state_trend",
|
| 78 |
+
"keywords": ["trend", "daily", "may", "month", "each day", "live storage", "tmc", "mcum", "lake level"],
|
| 79 |
+
"context_note": "Canonical daily trend for state metrics: parse timestamp once, compute report_date, rank rows per day by ts DESC and pick rn=1.",
|
| 80 |
+
"sql_template": "WITH parsed AS (SELECT TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM') AS ts, TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM')::date AS report_date, NULLIF(REPLACE(TRIM(live_storage_tmc::text), ',', ''), '')::numeric AS value FROM reservoir_reports WHERE name_of_dam ILIKE '%{dam_name}%'), ranked AS (SELECT report_date, value, ROW_NUMBER() OVER (PARTITION BY report_date ORDER BY ts DESC) AS rn FROM parsed WHERE ts >= TO_DATE('{start}','YYYY-MM-DD') AND ts < TO_DATE('{end}','YYYY-MM-DD') AND value IS NOT NULL) SELECT report_date AS date, value FROM ranked WHERE rn = 1 ORDER BY report_date;",
|
| 81 |
+
},
|
| 82 |
{
|
| 83 |
"category": "playbook_districtwise_stock",
|
| 84 |
"keywords": ["district-wise", "useful water stock", "projected", "designed", "current", "last year"],
|