triflix commited on
Commit
5a52b4f
·
verified ·
1 Parent(s): ef45502

Upload 17 files

Browse files
Files changed (2) hide show
  1. app/ai_engine.py +7 -1
  2. app/knowledge.py +18 -0
app/ai_engine.py CHANGED
@@ -107,11 +107,17 @@ STRICT OUTPUT JSON ONLY:
107
 
108
  SQL RULES:
109
  - Read-only.
110
- - Always LIMIT 100.
111
  - Use `DISTINCT` for lookup questions to avoid duplicates.
112
  - Dates: parse lake_level_reading_time using `TO_TIMESTAMP(..., 'DD/MM/YYYY HH:MI AM')`.
113
  - If you use TRIM/REPLACE/regex, do it on `col::text`.
114
  - Safe numeric: `NULLIF(REPLACE(TRIM(col::text), ',', ''), '')::numeric`.
 
 
 
 
 
 
115
  """
116
 
117
  def _system_prompt(self, schema_context: str, rag_context: str) -> str:
 
107
 
108
  SQL RULES:
109
  - Read-only.
110
+ - Use LIMIT 100 for lookups/inventory only. For complete time-bounded daily/monthly trend queries, do NOT use LIMIT.
111
  - Use `DISTINCT` for lookup questions to avoid duplicates.
112
  - Dates: parse lake_level_reading_time using `TO_TIMESTAMP(..., 'DD/MM/YYYY HH:MI AM')`.
113
  - If you use TRIM/REPLACE/regex, do it on `col::text`.
114
  - Safe numeric: `NULLIF(REPLACE(TRIM(col::text), ',', ''), '')::numeric`.
115
+
116
+ PRODUCTION RULES (WRD/audit-grade):
117
+ - Storage/level/"usage" (live storage) are state values. If user asks a daily trend ("each day", "daily", a month like "May 2023"), do NOT use AVG.
118
+ - Instead select the latest reading per day using `ROW_NUMBER() OVER (PARTITION BY report_date ORDER BY ts DESC) = 1`.
119
+ - Avoid repeated `TO_TIMESTAMP` calls: parse once in a CTE (`parsed`).
120
+ - Prefer half-open ranges for month windows: `ts >= 'YYYY-MM-01' AND ts < 'YYYY-MM-01' + INTERVAL '1 month'`.
121
  """
122
 
123
  def _system_prompt(self, schema_context: str, rag_context: str) -> str:
app/knowledge.py CHANGED
@@ -43,6 +43,18 @@ KB_CONFIG: list[dict[str, Any]] = [
43
  "context_note": "For trends, select the date column (casted) and the metric. Order by date ASC.",
44
  "sql_template": "NOTE_ONLY",
45
  },
 
 
 
 
 
 
 
 
 
 
 
 
46
  {
47
  "category": "intent_inventory",
48
  "keywords": ["inventory", "details", "show all", "table", "gated", "non-gated"],
@@ -61,6 +73,12 @@ KB_CONFIG: list[dict[str, Any]] = [
61
  "context_note": "To fetch the latest reading for a dam, order by parsed timestamp DESC and LIMIT 1.",
62
  "sql_template": "SELECT * FROM reservoir_reports WHERE name_of_dam ILIKE '%{dam_name}%' ORDER BY TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM') DESC LIMIT 1;",
63
  },
 
 
 
 
 
 
64
  {
65
  "category": "playbook_districtwise_stock",
66
  "keywords": ["district-wise", "useful water stock", "projected", "designed", "current", "last year"],
 
43
  "context_note": "For trends, select the date column (casted) and the metric. Order by date ASC.",
44
  "sql_template": "NOTE_ONLY",
45
  },
46
+ {
47
+ "category": "rules_state_timeseries",
48
+ "keywords": ["trend", "daily", "each day", "time series", "live storage", "lake level"],
49
+ "context_note": "Storage/level are state values. For daily trends, use the latest reading per day (ROW_NUMBER() OVER (PARTITION BY date ORDER BY ts DESC)=1). Do NOT use AVG for daily values.",
50
+ "sql_template": "NOTE_ONLY",
51
+ },
52
+ {
53
+ "category": "rules_limit_safety",
54
+ "keywords": ["may", "month", "between", "date range", "from", "to"],
55
+ "context_note": "Do not use LIMIT for strictly time-bounded daily/monthly trend queries (it can silently truncate days). LIMIT is fine for lookups/inventory but not for complete date windows.",
56
+ "sql_template": "NOTE_ONLY",
57
+ },
58
  {
59
  "category": "intent_inventory",
60
  "keywords": ["inventory", "details", "show all", "table", "gated", "non-gated"],
 
73
  "context_note": "To fetch the latest reading for a dam, order by parsed timestamp DESC and LIMIT 1.",
74
  "sql_template": "SELECT * FROM reservoir_reports WHERE name_of_dam ILIKE '%{dam_name}%' ORDER BY TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM') DESC LIMIT 1;",
75
  },
76
+ {
77
+ "category": "playbook_daily_state_trend",
78
+ "keywords": ["trend", "daily", "may", "month", "each day", "live storage", "tmc", "mcum", "lake level"],
79
+ "context_note": "Canonical daily trend for state metrics: parse timestamp once, compute report_date, rank rows per day by ts DESC and pick rn=1.",
80
+ "sql_template": "WITH parsed AS (SELECT TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM') AS ts, TO_TIMESTAMP(lake_level_reading_time, 'DD/MM/YYYY HH:MI AM')::date AS report_date, NULLIF(REPLACE(TRIM(live_storage_tmc::text), ',', ''), '')::numeric AS value FROM reservoir_reports WHERE name_of_dam ILIKE '%{dam_name}%'), ranked AS (SELECT report_date, value, ROW_NUMBER() OVER (PARTITION BY report_date ORDER BY ts DESC) AS rn FROM parsed WHERE ts >= TO_DATE('{start}','YYYY-MM-DD') AND ts < TO_DATE('{end}','YYYY-MM-DD') AND value IS NOT NULL) SELECT report_date AS date, value FROM ranked WHERE rn = 1 ORDER BY report_date;",
81
+ },
82
  {
83
  "category": "playbook_districtwise_stock",
84
  "keywords": ["district-wise", "useful water stock", "projected", "designed", "current", "last year"],