Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -157,8 +157,19 @@ def search():
|
|
| 157 |
return jsonify({"categorized_articles": {}, "has_articles": False, "loading": False})
|
| 158 |
|
| 159 |
try:
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
categorized_articles = {}
|
| 164 |
for article in enriched_articles:
|
|
@@ -174,6 +185,7 @@ def search():
|
|
| 174 |
logger.error(f"Semantic search error: {e}", exc_info=True)
|
| 175 |
return jsonify({"categorized_articles": {}, "has_articles": False, "loading": False}), 500
|
| 176 |
|
|
|
|
| 177 |
@app.route('/get_all_articles/<category>')
|
| 178 |
def get_all_articles(category):
|
| 179 |
try:
|
|
|
|
| 157 |
return jsonify({"categorized_articles": {}, "has_articles": False, "loading": False})
|
| 158 |
|
| 159 |
try:
|
| 160 |
+
# 1. Use similarity_search_with_score to get the raw distance score.
|
| 161 |
+
# This returns a list of (Document, float) tuples.
|
| 162 |
+
results_with_scores = vector_db.similarity_search_with_score(query, k=50)
|
| 163 |
+
|
| 164 |
+
# 2. Filter based on the raw L2 distance score. Lower is better.
|
| 165 |
+
# A threshold of 1.0 is a good starting point. You can make it smaller (e.g., 0.8)
|
| 166 |
+
# for stricter matches, or larger for looser matches.
|
| 167 |
+
score_threshold = 1.0
|
| 168 |
+
filtered_results = [(doc, score) for doc, score in results_with_scores if score < score_threshold]
|
| 169 |
+
|
| 170 |
+
# 3. Pass the correctly filtered list to the formatting function.
|
| 171 |
+
# This function is already set up to handle this data structure.
|
| 172 |
+
enriched_articles = format_articles_from_db_results(filtered_results)
|
| 173 |
|
| 174 |
categorized_articles = {}
|
| 175 |
for article in enriched_articles:
|
|
|
|
| 185 |
logger.error(f"Semantic search error: {e}", exc_info=True)
|
| 186 |
return jsonify({"categorized_articles": {}, "has_articles": False, "loading": False}), 500
|
| 187 |
|
| 188 |
+
|
| 189 |
@app.route('/get_all_articles/<category>')
|
| 190 |
def get_all_articles(category):
|
| 191 |
try:
|