Update pipeline.py
Browse files- pipeline.py +106 -116
pipeline.py
CHANGED
|
@@ -1109,143 +1109,133 @@ class UltraRobustCallAnalytics:
|
|
| 1109 |
torch.cuda.empty_cache()
|
| 1110 |
|
| 1111 |
def _map_emotion_to_sentiment(self, emotion):
|
| 1112 |
-
|
| 1113 |
-
|
| 1114 |
-
|
| 1115 |
-
|
| 1116 |
-
|
| 1117 |
-
|
| 1118 |
-
}
|
| 1119 |
-
|
| 1120 |
-
negative_emotions = {
|
| 1121 |
-
'sad': -0.6, 'angry': -0.9, 'frustrated': -0.8,
|
| 1122 |
-
'annoyed': -0.7, 'disappointed': -0.65, 'upset': -0.75
|
| 1123 |
-
}
|
| 1124 |
-
|
| 1125 |
-
if emotion_lower in positive_emotions:
|
| 1126 |
-
return {
|
| 1127 |
-
"sentiment": "positive",
|
| 1128 |
-
"polarity_score": positive_emotions[emotion_lower],
|
| 1129 |
-
"confidence": "high"
|
| 1130 |
}
|
| 1131 |
-
|
| 1132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1133 |
return {
|
| 1134 |
-
"sentiment": "
|
| 1135 |
-
"polarity_score":
|
| 1136 |
-
"confidence": "
|
| 1137 |
}
|
| 1138 |
-
|
| 1139 |
-
return {
|
| 1140 |
-
"sentiment": "neutral",
|
| 1141 |
-
"polarity_score": 0.0,
|
| 1142 |
-
"confidence": "medium"
|
| 1143 |
-
}
|
| 1144 |
|
| 1145 |
def _calculate_speech_rate(self, text, duration_seconds):
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
-
|
| 1161 |
-
|
| 1162 |
-
"speech_pace": pace
|
| 1163 |
-
}
|
| 1164 |
|
| 1165 |
def _extract_keywords(self, text, top_n=5):
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
| 1169 |
-
|
| 1170 |
-
try:
|
| 1171 |
-
keywords = self.keyword_model.extract_keywords(
|
| 1172 |
-
text,
|
| 1173 |
-
keyphrase_ngram_range=(1, 2),
|
| 1174 |
-
stop_words='english',
|
| 1175 |
-
top_n=top_n,
|
| 1176 |
-
use_maxsum=True,
|
| 1177 |
-
nr_candidates=20
|
| 1178 |
-
)
|
| 1179 |
|
| 1180 |
-
|
| 1181 |
-
|
| 1182 |
-
|
| 1183 |
-
|
| 1184 |
-
|
| 1185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1186 |
|
| 1187 |
|
| 1188 |
def _classify_topic(self, text):
|
| 1189 |
-
|
| 1190 |
-
|
| 1191 |
-
|
| 1192 |
-
|
| 1193 |
-
|
| 1194 |
-
|
| 1195 |
-
|
| 1196 |
-
|
| 1197 |
-
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
except:
|
| 1205 |
-
return {"topic": "unknown", "confidence": 0.0}
|
| 1206 |
|
| 1207 |
|
| 1208 |
def _aggregate_call_insights(self, results):
|
| 1209 |
-
|
| 1210 |
-
|
| 1211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1212 |
|
| 1213 |
-
|
| 1214 |
-
|
| 1215 |
-
|
| 1216 |
-
|
| 1217 |
-
for kw in seg['keywords']:
|
| 1218 |
-
keyword = kw['keyword']
|
| 1219 |
-
score = kw['relevance']
|
| 1220 |
-
all_keywords[keyword] = max(all_keywords.get(keyword, 0), score)
|
| 1221 |
-
|
| 1222 |
-
top_keywords = [
|
| 1223 |
-
{"keyword": k, "relevance": round(v, 3)}
|
| 1224 |
-
for k, v in sorted(all_keywords.items(), key=lambda x: x[1], reverse=True)[:10]
|
| 1225 |
-
]
|
| 1226 |
|
| 1227 |
# Aggregate topics
|
| 1228 |
-
|
| 1229 |
-
|
| 1230 |
-
|
| 1231 |
-
|
| 1232 |
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
|
| 1236 |
-
|
| 1237 |
|
| 1238 |
# Calculate stats
|
| 1239 |
-
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
|
| 1243 |
-
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
-
|
| 1249 |
|
| 1250 |
|
| 1251 |
if __name__ == "__main__":
|
|
|
|
| 1109 |
torch.cuda.empty_cache()
|
| 1110 |
|
| 1111 |
def _map_emotion_to_sentiment(self, emotion):
|
| 1112 |
+
"""Map emotion labels to sentiment with polarity score"""
|
| 1113 |
+
emotion_lower = emotion.lower()
|
| 1114 |
+
|
| 1115 |
+
positive_emotions = {
|
| 1116 |
+
'happy': 0.8, 'joy': 0.9, 'excited': 0.85,
|
| 1117 |
+
'pleased': 0.7, 'satisfied': 0.75, 'content': 0.6
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1118 |
}
|
| 1119 |
+
negative_emotions = {
|
| 1120 |
+
'sad': -0.6, 'angry': -0.9, 'frustrated': -0.8,
|
| 1121 |
+
'annoyed': -0.7, 'disappointed': -0.65, 'upset': -0.75
|
| 1122 |
+
}
|
| 1123 |
+
if emotion_lower in positive_emotions:
|
| 1124 |
+
return {
|
| 1125 |
+
"sentiment": "positive",
|
| 1126 |
+
"polarity_score": positive_emotions[emotion_lower],
|
| 1127 |
+
"confidence": "high"
|
| 1128 |
+
}
|
| 1129 |
+
|
| 1130 |
+
if emotion_lower in negative_emotions:
|
| 1131 |
+
return {
|
| 1132 |
+
"sentiment": "negative",
|
| 1133 |
+
"polarity_score": negative_emotions[emotion_lower],
|
| 1134 |
+
"confidence": "high"
|
| 1135 |
+
}
|
| 1136 |
return {
|
| 1137 |
+
"sentiment": "neutral",
|
| 1138 |
+
"polarity_score": 0.0,
|
| 1139 |
+
"confidence": "medium"
|
| 1140 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1141 |
|
| 1142 |
def _calculate_speech_rate(self, text, duration_seconds):
|
| 1143 |
+
"""Calculate words per minute (WPM) and classify pace"""
|
| 1144 |
+
if duration_seconds < 0.1:
|
| 1145 |
+
return {"wpm": 0, "word_count": 0, "speech_pace": "unknown"}
|
| 1146 |
+
words = text.split()
|
| 1147 |
+
word_count = len(words)
|
| 1148 |
+
wpm = (word_count / (duration_seconds / 60.0)) if duration_seconds > 0 else 0
|
| 1149 |
+
if wpm < 100: pace = "slow"
|
| 1150 |
+
elif wpm < 140: pace = "normal"
|
| 1151 |
+
elif wpm < 180: pace = "fast"
|
| 1152 |
+
else: pace = "very_fast"
|
| 1153 |
+
|
| 1154 |
+
return {
|
| 1155 |
+
"wpm": round(wpm, 1),
|
| 1156 |
+
"word_count": word_count,
|
| 1157 |
+
"speech_pace": pace
|
| 1158 |
+
}
|
|
|
|
|
|
|
| 1159 |
|
| 1160 |
def _extract_keywords(self, text, top_n=5):
|
| 1161 |
+
"""Extract keywords/keyphrases using KeyBERT"""
|
| 1162 |
+
if self.keyword_model is None or len(text.split()) < 3:
|
| 1163 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1164 |
|
| 1165 |
+
try:
|
| 1166 |
+
keywords = self.keyword_model.extract_keywords(
|
| 1167 |
+
text,
|
| 1168 |
+
keyphrase_ngram_range=(1, 2),
|
| 1169 |
+
stop_words='english',
|
| 1170 |
+
top_n=top_n,
|
| 1171 |
+
use_maxsum=True,
|
| 1172 |
+
nr_candidates=20
|
| 1173 |
+
)
|
| 1174 |
+
return [
|
| 1175 |
+
{"keyword": kw[0], "relevance": round(float(kw[1]), 3)}
|
| 1176 |
+
for kw in keywords
|
| 1177 |
+
]
|
| 1178 |
+
except:
|
| 1179 |
+
return []
|
| 1180 |
|
| 1181 |
|
| 1182 |
def _classify_topic(self, text):
|
| 1183 |
+
"""Classify text into call center topics"""
|
| 1184 |
+
if self.topic_classifier is None or len(text.split()) < 5:
|
| 1185 |
+
return {"topic": "unknown", "confidence": 0.0}
|
| 1186 |
+
try:
|
| 1187 |
+
result = self.topic_classifier(text, self.topic_labels, multi_label=False)
|
| 1188 |
+
return {
|
| 1189 |
+
"topic": result['labels'][0],
|
| 1190 |
+
"confidence": round(float(result['scores'][0]), 3),
|
| 1191 |
+
"top_3_topics": [
|
| 1192 |
+
{"topic": label, "score": round(float(score), 3)}
|
| 1193 |
+
for label, score in zip(result['labels'][:3], result['scores'][:3])
|
| 1194 |
+
]
|
| 1195 |
+
}
|
| 1196 |
+
except:
|
| 1197 |
+
return {"topic": "unknown", "confidence": 0.0}
|
|
|
|
|
|
|
| 1198 |
|
| 1199 |
|
| 1200 |
def _aggregate_call_insights(self, results):
|
| 1201 |
+
"""Aggregate keywords and topics at call level"""
|
| 1202 |
+
if not results:
|
| 1203 |
+
return {"top_keywords": [], "primary_topic": {"topic": "unknown"}}
|
| 1204 |
+
all_keywords = {}
|
| 1205 |
+
for seg in results:
|
| 1206 |
+
if 'keywords' in seg:
|
| 1207 |
+
for kw in seg['keywords']:
|
| 1208 |
+
keyword = kw['keyword']
|
| 1209 |
+
score = kw['relevance']
|
| 1210 |
+
all_keywords[keyword] = max(all_keywords.get(keyword, 0), score)
|
| 1211 |
|
| 1212 |
+
top_keywords = [
|
| 1213 |
+
{"keyword": k, "relevance": round(v, 3)}
|
| 1214 |
+
for k, v in sorted(all_keywords.items(), key=lambda x: x[1], reverse=True)[:10]
|
| 1215 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
|
| 1217 |
# Aggregate topics
|
| 1218 |
+
topic_votes = defaultdict(float)
|
| 1219 |
+
for seg in results:
|
| 1220 |
+
if 'topic' in seg and seg['topic']['confidence'] > 0.5:
|
| 1221 |
+
topic_votes[seg['topic']['topic']] += seg['topic']['confidence']
|
| 1222 |
|
| 1223 |
+
primary_topic = {
|
| 1224 |
+
"topic": max(topic_votes, key=topic_votes.get) if topic_votes else "unknown",
|
| 1225 |
+
"confidence": round(topic_votes[max(topic_votes, key=topic_votes.get)] / len(results), 3) if topic_votes else 0.0
|
| 1226 |
+
}
|
| 1227 |
|
| 1228 |
# Calculate stats
|
| 1229 |
+
total_words = sum(seg.get('speech_rate', {}).get('word_count', 0) for seg in results)
|
| 1230 |
+
wpm_values = [seg.get('speech_rate', {}).get('wpm', 0) for seg in results if seg.get('speech_rate', {}).get('wpm', 0) > 0]
|
| 1231 |
+
average_wpm = round(np.mean(wpm_values), 1) if wpm_values else 0
|
| 1232 |
|
| 1233 |
+
return {
|
| 1234 |
+
"top_keywords": top_keywords,
|
| 1235 |
+
"primary_topic": primary_topic,
|
| 1236 |
+
"total_words": total_words,
|
| 1237 |
+
"average_wpm": average_wpm
|
| 1238 |
+
}
|
| 1239 |
|
| 1240 |
|
| 1241 |
if __name__ == "__main__":
|