akpande2 commited on
Commit
08c2b3f
·
verified ·
1 Parent(s): 5d88606

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +106 -116
pipeline.py CHANGED
@@ -1109,143 +1109,133 @@ class UltraRobustCallAnalytics:
1109
  torch.cuda.empty_cache()
1110
 
1111
  def _map_emotion_to_sentiment(self, emotion):
1112
- """Map emotion labels to sentiment with polarity score"""
1113
- emotion_lower = emotion.lower()
1114
-
1115
- positive_emotions = {
1116
- 'happy': 0.8, 'joy': 0.9, 'excited': 0.85,
1117
- 'pleased': 0.7, 'satisfied': 0.75, 'content': 0.6
1118
- }
1119
-
1120
- negative_emotions = {
1121
- 'sad': -0.6, 'angry': -0.9, 'frustrated': -0.8,
1122
- 'annoyed': -0.7, 'disappointed': -0.65, 'upset': -0.75
1123
- }
1124
-
1125
- if emotion_lower in positive_emotions:
1126
- return {
1127
- "sentiment": "positive",
1128
- "polarity_score": positive_emotions[emotion_lower],
1129
- "confidence": "high"
1130
  }
1131
-
1132
- if emotion_lower in negative_emotions:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1133
  return {
1134
- "sentiment": "negative",
1135
- "polarity_score": negative_emotions[emotion_lower],
1136
- "confidence": "high"
1137
  }
1138
-
1139
- return {
1140
- "sentiment": "neutral",
1141
- "polarity_score": 0.0,
1142
- "confidence": "medium"
1143
- }
1144
 
1145
  def _calculate_speech_rate(self, text, duration_seconds):
1146
- """Calculate words per minute (WPM) and classify pace"""
1147
- if duration_seconds < 0.1:
1148
- return {"wpm": 0, "word_count": 0, "speech_pace": "unknown"}
1149
-
1150
- words = text.split()
1151
- word_count = len(words)
1152
- wpm = (word_count / (duration_seconds / 60.0)) if duration_seconds > 0 else 0
1153
-
1154
- if wpm < 100: pace = "slow"
1155
- elif wpm < 140: pace = "normal"
1156
- elif wpm < 180: pace = "fast"
1157
- else: pace = "very_fast"
1158
-
1159
- return {
1160
- "wpm": round(wpm, 1),
1161
- "word_count": word_count,
1162
- "speech_pace": pace
1163
- }
1164
 
1165
  def _extract_keywords(self, text, top_n=5):
1166
- """Extract keywords/keyphrases using KeyBERT"""
1167
- if self.keyword_model is None or len(text.split()) < 3:
1168
- return []
1169
-
1170
- try:
1171
- keywords = self.keyword_model.extract_keywords(
1172
- text,
1173
- keyphrase_ngram_range=(1, 2),
1174
- stop_words='english',
1175
- top_n=top_n,
1176
- use_maxsum=True,
1177
- nr_candidates=20
1178
- )
1179
 
1180
- return [
1181
- {"keyword": kw[0], "relevance": round(float(kw[1]), 3)}
1182
- for kw in keywords
1183
- ]
1184
- except:
1185
- return []
 
 
 
 
 
 
 
 
 
1186
 
1187
 
1188
  def _classify_topic(self, text):
1189
- """Classify text into call center topics"""
1190
- if self.topic_classifier is None or len(text.split()) < 5:
1191
- return {"topic": "unknown", "confidence": 0.0}
1192
-
1193
- try:
1194
- result = self.topic_classifier(text, self.topic_labels, multi_label=False)
1195
-
1196
- return {
1197
- "topic": result['labels'][0],
1198
- "confidence": round(float(result['scores'][0]), 3),
1199
- "top_3_topics": [
1200
- {"topic": label, "score": round(float(score), 3)}
1201
- for label, score in zip(result['labels'][:3], result['scores'][:3])
1202
- ]
1203
- }
1204
- except:
1205
- return {"topic": "unknown", "confidence": 0.0}
1206
 
1207
 
1208
  def _aggregate_call_insights(self, results):
1209
- """Aggregate keywords and topics at call level"""
1210
- if not results:
1211
- return {"top_keywords": [], "primary_topic": {"topic": "unknown"}}
 
 
 
 
 
 
 
1212
 
1213
- # Aggregate keywords
1214
- all_keywords = {}
1215
- for seg in results:
1216
- if 'keywords' in seg:
1217
- for kw in seg['keywords']:
1218
- keyword = kw['keyword']
1219
- score = kw['relevance']
1220
- all_keywords[keyword] = max(all_keywords.get(keyword, 0), score)
1221
-
1222
- top_keywords = [
1223
- {"keyword": k, "relevance": round(v, 3)}
1224
- for k, v in sorted(all_keywords.items(), key=lambda x: x[1], reverse=True)[:10]
1225
- ]
1226
 
1227
  # Aggregate topics
1228
- topic_votes = defaultdict(float)
1229
- for seg in results:
1230
- if 'topic' in seg and seg['topic']['confidence'] > 0.5:
1231
- topic_votes[seg['topic']['topic']] += seg['topic']['confidence']
1232
 
1233
- primary_topic = {
1234
- "topic": max(topic_votes, key=topic_votes.get) if topic_votes else "unknown",
1235
- "confidence": round(topic_votes[max(topic_votes, key=topic_votes.get)] / len(results), 3) if topic_votes else 0.0
1236
- }
1237
 
1238
  # Calculate stats
1239
- total_words = sum(seg.get('speech_rate', {}).get('word_count', 0) for seg in results)
1240
- wpm_values = [seg.get('speech_rate', {}).get('wpm', 0) for seg in results if seg.get('speech_rate', {}).get('wpm', 0) > 0]
1241
- average_wpm = round(np.mean(wpm_values), 1) if wpm_values else 0
1242
 
1243
- return {
1244
- "top_keywords": top_keywords,
1245
- "primary_topic": primary_topic,
1246
- "total_words": total_words,
1247
- "average_wpm": average_wpm
1248
- }
1249
 
1250
 
1251
  if __name__ == "__main__":
 
1109
  torch.cuda.empty_cache()
1110
 
1111
  def _map_emotion_to_sentiment(self, emotion):
1112
+ """Map emotion labels to sentiment with polarity score"""
1113
+ emotion_lower = emotion.lower()
1114
+
1115
+ positive_emotions = {
1116
+ 'happy': 0.8, 'joy': 0.9, 'excited': 0.85,
1117
+ 'pleased': 0.7, 'satisfied': 0.75, 'content': 0.6
 
 
 
 
 
 
 
 
 
 
 
 
1118
  }
1119
+ negative_emotions = {
1120
+ 'sad': -0.6, 'angry': -0.9, 'frustrated': -0.8,
1121
+ 'annoyed': -0.7, 'disappointed': -0.65, 'upset': -0.75
1122
+ }
1123
+ if emotion_lower in positive_emotions:
1124
+ return {
1125
+ "sentiment": "positive",
1126
+ "polarity_score": positive_emotions[emotion_lower],
1127
+ "confidence": "high"
1128
+ }
1129
+
1130
+ if emotion_lower in negative_emotions:
1131
+ return {
1132
+ "sentiment": "negative",
1133
+ "polarity_score": negative_emotions[emotion_lower],
1134
+ "confidence": "high"
1135
+ }
1136
  return {
1137
+ "sentiment": "neutral",
1138
+ "polarity_score": 0.0,
1139
+ "confidence": "medium"
1140
  }
 
 
 
 
 
 
1141
 
1142
  def _calculate_speech_rate(self, text, duration_seconds):
1143
+ """Calculate words per minute (WPM) and classify pace"""
1144
+ if duration_seconds < 0.1:
1145
+ return {"wpm": 0, "word_count": 0, "speech_pace": "unknown"}
1146
+ words = text.split()
1147
+ word_count = len(words)
1148
+ wpm = (word_count / (duration_seconds / 60.0)) if duration_seconds > 0 else 0
1149
+ if wpm < 100: pace = "slow"
1150
+ elif wpm < 140: pace = "normal"
1151
+ elif wpm < 180: pace = "fast"
1152
+ else: pace = "very_fast"
1153
+
1154
+ return {
1155
+ "wpm": round(wpm, 1),
1156
+ "word_count": word_count,
1157
+ "speech_pace": pace
1158
+ }
 
 
1159
 
1160
  def _extract_keywords(self, text, top_n=5):
1161
+ """Extract keywords/keyphrases using KeyBERT"""
1162
+ if self.keyword_model is None or len(text.split()) < 3:
1163
+ return []
 
 
 
 
 
 
 
 
 
 
1164
 
1165
+ try:
1166
+ keywords = self.keyword_model.extract_keywords(
1167
+ text,
1168
+ keyphrase_ngram_range=(1, 2),
1169
+ stop_words='english',
1170
+ top_n=top_n,
1171
+ use_maxsum=True,
1172
+ nr_candidates=20
1173
+ )
1174
+ return [
1175
+ {"keyword": kw[0], "relevance": round(float(kw[1]), 3)}
1176
+ for kw in keywords
1177
+ ]
1178
+ except:
1179
+ return []
1180
 
1181
 
1182
  def _classify_topic(self, text):
1183
+ """Classify text into call center topics"""
1184
+ if self.topic_classifier is None or len(text.split()) < 5:
1185
+ return {"topic": "unknown", "confidence": 0.0}
1186
+ try:
1187
+ result = self.topic_classifier(text, self.topic_labels, multi_label=False)
1188
+ return {
1189
+ "topic": result['labels'][0],
1190
+ "confidence": round(float(result['scores'][0]), 3),
1191
+ "top_3_topics": [
1192
+ {"topic": label, "score": round(float(score), 3)}
1193
+ for label, score in zip(result['labels'][:3], result['scores'][:3])
1194
+ ]
1195
+ }
1196
+ except:
1197
+ return {"topic": "unknown", "confidence": 0.0}
 
 
1198
 
1199
 
1200
  def _aggregate_call_insights(self, results):
1201
+ """Aggregate keywords and topics at call level"""
1202
+ if not results:
1203
+ return {"top_keywords": [], "primary_topic": {"topic": "unknown"}}
1204
+ all_keywords = {}
1205
+ for seg in results:
1206
+ if 'keywords' in seg:
1207
+ for kw in seg['keywords']:
1208
+ keyword = kw['keyword']
1209
+ score = kw['relevance']
1210
+ all_keywords[keyword] = max(all_keywords.get(keyword, 0), score)
1211
 
1212
+ top_keywords = [
1213
+ {"keyword": k, "relevance": round(v, 3)}
1214
+ for k, v in sorted(all_keywords.items(), key=lambda x: x[1], reverse=True)[:10]
1215
+ ]
 
 
 
 
 
 
 
 
 
1216
 
1217
  # Aggregate topics
1218
+ topic_votes = defaultdict(float)
1219
+ for seg in results:
1220
+ if 'topic' in seg and seg['topic']['confidence'] > 0.5:
1221
+ topic_votes[seg['topic']['topic']] += seg['topic']['confidence']
1222
 
1223
+ primary_topic = {
1224
+ "topic": max(topic_votes, key=topic_votes.get) if topic_votes else "unknown",
1225
+ "confidence": round(topic_votes[max(topic_votes, key=topic_votes.get)] / len(results), 3) if topic_votes else 0.0
1226
+ }
1227
 
1228
  # Calculate stats
1229
+ total_words = sum(seg.get('speech_rate', {}).get('word_count', 0) for seg in results)
1230
+ wpm_values = [seg.get('speech_rate', {}).get('wpm', 0) for seg in results if seg.get('speech_rate', {}).get('wpm', 0) > 0]
1231
+ average_wpm = round(np.mean(wpm_values), 1) if wpm_values else 0
1232
 
1233
+ return {
1234
+ "top_keywords": top_keywords,
1235
+ "primary_topic": primary_topic,
1236
+ "total_words": total_words,
1237
+ "average_wpm": average_wpm
1238
+ }
1239
 
1240
 
1241
  if __name__ == "__main__":