Update app.py
Browse files
app.py
CHANGED
|
@@ -42,8 +42,10 @@ import os
|
|
| 42 |
import datetime
|
| 43 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 44 |
|
|
|
|
| 45 |
DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history"
|
| 46 |
-
|
|
|
|
| 47 |
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
| 48 |
|
| 49 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
@@ -51,7 +53,7 @@ print("is none?", HF_TOKEN is None)
|
|
| 51 |
print("hfh", huggingface_hub.__version__)
|
| 52 |
|
| 53 |
repo = Repository(
|
| 54 |
-
local_dir=
|
| 55 |
)
|
| 56 |
|
| 57 |
MAX_MAX_NEW_TOKENS = 2048
|
|
@@ -244,6 +246,7 @@ def generate(
|
|
| 244 |
if session_index is not None:
|
| 245 |
print("Session index:", session_index)
|
| 246 |
else:
|
|
|
|
| 247 |
print("Session index not found or has no value.")
|
| 248 |
|
| 249 |
# Fetch personalized data
|
|
@@ -269,10 +272,42 @@ def generate(
|
|
| 269 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
| 270 |
input_ids = input_ids.to(model.device)
|
| 271 |
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
print(conversation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
# Set up the TextIteratorStreamer
|
| 278 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
|
|
|
| 42 |
import datetime
|
| 43 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 44 |
|
| 45 |
+
|
| 46 |
DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history"
|
| 47 |
+
DATA_DIRECTORY = "data" # Separate directory for storing data files
|
| 48 |
+
DATA_FILENAME = "history_7B_chat.csv" # Default filename
|
| 49 |
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
| 50 |
|
| 51 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
|
| 53 |
print("hfh", huggingface_hub.__version__)
|
| 54 |
|
| 55 |
repo = Repository(
|
| 56 |
+
local_dir=DATA_DIRECTORY, clone_from=DATASET_REPO_URL
|
| 57 |
)
|
| 58 |
|
| 59 |
MAX_MAX_NEW_TOKENS = 2048
|
|
|
|
| 246 |
if session_index is not None:
|
| 247 |
print("Session index:", session_index)
|
| 248 |
else:
|
| 249 |
+
session_index = 'no_session_id'
|
| 250 |
print("Session index not found or has no value.")
|
| 251 |
|
| 252 |
# Fetch personalized data
|
|
|
|
| 272 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
| 273 |
input_ids = input_ids.to(model.device)
|
| 274 |
|
| 275 |
+
|
| 276 |
+
# Save chat history to .csv file on HuggingFace Hub
|
| 277 |
+
#pd.DataFrame(conversation).to_csv(DATA_FILE, index=False)
|
| 278 |
+
#print("updating conversation")
|
| 279 |
+
#repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")
|
| 280 |
+
#print(conversation)
|
| 281 |
+
|
| 282 |
+
# Save chat history to .csv file on HuggingFace Hub
|
| 283 |
+
# Generate filename with bot id and session id
|
| 284 |
+
filename = f"{session_index}_{DATA_FILENAME}"
|
| 285 |
+
data_file = os.path.join(DATA_DIRECTORY, filename)
|
| 286 |
+
|
| 287 |
+
# Generate timestamp
|
| 288 |
+
timestamp = datetime.datetime.now()
|
| 289 |
+
|
| 290 |
+
# Check if the file already exists
|
| 291 |
+
if os.path.exists(data_file):
|
| 292 |
+
# If file exists, load existing data
|
| 293 |
+
existing_data = pd.read_csv(data_file)
|
| 294 |
+
|
| 295 |
+
# Add timestamp column
|
| 296 |
+
conversation_df = pd.DataFrame(conversation)
|
| 297 |
+
conversation_df['timestamp'] = timestamp
|
| 298 |
+
|
| 299 |
+
# Append new conversation to existing data
|
| 300 |
+
updated_data = pd.concat([existing_data, conversation_df], ignore_index=True)
|
| 301 |
+
updated_data.to_csv(data_file, index=False)
|
| 302 |
+
else:
|
| 303 |
+
# If file doesn't exist, create new file with conversation data
|
| 304 |
+
conversation_df = pd.DataFrame(conversation)
|
| 305 |
+
conversation_df['timestamp'] = timestamp
|
| 306 |
+
conversation_df.to_csv(data_file, index=False)
|
| 307 |
+
|
| 308 |
+
print("Updating .csv")
|
| 309 |
+
repo.push_to_hub(blocking=False, commit_message=f"Updating data at {timestamp}")
|
| 310 |
+
|
| 311 |
|
| 312 |
# Set up the TextIteratorStreamer
|
| 313 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|