trust-game-llama-2-7b-chat

Paused

App Files Files Community

botsi commited on Apr 5, 2024

Commit

5496e8b

verified ·

1 Parent(s): cb9be99

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -6

app.py CHANGED Viewed

@@ -42,8 +42,10 @@ import os
 import datetime
 from apscheduler.schedulers.background import BackgroundScheduler
 DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history"
-DATA_FILENAME = "data.csv"
 DATA_FILE = os.path.join("data", DATA_FILENAME)
 HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -51,7 +53,7 @@ print("is none?", HF_TOKEN is None)
 print("hfh", huggingface_hub.__version__)
 repo = Repository(
-    local_dir="data", clone_from=DATASET_REPO_URL
 )
 MAX_MAX_NEW_TOKENS = 2048
@@ -244,6 +246,7 @@ def generate(
     if session_index is not None:
         print("Session index:", session_index)
     else:
         print("Session index not found or has no value.")
     # Fetch personalized data
@@ -269,10 +272,42 @@ def generate(
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
-    pd.DataFrame(conversation).to_csv(DATA_FILE, index=False)
-    print("updating conversation")
-    repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")
-    print(conversation)
     # Set up the TextIteratorStreamer
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

 import datetime
 from apscheduler.schedulers.background import BackgroundScheduler
 DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history"
+DATA_DIRECTORY = "data"  # Separate directory for storing data files
+DATA_FILENAME = "history_7B_chat.csv"  # Default filename
 DATA_FILE = os.path.join("data", DATA_FILENAME)
 HF_TOKEN = os.environ.get("HF_TOKEN")
 print("hfh", huggingface_hub.__version__)
 repo = Repository(
+    local_dir=DATA_DIRECTORY, clone_from=DATASET_REPO_URL
 )
 MAX_MAX_NEW_TOKENS = 2048
     if session_index is not None:
         print("Session index:", session_index)
     else:
+        session_index = 'no_session_id'
         print("Session index not found or has no value.")
     # Fetch personalized data
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
+    # Save chat history to .csv file on HuggingFace Hub
+    #pd.DataFrame(conversation).to_csv(DATA_FILE, index=False)
+    #print("updating conversation")
+    #repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")
+    #print(conversation)
+    # Save chat history to .csv file on HuggingFace Hub
+    # Generate filename with bot id and session id
+    filename = f"{session_index}_{DATA_FILENAME}"
+    data_file = os.path.join(DATA_DIRECTORY, filename)
+    # Generate timestamp
+    timestamp = datetime.datetime.now()
+    # Check if the file already exists
+    if os.path.exists(data_file):
+        # If file exists, load existing data
+        existing_data = pd.read_csv(data_file)
+        # Add timestamp column
+        conversation_df = pd.DataFrame(conversation)
+        conversation_df['timestamp'] = timestamp
+        # Append new conversation to existing data
+        updated_data = pd.concat([existing_data, conversation_df], ignore_index=True)
+        updated_data.to_csv(data_file, index=False)
+    else:
+        # If file doesn't exist, create new file with conversation data
+        conversation_df = pd.DataFrame(conversation)
+        conversation_df['timestamp'] = timestamp
+        conversation_df.to_csv(data_file, index=False)
+    print("Updating .csv")
+    repo.push_to_hub(blocking=False, commit_message=f"Updating data at {timestamp}")
     # Set up the TextIteratorStreamer
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)