botsi commited on
Commit
5496e8b
·
verified ·
1 Parent(s): cb9be99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -6
app.py CHANGED
@@ -42,8 +42,10 @@ import os
42
  import datetime
43
  from apscheduler.schedulers.background import BackgroundScheduler
44
 
 
45
  DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history"
46
- DATA_FILENAME = "data.csv"
 
47
  DATA_FILE = os.path.join("data", DATA_FILENAME)
48
 
49
  HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -51,7 +53,7 @@ print("is none?", HF_TOKEN is None)
51
  print("hfh", huggingface_hub.__version__)
52
 
53
  repo = Repository(
54
- local_dir="data", clone_from=DATASET_REPO_URL
55
  )
56
 
57
  MAX_MAX_NEW_TOKENS = 2048
@@ -244,6 +246,7 @@ def generate(
244
  if session_index is not None:
245
  print("Session index:", session_index)
246
  else:
 
247
  print("Session index not found or has no value.")
248
 
249
  # Fetch personalized data
@@ -269,10 +272,42 @@ def generate(
269
  gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
270
  input_ids = input_ids.to(model.device)
271
 
272
- pd.DataFrame(conversation).to_csv(DATA_FILE, index=False)
273
- print("updating conversation")
274
- repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")
275
- print(conversation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
  # Set up the TextIteratorStreamer
278
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
 
42
  import datetime
43
  from apscheduler.schedulers.background import BackgroundScheduler
44
 
45
+
46
  DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history"
47
+ DATA_DIRECTORY = "data" # Separate directory for storing data files
48
+ DATA_FILENAME = "history_7B_chat.csv" # Default filename
49
  DATA_FILE = os.path.join("data", DATA_FILENAME)
50
 
51
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
53
  print("hfh", huggingface_hub.__version__)
54
 
55
  repo = Repository(
56
+ local_dir=DATA_DIRECTORY, clone_from=DATASET_REPO_URL
57
  )
58
 
59
  MAX_MAX_NEW_TOKENS = 2048
 
246
  if session_index is not None:
247
  print("Session index:", session_index)
248
  else:
249
+ session_index = 'no_session_id'
250
  print("Session index not found or has no value.")
251
 
252
  # Fetch personalized data
 
272
  gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
273
  input_ids = input_ids.to(model.device)
274
 
275
+
276
+ # Save chat history to .csv file on HuggingFace Hub
277
+ #pd.DataFrame(conversation).to_csv(DATA_FILE, index=False)
278
+ #print("updating conversation")
279
+ #repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")
280
+ #print(conversation)
281
+
282
+ # Save chat history to .csv file on HuggingFace Hub
283
+ # Generate filename with bot id and session id
284
+ filename = f"{session_index}_{DATA_FILENAME}"
285
+ data_file = os.path.join(DATA_DIRECTORY, filename)
286
+
287
+ # Generate timestamp
288
+ timestamp = datetime.datetime.now()
289
+
290
+ # Check if the file already exists
291
+ if os.path.exists(data_file):
292
+ # If file exists, load existing data
293
+ existing_data = pd.read_csv(data_file)
294
+
295
+ # Add timestamp column
296
+ conversation_df = pd.DataFrame(conversation)
297
+ conversation_df['timestamp'] = timestamp
298
+
299
+ # Append new conversation to existing data
300
+ updated_data = pd.concat([existing_data, conversation_df], ignore_index=True)
301
+ updated_data.to_csv(data_file, index=False)
302
+ else:
303
+ # If file doesn't exist, create new file with conversation data
304
+ conversation_df = pd.DataFrame(conversation)
305
+ conversation_df['timestamp'] = timestamp
306
+ conversation_df.to_csv(data_file, index=False)
307
+
308
+ print("Updating .csv")
309
+ repo.push_to_hub(blocking=False, commit_message=f"Updating data at {timestamp}")
310
+
311
 
312
  # Set up the TextIteratorStreamer
313
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)