kwabs22 commited on
Commit
0ebff8f
1 Parent(s): 9835647

time variables edited

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -17,8 +17,10 @@ def generate_response(user_message): #generate_response_token_by_token
17
 
18
 
19
  start_time = time.time()
 
20
  alltokens = ""
21
  token_buffer = ''
 
22
  try:
23
  while True:
24
  # Read one character at a time
@@ -30,20 +32,22 @@ def generate_response(user_message): #generate_response_token_by_token
30
  if char == ' ' or char == '\n': # Token delimiters
31
  elapsed_time = time.time() - start_time # Calculate elapsed time
32
  alltokens += token_buffer
33
- yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds]"
 
34
  token_buffer = '' # Reset token buffer
35
  # Log resource usage every minute
36
- if time.time() - start_time > 60:
37
  cpu_usage = process_monitor.cpu_percent()
38
  memory_usage = process_monitor.memory_info().rss # in bytes
39
  print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB")
40
- start_time = time.time() # Reset the timer
41
 
42
  # Yield the last token if there is any
43
  if token_buffer:
44
  elapsed_time = time.time() - start_time # Calculate elapsed time
45
  alltokens += token_buffer
46
- yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds]"
 
47
  finally:
48
  try:
49
  # Wait for the process to complete, with a timeout
 
17
 
18
 
19
  start_time = time.time()
20
+ monitor_start_time = time.time()
21
  alltokens = ""
22
  token_buffer = ''
23
+ tokencount = 0
24
  try:
25
  while True:
26
  # Read one character at a time
 
32
  if char == ' ' or char == '\n': # Token delimiters
33
  elapsed_time = time.time() - start_time # Calculate elapsed time
34
  alltokens += token_buffer
35
+ tokencount += 1
36
+ yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Tokens: { tokencount }]"
37
  token_buffer = '' # Reset token buffer
38
  # Log resource usage every minute
39
+ if time.time() - monitor_start_time > 60:
40
  cpu_usage = process_monitor.cpu_percent()
41
  memory_usage = process_monitor.memory_info().rss # in bytes
42
  print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB")
43
+ monitor_start_time = time.time() # Reset the timer
44
 
45
  # Yield the last token if there is any
46
  if token_buffer:
47
  elapsed_time = time.time() - start_time # Calculate elapsed time
48
  alltokens += token_buffer
49
+ yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Average Tokens per second: { tokencount / elapsed_time}]"
50
+
51
  finally:
52
  try:
53
  # Wait for the process to complete, with a timeout