kwabs22
commited on
Commit
•
0ebff8f
1
Parent(s):
9835647
time variables edited
Browse files
app.py
CHANGED
@@ -17,8 +17,10 @@ def generate_response(user_message): #generate_response_token_by_token
|
|
17 |
|
18 |
|
19 |
start_time = time.time()
|
|
|
20 |
alltokens = ""
|
21 |
token_buffer = ''
|
|
|
22 |
try:
|
23 |
while True:
|
24 |
# Read one character at a time
|
@@ -30,20 +32,22 @@ def generate_response(user_message): #generate_response_token_by_token
|
|
30 |
if char == ' ' or char == '\n': # Token delimiters
|
31 |
elapsed_time = time.time() - start_time # Calculate elapsed time
|
32 |
alltokens += token_buffer
|
33 |
-
|
|
|
34 |
token_buffer = '' # Reset token buffer
|
35 |
# Log resource usage every minute
|
36 |
-
if time.time() -
|
37 |
cpu_usage = process_monitor.cpu_percent()
|
38 |
memory_usage = process_monitor.memory_info().rss # in bytes
|
39 |
print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB")
|
40 |
-
|
41 |
|
42 |
# Yield the last token if there is any
|
43 |
if token_buffer:
|
44 |
elapsed_time = time.time() - start_time # Calculate elapsed time
|
45 |
alltokens += token_buffer
|
46 |
-
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds]"
|
|
|
47 |
finally:
|
48 |
try:
|
49 |
# Wait for the process to complete, with a timeout
|
|
|
17 |
|
18 |
|
19 |
start_time = time.time()
|
20 |
+
monitor_start_time = time.time()
|
21 |
alltokens = ""
|
22 |
token_buffer = ''
|
23 |
+
tokencount = 0
|
24 |
try:
|
25 |
while True:
|
26 |
# Read one character at a time
|
|
|
32 |
if char == ' ' or char == '\n': # Token delimiters
|
33 |
elapsed_time = time.time() - start_time # Calculate elapsed time
|
34 |
alltokens += token_buffer
|
35 |
+
tokencount += 1
|
36 |
+
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Tokens: { tokencount }]"
|
37 |
token_buffer = '' # Reset token buffer
|
38 |
# Log resource usage every minute
|
39 |
+
if time.time() - monitor_start_time > 60:
|
40 |
cpu_usage = process_monitor.cpu_percent()
|
41 |
memory_usage = process_monitor.memory_info().rss # in bytes
|
42 |
print(f"Subprocess CPU Usage: {cpu_usage}%, Memory Usage: {memory_usage / 1024 ** 2} MB")
|
43 |
+
monitor_start_time = time.time() # Reset the timer
|
44 |
|
45 |
# Yield the last token if there is any
|
46 |
if token_buffer:
|
47 |
elapsed_time = time.time() - start_time # Calculate elapsed time
|
48 |
alltokens += token_buffer
|
49 |
+
yield f"{alltokens} \n\n [Inference time: {elapsed_time:.2f} seconds | Average Tokens per second: { tokencount / elapsed_time}]"
|
50 |
+
|
51 |
finally:
|
52 |
try:
|
53 |
# Wait for the process to complete, with a timeout
|