Spaces:
Running
Running
import requests | |
import zipfile | |
import tempfile | |
def group_tests_by_duration(file_path: str) -> dict: | |
# Define the buckets and their labels | |
buckets = [(0, 5), (5, 10), (10, 15), (15, 20), (20, float('inf'))] | |
bucket_names = ["0-5s", "5-10s", "10-15s", "15-20s", ">20s"] | |
test_groups = {name: [] for name in bucket_names} | |
# Process the file with error handling | |
with open(file_path, 'r') as file: | |
for line in file: | |
try: | |
parts = line.split() | |
# Extracting duration and test name, ignoring lines that don't match expected format | |
if len(parts) >= 3 and 's' in parts[0]: | |
duration = float(parts[0].rstrip('s')) # Remove 's' and convert to float | |
test_name = ' '.join(parts[2:]) # Join back the test name parts | |
# Assign test to the correct bucket based on duration | |
for (start, end), bucket_name in zip(buckets, bucket_names): | |
if start <= duration < end: | |
test_groups[bucket_name].append((duration, test_name)) | |
break | |
except ValueError: | |
# Skip lines that cannot be parsed properly | |
continue | |
return test_groups | |
def extract_top_n_tests(file_path, n=10): | |
test_durations = [] | |
# Reading and processing the file | |
with open(file_path, 'r') as file: | |
for line in file: | |
parts = line.split() | |
if len(parts) >= 3 and parts[1] == 'call': | |
duration_s = parts[0].rstrip('s') # Remove the 's' from the duration | |
try: | |
duration = float(duration_s) | |
test_name = ' '.join(parts[2:]) | |
test_durations.append((duration, test_name)) | |
except ValueError: | |
# Skip lines that cannot be converted to float | |
continue | |
# Sort the list in descending order of duration | |
test_durations.sort(reverse=True, key=lambda x: x[0]) | |
# Extract the top N tests | |
top_n_tests = {test[1]: f"{test[0]}s" | |
for i, test in enumerate(test_durations[:n])} | |
return top_n_tests | |
def fetch_test_duration_artifact(repo_id, token, run_id, artifact_name): | |
# Construct the API URL | |
owner_repo = repo_id.split("/") | |
artifacts_url = f'https://api.github.com/repos/{owner_repo[0]}/{owner_repo[1]}/actions/runs/{run_id}/artifacts' | |
# Set up the headers with your authentication token | |
headers = {'Authorization': f'token {token}'} | |
# Send the request to get a list of artifacts from the specified run | |
response = requests.get(artifacts_url, headers=headers) | |
response.raise_for_status() # Raise an exception for HTTP error responses | |
# Search for the artifact with the specified name | |
download_url = None | |
for artifact in response.json().get('artifacts', []): | |
if artifact['name'] == artifact_name: | |
download_url = artifact['archive_download_url'] | |
break | |
if download_url: | |
# Download the artifact | |
download_response = requests.get(download_url, headers=headers, stream=True) | |
download_response.raise_for_status() | |
# Save the downloaded artifact to a file | |
zip_file_path = f'{artifact_name}.zip' | |
with open(zip_file_path, 'wb') as file: | |
for chunk in download_response.iter_content(chunk_size=128): | |
file.write(chunk) | |
# Extract the duration text file | |
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | |
# Check if the specified file exists in the zip | |
zip_files = zip_ref.namelist() | |
for file in zip_files: | |
if "duration" in file: | |
zip_ref.extract(file, ".") | |
break | |
return file | |
else: | |
raise ValueError("Error 🥲") | |
def format_to_markdown_str(test_bucket_map, top_n_slow_tests, repo_id, run_id, artifact_name): | |
run_url = f"https://github.com/{repo_id}/actions/runs/{run_id}/" | |
markdown_str = f""" | |
## Top {len(top_n_slow_tests)} slow test for {artifact_name}\n | |
""" | |
for test, duration in top_n_slow_tests.items(): | |
markdown_str += f"* {test.split('/')[-1]}: {duration}\n" | |
markdown_str += """ | |
## Bucketed durations of the tests\n | |
""" | |
for bucket, num_tests in test_bucket_map.items(): | |
if ">" in bucket: | |
bucket = f"\{bucket}" | |
markdown_str += f"* {bucket}: {num_tests} tests\n" | |
markdown_str += f"\nRun URL: [{run_url}]({run_url})." | |
return markdown_str | |
def analyze_tests(repo_id, token, run_id, artifact_name, top_n): | |
test_duration_file = fetch_test_duration_artifact(repo_id=repo_id, token=token, run_id=run_id, artifact_name=artifact_name) | |
grouped_tests_map = group_tests_by_duration(test_duration_file) | |
test_bucket_map = {bucket: len(tests) for bucket, tests in grouped_tests_map.items()} | |
print(test_bucket_map) | |
top_n_slow_tests = extract_top_n_tests(test_duration_file, n=top_n) | |
print(top_n_slow_tests) | |
return format_to_markdown_str(test_bucket_map, top_n_slow_tests, repo_id, run_id, artifact_name) |