Spaces:

diffusers
/

pr-test-analyzer

Running

File size: 5,265 Bytes

bc69905

import requests
import zipfile 
import tempfile

def group_tests_by_duration(file_path: str) -> dict:
    # Define the buckets and their labels
    buckets = [(0, 5), (5, 10), (10, 15), (15, 20), (20, float('inf'))]
    bucket_names = ["0-5s", "5-10s", "10-15s", "15-20s", ">20s"]
    test_groups = {name: [] for name in bucket_names}
    
    # Process the file with error handling
    with open(file_path, 'r') as file:
        for line in file:
            try:
                parts = line.split()
                # Extracting duration and test name, ignoring lines that don't match expected format
                if len(parts) >= 3 and 's' in parts[0]:
                    duration = float(parts[0].rstrip('s'))  # Remove 's' and convert to float
                    test_name = ' '.join(parts[2:])  # Join back the test name parts
                    
                    # Assign test to the correct bucket based on duration
                    for (start, end), bucket_name in zip(buckets, bucket_names):
                        if start <= duration < end:
                            test_groups[bucket_name].append((duration, test_name))
                            break
            except ValueError:
                # Skip lines that cannot be parsed properly
                continue
    
    return test_groups


def extract_top_n_tests(file_path, n=10):
    test_durations = []

    # Reading and processing the file
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.split()
            if len(parts) >= 3 and parts[1] == 'call':
                duration_s = parts[0].rstrip('s')  # Remove the 's' from the duration
                try:
                    duration = float(duration_s)
                    test_name = ' '.join(parts[2:])
                    test_durations.append((duration, test_name))
                except ValueError:
                    # Skip lines that cannot be converted to float
                    continue

    # Sort the list in descending order of duration
    test_durations.sort(reverse=True, key=lambda x: x[0])

    # Extract the top N tests
    top_n_tests = {test[1]: f"{test[0]}s"
                   for i, test in enumerate(test_durations[:n])}

    return top_n_tests


def fetch_test_duration_artifact(repo_id, token, run_id, artifact_name):
    # Construct the API URL
    owner_repo = repo_id.split("/")
    artifacts_url = f'https://api.github.com/repos/{owner_repo[0]}/{owner_repo[1]}/actions/runs/{run_id}/artifacts'

    # Set up the headers with your authentication token
    headers = {'Authorization': f'token {token}'}

    # Send the request to get a list of artifacts from the specified run
    response = requests.get(artifacts_url, headers=headers)
    response.raise_for_status()  # Raise an exception for HTTP error responses

    # Search for the artifact with the specified name
    download_url = None
    for artifact in response.json().get('artifacts', []):
        if artifact['name'] == artifact_name:
            download_url = artifact['archive_download_url']
            break

    if download_url:
        # Download the artifact
        download_response = requests.get(download_url, headers=headers, stream=True)
        download_response.raise_for_status()

        # Save the downloaded artifact to a file
        zip_file_path = f'{artifact_name}.zip'
        with open(zip_file_path, 'wb') as file:
            for chunk in download_response.iter_content(chunk_size=128):
                file.write(chunk)

        # Extract the duration text file
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            # Check if the specified file exists in the zip
            zip_files = zip_ref.namelist()
            for file in zip_files:
                if "duration" in file:
                    zip_ref.extract(file, ".")
                    break
        return file
    
    else:
        raise ValueError("Error 🥲")
    
def format_to_markdown_str(test_bucket_map, top_n_slow_tests, repo_id, run_id, artifact_name):
    run_url = f"https://github.com/{repo_id}/actions/runs/{run_id}/"
    markdown_str = f"""
## Top {len(top_n_slow_tests)} slow test for {artifact_name}\n
"""
    for test, duration in top_n_slow_tests.items():
        markdown_str += f"* {test.split('/')[-1]}: {duration}\n"
    
    markdown_str += """
## Bucketed durations of the tests\n
"""
    for bucket, num_tests in test_bucket_map.items():
        if ">" in bucket:
            bucket = f"\{bucket}"
        markdown_str += f"* {bucket}: {num_tests} tests\n"
    
    markdown_str += f"\nRun URL: [{run_url}]({run_url})."
    
    return markdown_str
    

def analyze_tests(repo_id, token, run_id, artifact_name, top_n):
    test_duration_file = fetch_test_duration_artifact(repo_id=repo_id, token=token, run_id=run_id, artifact_name=artifact_name)
    
    grouped_tests_map = group_tests_by_duration(test_duration_file)
    test_bucket_map = {bucket: len(tests) for bucket, tests in grouped_tests_map.items()}
    print(test_bucket_map)
    top_n_slow_tests = extract_top_n_tests(test_duration_file, n=top_n)
    print(top_n_slow_tests)

    return format_to_markdown_str(test_bucket_map, top_n_slow_tests, repo_id, run_id, artifact_name)