Spaces:

nataliaElv
/

github_issues_dashboard

Sleeping

nataliaElv

Links first

842bf87 9 months ago

7.86 kB

	import streamlit as st
	import pandas as pd
	from github import Github
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt
	import re
	import datetime

	g = Github(st.secrets["ACCESS_TOKEN"])
	repos = st.secrets["REPO_NAME"].split(",")
	repos = [g.get_repo(repo) for repo in repos]

	@st.cache_data
	def fetch_data():

	issues_data = []

	for repo in repos:
	issues = repo.get_issues(state="all")

	for issue in issues:
	issues_data.append(
	{
	'Issue': f"{issue.number} - {issue.title}",
	'State': issue.state,
	'Created at': issue.created_at,
	'Closed at': issue.closed_at,
	'Last update': issue.updated_at,
	'Labels': [label.name for label in issue.labels],
	'Reactions': issue.reactions['total_count'],
	'Comments': issue.comments,
	'URL': issue.html_url,
	'Repository': repo.name,
	}
	)
	return pd.DataFrame(issues_data)

	# def save_data(df):
	# df.to_json("issues.json", orient="records", indent=4, index=False)

	# @st.cache_data
	# def load_data():
	# try:
	# df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms")
	# except:
	# df = fetch_data()
	# save_data(df)
	# return df


	st.title(f"GitHub Issues Dashboard")
	with st.status(label="Loading data...", state="running") as status:
	df = fetch_data()
	status.update(label="Data loaded!", state="complete")
	today = datetime.date.today()

	# Section 1: Issue activity metrics
	st.header("Issue activity metrics")

	col1, col2, col3 = st.columns(3)

	state_counts = df['State'].value_counts()
	open_issues = df.loc[df['State'] == 'open']
	closed_issues = df.loc[df['State'] == 'closed']
	closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at']

	with col1:
	st.metric(label="Open issues", value=state_counts['open'])

	with col2:
	st.metric(label="Closed issues", value=state_counts['closed'])

	with col3:
	average_time_to_close = closed_issues['Time to Close'].mean().days
	st.metric(label="Avg. days to close", value=average_time_to_close)


	# TODO Plot: number of open vs closed issues by date


	# st.subheader("Latest bugs 🐞")
	# bug_issues = open_issues[open_issues["Labels"].apply(lambda labels: "type: bug" in labels)]
	# bug_issues = bug_issues[["Issue","Labels","Created at","URL"]]
	# st.dataframe(
	# bug_issues.sort_values(by="Created at", ascending=False),
	# hide_index=True,
	# column_config={
	# "Issue": st.column_config.TextColumn("Issue", width=400),
	# "Labels": st.column_config.TextColumn("Labels"),
	# "Created at": st.column_config.DatetimeColumn("Created at"),
	# "URL": st.column_config.LinkColumn("🔗", display_text="🔗")
	# }
	# )

	st.subheader("Latest updates 📝")
	col1, col2 = st.columns(2)
	with col1:
	last_update_date = st.date_input("Last updated after:", value=today - datetime.timedelta(days=7), format="DD-MM-YYYY")
	last_update_date = datetime.datetime.combine(last_update_date, datetime.datetime.min.time())
	with col2:
	updated_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) > pd.to_datetime(last_update_date)]
	st.metric("Results:", updated_issues.shape[0])

	st.dataframe(
	updated_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=False),
	hide_index=True,
	# use_container_width=True,
	column_config={
	"Issue": st.column_config.TextColumn("Issue", width="large"),
	"Labels": st.column_config.ListColumn("Labels", width="large"),
	"Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
	"URL": st.column_config.LinkColumn("🔗", display_text="🔗", width="small")
	}
	)

	st.subheader("Stale issues? 🕸️")
	col1, col2 = st.columns(2)
	with col1:
	not_updated_since = st.date_input("Not updated since:", value=today - datetime.timedelta(days=90), format="DD-MM-YYYY")
	not_updated_since = datetime.datetime.combine(not_updated_since, datetime.datetime.min.time())
	with col2:
	stale_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) < pd.to_datetime(not_updated_since)]
	st.metric("Results:", stale_issues.shape[0])
	st.dataframe(
	stale_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=True),
	hide_index=True,
	# use_container_width=True,
	column_config={
	"Issue": st.column_config.TextColumn("Issue", width="large"),
	"Labels": st.column_config.ListColumn("Labels", width="large"),
	"Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
	"URL": st.column_config.LinkColumn("🔗", display_text="🔗", width="small")
	}
	)

	# Section 2: Issue classification
	st.header("Issue classification")
	col1, col2 = st.columns(2)

	## Dataframe: Number of open issues by label.
	with col1:
	st.subheader("Top ten labels 🔖")
	label_counts = open_issues.groupby("Repository").apply(lambda x: x.explode("Labels").value_counts("Labels").to_frame().reset_index()).reset_index()

	def generate_labels_link(labels,repos):
	links = []
	for label,repo in zip(labels,repos):
	label = label.replace(" ", "+")
	links.append(f"https://github.com/argilla-io/{repo}/issues?q=is:open+is:issue+label:%22{label}%22")
	return links

	label_counts['Link'] = generate_labels_link(label_counts['Labels'],label_counts['Repository'])

	st.dataframe(
	label_counts[["Link","Labels","Repository", "count",]].head(10),
	hide_index=True,
	column_config={
	"Labels": st.column_config.TextColumn("Labels"),
	"count": st.column_config.NumberColumn("Count"),
	"Link": st.column_config.LinkColumn("🔗", display_text="🔗")
	}
	)

	## Cloud of words: Issue titles
	with col2:
	st.subheader("Cloud of words ☁️")
	titles = " ".join(open_issues["Issue"])
	titles = re.sub(r'\[.*?\]', '', titles)
	wordcloud = WordCloud(width=800, height=400, background_color="black").generate(titles)
	plt.figure(figsize=(10, 5))
	plt.imshow(wordcloud, interpolation="bilinear")
	plt.axis("off")
	st.pyplot(plt, use_container_width=True)

	# # Community engagement
	st.header("Community engagement")
	# ## Dataframe: Latest issues open by the community
	# ## Dataframe: issues sorted by number of comments
	st.subheader("Top engaging issues 💬")
	engagement_df = open_issues[["URL","Issue","Repository","Created at", "Reactions","Comments"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10)
	st.dataframe(
	engagement_df,
	hide_index=True,
	# use_container_width=True,
	column_config={
	"Issue": st.column_config.TextColumn("Issue", width="large"),
	"Reactions": st.column_config.NumberColumn("Reactions", format="%d 👍", width="small"),
	"Comments": st.column_config.NumberColumn("Comments", format="%d 💬", width="small"),
	"URL": st.column_config.LinkColumn("🔗", display_text="🔗", width="small")
	}
	)

	# ## Cloud of words: Comments??
	# ## Dataframe: Contributor leaderboard.

	# # Issue dependencies
	# st.header("Issue dependencies")
	# ## Map: dependencies between issues. Network of issue mentions.x

	# status.update(label="Checking for updated data...", state="running")
	# updated_data = fetch_data()
	# if df.equals(updated_data):
	# status.update(label="Data is up to date!", state="complete")
	# else:
	# save_data(updated_data)
	# status.update(label="Refresh for updated data!", state="complete")