cfahlgren1's picture
cfahlgren1 HF staff
cleanup
633e5e5
raw
history blame
6.67 kB
"use client";
import { Inter } from "next/font/google";
import ActivityCalendar from "react-activity-calendar";
import * as duckdb from "@duckdb/duckdb-wasm"
import { useState, useEffect } from "react";
import { Tooltip as MuiTooltip } from '@mui/material';
const inter = Inter({ subsets: ["latin"] });
interface CustomActivity {
date: string;
count: number;
level: number;
details: Array<{ provider: string; count: number }>;
}
type ProviderKey = "mistralai" | "meta-llama" | "openai" | "anthropic" | "google";
export default function Home() {
const [calendarData, setCalendarData] = useState<Record<ProviderKey, CustomActivity[]>>({} as Record<ProviderKey, CustomActivity[]>);
const [isLoading, setIsLoading] = useState(true);
const PROVIDERS_MAP: Record<ProviderKey, { name: string; color: string }> = {
"mistralai": {"name": "Mistral AI", "color": "#ff7000"},
"meta-llama": {"name": "Meta", "color": "#0668E1"},
"openai": {"name": "OpenAI", "color": "#10A37F"},
"anthropic": {"name": "Anthropic", "color": "#cc785c"},
"google": {"name": "Google", "color": "#4285F4"},
}
const getModelData = async (conn: duckdb.AsyncDuckDBConnection) => {
const result = await conn.query(`
SELECT
STRFTIME(DATE_TRUNC('day', CAST(createdAt AS DATE)), '%Y-%m-%d') AS date,
CASE
WHEN SPLIT_PART(id, '/', 1) IN ('meta-llama', 'facebook') THEN 'meta-llama'
ELSE SPLIT_PART(id, '/', 1)
END AS provider,
COUNT(*) AS count
FROM models
WHERE CAST(createdAt AS DATE) >= DATE_TRUNC('year', CURRENT_DATE)
AND (
SPLIT_PART(id, '/', 1) IN (${Object.keys(PROVIDERS_MAP).map(provider => `'${provider}'`).join(', ')})
OR SPLIT_PART(id, '/', 1) = 'facebook'
)
GROUP BY DATE_TRUNC('day', CAST(createdAt AS DATE)),
CASE
WHEN SPLIT_PART(id, '/', 1) IN ('meta-llama', 'facebook') THEN 'meta-llama'
ELSE SPLIT_PART(id, '/', 1)
END
ORDER BY date
`);
return result.toArray().map((row: any) => ({
date: row.date,
provider: row.provider,
count: Number(row.count)
}));
}
const generateCalendarData = (modelData: any[]) => {
const data: Record<ProviderKey, CustomActivity[]> = Object.keys(PROVIDERS_MAP).reduce((acc, provider) => {
acc[provider as ProviderKey] = [];
return acc;
}, {} as Record<ProviderKey, CustomActivity[]>);
const today = new Date();
const startOfYear = new Date(today.getFullYear(), 0, 1);
for (let d = new Date(startOfYear); d <= today; d.setDate(d.getDate() + 1)) {
const dateString = d.toISOString().split('T')[0];
Object.keys(PROVIDERS_MAP).forEach((provider) => {
const dayData = modelData.filter(item => item.date === dateString && item.provider === provider);
const count = dayData.reduce((sum, item) => sum + item.count, 0);
data[provider as ProviderKey].push({
date: dateString,
count,
level: 0,
details: dayData,
});
});
}
const avgCounts: Record<ProviderKey, number> = Object.fromEntries(
Object.keys(PROVIDERS_MAP).map(provider => [
provider,
data[provider as ProviderKey].reduce((sum, day) => sum + day.count, 0) / data[provider as ProviderKey].length || 0
])
) as Record<ProviderKey, number>;
Object.entries(data).forEach(([provider, days]) => {
const avgCount = avgCounts[provider as ProviderKey];
days.forEach(day => {
day.level = day.count === 0 ? 0 :
day.count <= avgCount * 0.5 ? 1 :
day.count <= avgCount ? 2 :
day.count <= avgCount * 1.5 ? 3 : 4;
});
});
return data;
}
const initDB = async () => {
const CDN_BASE = `https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@next`
const JSDELIVR_BUNDLES = {
mvp: {
mainModule: `${CDN_BASE}/dist/duckdb-mvp.wasm`,
mainWorker: `${CDN_BASE}/dist/duckdb-browser-mvp.worker.js`,
},
eh: {
mainModule: `${CDN_BASE}/dist/duckdb-eh.wasm`,
mainWorker: `${CDN_BASE}/dist/duckdb-browser-eh.worker.js`,
},
}
const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES)
const worker_url = URL.createObjectURL(
new Blob([`importScripts("${bundle.mainWorker}");`], {
type: "text/javascript",
})
)
const worker = new Worker(worker_url)
const logger = new duckdb.ConsoleLogger()
const db = new duckdb.AsyncDuckDB(logger, worker)
await db.instantiate(bundle.mainModule)
const connection = await db.connect()
await connection.query(`
CREATE VIEW models AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true');
`);
const modelData = await getModelData(connection);
const calendarData = generateCalendarData(modelData);
setCalendarData(calendarData);
setIsLoading(false);
await connection.close();
}
useEffect(() => {
initDB();
}, []);
return (
<main className={`flex flex-col items-center justify-center min-h-screen mx-auto p-24 ${inter.className}`}>
<h1 className="text-5xl font-bold text-center">Open Source Calendar</h1>
<p className="text-center mt-2 text-sm">A calendar for open source model releases.</p>
<div className="mt-16">
{isLoading ? (
<p>Loading...</p>
) : (
<>
{Object.entries(PROVIDERS_MAP)
.sort(([keyA], [keyB]) =>
calendarData[keyB as ProviderKey].reduce((sum, day) => sum + day.count, 0) -
calendarData[keyA as ProviderKey].reduce((sum, day) => sum + day.count, 0)
)
.map(([key, value]) => (
<div key={key} className="mb-8">
<h2 className="text-2xl font-bold mb-2">{value.name}</h2>
<ActivityCalendar
data={calendarData[key as ProviderKey]}
theme={{
dark: ['#161b22', value.color],
}}
colorScheme="dark"
renderBlock={(block, activity) => (
<MuiTooltip
title={`${activity.count} activities on ${activity.date}`}
>
{block}
</MuiTooltip>
)}
/>
</div>
))
}
</>
)}
</div>
</main>
);
}