|
'use client'; |
|
|
|
import { useState, useEffect } from 'react'; |
|
import * as duckdb from '@duckdb/duckdb-wasm'; |
|
import Table from './components/Table'; |
|
import Modal from './components/Modal'; |
|
|
|
type ModelData = { |
|
ancestor: string; |
|
direct_children: string[] | null; |
|
all_children: string[]; |
|
all_children_count: number; |
|
direct_children_count: number | null; |
|
}; |
|
|
|
type OrgData = { |
|
org: string; |
|
family_model_count: number; |
|
family_direct_children_count: number; |
|
family_all_children_count: number; |
|
}; |
|
|
|
interface VectorLike<T> { |
|
get(index: number): T; |
|
length: number; |
|
} |
|
|
|
export default function Home() { |
|
const [allModels, setAllModels] = useState<ModelData[]>([]); |
|
const [orgData, setOrgData] = useState<OrgData[]>([]); |
|
const [currentPage, setCurrentPage] = useState(1); |
|
const [pageSize, setPageSize] = useState(100); |
|
const [filterText, setFilterText] = useState(''); |
|
const [isLoading, setIsLoading] = useState(true); |
|
const [orderBy, setOrderBy] = useState<'all_children' | 'direct_children'>('all_children'); |
|
const [activeTab, setActiveTab] = useState<'models' | 'orgs'>('models'); |
|
const [orgCurrentPage, setOrgCurrentPage] = useState(1); |
|
const [orgPageSize, setOrgPageSize] = useState(100); |
|
const [orgOrderBy, setOrgOrderBy] = useState<keyof OrgData>('family_all_children_count'); |
|
const [orgFilterText, setOrgFilterText] = useState(''); |
|
const [selectedModel, setSelectedModel] = useState<ModelData | null>(null); |
|
const [selectedOrg, setSelectedOrg] = useState<string | null>(null); |
|
const [selectedOrgModels, setSelectedOrgModels] = useState<ModelData[]>([]); |
|
const [selectedModelChildren, setSelectedModelChildren] = useState<string[]>([]); |
|
const [selectedModelChildrenType, setSelectedModelChildrenType] = useState<'direct' | 'all'>('all'); |
|
const [modelChildrenPage, setModelChildrenPage] = useState(1); |
|
const [orgModelsPage, setOrgModelsPage] = useState(1); |
|
const modalPageSize = 10; |
|
|
|
useEffect(() => { |
|
const urlParams = new URLSearchParams(window.location.search); |
|
const tab = urlParams.get('tab'); |
|
const page = urlParams.get('page'); |
|
const order = urlParams.get('order'); |
|
const filter = urlParams.get('filter'); |
|
const orgFilter = urlParams.get('orgFilter'); |
|
|
|
if (tab === 'orgs') { |
|
setActiveTab('orgs'); |
|
} |
|
if (page) { |
|
setCurrentPage(parseInt(page, 10)); |
|
} |
|
if (order === 'direct_children') { |
|
setOrderBy('direct_children'); |
|
} else { |
|
setOrderBy('all_children'); |
|
} |
|
if (filter) { |
|
setFilterText(filter); |
|
} |
|
if (orgFilter) { |
|
setOrgFilterText(orgFilter); |
|
} |
|
}, []); |
|
|
|
useEffect(() => { |
|
const urlParams = new URLSearchParams(); |
|
if (activeTab === 'orgs') { |
|
urlParams.set('tab', 'orgs'); |
|
} |
|
if (currentPage > 1) { |
|
urlParams.set('page', currentPage.toString()); |
|
} |
|
if (orderBy === 'direct_children') { |
|
urlParams.set('order', 'direct_children'); |
|
} |
|
if (filterText) { |
|
urlParams.set('filter', filterText); |
|
} |
|
if (orgFilterText) { |
|
urlParams.set('orgFilter', orgFilterText); |
|
} |
|
const newUrl = `${window.location.pathname}?${urlParams.toString()}`; |
|
window.history.replaceState(null, '', newUrl); |
|
}, [activeTab, currentPage, orderBy, filterText, orgFilterText]); |
|
|
|
useEffect(() => { |
|
async function fetchData() { |
|
const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles(); |
|
|
|
|
|
const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES); |
|
|
|
const worker_url = URL.createObjectURL( |
|
new Blob([`importScripts("${bundle.mainWorker!}");`], { type: 'text/javascript' }) |
|
); |
|
|
|
|
|
const worker = new Worker(worker_url); |
|
const logger = new duckdb.ConsoleLogger(); |
|
const db = new duckdb.AsyncDuckDB(logger, worker); |
|
await db.instantiate(bundle.mainModule, bundle.pthreadWorker); |
|
|
|
|
|
await db.registerFileURL( |
|
'ancestor_children.parquet', |
|
`${window.location.origin}/ancestor_children.parquet`, |
|
duckdb.DuckDBDataProtocol.HTTP, |
|
false |
|
); |
|
|
|
|
|
const query = ` |
|
SELECT |
|
ancestor, |
|
direct_children, |
|
all_children, |
|
CAST(all_children_count AS INTEGER) AS all_children_count, |
|
CAST(direct_children_count AS INTEGER) AS direct_children_count |
|
FROM 'ancestor_children.parquet' |
|
`; |
|
const conn = await db.connect(); |
|
const result = await conn.query(query); |
|
|
|
|
|
const data: ModelData[] = result.toArray(); |
|
|
|
|
|
const orgQuery = ` |
|
SELECT |
|
SPLIT_PART(ancestor, '/', 1) AS org, |
|
CAST(COUNT(DISTINCT ancestor) AS INTEGER) AS family_model_count, |
|
CAST(SUM(direct_children_count) AS INTEGER) AS family_direct_children_count, |
|
CAST(SUM(all_children_count) AS INTEGER) AS family_all_children_count |
|
FROM 'ancestor_children.parquet' |
|
GROUP BY org |
|
ORDER BY family_all_children_count DESC |
|
`; |
|
const orgResult = await conn.query(orgQuery); |
|
|
|
|
|
const orgData: OrgData[] = orgResult.toArray(); |
|
|
|
|
|
await conn.close(); |
|
await db.terminate(); |
|
|
|
setAllModels(data); |
|
setOrgData(orgData); |
|
setIsLoading(false); |
|
} |
|
fetchData(); |
|
}, []); |
|
|
|
const filteredModels = allModels.filter((model) => |
|
model.ancestor.toLowerCase().includes(filterText.toLowerCase()) |
|
); |
|
|
|
const sortedModels = filteredModels.sort((a, b) => { |
|
if (orderBy === 'all_children') { |
|
return b.all_children_count - a.all_children_count; |
|
} else { |
|
return (b.direct_children_count ?? 0) - (a.direct_children_count ?? 0); |
|
} |
|
}); |
|
|
|
const handleTabChange = (tab: 'models' | 'orgs') => { |
|
setActiveTab(tab); |
|
setCurrentPage(1); |
|
setOrderBy('all_children'); |
|
setFilterText(''); |
|
setOrgFilterText(''); |
|
}; |
|
|
|
const handlePageChange = (page: number, tab: 'models' | 'orgs') => { |
|
if (tab === 'models') { |
|
setCurrentPage(page); |
|
} else { |
|
setOrgCurrentPage(page); |
|
} |
|
}; |
|
|
|
const handleOrderByClick = (column: 'all_children' | 'direct_children') => { |
|
setOrderBy(column); |
|
setCurrentPage(1); |
|
}; |
|
|
|
const filteredOrgData = orgData.filter((org) => |
|
org.org.toLowerCase().includes(orgFilterText.toLowerCase()) |
|
); |
|
|
|
const sortedOrgData = filteredOrgData.sort((a, b) => { |
|
if (orgOrderBy === 'org') { |
|
return a.org.localeCompare(b.org); |
|
} |
|
return b[orgOrderBy] - a[orgOrderBy]; |
|
}); |
|
|
|
const paginatedOrgData = sortedOrgData.slice( |
|
(orgCurrentPage - 1) * orgPageSize, |
|
orgCurrentPage * orgPageSize |
|
); |
|
|
|
const orgTotalPages = Math.ceil(sortedOrgData.length / orgPageSize); |
|
|
|
const handleModelChildrenClick = (model: ModelData, type: 'direct' | 'all') => { |
|
console.log('Model data:', model); |
|
console.log('Children type:', type); |
|
setSelectedModel(model); |
|
setSelectedModelChildrenType(type); |
|
let children: VectorLike<string> | string[]; |
|
if (type === 'direct') { |
|
children = model.direct_children || []; |
|
} else { |
|
children = model.all_children || []; |
|
} |
|
console.log('Children:', children); |
|
|
|
|
|
if (children && typeof children === 'object' && 'get' in children && 'length' in children) { |
|
const vectorChildren = []; |
|
for (let i = 0; i < (children as VectorLike<string>).length; i++) { |
|
vectorChildren.push((children as VectorLike<string>).get(i)); |
|
} |
|
setSelectedModelChildren(vectorChildren); |
|
} else if (Array.isArray(children)) { |
|
setSelectedModelChildren(children); |
|
} else { |
|
console.error('Unexpected children data structure:', children); |
|
setSelectedModelChildren([]); |
|
} |
|
}; |
|
|
|
const handleOrgModelsClick = (org: string) => { |
|
setSelectedOrg(org); |
|
const orgModels = allModels.filter((model) => model.ancestor.split('/')[0] === org); |
|
setSelectedOrgModels(orgModels); |
|
}; |
|
|
|
const handleModelChildrenPageChange = (page: number) => { |
|
setModelChildrenPage(page); |
|
}; |
|
|
|
const handleOrgModelsPageChange = (page: number) => { |
|
setOrgModelsPage(page); |
|
}; |
|
|
|
return ( |
|
<main className="container mx-auto py-8 bg-white dark:bg-gray-900 text-gray-900 dark:text-white"> |
|
<h1 className="text-4xl font-bold mb-4">Hugging Face Model Derivatives Explorer</h1> |
|
<p className="mb-8"> |
|
This tool allows you to explore the popularity of Hugging Face models based on their derivatives. It pulls model data from publicly exposed files and calculates the number of direct and indirect children for each model using the <code>base_model</code> tag, up to 10 iterations for all children. |
|
</p> |
|
<div className="mb-8"> |
|
<h2 className="text-2xl font-bold mb-2">Definitions</h2> |
|
<ul className="list-disc list-inside"> |
|
<li> |
|
<strong>Direct Children:</strong> Models that are directly derived from the selected model, i.e., they have the selected model set as their <code>base_model</code>. |
|
</li> |
|
<li> |
|
<strong>All Children:</strong> All models that are derived from the selected model, either directly or indirectly, up to 10 iterations deep. This includes models that have the selected model as their <code>base_model</code>, as well as models derived from those models, and so on. |
|
</li> |
|
</ul> |
|
</div> |
|
<div className="mb-4 flex space-x-4"> |
|
<a |
|
href={`?tab=models`} |
|
onClick={(e) => { |
|
e.preventDefault(); |
|
handleTabChange('models'); |
|
}} |
|
className={`px-4 py-2 rounded-md ${ |
|
activeTab === 'models' |
|
? 'bg-blue-500 dark:bg-blue-600 text-white' |
|
: 'bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-200' |
|
}`} |
|
> |
|
Models |
|
</a> |
|
<a |
|
href={`?tab=orgs`} |
|
onClick={(e) => { |
|
e.preventDefault(); |
|
handleTabChange('orgs'); |
|
}} |
|
className={`px-4 py-2 rounded-md ${ |
|
activeTab === 'orgs' |
|
? 'bg-blue-500 dark:bg-blue-600 text-white' |
|
: 'bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-200' |
|
}`} |
|
> |
|
Organizations |
|
</a> |
|
</div> |
|
{activeTab === 'models' ? ( |
|
<> |
|
<div className="mb-4"> |
|
<input |
|
type="text" |
|
placeholder="Filter by model name" |
|
value={filterText} |
|
onChange={(e) => setFilterText(e.target.value)} |
|
className="px-4 py-2 border border-gray-300 dark:border-gray-700 rounded-md bg-white dark:bg-gray-800 text-gray-900 dark:text-white" |
|
/> |
|
</div> |
|
{isLoading ? ( |
|
<p>Loading data...</p> |
|
) : ( |
|
<Table |
|
data={sortedModels} |
|
columns={[ |
|
{ |
|
key: 'ancestor', |
|
label: 'Model', |
|
}, |
|
{ |
|
key: 'direct_children_count', |
|
label: 'Direct Children', |
|
render: (value, row) => ( |
|
<button |
|
className="text-right text-blue-500 hover:underline" |
|
onClick={() => handleModelChildrenClick(row, 'direct')} |
|
> |
|
{value ?? 0} |
|
</button> |
|
), |
|
}, |
|
{ |
|
key: 'all_children_count', |
|
label: 'All Children', |
|
render: (value, row) => ( |
|
<button |
|
className="text-right text-blue-500 hover:underline" |
|
onClick={() => handleModelChildrenClick(row, 'all')} |
|
> |
|
{value} |
|
</button> |
|
), |
|
}, |
|
]} |
|
orderBy={orderBy} |
|
onOrderByChange={(key) => { |
|
if (key === 'all_children' || key === 'direct_children') { |
|
setOrderBy(key); |
|
setCurrentPage(1); |
|
} |
|
}} |
|
pageSize={pageSize} |
|
currentPage={currentPage} |
|
onPageChange={(page) => handlePageChange(page, 'models')} |
|
/> |
|
)} |
|
</> |
|
) : ( |
|
<> |
|
<div className="mb-4"> |
|
<input |
|
type="text" |
|
placeholder="Filter by organization name" |
|
value={orgFilterText} |
|
onChange={(e) => setOrgFilterText(e.target.value)} |
|
className="px-4 py-2 border border-gray-300 dark:border-gray-700 rounded-md bg-white dark:bg-gray-800 text-gray-900 dark:text-white" |
|
/> |
|
</div> |
|
{isLoading ? ( |
|
<p>Loading data...</p> |
|
) : ( |
|
<Table |
|
data={paginatedOrgData} |
|
columns={[ |
|
{ |
|
key: 'org', |
|
label: 'Organization', |
|
}, |
|
{ |
|
key: 'family_model_count', |
|
label: 'Model Count', |
|
render: (value, row) => ( |
|
<button |
|
className="text-right text-blue-500 hover:underline" |
|
onClick={() => handleOrgModelsClick(row.org)} |
|
> |
|
{value} |
|
</button> |
|
), |
|
}, |
|
{ |
|
key: 'family_direct_children_count', |
|
label: 'Direct Children', |
|
render: (value) => <span className="text-right">{value}</span>, |
|
}, |
|
{ |
|
key: 'family_all_children_count', |
|
label: 'All Children', |
|
render: (value) => <span className="text-right">{value}</span>, |
|
}, |
|
]} |
|
orderBy={orgOrderBy} |
|
onOrderByChange={(key) => setOrgOrderBy(key)} |
|
pageSize={orgPageSize} |
|
currentPage={orgCurrentPage} |
|
onPageChange={(page) => handlePageChange(page, 'orgs')} |
|
/> |
|
)} |
|
</> |
|
)} |
|
{selectedModel && ( |
|
<Modal onClose={() => { |
|
setSelectedModel(null); |
|
setModelChildrenPage(1); |
|
}}> |
|
<h2 className="text-2xl font-bold mb-4"> |
|
{selectedModelChildrenType === 'direct' ? 'Direct Children' : 'All Children'} of {selectedModel.ancestor} |
|
</h2> |
|
{selectedModelChildren.length > 0 ? ( |
|
<Table |
|
data={selectedModelChildren.map((child, index) => ({ id: index, model: child }))} |
|
columns={[{ key: 'model', label: 'Model' }]} |
|
pageSize={modalPageSize} |
|
currentPage={modelChildrenPage} |
|
onPageChange={handleModelChildrenPageChange} |
|
/> |
|
) : ( |
|
<p>No children found for this model.</p> |
|
)} |
|
</Modal> |
|
)} |
|
{selectedOrg && ( |
|
<Modal onClose={() => { |
|
setSelectedOrg(null); |
|
setOrgModelsPage(1); |
|
}}> |
|
<h2 className="text-2xl font-bold mb-4">Models under {selectedOrg}</h2> |
|
{selectedOrgModels.length > 0 ? ( |
|
<Table |
|
data={selectedOrgModels} |
|
columns={[ |
|
{ |
|
key: 'ancestor', |
|
label: 'Model', |
|
}, |
|
{ |
|
key: 'direct_children_count', |
|
label: 'Direct Children', |
|
render: (value) => <span className="text-right">{value ?? 0}</span>, |
|
}, |
|
{ |
|
key: 'all_children_count', |
|
label: 'All Children', |
|
render: (value) => <span className="text-right">{value}</span>, |
|
}, |
|
]} |
|
pageSize={modalPageSize} |
|
currentPage={orgModelsPage} |
|
onPageChange={handleOrgModelsPageChange} |
|
/> |
|
) : ( |
|
<p>No models found for this organization.</p> |
|
)} |
|
</Modal> |
|
)} |
|
</main> |
|
); |
|
} |
|
|