Usage
Installation
git clone https://github.com/panuthept/OKEAN.git
cd OKEAN
conda create -n okean python==3.11.4
conda activate okean
# Select the appropriate PyTorch version based on your CUDA version
# CUDA 11.8
conda install pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 pytorch-cuda=11.8 -c pytorch -c nvidia
# CUDA 12.1
conda install pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 pytorch-cuda=12.1 -c pytorch -c nvidia
# CPU Only
conda install pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 cpuonly -c pytorch
pip install -e .
Entity Linking
from okean.modules.entity_linking.elq import ELQ
model = ELQ.from_pretrained(
model_name_or_path="panuthept/okean-elq-wikipedia",
)
texts = [
"Barack Obama is the former president of the United States.",
"The Eiffel Tower is located in Paris.",
]
response = model(texts=texts, return_metadata=["id"])
print(response.passages)
>> [
Passage(
text='Barack Obama is the former president of the United States.',
spans=[
Span(start=0, end=12, surface_form='Barack Obama', confident=0.7972, entity=Entity(identifier=213062, confident=1.0, metadata={'id': {'wikipedia': '534366', 'wikidata': 'Q76'}})),
Span(start=27, end=57, surface_form='president of the United States', confident=0.5499, entity=Entity(identifier=11887, confident=0.9999, metadata={'id': {'wikipedia': '24113', 'wikidata': 'Q11696'}}))
]),
Passage(
text='The Eiffel Tower is located in Paris.',
spans=[
Span(start=4, end=16, surface_form='Eiffel Tower', confident=0.5214, entity=Entity(identifier=4276, confident=0.9999, metadata={'id': {'wikipedia': '9232', 'wikidata': 'Q243'}})),
Span(start=31, end=36, surface_form='Paris', confident=0.6658, entity=Entity(identifier=11245, confident=0.9999, metadata={'id': {'wikipedia': '22989', 'wikidata': 'Q90'}}))
]
)
]
Entity Disambiguation
from okean.modules.entity_linking.elq import ELQ
from okean.data_types.basic_types import Passage, Span
model = ELQ.from_pretrained(
model_name_or_path="panuthept/okean-elq-wikipedia",
)
passages = [
Passage(
text="Barack Obama is the former president of the United States.",
spans=[
Span(start=0, end=12, surface_form="Barack Obama"),
Span(start=27, end=57, surface_form="president of the United States"),
]
),
Passage(
text="The Eiffel Tower is located in Paris.",
spans=[
Span(start=4, end=16, surface_form="Eiffel Tower"),
Span(start=31, end=36, surface_form="Paris"),
]
),
]
response = model(passages=passages, return_metadata=["id"])
print(response.passages)
>> [
Passage(
text='Barack Obama is the former president of the United States.',
spans=[
Span(start=0, end=12, surface_form='Barack Obama', confident=1.0, entity=Entity(identifier=213062, confident=1.0, metadata={'id': {'wikipedia': '534366', 'wikidata': 'Q76'}})),
Span(start=27, end=57, surface_form='president of the United States', confident=1.0, entity=Entity(identifier=11887, confident=0.9999, metadata={'id': {'wikipedia': '24113', 'wikidata': 'Q11696'}}))
]),
Passage(
text='The Eiffel Tower is located in Paris.',
spans=[
Span(start=4, end=16, surface_form='Eiffel Tower', confident=1.0, entity=Entity(identifier=4276, confident=0.9999, metadata={'id': {'wikipedia': '9232', 'wikidata': 'Q243'}})),
Span(start=31, end=36, surface_form='Paris', confident=1.0, entity=Entity(identifier=11245, confident=0.9999, metadata={'id': {'wikipedia': '22989', 'wikidata': 'Q90'}}))
]
)
]
- Downloads last month
- 10