Source code for sylloge.med_bbk_loader
import pathlib
from typing import Any, Dict, Optional
from .base import BACKEND_LITERAL, BASE_DATASET_MODULE, ZipEADataset
MED_BBK_MODULE = BASE_DATASET_MODULE.module("med_bbk")
[docs]class MED_BBK(ZipEADataset):
"""Class containing the MED-BBK dataset.
Published in `Zhang, Z. et. al. (2020) An Industry Evaluation of Embedding-based Entity Alignment <A Benchmarking Study of Embedding-based Entity Alignment for Knowledge Graphs>`_,
*COLING*"""
#: The link to the zip file
_ZIP_LINK: str = (
"https://github.com/ZihengZZH/industry-eval-EA/raw/main/benchmark/industry.zip"
)
#: The hex digest for the zip file
_SHA512: str = "da1ee2b025070fd6890fb7e77b07214af3767b5ae85bcdc1bb36958b4b8dd935bc636e3466b94169158940a960541f96284e3217d32976bfeefa56e29d4a9e0d"
def __init__(
self,
backend: BACKEND_LITERAL = "pandas",
npartitions: int = 1,
use_cache: bool = True,
cache_path: Optional[pathlib.Path] = None,
):
"""Initializes an MED-BBK dataset.
:param backend: Whether to use "pandas" or "dask"
:param npartitions: how many partitions to use for each frame, when using dask
:param use_cache: whether to use cache or not
:param cache_path: Path where cache will be stored/loaded
"""
# ensure zip file is present
zip_path = MED_BBK_MODULE.ensure(
url=MED_BBK._ZIP_LINK,
download_kwargs=dict(hexdigests=dict(sha512=MED_BBK._SHA512)),
)
inner_path = "industry"
actual_cache_path = self.create_cache_path(
MED_BBK_MODULE, inner_path, cache_path
)
super().__init__(
cache_path=actual_cache_path,
use_cache=use_cache,
zip_path=zip_path,
inner_path=pathlib.PurePosixPath(inner_path),
backend=backend,
npartitions=npartitions,
dataset_names=("MED", "BBK"),
)
[docs] def initial_read(self, backend: BACKEND_LITERAL) -> Dict[str, Any]:
# MED is KG2 and BBK is KG1
inital_dict = super().initial_read(backend=backend)
ent_links = inital_dict["ent_links"]
switched_columns = [ent_links.columns[1], ent_links.columns[0]]
ent_links = ent_links[switched_columns]
ent_links.columns = switched_columns
return dict(
rel_triples_left=inital_dict["rel_triples_right"],
rel_triples_right=inital_dict["rel_triples_left"],
attr_triples_left=inital_dict["attr_triples_right"],
attr_triples_right=inital_dict["attr_triples_left"],
ent_links=ent_links,
)
@property
def _canonical_name(self) -> str:
return f"{self.__class__.__name__}"
@property
def _param_repr(self) -> str:
return ""