MinerU / magic_pdf /libs /textbase.py
derful's picture
Upload folder using huggingface_hub
240e0a0 verified
raw
history blame contribute delete
732 Bytes
import math
def __inc_dict_val(mp, key, val_inc:int):
if mp.get(key):
mp[key] = mp[key] + val_inc
else:
mp[key] = val_inc
def get_text_block_base_info(block):
"""
θŽ·ε–θΏ™δΈͺζ–‡ζœ¬ε—ι‡Œηš„ε­—δ½“ηš„ι’œθ‰²γ€ε­—ε·γ€ε­—δ½“
ζŒ‰η…§ζ­£ζ–‡ε­—ζ•°ζœ€ε€šηš„θΏ”ε›ž
"""
counter = {}
for line in block['lines']:
for span in line['spans']:
color = span['color']
size = round(span['size'], 2)
font = span['font']
txt_len = len(span['text'])
__inc_dict_val(counter, (color, size, font), txt_len)
c, s, ft = max(counter, key=counter.get)
return c, s, ft