Spaces:

derful
/

MinerU

Runtime error

MinerU / magic_pdf /libs /language.py

Upload folder using huggingface_hub

240e0a0 verified 8 months ago

692 Bytes

	import unicodedata
	from fast_langdetect import detect_language


	def detect_lang(text: str) -> str:
	if len(text) == 0:
	return ""
	try:
	lang_upper = detect_language(text)
	except:
	html_no_ctrl_chars = ''.join([l for l in text if unicodedata.category(l)[0] not in ['C', ]])
	lang_upper = detect_language(html_no_ctrl_chars)
	try:
	lang = lang_upper.lower()
	except:
	lang = ""
	return lang


	if __name__ == '__main__':
	print(detect_lang("This is a test."))
	print(detect_lang("<html>This is a test</html>"))
	print(detect_lang("这个是中文测试。"))
	print(detect_lang("<html>这个是中文测试。</html>"))