--- license: apache-2.0 language: - zh - en pipeline_tag: text-generation inference: false tags: - chinese --- # explain - base tokenizer from [baichuan-7B](https://github.com/baichuan-inc/baichuan-7B), this model add some maths symbol ``` "approx": 64000, "arccos": 64001, "arcsin": 64002, "arctan": 64003, "backsim": 64004, "begin{matrix}": 64005, "begin{vmatrix}": 64006, "beta": 64007, "cdot": 64008, "cdots": 64009, "cong": 64010, "delta": 64011, "dot": 64012, "downarrow": 64013, "end{matrix}": 64014, "end{vmatrix}": 64015, "exists": 64016, "forall": 64017, "gamma": 64018, "geq": 64019, "infty": 64020, "lambda": 64021, "left.": 64022, "left[": 64023, "left{": 64024, "leftrightarrow": 64025, "leq": 64026, "lg": 64027, "neq": 64028, "notin": 64029, "omega": 64030, "overline": 64031, "overrightarrow": 64032, "prime": 64033, "psi": 64034, "rho": 64035, "right.": 64036, "right}": 64037, "right]": 64038, "rightarrow": 64039, "sigma": 64040, "subset": 64041, "subseteq": 64042, "supset": 64043, "supseteq": 64044, "tan": 64045, "textcircled": 64046, "text{": 64047, "therefore": 64048, "theta": 64049, "varepsilon": 64050, "varphi": 64051, "widehat": 64052, "xrightarrow": 64053, "…": 64054, "℃": 64055, "①": 64056, "②": 64057, "③": 64058, "④": 64059, "⑤": 64060, "⑥": 64061, "⑦": 64062, "⑧": 64063, "⑨": 64064, "⑩": 64065, "%": 64066, "(": 64067, ")": 64068, "+": 64069, "-": 64070, ".": 64071, ";": 64072, "<": 64073, "=": 64074, ">": 64075 ```