webersni commited on
Commit
a21cb67
•
1 Parent(s): 0d1c244

Update tokenizer based on new language model

Browse files
Files changed (3) hide show
  1. added_tokens.json +1 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +1 -1
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"planning": 50435, "innovative": 50491, "materials": 50357, "particular": 50390, "risks": 50265, "invested": 50439, "deliver": 50437, "Risk": 50319, "stakeholders": 50384, "plans": 50383, "emissions": 50266, "ensure": 50399, "companies": 50271, "infrastructure": 50310, "additional": 50367, "manufacturing": 50425, "Investment": 50369, "conditions": 50376, "affect": 50320, "costs": 50275, "technologies": 50352, "flooding": 50407, "“": 50373, "purchase": 50478, "electricity": 50291, "society": 50364, "•": 50276, "adverse": 50411, "failure": 50499, "funds": 50398, "annually": 50488, "taken": 50460, "launched": 50440, "significantly": 50458, "transition": 50273, "lending": 50368, "relating": 50461, "supporting": 50490, "’": 50267, "adversely": 50497, "governance": 50345, "statements": 50453, "contribute": 50405, "Agreement": 50344, "policies": 50353, "directly": 50427, "reduced": 50422, "profitability": 50454, "increased": 50293, "partners": 50482, "Finance": 50493, "therefore": 50456, "Operational": 50486, "developing": 50381, "reduce": 50284, "operational": 50302, "losses": 50346, "floods": 50449, "activities": 50283, "solutions": 50355, "exposure": 50343, "solar": 50340, "greater": 50444, "least": 50487, "reputation": 50327, "GHG": 50322, "developments": 50474, "facilities": 50336, "considered": 50479, "financing": 50294, "fossil": 50374, "transport": 50419, "Principles": 50494, "already": 50496, "2021": 50347, "Sustainable": 50341, "compared": 50467, "several": 50416, "sands": 50441, "reduction": 50306, "portfolios": 50363, "emerging": 50489, "2050": 50401, "Committee": 50329, "strategic": 50300, "returns": 50495, "practices": 50394, "indirect": 50445, "communities": 50389, "clients": 50289, "emission": 50388, "Eni": 50476, "BNP": 50370, "temperature": 50423, "customers": 50279, "disasters": 50432, "achieving": 50498, "decisions": 50412, "corporate": 50307, "construction": 50462, "provide": 50313, "initiative": 50424, "exposed": 50386, "fiscal": 50436, "areas": 50318, "experience": 50448, "sectors": 50312, "Greenhouse": 50463, "basis": 50473, "industry": 50292, "standards": 50349, "2025": 50403, "towards": 50414, "severity": 50468, "longer": 50431, "footprint": 50338, "assessment": 50351, "Sustainability": 50362, "resulting": 50361, "strategy": 50286, "increases": 50446, "asset": 50325, "manage": 50326, "following": 50371, "‘": 50426, "cement": 50492, "loans": 50417, "possible": 50420, "transactions": 50451, "sustainability": 50285, "patterns": 50438, "buildings": 50404, "goals": 50348, "bond": 50418, "institutions": 50450, "potential": 50281, "operations": 50270, "Corporate": 50408, "vehicles": 50455, "businesses": 50309, "sustainable": 50288, "suppliers": 50402, "supply": 50297, "unable": 50464, "engagement": 50397, "provides": 50409, "portfolio": 50277, "efforts": 50324, "caused": 50410, "operate": 50415, "Carbon": 50358, "factors": 50301, "reputational": 50469, "metrics": 50442, "further": 50315, "processes": 50354, "plants": 50350, "reducing": 50359, "consumption": 50314, "criteria": 50477, "2030": 50339, "objectives": 50387, "IFC": 50475, "prices": 50484, "€": 50303, "others": 50472, "mortgage": 50459, "equivalent": 50457, "claims": 50470, "equity": 50434, "opportunities": 50278, "requirements": 50335, "achieve": 50379, "various": 50382, "—": 50430, "commitment": 50337, "assess": 50378, "investing": 50392, "legislation": 50485, "mainly": 50466, "environmental": 50268, "operating": 50372, "targets": 50305, "included": 50393, "Paribas": 50385, "scenario": 50290, "across": 50299, "investments": 50282, "decision": 50481, "challenges": 50400, "uncertainty": 50406, "annual": 50342, "commitments": 50465, "committed": 50334, "tonnes": 50447, "greenhouse": 50323, "–": 50317, "Governance": 50308, "investors": 50356, "strategies": 50391, "ESG": 50360, "waste": 50331, "impacts": 50272, "systems": 50333, "identify": 50377, "sources": 50380, "managing": 50421, "addition": 50287, "economy": 50304, "regarding": 50413, "continued": 50443, "initiatives": 50316, "scenarios": 50330, "fuels": 50375, "mitigate": 50428, "countries": 50328, "renewable": 50274, "approach": 50311, "insurance": 50296, "BlackRock": 50480, "investment": 50269, "distribution": 50429, "increase": 50280, "bonds": 50452, "customer": 50395, "loan": 50396, "overall": 50366, "dioxide": 50471, "finance": 50298, "employees": 50332, "resilience": 50433, "regulations": 50321, "CO2": 50365, "methodology": 50483, "regulatory": 50295}
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilroberta-base", "tokenizer_class": "RobertaTokenizer"}
 
1
+ {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "name_or_path": "task2_model/specificity/checkpoint-4", "special_tokens_map_file": "pre_model/30092021_roberta/special_tokens_map.json", "tokenizer_class": "RobertaTokenizer"}