--- license: mit datasets: - HuggingFaceFW/fineweb-edu language: - en library_name: transformers --- 30,142,848 trainable parameters. Embedding parameters: 19,298,688 Non-embedding parameters: 10,844,160 Tokenizer: GPT-2 Vocabulary size: 50,257 Compute: single T4 GPU Total train time: 2 hours and 40 minutes Total train tokens: 136,000,000 Epochs: 2 Final train Loss: 2.9811 Final test Loss: 2.7963 _________________________________________ try the following script for inference: !pip install huggingface_hub !pip install transformers !pip install torch from transformers import GPT2Tokenizer, GPT2Config, GPT2LMHeadModel from huggingface_hub import hf_hub_download import torch # Name model_name = 'Mizule/Dense-30M' # Authenticate token = input("Enter your Hugging Face token: ") # Download model_file = hf_hub_download(repo_id=f"{model_name}", filename="Dense-30M.pth", use_auth_token=token) # Custom config tokenizer = GPT2Tokenizer.from_pretrained("gpt2") config = GPT2Config( vocab_size=tokenizer.vocab_size, n_positions=512, n_ctx=512, n_embd=384, n_layer=6, n_head=8 ) # Load model model = GPT2LMHeadModel(config) model.load_state_dict(torch.load(model_file, map_location=torch.device('cpu'))) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) model.eval() # Inference settings def generate_text(prompt, max_length=128, temperature=0.2, top_k=50, top_p=0.9): inputs = tokenizer(prompt, return_tensors="pt") inputs = {key: value.to("cuda" if torch.cuda.is_available() else "cpu") for key, value in inputs.items()} outputs = model.generate( **inputs, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, num_return_sequences=1, no_repeat_ngram_size=2, do_sample=True, early_stopping=True ) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Interactive loop (it's an undertrained base model, don't expect it to chat) while True: prompt = input("Prompt: ") if prompt.lower() == 'exit': break output = generate_text(prompt) print(f"Generated text: {output}")