"""Quick test to verify the LLM loads and generates real text."""
import os
os.environ["HF_HOME"] = "./hf_cache"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import torch
from transformers import pipeline

print("Loading SmolLM-135M-Instruct...")
device = 0 if torch.cuda.is_available() else -1
device_name = "GPU (CUDA)" if device == 0 else "CPU"
print(f"  Device: {device_name}")

gen = pipeline(
    "text-generation",
    model="HuggingFaceTB/SmolLM-135M-Instruct",
    device=device,
    torch_dtype=torch.float16 if device == 0 else torch.float32,
)
print("[OK] Model loaded!")

messages = [
    {"role": "system", "content": "You are an expert social media marketing copywriter. Write engaging captions."},
    {"role": "user", "content": "Write a professional Instagram caption for Nike's Summer Sprint campaign in the fitness niche. Keep it 2-3 sentences."}
]
out = gen(messages, max_new_tokens=150, do_sample=True, temperature=0.7, top_p=0.9)
print("\n=== Generated Caption ===")
print(out[0]["generated_text"][-1]["content"])
print("=========================\n")

# Test pitch generation
messages2 = [
    {"role": "system", "content": "You are an expert at writing influencer pitch messages. Write compelling pitches."},
    {"role": "user", "content": "Write a pitch from Sarah (a fitness creator with 50K followers) applying to Nike's Summer Sprint campaign."}
]
out2 = gen(messages2, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.9)
print("=== Generated Pitch ===")
print(out2[0]["generated_text"][-1]["content"])
print("========================")