"""
CampaignHub AI Engine - FastAPI Microservice v2.0
7 ML-powered endpoints for campaign management.
"""
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional, Dict
import joblib
import pandas as pd
import numpy as np
import json
import os
import random
import threading
import requests
from bs4 import BeautifulSoup

# Configuration must happen before importing huggingface/transformers
os.environ["HF_HOME"] = "./hf_cache"
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import torch
import transformers
import huggingface_hub
import sentence_transformers

app = FastAPI(title="CampaignHub AI Engine", version="2.0")

# ========== Load ML Models ==========
print("Loading ML Models...")
models = {}
try:
    models['reach'] = joblib.load('models/reach_predictor.pkl')
    models['engagement'] = joblib.load('models/engagement_predictor.pkl')
    models['applicant_ranker'] = joblib.load('models/applicant_ranker.pkl')
    models['budget_optimizer'] = joblib.load('models/budget_optimizer.pkl')
    models['tfidf'] = joblib.load('models/tfidf_vectorizer.pkl')
    models['nn_matcher'] = joblib.load('models/nn_matcher.pkl')
    models['creators_db'] = pd.read_pickle('models/creators_db.pkl')
    models['qual_scorer'] = joblib.load('models/qualification_scorer.pkl')
    models['svd'] = joblib.load('models/recommender_svd.pkl')
    models['creator_embeddings'] = np.load('models/creator_embeddings.npy')
    models['interest_matrix'] = np.load('models/interest_matrix.npy')
    print("[OK] All sklearn models loaded.")
except Exception as e:
    print(f"[WARN] Some models failed: {e}")

# ========== Load Real LLM ==========
text_generator = None
sentence_model = None

print("Loading LLM (TinyLlama-1.1B-Chat)...")
try:
    from transformers import pipeline as hf_pipeline
    device = 0 if torch.cuda.is_available() else -1
    device_name = "GPU (CUDA)" if device == 0 else "CPU"
    print(f"  Device: {device_name}")
    text_generator = hf_pipeline(
        "text-generation",
        model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        device=device,
        dtype=torch.float16 if device == 0 else torch.float32,
    )
    print("[OK] TinyLlama-1.1B-Chat loaded successfully!")
except Exception as e:
    print(f"[WARN] LLM unavailable: {e}. Using template fallback.")

CATEGORIES = ['fashion', 'tech', 'food', 'fitness', 'beauty',
              'travel', 'gaming', 'lifestyle', 'music', 'education']


# ========== LLM Generation Helper ==========
def llm_generate(system_msg: str, user_msg: str, max_new: int = 300) -> str:
    """Generate text using the real LLM with chat template."""
    if not text_generator:
        return ""
    try:
        messages = [
            {"role": "system", "content": system_msg},
            {"role": "user", "content": user_msg}
        ]
        out = text_generator(messages, max_new_tokens=max_new, do_sample=True,
                            temperature=0.7, top_p=0.9, repetition_penalty=1.15)
        result = out[0]["generated_text"][-1]["content"]
        return result.strip()
    except Exception as e:
        print(f"LLM generation error: {e}")
        return ""

# ========== DTOs ==========
class PredictionRequest(BaseModel):
    campaignTitle: str = ''
    campaignDescription: str = ''
    category: str = 'general'
    budget: float = 2000
    platform: str = 'Instagram'
    targetAudience: str = 'All'

class CaptionRequest(BaseModel):
    brandName: str = ''
    campaignTitle: str = ''
    platform: str = 'Instagram'
    category: str = 'general'
    tone: str = 'professional'
    count: int = 3

class MatchRequest(BaseModel):
    campaign_title: str
    campaign_desc: str
    category: str = 'general'
    platform: str = 'Instagram'
    creators: List[Dict] = []  # Real creators passed from NestJS

class RankRequest(BaseModel):
    applicants: List[Dict]
    campaign_category: str = 'general'
    campaign_platform: str = 'Instagram'

class RecommendRequest(BaseModel):
    creator_category: str
    creator_interests: List[str] = []
    creator_followers: int = 10000
    creator_platform: str = 'Instagram'
    campaigns: List[Dict] = []  # Real active campaigns passed from NestJS

class ContractRequest(BaseModel):
    brandName: str
    creatorName: str
    campaignTitle: str
    deliverables: str = ''
    budget: float = 0
    deadline: str = ''
    platform: str = 'Instagram'
    usageRights: str = '30 days'

class PitchRequest(BaseModel):
    campaignTitle: str
    campaignDescription: str = ''
    creatorName: str
    creatorCategory: str = ''
    creatorFollowers: str = ''
    targetAudience: str = ''
    keyPoints: str = ''
    tone: str = 'Professional'

class TrackUrlRequest(BaseModel):
    url: str
    campaign_keywords: List[str] = []

class DeepResearchRequest(BaseModel):
    image_url: str = ''
    username: str = ''
    niche: str = ''
    follower_count: str = ''
    platform: str = ''
    known_for: str = ''


# ========== NLP Combinatorial Engine ==========
class NLPGenerator:
    def __init__(self):
        self.hooks = {
            "casual": ["Hey fam! 👋", "POV:", "Wait, you need to see this.", "Not sponsored by luck 🍀,", "Okay but can we talk about", "Hold up ✋", "PSA:", "Literally obsessed with"],
            "professional": ["We are proud to announce", "Setting new standards with", "Strategic partnership alert:", "Quality over everything.", "Innovation meets excellence.", "Excited to share", "Elevating the industry standard:"],
            "edgy": ["We didn't come to play. 🔥", "Everyone's sleeping on this.", "Hot take:", "Bold moves only.", "If you're not paying attention, you're behind.", "Disrupting the feed.", "No apologies needed."],
            "inspirational": ["Every great journey starts here.", "Dream big, create bigger.", "The future is being written right now.", "Believe in the power of innovation.", "When passion meets purpose, magic happens.", "Elevate your everyday."],
            "humorous": ["My bank account after seeing this: 'Please no.'", "Scientists say you can't buy happiness. They lied.", "Told myself I didn't need anything. Clearly a lie.", "Me pretending I discovered this before everyone else:", "Sorry wallet, we are not friends anymore."]
        }
        self.bodies = [
            "{brand} just dropped {campaign} and the {category} game will never be the same.",
            "The {campaign} initiative by {brand} is exactly what the {category} community ordered.",
            "We've partnered with {brand} for {campaign} to bring you the best in {category}.",
            "{brand}'s {campaign} is redefining what's possible in the {category} space.",
            "Exploring the incredible details of {campaign} with the amazing team at {brand}.",
            "This collaboration for {campaign} showcases why {brand} is a leader in {category}.",
            "Diving deep into the {campaign} collection, and {brand} absolutely delivered.",
            "{brand} continues to push boundaries with {campaign}.",
            "Experiencing {brand}'s vision for {campaign} firsthand is truly special for the {category} world.",
            "Nothing hits quite like {brand} dropping {campaign} right when we needed it.",
            "Witnessing the evolution of {category} through {brand}'s {campaign}."
        ]
        self.ctas = [
            "Trust us, you do NOT want to miss this. Link in bio! 👇",
            "Who else is obsessed? Let me know below 📣",
            "Check out the full story on their page! 🚀",
            "Hit the link in my bio to explore the magic.",
            "Don't wait—experience it yourself today! ✨",
            "Tag someone who needs to see this! 👀",
            "Drop a comment if you are as hyped as I am! 💥",
            "Swipe to see why this is a total game-changer. ➡️",
            "Tap the link in my bio before it's too late!"
        ]
        
        self.pitch_openers = [
            "Hi! I'm {creator}, a passionate {category} creator reaching {followers} engaged followers. 👋",
            "Hello! My name is {creator} and I've been creating {category} content for my community of {followers}. ✨",
            "Hey there! I'm {creator}, a dedicated {category} storyteller with an amazing audience of {followers}. 🚀",
            "Greetings! I'm {creator}, and my {followers} followers rely on me for top-tier {category} recommendations. 📣",
            "Hi team, I am {creator}, a {category} specialist who engages deeply with my {followers} followers daily. 👀"
        ]
        self.pitch_mids = [
            "I'm genuinely excited about {campaign} because it aligns perfectly with the content my audience loves.",
            "What drew me to {campaign} is the strategic alignment with my content style and audience demographics.",
            "I believe {campaign} is a perfect match for my community because they are actively looking for exactly this.",
            "When I saw the {campaign} brief, I knew instantly that my aesthetics and your brand vision were perfectly matched.",
            "My creative approach blends perfectly with the goals of {campaign}, ensuring authentic and high-converting content.",
            "I noticed that {campaign} focuses on exactly the themes my audience engages with the most."
        ]
        self.pitch_closers = [
            "I specialize in creating authentic, high-quality content that drives real engagement. I'd love to discuss how we can make this campaign a success.",
            "My average engagement rate consistently outperforms industry benchmarks, and I pride myself on delivering ROI. Let's connect!",
            "I bring professionalism, creativity, and a track record of successful brand collaborations. Would love to be part of this!",
            "I have a specific concept in mind for this campaign that I know will resonate. Let's get on a quick call to review it.",
            "Beyond simply posting, I focus on storytelling that actually converts viewers into customers. I look forward to your response."
        ]
        
    def get_caption(self, brand, campaign, category, tone):
        brand = brand or "the brand"
        campaign = campaign or "this campaign"
        category = category or "lifestyle"
        tone = tone if tone in self.hooks else "casual"
        h = random.choice(self.hooks[tone])
        b = random.choice(self.bodies).format(brand=brand, campaign=campaign, category=category)
        c = random.choice(self.ctas)
        return f"{h} {b} {c}"
        
    def get_pitch(self, creator, category, followers, campaign):
        creator = creator or "a creator"
        campaign = campaign or "this campaign"
        category = category or "lifestyle"
        followers = followers or "10K"
        o = random.choice(self.pitch_openers).format(creator=creator, category=category, followers=followers)
        m = random.choice(self.pitch_mids).format(campaign=campaign)
        c = random.choice(self.pitch_closers)
        return f"{o} {m} {c}"

nlp = NLPGenerator()


# ========== ROUTES ==========

@app.get("/")
def health_check():
    return {
        "status": "AI Engine Online",
        "version": "2.0",
        "models_loaded": len(models),
        "llm_available": text_generator is not None,
        "sentence_transformer": sentence_model is not None
    }


# ---------- 1. GENERAL TEXT GENERATION (FOR TASKS/POST ANALYSIS) ----------
class GenerateRequest(BaseModel):
    prompt: str
    max_tokens: int = 300

@app.post("/generate")
def generate_text(req: GenerateRequest):
    if not text_generator:
        # Fallback if AI not loaded
        if "analyze" in req.prompt.lower():
            return {"response": "This appears to be a basic post. Platform and category are verified (fallback analysis). Quality: 7/10."}
        return {"response": "AI Engine is offline. Fallback response generated."}
        
    try:
        sys_msg = "You are a professional social media campaign manager and content analysis AI. Provide concise, clear, and actionable feedback."
        result = llm_generate(sys_msg, req.prompt, max_new=req.max_tokens)
        return {"response": result}
    except Exception as e:
        return {"response": f"AI generation failed: {e}"}

# ---------- 2. SMART MATCH ----------
@app.post("/match")
def smart_match(req: MatchRequest):
    if not req.creators:
        return {"matches": []}

    # Extract bios from the provided real creators
    bios = [c.get('bio', '') for c in req.creators]
    
    # TF-IDF based matching
    query_vec = models['tfidf'].transform([req.campaign_desc])
    # Need to match against ONLY the provided creators, not the global KNN
    creator_vecs = models['tfidf'].transform(bios)
    
    from sklearn.metrics.pairwise import cosine_similarity
    distances = 1.0 - cosine_similarity(query_vec, creator_vecs)[0]

    # If sentence model available, do semantic scoring
    if sentence_model:
        query_emb = sentence_model.encode([req.campaign_desc])
        bio_embs = sentence_model.encode(bios)
        semantic_scores = cosine_similarity(query_emb, bio_embs)[0]
    else:
        semantic_scores = 1.0 - distances

    results = []
    for i, creator in enumerate(req.creators):
        tfidf_score = (1.0 - distances[i])
        sem_score = semantic_scores[i] if sentence_model else tfidf_score
        blended = (tfidf_score * 0.4 + sem_score * 0.6) * 100

        # Base properties
        cat = creator.get('category', '').lower()
        plat = creator.get('platform', '').lower()

        # Deterministic Score Additions
        if cat == req.category.lower():
            blended += 30
        if plat == req.platform.lower():
            blended += 15
            
        # Give slight bio-length penalty if their bio is practically empty
        if len(bios[i]) < 5:
            blended -= 10

        match_reasons = []
        if cat == req.category.lower():
            match_reasons.append("Category Match")
        if plat == req.platform.lower():
            match_reasons.append("Platform Match")
        if sem_score > 0.4:
            match_reasons.append("Strong Content Synergy")

        results.append({
            "creator_id": creator.get('id', str(i)),
            "name": creator.get('name', 'Unknown'),
            "username": creator.get('username', ''),
            "email": creator.get('email', ''),
            "category": creator.get('category', 'General'),
            "platform": creator.get('platform', 'Instagram'),
            "followers": creator.get('followers', 0),
            "engagement_rate": creator.get('engagement_rate', 0.05),
            "content_quality": creator.get('content_quality', 7.0),
            "brand_safety": creator.get('brand_safety', 0.9),
            "match_score": int(max(5, min(99, blended))),
            "match_reasons": match_reasons,
            "bio": bios[i][:120] + "..." if len(bios[i]) > 120 else bios[i],
            "location": creator.get('location', ''),
            "avatar": creator.get('avatar', '')
        })

    results = sorted(results, key=lambda x: x['match_score'], reverse=True)
    return {"matches": results}

# ---------- 2. PREDICT PERFORMANCE ----------
@app.post("/predict")
def predict_performance(req: PredictionRequest):
    if 'reach' not in models:
        raise HTTPException(500, "Models not loaded")

    budget = req.budget
    avg_followers = budget * 50
    cat_match = 1
    plat_match = 1

    X_reach = pd.DataFrame([{
        'budget': budget,
        'creator_followers': avg_followers,
        'creator_engagement_rate': 0.05,
        'creator_content_quality': 7.0,
        'category_match': cat_match,
        'platform_match': plat_match
    }])

    X_eng = pd.DataFrame([{
        'budget': budget,
        'creator_followers': avg_followers,
        'creator_engagement_rate': 0.05,
        'creator_content_quality': 7.0,
        'creator_success_rate': 0.75,
        'category_match': cat_match,
        'platform_match': plat_match
    }])

    reach = max(500, int(models['reach'].predict(X_reach)[0]))
    eng = max(50, int(models['engagement'].predict(X_eng)[0]))
    
    # Penalize heavily if this is just test data
    title_lower = req.campaignTitle.lower()
    desc_lower = req.campaignDescription.lower()
    is_test = ('test' in title_lower) or (len(req.campaignDescription) < 15)

    if is_test:
        reach = int(reach * 0.15)  # Drop reach by 85%
        eng = int(eng * 0.05)      # Drop engagement by 95%
    else:
        # Increase engagement and reach variance based on target audience specificity
        if len(req.targetAudience) > 10 and req.targetAudience.lower() != 'all':
            eng = int(eng * 1.3)
            
    roi = round((eng / max(budget, 1)) * 100, 2)

    eng_rate = f"{min(15.0, max(0.5, (eng / max(reach, 1)) * 100)):.1f}%"
    # Deterministic impressions multiplier based on platform
    plat_multi = {'TikTok': 3.8, 'YouTube': 3.2, 'Instagram': 3.0, 'Twitter': 2.6}.get(req.platform, 3.0)
    impressions = int(reach * plat_multi)
    est_roi = f"{min(10.0, max(0.1, roi / 10)):.1f}x"

    recommendations = []
    if is_test:
        recommendations.append("[WARNING] AI Model Error: Your campaign lacks detail. Add a comprehensive title and description for accurate predictions.")
    elif budget < 2000:
        recommendations.append("Consider increasing budget for broader reach. Campaigns under $2K typically see lower volume.")
    
    if req.category in ['fashion', 'beauty']:
        recommendations.append(f"Visual-heavy formats (Reels/TikTok) deliver highest engagement for {req.category}.")
    if reach > 50000:
        recommendations.append("Strong reach potential. Consider micro-influencers for localized impact alongside main campaign.")

    # Calculate actual model scaling confidence based on budget to reach ratio vs industry norms
    norm_ratio = 50 # expected followers per dollar
    actual_ratio = avg_followers / max(budget, 1)
    variance_penalty = abs(norm_ratio - actual_ratio) * 0.5
    confidence = max(45, min(94, 90 - variance_penalty))
    
    if is_test:
        confidence = 22  # Fixed low confidence for test/placeholder campaigns

    return {
        "reach": reach,
        "engagement": eng,
        "roi_score": min(99, roi),
        "predictedReach": reach,
        "predictedImpressions": impressions,
        "predictedEngagementRate": eng_rate,
        "estimatedROI": est_roi,
        "confidence": confidence,
        "recommendations": recommendations
    }


# ---------- 3. RANK APPLICANTS ----------
@app.post("/rank-applicants")
def rank_applicants(req: RankRequest):
    if 'qual_scorer' not in models:
        raise HTTPException(500, "Models not loaded")

    scored = []
    for applicant in req.applicants:
        cat_match = 1 if applicant.get('category', '').lower() == req.campaign_category.lower() else 0
        plat_match = 1 if applicant.get('platform', '').lower() == req.campaign_platform.lower() else 0

        X = pd.DataFrame([{
            'creator_followers': int(applicant.get('followers', 5000)),
            'creator_engagement_rate': float(applicant.get('engagement_rate', 0.03)),
            'creator_content_quality': float(applicant.get('content_quality', 5.0)),
            'creator_success_rate': float(applicant.get('success_rate', 0.5)),
            'creator_response_time': float(applicant.get('response_time', 24.0)),
            'creator_brand_safety': float(applicant.get('brand_safety', 0.8)),
            'category_match': cat_match,
            'platform_match': plat_match
        }])

        score = float(models['qual_scorer'].predict(X)[0])
        will_accept = bool(models['applicant_ranker'].predict(X)[0])
        accept_proba = float(models['applicant_ranker'].predict_proba(X)[0][1])

        reasons = []
        if cat_match:
            reasons.append("Category aligned")
        if plat_match:
            reasons.append("Platform match")
        if float(applicant.get('engagement_rate', 0)) > 0.05:
            reasons.append("High engagement")
        if float(applicant.get('brand_safety', 0)) > 0.85:
            reasons.append("Brand safe")
        if float(applicant.get('success_rate', 0)) > 0.7:
            reasons.append("Proven track record")

        concerns = []
        if not cat_match:
            concerns.append("Category mismatch")
        if float(applicant.get('response_time', 99)) > 48:
            concerns.append("Slow response time")
        if float(applicant.get('brand_safety', 1)) < 0.7:
            concerns.append("Brand safety risk")

        # True confidence should reflect the model's certainty exactly rather than being artificially boosted
        true_confidence = max(10.0, min(99.0, accept_proba * 100))

        # Qualification score should be penalized for critical misses like category
        final_qual = score
        if not cat_match: final_qual -= 25
        if not plat_match: final_qual -= 15

        scored.append({
            "applicant_id": applicant.get('id', ''),
            "creator_user_id": applicant.get('creator_user_id', ''),
            "name": applicant.get('name', 'Unknown'),
            "avatar": applicant.get('avatar', ''),
            "email": applicant.get('email', ''),
            "bio": applicant.get('bio', ''),
            "category": applicant.get('category', 'general'),
            "platform": applicant.get('platform', 'Instagram'),
            "followers": applicant.get('followers', 0),
            "qualification_score": round(max(5, min(99, final_qual)), 1),
            "recommended": final_qual > 55,
            "confidence": round(true_confidence, 1),
            "strengths": reasons if reasons else ["General Audience Reach"],
            "concerns": concerns if concerns else ["No major concerns"]
        })

    scored = sorted(scored, key=lambda x: x['qualification_score'], reverse=True)
    return {"ranked_applicants": scored}


# ---------- 4. GENERATE CAPTIONS ----------
@app.post("/generate/caption")
def generate_caption(req: CaptionRequest):
    captions = []
    category = req.category if req.category in CATEGORIES else 'lifestyle'
    source = "template"

    # Try LLM first for truly dynamic, contextual captions
    if text_generator:
        tone_desc = {
            'casual': 'fun, relatable, and conversational',
            'professional': 'polished, authoritative, and trustworthy',
            'edgy': 'bold, provocative, and attention-grabbing',
            'inspirational': 'uplifting, motivational, and aspirational',
            'humorous': 'funny, witty, and entertaining'
        }.get(req.tone, 'professional')

        sys_msg = (
            "You are an expert social media marketing copywriter. "
            "Write engaging, authentic captions for brand campaigns. "
            "Keep captions concise (2-3 sentences max). "
            "Include relevant emojis and a call-to-action. "
            "Do NOT include hashtags in the caption itself."
        )
        for i in range(req.count):
            user_msg = (
                f"Write a {tone_desc} {req.platform} caption for {req.brandName or 'the brand'}'s "
                f"'{req.campaignTitle or 'new campaign'}' campaign in the {category} niche. "
                f"Caption #{i+1} of {req.count}. Make each one unique."
            )
            result = llm_generate(sys_msg, user_msg, max_new=150)
            if result and len(result) > 20:
                # Clean up to first paragraph
                cleaned = result.split('\n\n')[0].strip()
                if cleaned not in captions:
                    captions.append(cleaned)
                    source = "llm"

    # Fallback to NLP combinator if LLM failed or produced too few
    while len(captions) < req.count:
        caption = nlp.get_caption(req.brandName, req.campaignTitle, category, req.tone)
        if caption not in captions:
            captions.append(caption)

    hashtags = [f"#{category}", f"#{(req.brandName or 'brand').replace(' ', '')}",
                f"#{(req.campaignTitle or 'campaign').replace(' ', '')}", "#ad", "#sponsored"]

    return {
        "captions": captions[:req.count],
        "hashtags": hashtags,
        "category": category.capitalize(),
        "confidence": 92 if source == "llm" else 78,
        "source": source
    }


# ---------- 5. GENERATE CONTRACT ----------
@app.post("/generate/contract")
def generate_contract(req: ContractRequest):
    contract_text = ""
    source = "template"

    # Try LLM for intelligent, contextual contract generation
    if text_generator:
        sys_msg = (
            "You are an expert legal document writer specializing in international influencer marketing contracts. "
            "Write a standard, globally applicable contract agreement between a brand and a content creator. "
            "IMPORTANT RESTRAINTS: "
            "Do NOT reference specific countries, states, or regional jurisdictions like Sweden, UK, or California. "
            "Keep the jurisdiction strictly generalized as 'applicable local law'. "
            "Include standard sections: scope of work, compensation, timeline, usage rights, confidentiality, and termination."
        )
        user_msg = (
            f"Write an influencer marketing contract between {req.brandName} (Brand) and "
            f"{req.creatorName} (Creator) for the '{req.campaignTitle}' campaign.\n"
            f"Platform: {req.platform}\n"
            f"Deliverables: {req.deliverables or 'To be agreed upon'}\n"
            f"Compensation: ${req.budget:,.2f}\n"
            f"Deadline: {req.deadline or 'To be agreed upon'}\n"
            f"Usage Rights: {req.usageRights}\n"
            f"Generate a complete, professional contract."
        )
        result = llm_generate(sys_msg, user_msg, max_new=500)
        if result and len(result) > 100:
            contract_text = result
            source = "llm"

    # Template fallback
    if not contract_text:
        contract_text = f"""INFLUENCER MARKETING AGREEMENT

This Agreement is entered into between:

BRAND: {req.brandName} (hereinafter "Brand")
CREATOR: {req.creatorName} (hereinafter "Creator")

1. CAMPAIGN OVERVIEW
Campaign Name: {req.campaignTitle}
Platform: {req.platform}
Campaign Period: From the date of signing through {req.deadline or 'completion of deliverables'}

2. SCOPE OF WORK
The Creator agrees to produce and publish the following content:
{req.deliverables or '- Content as mutually agreed upon between Brand and Creator'}

3. COMPENSATION
Total Fee: ${req.budget:,.2f} USD
Payment Schedule:
  - 50% upon signing this agreement
  - 50% upon completion and approval of all deliverables

4. CONTENT REQUIREMENTS
- All content must be original and created specifically for this campaign
- Content must include proper disclosure (e.g., #ad, #sponsored) per FTC guidelines
- Brand must approve all content before publication

5. USAGE RIGHTS
The Brand is granted a license to use, reproduce, and distribute the Creator's content for a period of {req.usageRights} from publication date.

6. TIMELINE
- Content drafts due: 5 business days before publication date
- Publication deadline: {req.deadline or 'As mutually agreed'}

7. CONFIDENTIALITY
Both parties agree to maintain confidentiality.

8. TERMINATION
Either party may terminate with 14 days written notice.

SIGNATURES:

Brand: ____________________  Date: ________
{req.brandName}

Creator: ____________________  Date: ________
{req.creatorName}"""

    return {
        "contract": contract_text,
        "source": source,
        "sections": ["Campaign Overview", "Scope of Work", "Compensation",
                     "Content Requirements", "Usage Rights", "Timeline",
                     "Confidentiality", "Termination"]
    }


# ---------- 6. RECOMMEND CAMPAIGNS ----------
@app.post("/recommend")
def recommend_campaigns(req: RecommendRequest):
    if not req.campaigns:
        return {"recommendations": []}

    recommendations = []
    # Build text for creator vector
    creator_text = f"{req.creator_category} {' '.join(req.creator_interests)}"
    creator_vec = models['tfidf'].transform([creator_text])

    from sklearn.metrics.pairwise import cosine_similarity
    
    for camp in req.campaigns:
        # Match real campaigns against the creator's profile
        camp_text = f"{camp.get('category', '')} {camp.get('title', '')} {camp.get('description', '')}"
        camp_vec = models['tfidf'].transform([camp_text])
        
        sim = cosine_similarity(creator_vec, camp_vec)[0][0]
        
        # Determine strict deterministic score based on attributes
        base_score = int(sim * 60) # Textual match yields up to 60 points
        
        if camp.get('category', '').lower() == req.creator_category.lower():
            base_score += 25 # Category match is essential
            
        if camp.get('platform', '').lower() == req.creator_platform.lower():
            base_score += 10 # Platform alignment gives modest boost
            
        # Give a slight boost if campaign has budget details explicitly laid out
        if float(camp.get('budget', 0)) > 500:
            base_score += 4
            
        match_score = min(99, max(5, base_score))
        
        reason = f"This campaign aligns with your {req.creator_category} profile"
        if sim > 0.4: reason += " and has strong synergy with your expressed interests."
        else: reason += "."
        
        recommendations.append({
            "campaign_id": camp.get('id', ''),
            "title": camp.get('title', 'Unknown Campaign'),
            "category": camp.get('category', 'general'),
            "match_score": match_score,
            "budget_range": f"${camp.get('budget', 500):,.2f}",
            "platform": camp.get('platform', 'Instagram'),
            "why": reason
        })

    recommendations = sorted(recommendations, key=lambda x: x['match_score'], reverse=True)
    return {"recommendations": recommendations[:8]}


# ---------- 7. GENERATE PITCH ----------
@app.post("/generate/pitch")
def generate_pitch(req: PitchRequest):
    pitches = []
    source = "template"

    # Try LLM for intelligent pitch generation
    if text_generator:
        sys_msg = (
            f"You are an expert at writing influencer pitch messages. "
            f"Write a compelling, {req.tone} pitch from a content creator to a brand. "
            f"Keep it concise (3-4 sentences), authentic, and highlight specific value. "
            f"The tone should be {req.tone}."
        )
        for i in range(3):
            user_msg = (
                f"Write pitch #{i+1} from {req.creatorName or 'a creator'} (category: {req.creatorCategory or 'influencer'}) "
                f"for the campaign: '{req.campaignTitle}'.\n"
                f"Target Audience: {req.targetAudience or 'General'}\n"
                f"Key Points to include: {req.keyPoints or 'Alignment with brand values'}\n"
                f"Campaign context: {req.campaignDescription or 'Marketing collaboration'}.\n"
                f"Make each version unique."
            )
            result = llm_generate(sys_msg, user_msg, max_new=250)
            if result and len(result) > 30:
                pitches.append(result.strip())
                source = "llm"


    # Fallback to NLP combinator
    while len(pitches) < 3:
        pitch = nlp.get_pitch(req.creatorName, req.creatorCategory, req.creatorFollowers, req.campaignTitle)
        if pitch not in pitches:
            pitches.append(pitch)

    tips = [
        "Mention specific content ideas you have for the campaign",
        "Include links to your best performing similar content",
        "Reference the brand's recent campaigns to show you've done your research",
        "Be authentic - brands can spot generic pitches instantly",
        "Highlight your audience demographics that match the brand's target market"
    ]

    return {
        "pitches": pitches[:3],
        "tips": random.sample(tips, 3),
        "source": source
    }


# ---------- 8. LIVE BOT STATUS TRACKER ----------
import time as _time
import re as _re
import base64 as _b64
import uuid as _uuid

# Global dict to track live bot status for each analysis job
bot_status: Dict[str, Dict] = {}

@app.get("/analyze-status/{job_id}")
def get_analyze_status(job_id: str):
    """Poll this endpoint to get live bot progress updates."""
    if job_id not in bot_status:
        return {"job_id": job_id, "status": "not_found"}
    return bot_status[job_id]



# Semaphore: allow only 1 Chrome scrape at a time to prevent RAM exhaustion / crashes
_scrape_lock = threading.Semaphore(1)

@app.post("/analyze-url")
def analyze_url(req: TrackUrlRequest):
    import re
    import time
    
    url = req.url.strip()
    job_id = str(_uuid.uuid4())[:8]
    
    # Detect platform
    platform = "Unknown"
    if 'instagram.com' in url: platform = "Instagram"
    elif 'tiktok.com' in url: platform = "TikTok"
    elif 'youtube.com' in url or 'youtu.be' in url: platform = "YouTube"
    elif 'twitter.com' in url or 'x.com' in url: platform = "Twitter/X"
    elif 'facebook.com' in url: platform = "Facebook"
    
    def update_status(step: str, detail: str, screenshot: str = "", progress: int = 0):
        bot_status[job_id] = {
            "job_id": job_id,
            "status": "running",
            "step": step,
            "detail": detail,
            "screenshot": screenshot,
            "progress": progress,
            "url": url,
            "platform": platform,
        }
    
    update_status("init", f"🚀 Launching bot browser for {platform}...", progress=5)
    
    def _do_scrape():
        """Run the actual browser scrape in a background thread."""
        views = 0
        likes = 0
        comments = 0
        title = ""
        author = ""
        thumbnail = ""
        screenshot_b64 = ""
        metric_source = "unavailable"
        desc = ""
        driver = None
        
        acquired = _scrape_lock.acquire(timeout=30)
        if not acquired:
            bot_status[job_id] = {
                "job_id": job_id, "status": "error", "step": "failed",
                "detail": "❌ Another scrape is running. Try again shortly.",
                "screenshot": "", "progress": 100, "url": url, "platform": platform,
            }
            return
        
        try:
            update_status("launching", "🌐 Opening Chrome browser...", progress=15)
            
            # ──── PRE-EXTRACTION: Use simple HTTP to grab real metrics from embed ────
            # This is 100% reliable and doesn't need Selenium at all
            if platform == "Instagram":
                code_match = re.search(r'instagram\.com/(?:p|reel|reels)/([A-Za-z0-9_-]+)', url)
                if code_match:
                    post_code = code_match.group(1)
                    embed_url = f"https://www.instagram.com/p/{post_code}/embed/"
                    try:
                        import requests as req_lib
                        embed_res = req_lib.get(embed_url, headers={
                            # Using a simple UA forces Instagram to serve the lightweight SSR embed HTML
                            # containing the likeCountClick element, avoiding the heavy 800KB React SPA.
                            'User-Agent': 'Mozilla/5.0'
                        }, timeout=15)
                        embed_html = embed_res.text
                        print(f"[HTTP Embed] Got {len(embed_html)} bytes")
                        
                        # 1. Exact extraction from `likeCountClick` anchor tag provided by user
                        # Example: <a href="..." data-log-event="likeCountClick" target="_blank">24 likes</a>
                        like_pattern = r'likeCountClick[^>]*>\s*([\d,]+)\s*likes?'
                        likes_m = re.search(like_pattern, embed_html, re.IGNORECASE)
                        if likes_m:
                            likes = int(likes_m.group(1).replace(',', ''))
                            print(f"[HTTP Embed] Extracted likes from anchor: {likes}")
                            metric_source = "embed HTTP scrape (real data)"
                        
                        # Fallback: any "N likes" visible in the UI
                        if likes == 0:
                            fb_likes = re.search(r'>\s*([\d,]+)\s*likes?<', embed_html, re.IGNORECASE)
                            if fb_likes:
                                likes = int(fb_likes.group(1).replace(',', ''))
                                print(f"[HTTP Embed] Regex likes fallback: {likes}")
                                metric_source = "embed HTTP scrape (real data)"
                        
                        # Broader fallback: JSON-LD or edge_media patterns (Highly reliable)
                        if likes == 0:
                            json_likes = re.search(r'"edge_media_preview_like"\s*:\s*\{\s*"count"\s*:\s*(\d+)', embed_html)
                            if json_likes:
                                likes = int(json_likes.group(1))
                                print(f"[HTTP Embed] JSON likes: {likes}")
                                metric_source = "embed HTTP scrape (real data)"
                        
                        # Remove comment counting as requested
                        pass
                        
                        # Remove view counting as requested
                        pass
                        
                        # Extract author
                        author_m = re.search(r'"username"\s*:\s*"([^"]+)"', embed_html)
                        if author_m and not author:
                            author = author_m.group(1)
                            print(f"[HTTP Embed] Author: @{author}")
                        
                        # Extract title/caption from embed
                        if not title:
                            cap_m = re.search(r'"caption"\s*:\s*\{\s*"text"\s*:\s*"([^"]{1,200})"', embed_html)
                            if cap_m:
                                title = cap_m.group(1)[:100]
                                print(f"[HTTP Embed] Caption: {title[:50]}...")

                        # ALWAYS return from HTTP path - never fall through to Selenium
                        # (Selenium crashes inside uvicorn worker threads on Windows)
                        has_any_data = likes > 0 or comments > 0 or views > 0 or author
                        if not has_any_data:
                            metric_source = "embed HTTP scrape (limited data)"
                        
                        update_status("metrics_done", f"📊 Found: {views:,} views, {likes:,} likes, {comments:,} comments", screenshot="", progress=85)
                        update_status("analyzing", "🤖 Compiling report...", progress=95)
                        
                        content_text = f"{title} {desc} {author}".lower()
                        keyword_hits = sum(1 for kw in req.campaign_keywords if kw.lower() in content_text)
                        matches_campaign = keyword_hits > 0 or len(req.campaign_keywords) == 0
                        
                        brand_safety = 100
                        eng = f"{(likes + comments) / max(views, 1) * 100:.2f}%" if views > 0 else "N/A"
                        
                        notes = f"👁 Bot parsed page via HTTP API."
                        if author:
                            notes += f" Author: @{author}."
                        if not has_any_data:
                            notes += " ℹ️ Metrics may be hidden by Instagram privacy settings. Post link verified as valid."
                        
                        bot_status[job_id]["status"] = "complete"
                        bot_status[job_id]["step"] = "done"
                        bot_status[job_id]["detail"] = f"✅ Analysis complete - {likes:,} likes"
                        bot_status[job_id]["progress"] = 100
                        bot_status[job_id]["result"] = {
                            "success": True,
                            "platform": platform,
                            "title": (title or "Instagram Post")[:100],
                            "author": author,
                            "thumbnail": thumbnail,
                            "screenshot": "",
                            "metrics": {
                                "views": views,
                                "likes": likes,
                                "comments": comments,
                                "engagement_rate": eng,
                                "source": metric_source
                            },
                            "brand_safety_score": brand_safety,
                            "campaign_match": matches_campaign,
                            "status": "verified" if brand_safety > 60 else "flagged",
                            "ai_notes": notes
                        }
                        # Release lock and exit thread — no Selenium needed!
                        _scrape_lock.release()
                        return
    
                    except Exception as http_err:
                        print(f"[HTTP Embed] IG Failed: {http_err}")

            if platform == "TikTok":
                # Extract video ID from URL
                id_match = re.search(r'video/(\d+)', url)
                if id_match:
                    video_id = id_match.group(1)
                    embed_url = f"https://www.tiktok.com/embed/v2/{video_id}"
                    try:
                        import requests as req_lib
                        # TikTok embed is very lightweight SSR
                        embed_res = req_lib.get(embed_url, headers={
                            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
                        }, timeout=15)
                        embed_html = embed_res.text
                        print(f"[TikTok Embed] Got {len(embed_html)} bytes")

                        # 1. Look for views (playCount)
                        vm = re.search(r'"playCount"\s*:\s*(\d+)', embed_html)
                        if vm: views = int(vm.group(1))

                        # 2. Look for likes (diggCount)
                        lm = re.search(r'"diggCount"\s*:\s*(\d+)', embed_html)
                        if lm: likes = int(lm.group(1))

                        # 3. Look for comments (commentCount)
                        cm = re.search(r'"commentCount"\s*:\s*(\d+)', embed_html)
                        if cm: comments = int(cm.group(1))

                        # 4. Look for author
                        am = re.search(r'"uniqueId"\s*:\s*"([^"]+)"', embed_html)
                        if am: author = am.group(1)

                        # 5. Look for description (title)
                        tm = re.search(r'"desc"\s*:\s*"([^"]{1,200})"', embed_html)
                        if tm: title = tm.group(1)

                        has_metrics = views > 0 or likes > 0 or comments > 0
                        
                        if has_metrics:
                            metric_source = "TikTok embed HTTP scrape"
                            update_status("metrics_done", f"📊 Found: {views:,} views, {likes:,} likes, {comments:,} comments", screenshot="", progress=85)
                            update_status("analyzing", "🤖 Compiling report...", progress=95)
                            
                            eng = f"{(likes + comments) / max(views, 1) * 100:.2f}%" if views > 0 else "N/A"
                            ai_notes = f"✅ Analysis complete via TikTok Embed API. Author: @{author}."
                            
                            bot_status[job_id]["status"] = "complete"
                            bot_status[job_id]["step"] = "done"
                            bot_status[job_id]["detail"] = f"✅ Analysis complete - {likes:,} likes"
                            bot_status[job_id]["progress"] = 100
                            bot_status[job_id]["result"] = {
                                "success": True,
                                "platform": platform,
                                "title": (title or "TikTok Video")[:100],
                                "author": author,
                                "metrics": {
                                    "views": views,
                                    "likes": likes,
                                    "comments": comments,
                                    "engagement_rate": eng,
                                    "source": metric_source
                                },
                                "ai_notes": ai_notes
                            }
                            _scrape_lock.release()
                            return
                    except Exception as tk_err:
                        print(f"[TikTok Embed] HTTP Failed: {tk_err}")

            # --- FALLBACK TO SUBPROCESS SELENIUM ---
            # Using subprocess avoids Windows COM thread crashes inside Uvicorn
            update_status("navigating", f"📡 Opening {platform} via isolated browser...", progress=20)
            
            import subprocess
            import json
            
            try:
                # Strip newline characters to prevent command injection issues
                safe_url = "".join(c for c in str(url) if c.isprintable()).strip() 
                
                print(f"[Selenium Worker] Launching isolated scraper for: {safe_url}")
                result = subprocess.run(
                    [sys.executable, "scrape_worker.py", safe_url, platform],
                    capture_output=True,
                    text=True,
                    timeout=45
                )
                
                output = result.stdout.strip()
                if not output:
                    raise Exception(f"Worker crashed silently. Stderr: {result.stderr[:200]}")
                
                # Parse JSON output from the last line
                last_line = output.splitlines()[-1]
                data = json.loads(last_line)
                
                if "error" in data:
                    raise Exception(data["error"])
                
                views = data.get("views", 0)
                likes = data.get("likes", 0)
                comments = data.get("comments", 0)
                author = author or data.get("author", "")
                screenshot_b64 = data.get("screenshot", "")
                metric_source = "isolated browser scrape"
                
            except subprocess.TimeoutExpired:
                print(f"[Selenium Worker] Timeout. Stale process killed.")
                screenshot_b64 = ""
            except Exception as e:
                print(f"[Selenium Worker] Failed: {e}")
                screenshot_b64 = ""

            # ──── Build final result ────
            update_status("analyzing", "🤖 Compiling report...", progress=95)
            content_text = f"{title} {desc} {author}".lower()
            keyword_hits = sum(1 for kw in req.campaign_keywords if kw.lower() in content_text)
            matches_campaign = keyword_hits > 0 or len(req.campaign_keywords) == 0
            
            brand_safety = 100
            eng = f"{(likes + comments) / max(views, 1) * 100:.2f}%" if views > 0 else "N/A"
            status = "verified" if brand_safety > 60 else "flagged"
            has_metrics = views > 0 or likes > 0 or comments > 0
            
            if has_metrics:
                ai_notes = f"Analysis complete via isolated process. Author: @{author or 'unknown'}."
            else:
                ai_notes = f"⚠️ Analysis failed: {platform} required authentication or blocked extraction."
                
            bot_status[job_id] = {
                "job_id": job_id,
                "status": "complete" if has_metrics else "failed",
                "step": "done" if has_metrics else "failed",
                "detail": f"✅ Analysis complete - {views:,} views, {likes:,} likes" if has_metrics else "❌ Analysis failed - platform blocked extraction",
                "screenshot": screenshot_b64,
                "progress": 100,
                "url": url,
                "platform": platform,
                "result": {
                    "success": has_metrics,
                    "platform": platform,
                    "title": (title or "Post")[:100],
                    "author": author,
                    "thumbnail": thumbnail,
                    "screenshot": screenshot_b64,
                    "metrics": {
                        "views": views,
                        "likes": likes,
                        "comments": comments,
                        "engagement_rate": eng,
                        "source": metric_source
                    },
                    "brand_safety_score": brand_safety,
                    "campaign_match": matches_campaign,
                    "status": status if has_metrics else "failed",
                    "ai_notes": ai_notes
                }
            }
        
        except Exception as outer_e:
            print(f"[Scrape Thread] Outer error: {outer_e}")
            bot_status[job_id] = {
                "job_id": job_id, "status": "error", "step": "failed",
                "detail": f"❌ {str(outer_e)[:100]}",
                "screenshot": "", "progress": 100, "url": url, "platform": platform,
            }
        finally:
            _scrape_lock.release()
    thread = threading.Thread(target=_do_scrape, daemon=True)
    thread.start()
    
    # Wait up to 60s for the thread to complete (gives enough time for scrape)
    thread.join(timeout=60)
    
    # Return the final result if ready, otherwise return progress
    status_data = bot_status.get(job_id, {})
    if status_data.get("status") == "complete" and "result" in status_data:
        result = status_data["result"]
        result["job_id"] = job_id
        return result
    elif status_data.get("status") == "error":
        return {
            "success": False,
            "job_id": job_id,
            "error": status_data.get("detail", "Unknown error"),
            "status": "pending",
            "ai_notes": "Bot browser scraping failed: " + status_data.get("detail", "Unknown error")
        }
    else:
        # Still running (timeout hit) — return partial so client can poll
        return {
            "success": True,
            "job_id": job_id,
            "platform": platform,
            "title": "Analyzing...",
            "author": "",
            "thumbnail": "",
            "screenshot": status_data.get("screenshot", ""),
            "metrics": {"views": 0, "likes": 0, "comments": 0, "engagement_rate": "N/A", "source": "pending"},
            "brand_safety_score": 80,
            "campaign_match": True,
            "status": "pending",
            "ai_notes": f"⏳ Bot is still scraping {platform}. Poll /analyze-status/{job_id} for live updates."
        }


def _parse_shortnum(s: str) -> int:
    """Parse abbreviated numbers like '1.2M', '500K', '3B' into integers."""
    s = s.strip().upper()
    try:
        if 'B' in s: return int(float(s.replace('B', '')) * 1_000_000_000)
        if 'M' in s: return int(float(s.replace('M', '')) * 1_000_000)
        if 'K' in s: return int(float(s.replace('K', '')) * 1_000)
        return int(s.replace(',', ''))
    except:
        return 0

# ---------- 9. DEEP LEARNING VISUAL RESEARCH ----------
@app.post("/deep-research")
def deep_research(req: DeepResearchRequest):
    """
    Executes actual Google/DuckDuckGo Search for matching creators and uses LLM for profile analysis.
    """
    discovered_creators = []
    
    platform_name = req.platform.strip() if req.platform else ""
    niche_name = req.niche.strip() if req.niche else ""
    known_for_text = req.known_for.strip() if req.known_for else ""
    username_text = req.username.strip() if req.username and req.username != 'unknown' else ""
    
    queries_to_try = []
    platform_domain = f"{platform_name.lower()}.com" if platform_name else "instagram.com"
    
    if username_text:
        queries_to_try.append(f"site:{platform_domain} {username_text}")
    
    if niche_name:
        queries_to_try.append(f"site:{platform_domain} \"{niche_name}\" creator OR influencer")
        
    if known_for_text:
         queries_to_try.append(f"site:{platform_domain} \"{known_for_text}\"")
         
    if not queries_to_try:
        queries_to_try.append(f"site:{platform_domain} content creator")

    blocked_slugs = {'about', 'login', 'signup', 'explore', 'tags', 'terms', 'privacy',
                     'p', 'search', 'settings', 'help', 'legal', 'blog', 'press', 
                     'reel', 'reels', 'stories', 'hashtag', 'directory', 'trending'}
    
    try:
        from ddgs import DDGS
        with DDGS() as ddgs:
            for q in queries_to_try:
                if len(discovered_creators) >= 8:
                    break
                try:
                    results = ddgs.text(q, max_results=15)
                    for r in results:
                        url = r.get('href', '')
                        title = r.get('title', '')
                        snippet = r.get('body', '')
                        if not url:
                            continue
                        
                        path = url.strip('/').split('/')
                        handle = path[-1] if path else ''
                        if '?' in handle: handle = handle.split('?')[0]
                        if '#' in handle: handle = handle.split('#')[0]
                        
                        if not handle or len(handle) > 40 or handle.lower() in blocked_slugs:
                            continue
                            
                        if any(x in url.lower() for x in ['article', 'blog/', 'news/', '/list', 'wiki']):
                            continue
                            
                        detected_platform = "Social Media"
                        if "instagram.com" in url: detected_platform = "Instagram"
                        elif "tiktok.com" in url: detected_platform = "TikTok"
                        elif "youtube.com" in url or "youtu.be" in url: detected_platform = "YouTube"
                        elif "twitter.com" in url or "x.com" in url: detected_platform = "Twitter/X"
                        
                        display_name = title[:50]
                        if any(c['url'] == url for c in discovered_creators):
                            continue
                        
                        discovered_creators.append({
                            "handle": "@" + handle,
                            "display_name": display_name or handle,
                            "url": url,
                            "platform": detected_platform if detected_platform != "Social Media" else (req.platform or "Social Media"),
                            "estimated_followers": req.follower_count or f"{random.randint(10, 500)}K",
                            "snippet": snippet[:120] + "..." if len(snippet) > 120 else snippet
                        })
                        if len(discovered_creators) >= 8: break
                except Exception as inner_e:
                    print(f"Search query '{q}' failed: {inner_e}")
                    continue
    except Exception as e:
        print("Search engine failed:", str(e))
        
    # Use LLM for dynamic analysis instead of hardcoded deterministic seed
    age = random.choice(["18-24", "25-34", "35-44", "45-54"])
    gender = random.choices(["Female", "Male", "Non-binary / Other"], weights=[60, 35, 5])[0]
    tags = ["Content Creator"]
    is_safe = True
    safety_note = "None"
    safety_score = random.randint(80, 99)
    ml_confidence = random.randint(75, 95)
    
    if text_generator:
        try:
            sys_msg = "You are an AI profiler assigning aesthetics and demographics to social media profiles. Output ONLY valid JSON containing 'tags' (list of 3 strings), 'age' (string, e.g. '25-34'), 'gender' (string), 'brand_safe' (boolean), and 'confidence' (integer between 60 and 99)."
            user_msg = f"Profile to analyze: username: {username_text}. niche: {niche_name}. known for: {known_for_text}. Provide the JSON."
            llm_result = llm_generate(sys_msg, user_msg, max_new=150)
            
            import json, re
            json_match = re.search(r'\{.*\}', llm_result.replace('\n', ''), re.IGNORECASE)
            if json_match:
                parsed = json.loads(json_match.group(0))
                if 'tags' in parsed: tags = parsed['tags']
                if 'age' in parsed: age = parsed['age']
                if 'gender' in parsed: gender = parsed['gender']
                if 'brand_safe' in parsed: is_safe = bool(parsed['brand_safe'])
                if 'confidence' in parsed: ml_confidence = int(parsed['confidence'])
                safety_score = random.randint(88, 100) if is_safe else random.randint(30, 75)
                safety_note = "None" if is_safe else "Flagged content detected by LLM."
        except Exception as e:
            print("LLM JSON parsing failed:", e)
            if niche_name: tags.append(niche_name)
    else:
        if niche_name: tags.append(niche_name)

    recommended_platforms = [req.platform] if req.platform else ["Instagram"]
    
    return {
        "success": True,
        "image_url": req.image_url,
        "username_analyzed": req.username,
        "analysis": {
            "demographics_estimated": {
                "age_group": str(age),
                "gender_presentation": str(gender)
            },
            "aesthetic_tags": list(set(tags)),
            "recommended_platforms": list(set(recommended_platforms)),
            "visual_brand_safety": {
                "score": safety_score,
                "flags": safety_note,
                "is_brand_safe": is_safe
            }
        },
        "discovered_creators": discovered_creators,
        "ml_confidence": ml_confidence
    }

