"""
KALSHIBOT LEARNING ENGINE  —  analyzer.py
Reads trade_analytics.json and finds what conditions actually win.
After 100+ trades it writes auto_params.json which the bot loads on startup.

Run anytime:  python analyzer.py
"""
import json, os, math
from collections import defaultdict

ANALYTICS_PATH   = "C:\\kalshibot\\trade_analytics.json"
AUTO_PARAMS_PATH = "C:\\kalshibot\\auto_params.json"
MIN_TRADES_TO_TUNE = 100   # need this many exits before auto-tuning
MIN_BUCKET_SIZE    = 8     # ignore buckets with fewer than this many trades
# Strong pattern thresholds — must match paper_trade.py
STRONG_WIN_RATE    = 0.60  # 60%+ to call a condition "good"
STRONG_AVOID_RATE  = 0.40  # 40%- to call a condition "avoid"
STRONG_MIN_TRADES  = 20    # bucket must have 20+ trades to act on

DAYS = ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"]

# ── Load data ────────────────────────────────────────────────────────────────

def load_exits():
    if not os.path.exists(ANALYTICS_PATH):
        print("  No trade_analytics.json yet. Run the bot for a while first.")
        return []
    with open(ANALYTICS_PATH) as f:
        data = json.load(f)
    exits = [r for r in data if r.get("event") == "exit"]
    print(f"  Loaded {len(exits)} completed trades from analytics log.\n")
    return exits

# ── Bucketing helpers ────────────────────────────────────────────────────────

def bucket_win_rate(trades, key_fn, label_fn, title):
    buckets = defaultdict(lambda: {"wins": 0, "total": 0, "pnl": 0.0})
    for t in trades:
        k = key_fn(t)
        buckets[k]["total"] += 1
        buckets[k]["pnl"]   += t.get("pnl", 0)
        if t.get("outcome") == "WIN":
            buckets[k]["wins"] += 1
    print(f"  ── {title} ──")
    rows = []
    for k in sorted(buckets):
        d = buckets[k]
        if d["total"] < MIN_BUCKET_SIZE: continue
        wr  = d["wins"] / d["total"] * 100
        avg = d["pnl"] / d["total"]
        rows.append((k, d["total"], wr, avg, d["pnl"]))
        label = label_fn(k)
        bar   = "█" * int(wr / 5)
        flag  = "  ✅" if wr >= 55 else ("  ⚠️" if wr < 45 else "")
        print(f"    {label:<22}  {d['total']:>4} trades  WR:{wr:>5.1f}%  {bar}{flag}  avg P&L:${avg:>+.3f}")
    if not rows:
        print(f"    Not enough data yet (need {MIN_BUCKET_SIZE}+ per bucket).")
    print()
    return rows

# ── Analysis sections ────────────────────────────────────────────────────────

def analyze_time_of_day(exits):
    def hour_label(h):
        ampm = "AM" if h < 12 else "PM"
        hr   = h if h <= 12 else h - 12
        hr   = 12 if hr == 0 else hr
        return f"{hr:02d}:00-{hr:02d}:59 {ampm}"
    return bucket_win_rate(exits,
        key_fn   = lambda t: t.get("hour", 0),
        label_fn = hour_label,
        title    = "Win Rate by Hour of Day")

def analyze_confidence_score(exits):
    """Break win rate down by total confidence score bucket."""
    def bucket(t):
        s = t.get("confidence_score", 0)
        if   s < 7:  return "0-6   (below threshold)"
        elif s < 10: return "7-9   (MEDIUM low)"
        elif s < 15: return "10-14 (MEDIUM mid)"
        elif s < 20: return "15-19 (MEDIUM high)"
        elif s < 28: return "20-27 (near HIGH)"
        elif s < 40: return "28-39 (HIGH)"
        else:        return "40+   (HIGH strong)"
    return bucket_win_rate(exits,
        key_fn   = bucket,
        label_fn = lambda k: f"Score {k}",
        title    = "Win Rate by Confidence Score")

def analyze_exp_score(exits):
    """Win rate by EXP (expiration value) component score."""
    def bucket(t):
        s = t.get("exp_score", 0)
        if   s == 0:  return "EXP=0  (no mispricing)"
        elif s < 8:   return "EXP=1-7  (weak)"
        elif s < 15:  return "EXP=8-14 (moderate)"
        elif s < 25:  return "EXP=15-24 (strong)"
        else:         return "EXP=25+  (very strong)"
    return bucket_win_rate(exits,
        key_fn   = bucket,
        label_fn = lambda k: k,
        title    = "Win Rate by EXP Score (Expiration Value Signal)")

def analyze_lag_score(exits):
    """Win rate by LAG (Kalshi lag vs BTC) component score."""
    def bucket(t):
        s = t.get("lag_score", 0)
        if   s == 0:  return "LAG=0  (no lag detected)"
        elif s < 10:  return "LAG=1-9  (small lag)"
        elif s < 20:  return "LAG=10-19 (moderate)"
        else:         return "LAG=20+  (strong lag)"
    return bucket_win_rate(exits,
        key_fn   = bucket,
        label_fn = lambda k: k,
        title    = "Win Rate by LAG Score (Kalshi Lag Detection)")

def analyze_signal_tier(exits):
    """Win rate by confidence tier — HIGH vs MEDIUM."""
    return bucket_win_rate(exits,
        key_fn   = lambda t: t.get("confidence_tier", "?"),
        label_fn = lambda k: f"Tier: {k}",
        title    = "Win Rate by Confidence Tier (HIGH vs MEDIUM)")

def analyze_score_combo(exits):
    """Find which combinations of EXP+LAG together produce best results."""
    def bucket(t):
        exp = t.get("exp_score", 0) > 0
        lag = t.get("lag_score", 0) > 0
        mom = t.get("mom_score", 0) > 0
        parts = []
        if exp: parts.append("EXP")
        if lag: parts.append("LAG")
        if mom: parts.append("MOM")
        return "+".join(parts) if parts else "none"
    return bucket_win_rate(exits,
        key_fn   = bucket,
        label_fn = lambda k: f"Signals: {k}",
        title    = "Win Rate by Signal Combination (which signals fired)")

def analyze_momentum_strength(exits):
    def bucket(t):
        s = t.get("momentum_strength", 0)
        if   s < 1.5: return "1.0x-1.5x"
        elif s < 2.0: return "1.5x-2.0x"
        elif s < 3.0: return "2.0x-3.0x"
        elif s < 5.0: return "3.0x-5.0x"
        else:         return "5.0x+"
    return bucket_win_rate(exits,
        key_fn   = bucket,
        label_fn = lambda k: k,
        title    = "Win Rate by Momentum Strength (x threshold)")

def analyze_market_price(exits):
    def bucket(t):
        p = t.get("entry_price", 0.5)
        if   p < 0.25: return "0.15-0.25"
        elif p < 0.35: return "0.25-0.35"
        elif p < 0.45: return "0.35-0.45"
        elif p < 0.55: return "0.45-0.55"
        elif p < 0.65: return "0.55-0.65"
        else:          return "0.65-0.75"
    return bucket_win_rate(exits,
        key_fn   = bucket,
        label_fn = lambda k: f"Contract @ {k}",
        title    = "Win Rate by Entry Price")

def analyze_volatility(exits):
    def bucket(t):
        v = t.get("btc_volatility", 0)
        if   v < 0.02: return "very_low  <0.02%"
        elif v < 0.05: return "low       0.02-0.05%"
        elif v < 0.10: return "medium    0.05-0.10%"
        elif v < 0.20: return "high      0.10-0.20%"
        else:          return "extreme   >0.20%"
    return bucket_win_rate(exits,
        key_fn   = bucket,
        label_fn = lambda k: f"Volatility {k}",
        title    = "Win Rate by BTC Volatility")

def analyze_time_left(exits):
    def bucket(t):
        s = t.get("secs_left", 600)
        if   s > 750: return "750-900s (12-15m left)"
        elif s > 600: return "600-750s (10-12m left)"
        elif s > 450: return "450-600s (7-10m left)"
        elif s > 300: return "300-450s (5-7m left)"
        else:         return "< 300s   (<5m left)"
    return bucket_win_rate(exits,
        key_fn   = bucket,
        label_fn = lambda k: k,
        title    = "Win Rate by Time Left in Window at Entry")

def analyze_spread(exits):
    def bucket(t):
        sp = t.get("market_spread", 0.10)
        if   sp < 0.05: return "spread <5c  (tight)"
        elif sp < 0.10: return "spread 5-10c"
        elif sp < 0.15: return "spread 10-15c"
        else:           return "spread >15c (wide)"
    return bucket_win_rate(exits,
        key_fn   = bucket,
        label_fn = lambda k: k,
        title    = "Win Rate by Market Spread")

def analyze_exit_reasons(exits):
    reasons = defaultdict(lambda: {"count": 0, "pnl": 0.0})
    for t in exits:
        r = t.get("exit_reason", "?").split()[0]  # TARGET / STOP / TIME
        reasons[r]["count"] += 1
        reasons[r]["pnl"]   += t.get("pnl", 0)
    print("  ── Exit Reason Breakdown ──")
    for r, d in sorted(reasons.items()):
        avg = d["pnl"] / d["count"] if d["count"] else 0
        print(f"    {r:<8}  {d['count']:>4} trades  avg P&L:${avg:>+.3f}  total:${d['pnl']:>+.2f}")
    print()

# ── Auto-tuning ──────────────────────────────────────────────────────────────

def analyze_market_snapshots():
    snap_path = "C:\\kalshibot\\market_snapshots.json"
    if not os.path.exists(snap_path):
        print("  ── BTC & Odds Trend Analysis ──")
        print("    No market_snapshots.json yet — starts building as bot runs.\n")
        return
    with open(snap_path) as f: snaps = json.load(f)
    if len(snaps) < 10:
        print(f"  ── BTC & Odds Trend Analysis ──\n    Only {len(snaps)} snapshots so far — need more data.\n")
        return
    print(f"  ── BTC & Odds Trend Analysis ({len(snaps)} snapshots) ──")
    # BTC trend by hour
    hour_prices = {}
    for s in snaps:
        h = s.get("hour", 0)
        if h not in hour_prices: hour_prices[h] = []
        chg = s.get("btc_chg_5m", 0) or 0
        hour_prices[h].append(chg)
    print("  Average BTC 5-min change by hour:")
    for h in sorted(hour_prices):
        if len(hour_prices[h]) < 5: continue
        avg = sum(hour_prices[h]) / len(hour_prices[h])
        bar = ("▲" if avg > 0 else "▼") * min(10, int(abs(avg) * 200))
        print(f"    {h:02d}:xx  avg {avg:+.4f}%  {bar}  ({len(hour_prices[h])} samples)")
    # Odds drift over time
    odds_snaps = [s for s in snaps if s.get("yes_ask") is not None]
    if odds_snaps:
        yes_asks = [s["yes_ask"] for s in odds_snaps[-100:]]
        avg_yes  = sum(yes_asks) / len(yes_asks)
        print(f"\n  Last 100 Kalshi snapshots:")
        print(f"    Avg YES ask : {avg_yes:.3f}  (market bias {'UP' if avg_yes > 0.5 else 'DOWN'})")
        spreads = [s["spread"] for s in odds_snaps[-100:] if s.get("spread") is not None]
        if spreads: print(f"    Avg spread  : {sum(spreads)/len(spreads):.3f}")
    print()

def generate_auto_params(exits):
    if len(exits) < MIN_TRADES_TO_TUNE:
        print(f"  Auto-tune needs {MIN_TRADES_TO_TUNE}+ completed trades. "
              f"Have {len(exits)} so far. Keep running!\n")
        return

    print(f"\n  ══ AUTO-TUNE  ({len(exits)} trades) ══")

    # Find best hours
    hour_wins = defaultdict(lambda: {"wins":0,"total":0})
    for t in exits:
        h = t.get("hour", 0)
        hour_wins[h]["total"] += 1
        if t.get("outcome") == "WIN": hour_wins[h]["wins"] += 1
    good_hours = [h for h, d in hour_wins.items()
                  if d["total"] >= MIN_BUCKET_SIZE and d["wins"]/d["total"] >= 0.52]

    # Find best momentum range
    mom_wins = defaultdict(lambda: {"wins":0,"total":0,"sum":0.0})
    for t in exits:
        s = t.get("momentum_strength", 0)
        bucket = round(s * 2) / 2  # round to nearest 0.5
        mom_wins[bucket]["total"] += 1
        mom_wins[bucket]["sum"]   += t.get("momentum_pct", 0)
        if t.get("outcome") == "WIN": mom_wins[bucket]["wins"] += 1
    # Find best hours — STRONG threshold only
    good_hours = [h for h, d in hour_wins.items()
                  if d["total"] >= STRONG_MIN_TRADES
                  and d["wins"]/d["total"] >= STRONG_WIN_RATE]

    # Find best entry price range
    price_wins = defaultdict(lambda: {"wins":0,"total":0})
    for t in exits:
        p = t.get("entry_price", 0.5)
        bucket = round(p * 10) / 10
        price_wins[bucket]["total"] += 1
        if t.get("outcome") == "WIN": price_wins[bucket]["wins"] += 1
    good_prices = [p for p, d in price_wins.items()
                   if d["total"] >= STRONG_MIN_TRADES and d["wins"]/d["total"] >= STRONG_WIN_RATE]

    params = {
        "generated_at"         : str(__import__("datetime").datetime.now()),
        "trades_analyzed"      : len(exits),
        "good_hours"           : [],   # disabled — no hour blocking, needs weeks of data
        "avoid_hours"          : [],   # disabled — no hour blocking, needs weeks of data
        "best_entry_prices"    : sorted(good_prices),
        "momentum_threshold"   : 0.0010,
        "notes"                : []
    }

    if good_hours:
        params["notes"].append(f"Hour info (NOT blocking — informational only): best {[f'{h:02d}:xx' for h in good_hours]}")
    if good_prices:
        params["notes"].append(f"Best entry prices (60%+ WR): {good_prices}")

    # ── Score-based tuning notes ──────────────────────────────────────────────
    low_score  = [t for t in exits if 7 <= t.get("confidence_score", 0) < 15]
    high_score = [t for t in exits if t.get("confidence_score", 0) >= 28]
    if len(low_score) >= STRONG_MIN_TRADES:
        wr_low = sum(1 for t in low_score if t.get("outcome")=="WIN") / len(low_score)
        if wr_low < STRONG_AVOID_RATE:
            params["notes"].append(
                f"Low-score trades (7-14) losing at {wr_low*100:.0f}% — "
                f"consider raising SCORE_MEDIUM to 15")
        elif wr_low >= STRONG_WIN_RATE:
            params["notes"].append(
                f"Low-score trades (7-14) winning at {wr_low*100:.0f}% — "
                f"SCORE_MEDIUM=7 is working well")
    if len(high_score) >= STRONG_MIN_TRADES:
        wr_high = sum(1 for t in high_score if t.get("outcome")=="WIN") / len(high_score)
        params["notes"].append(
            f"HIGH confidence trades (28+): {wr_high*100:.0f}% WR over {len(high_score)} trades")

    # ── EXP vs LAG signal quality ─────────────────────────────────────────────
    exp_only = [t for t in exits if t.get("exp_score",0) > 0 and t.get("lag_score",0) == 0]
    lag_any  = [t for t in exits if t.get("lag_score",0) > 0]
    if len(exp_only) >= MIN_BUCKET_SIZE:
        wr_exp = sum(1 for t in exp_only if t.get("outcome")=="WIN") / len(exp_only)
        params["notes"].append(
            f"EXP-only trades: {wr_exp*100:.0f}% WR over {len(exp_only)} trades")
    if len(lag_any) >= MIN_BUCKET_SIZE:
        wr_lag = sum(1 for t in lag_any if t.get("outcome")=="WIN") / len(lag_any)
        params["notes"].append(
            f"Trades with LAG signal: {wr_lag*100:.0f}% WR over {len(lag_any)} trades")

    with open(AUTO_PARAMS_PATH, "w") as f:
        json.dump(params, f, indent=2)

    print(f"  ✅ Wrote auto_params.json")
    for note in params["notes"]:
        print(f"     {note}")
    print(f"\n  The bot will load these settings on next restart.\n")

# ── Main ─────────────────────────────────────────────────────────────────────

def run():
    print("\n" + "="*62)
    print("  KALSHIBOT LEARNING ENGINE")
    print("="*62 + "\n")

    exits = load_exits()
    if not exits:
        return

    wins  = [t for t in exits if t.get("outcome") == "WIN"]
    total_pnl = sum(t.get("pnl", 0) for t in exits)
    wr    = len(wins) / len(exits) * 100

    print(f"  Total trades   : {len(exits)}")
    print(f"  Win rate       : {wr:.1f}%  ({len(wins)}W / {len(exits)-len(wins)}L)")
    print(f"  Total P&L      : ${total_pnl:+.2f}")
    print(f"  Avg per trade  : ${total_pnl/len(exits):+.3f}\n")

    analyze_confidence_score(exits)
    analyze_signal_tier(exits)
    analyze_score_combo(exits)
    analyze_exp_score(exits)
    analyze_lag_score(exits)
    analyze_time_of_day(exits)
    analyze_momentum_strength(exits)
    analyze_market_price(exits)
    analyze_volatility(exits)
    analyze_time_left(exits)
    analyze_spread(exits)
    analyze_exit_reasons(exits)
    analyze_market_snapshots()
    analyze_price_patterns()
    generate_auto_params(exits)

    print("="*62)
    print("  Tip: run this again after every few hours of trading.")
    print("  Auto-tunes ONLY on 60%+ or 40%- WR with 20+ trades.")
    print("="*62 + "\n")

if __name__ == "__main__":
    run()

def analyze_price_patterns():
    """
    Read contract_price_log.json, group resolved observations by price range + time bucket,
    find which combinations reliably move in one direction.
    Writes price_patterns.json — loaded live by the bot for bonus scoring.
    """
    log_path = "C:\\kalshibot\\contract_price_log.json"
    pat_path = "C:\\kalshibot\\price_patterns.json"
    if not os.path.exists(log_path):
        print("  ── Price Pattern Analysis ──")
        print("    No contract_price_log.json yet — starts building as bot runs.\n")
        return
    with open(log_path) as f: log = json.load(f)
    resolved = [r for r in log if r.get("resolved") and r.get("outcome") != "UNKNOWN"]
    print(f"  ── Price Pattern Analysis ({len(resolved)} resolved observations) ──")
    if len(resolved) < 20:
        print(f"    Need 20+ resolved windows. Have {len(resolved)}. Keep running!\n")
        return

    # ── Define bucketing dimensions ───────────────────────────────────────────
    NO_RANGES  = [(0.05, 0.10), (0.10, 0.15), (0.15, 0.20), (0.20, 0.30),
                  (0.30, 0.45), (0.45, 0.55), (0.55, 0.70), (0.70, 0.85)]
    YES_RANGES = [(0.15, 0.30), (0.30, 0.45), (0.45, 0.55), (0.55, 0.70),
                  (0.70, 0.80), (0.80, 0.90), (0.90, 0.97)]
    SECS_BINS  = [(0, 120), (120, 300), (300, 480), (480, 660), (660, 900)]

    patterns = []
    MIN_PAT_SAMPLES = 8   # need at least 8 obs in a bucket to publish a pattern

    for (n_lo, n_hi) in NO_RANGES:
        for (s_lo, s_hi) in SECS_BINS:
            bucket = [r for r in resolved
                      if n_lo <= r["no_ask"] < n_hi
                      and s_lo <= r["secs_left"] < s_hi]
            if len(bucket) < MIN_PAT_SAMPLES: continue
            yes_won = sum(1 for r in bucket if r["outcome"] == "YES_WON")
            no_won  = sum(1 for r in bucket if r["outcome"] == "NO_WON")
            n       = len(bucket)
            # If YES wins reliably → buying NO is correct (NO falls to near 0)
            yes_freq = yes_won / n
            no_freq  = no_won  / n
            avg_no_moved = sum(r.get("no_moved", 0) for r in bucket) / n
            if yes_freq >= 0.65:
                patterns.append({
                    "type"      : "NO_PRICE",
                    "no_range"  : [n_lo, n_hi],
                    "yes_range" : [0, 1],
                    "secs_range": [s_lo, s_hi],
                    "direction" : "UP",   # buy YES (NO is cheap, YES should win)
                    "frequency" : round(yes_freq, 3),
                    "avg_no_moved": round(avg_no_moved, 4),
                    "samples"   : n,
                    "label"     : f"NO@{n_lo:.2f}-{n_hi:.2f} | {s_lo//60}-{s_hi//60}min | YES wins {yes_freq*100:.0f}%"
                })
            elif no_freq >= 0.65:
                patterns.append({
                    "type"      : "NO_PRICE",
                    "no_range"  : [n_lo, n_hi],
                    "yes_range" : [0, 1],
                    "secs_range": [s_lo, s_hi],
                    "direction" : "DOWN",  # buy NO (YES is cheap, NO should win)
                    "frequency" : round(no_freq, 3),
                    "avg_no_moved": round(avg_no_moved, 4),
                    "samples"   : n,
                    "label"     : f"NO@{n_lo:.2f}-{n_hi:.2f} | {s_lo//60}-{s_hi//60}min | NO wins {no_freq*100:.0f}%"
                })

    # Print found patterns
    if patterns:
        print(f"  Found {len(patterns)} reliable price patterns:\n")
        for p in sorted(patterns, key=lambda x: x["frequency"], reverse=True):
            conf = "🟢 STRONG" if p["frequency"] >= 0.80 else "🟡 GOOD" if p["frequency"] >= 0.72 else "🔵 FAIR"
            print(f"    {conf}  {p['label']}")
            print(f"           avg price move: {p['avg_no_moved']:+.4f}  |  {p['samples']} samples")
    else:
        print("    No strong patterns found yet — keep collecting data.")

    # Also show price trajectories for informational insight
    print(f"\n  ── Price Trajectory Insight ──")
    for (n_lo, n_hi) in [(0.05, 0.15), (0.15, 0.25), (0.25, 0.40)]:
        bucket = [r for r in resolved if n_lo <= r["no_ask"] < n_hi]
        if len(bucket) < 5: continue
        avg_move = sum(r.get("no_moved", 0) for r in bucket) / len(bucket)
        direction = "fell" if avg_move < 0 else "rose"
        print(f"    NO priced {n_lo:.2f}-{n_hi:.2f}¢: avg moved {avg_move:+.4f} ({direction}) over {len(bucket)} observations")
    print()

    # Write patterns file
    output = {
        "generated_at"      : str(__import__("datetime").datetime.now()),
        "resolved_obs"      : len(resolved),
        "patterns_found"    : len(patterns),
        "patterns"          : patterns,
    }
    with open(pat_path, "w") as f: json.dump(output, f, indent=2)
    if patterns:
        print(f"  ✅ Wrote {len(patterns)} patterns to price_patterns.json")
        print(f"     Bot loads these automatically on next window change.\n")
    else:
        print(f"  price_patterns.json updated — no strong patterns yet.\n")