2025-wrapped / app.py
merve's picture
merve HF Staff
Upload 2 files
00e4b7d verified
raw
history blame
25.1 kB
import gradio as gr
from datetime import datetime
from typing import Any, Dict, Iterable, List, Optional, Tuple
from collections import Counter
import json
import os
import html as html_lib
from huggingface_hub import HfApi, InferenceClient
def _created_year(obj):
if hasattr(obj, "created_at"):
dt = getattr(obj, "created_at")
return dt.year
def _repo_id(obj: Any) -> str:
if isinstance(obj, dict):
return obj.get("id") or obj.get("modelId") or obj.get("repoId") or "N/A"
return (
getattr(obj, "id", None)
or getattr(obj, "modelId", None)
or getattr(obj, "repoId", None)
or getattr(obj, "repo_id", None)
or "N/A"
)
def _repo_likes(obj: Any) -> int:
return int(getattr(obj, "likes", 0) or 0)
def _repo_tags(obj: Any) -> List[str]:
tags = getattr(obj, "tags", None) or []
return [t for t in tags if isinstance(t, str)]
def _repo_pipeline_tag(obj: Any) -> Optional[str]:
val = getattr(obj, "pipeline_tag", None)
return val
def _collect_2025_sorted_desc(items: Iterable[Any]) -> List[Any]:
"""
We rely on API-side sorting (createdAt desc) + early-stop once we hit < 2025.
This avoids pulling a user's entire history.
"""
out: List[Any] = []
for item in items:
yr = _created_year(item)
if yr is None:
continue
if yr < 2025:
break
if yr == 2025:
out.append(item)
return out
def fetch_user_data_2025(username: str, token: Optional[str] = None) -> Dict[str, List[Any]]:
"""Fetch user's models/datasets/spaces created in 2025 (API-side sort + paginated early-stop)."""
api = HfApi(token=token)
data: Dict[str, List[Any]] = {"models": [], "datasets": [], "spaces": []}
data["models"] = _collect_2025_sorted_desc(
api.list_models(author=username, full=True, sort="createdAt", direction=-1)
)
data["datasets"] = _collect_2025_sorted_desc(
api.list_datasets(author=username, full=True, sort="createdAt", direction=-1)
)
data["spaces"] = _collect_2025_sorted_desc(
api.list_spaces(author=username, full=True, sort="createdAt", direction=-1)
)
return data
def get_most_liked_item(items: List[Dict]) -> Optional[Dict]:
"""Get the item with most likes"""
return max(items, key=lambda x: x.get("likes", 0))
def _normalize_task_tag(tag: str) -> Optional[str]:
t = (tag or "").strip()
if not t:
return None
for prefix in ("task_categories:", "task_ids:", "pipeline_tag:"):
if t.startswith(prefix):
t = t[len(prefix):].strip()
t = t.strip().lower()
return t or None
def _suggested_nickname_for_task(task: Optional[str]) -> Optional[str]:
t = task.strip().lower()
mapping = {
"text-generation": "LLM Whisperer πŸ—£οΈ",
"image-text-to-text": "VLM Nerd πŸ€“",
"text-to-speech": "Full‑time Yapper πŸ—£οΈ",
"automatic-speech-recognition": "Subtitle Goblin 🎧",
"text-to-image": "Diffusion Gremlin 🎨",
"image-classification": "Pixel Judge πŸ‘οΈ",
"token-classification": "NERd Lord πŸ€“",
"text-classification": "Opinion Machine 🧠",
"translation": "Language Juggler πŸ—ΊοΈ",
"summarization": "TL;DR Dealer ✍️",
"image-to-text": "Caption Connoisseur πŸ–ΌοΈ",
"zero-shot-classification": "Label Wizard πŸͺ„",
}
return mapping.get(t)
def infer_task_and_modality(models: List[Any], datasets: List[Any], spaces: List[Any]) -> Tuple[Optional[str], Optional[str], Counter]:
"""
Returns: (most_common_task, task_counter)
- Task is primarily inferred from model `pipeline_tag`, then from task-ish tags on all artifacts.
"""
model_tasks: List[str] = []
for m in models:
pt = _repo_pipeline_tag(m)
if pt:
model_tasks.append(pt.strip().lower())
tag_tasks: List[str] = []
for obj in (models + datasets + spaces):
for tag in _repo_tags(obj):
nt = _normalize_task_tag(tag)
if nt:
tag_tasks.append(nt)
counts = Counter(model_tasks if model_tasks else tag_tasks)
top_task = counts.most_common(1)[0][0] if counts else None
return top_task, counts
def _k2_model_candidates() -> List[str]:
"""
Kimi K2 repo IDs can vary; allow override via env and try a small list.
"""
env_model = "moonshotai/Kimi-K2-Instruct"
# de-dupe while preserving order
seen = set()
out = []
for c in candidates:
if c and c not in seen:
out.append(c)
seen.add(c)
return out
def _esc(value: Any) -> str:
if value is None:
return ""
return html_lib.escape(str(value), quote=True)
def _profile_username(profile: Any) -> Optional[str]:
if profile is None:
return None
for key in ("username", "preferred_username", "name", "user", "handle"):
val = getattr(profile, key, None)
if isinstance(val, str) and val.strip():
return val.strip().lstrip("@")
data = getattr(profile, "data", None)
if isinstance(data, dict):
for key in ("username", "preferred_username", "name"):
val = data.get(key)
if isinstance(val, str) and val.strip():
return val.strip().lstrip("@")
for container in ("profile", "user"):
blob = data.get(container)
if isinstance(blob, dict):
val = blob.get("username") or blob.get("preferred_username") or blob.get("name")
if isinstance(val, str) and val.strip():
return val.strip().lstrip("@")
if isinstance(profile, dict):
val = profile.get("username") or profile.get("preferred_username") or profile.get("name")
if isinstance(val, str) and val.strip():
return val.strip().lstrip("@")
return None
def _profile_token(profile: Any) -> Optional[str]:
"""
Gradio's OAuth payload varies by version.
We try common attribute names and `.data` shapes.
"""
if profile is None:
return None
for key in ("token", "access_token", "hf_token", "oauth_token", "oauth_access_token"):
val = getattr(profile, key, None)
if isinstance(val, str) and val.strip():
return val.strip()
data = getattr(profile, "data", None)
if isinstance(data, dict):
for key in ("token", "access_token", "hf_token", "oauth_token", "oauth_access_token"):
val = data.get(key)
if isinstance(val, str) and val.strip():
return val.strip()
# Common nested objects
oauth_info = data.get("oauth_info") or data.get("oauth") or data.get("oauthInfo") or {}
if isinstance(oauth_info, dict):
val = oauth_info.get("access_token") or oauth_info.get("token")
if isinstance(val, str) and val.strip():
return val.strip()
if isinstance(profile, dict):
val = profile.get("token") or profile.get("access_token")
if isinstance(val, str) and val.strip():
return val.strip()
return None
def generate_roast_and_nickname_with_k2(
*,
username: str,
total_artifacts_2025: int,
models_2025: int,
datasets_2025: int,
spaces_2025: int,
top_task: Optional[str],
) -> Tuple[Optional[str], Optional[str]]:
"""
Calls Kimi K2 via Hugging Face Inference Providers (via huggingface_hub InferenceClient).
Returns (nickname, roast). If call fails, returns (None, None).
"""
token = (os.getenv("HF_TOKEN") or "").strip()
if not token:
return None, None
vibe = top_task or "mysterious vibes"
above_below = "above" if total_artifacts_2025 > 20 else "below"
suggested = _suggested_nickname_for_task(top_task)
system = (
"You are a witty, playful roast-comedian. Keep it fun, not cruel. "
"No slurs, no hate, no harassment. Avoid profanity. Keep it short."
)
user = f"""
Create TWO things about this Hugging Face user, based on their 2025 activity stats.
User: @{username}
Artifacts created in 2025: {total_artifacts_2025} (models={models_2025}, datasets={datasets_2025}, spaces={spaces_2025}) which is {above_below} 20.
Top task (pipeline_tag): {top_task or "unknown"}
Nickname guidance (examples you SHOULD follow when applicable):
- text-generation -> LLM Whisperer πŸ—£οΈ
- image-text-to-text -> VLM Nerd πŸ€“
- text-to-speech -> Full‑time Yapper πŸ—£οΈ
If top task is known and you have a strong matching idea, pick a nickname like the examples. {f'If unsure, you may use this suggested nickname: {suggested}' if suggested else ''}
Roast should reference the task and whether they are above/below 20 artifacts.
Most common vibe: {vibe}
Return ONLY valid JSON with exactly these keys:
{{
"nickname": "...", // short, funny, can include 1 emoji
"roast": "..." // 1-2 sentences max, playful, no bullying
}}
""".strip()
client = InferenceClient(model="moonshotai/Kimi-K2-Instruct", token=token)
resp = client.chat.completions.create(
model="moonshotai/Kimi-K2-Instruct",
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user},
],
max_tokens=180,
temperature=0.8,
)
content = (resp.choices[0].message.content or "").strip()
payload = json.loads(content)
nickname = payload.get("nickname")
roast = payload.get("roast")
nickname_out = nickname.strip() if isinstance(nickname, str) else None
roast_out = roast.strip() if isinstance(roast, str) else None
return nickname_out, roast_out
def generate_wrapped_report(profile: gr.OAuthProfile) -> str:
"""Generate the HF Wrapped 2025 report"""
username = _profile_username(profile) or "unknown"
token = _profile_token(profile)
# Fetch 2025 data (API-side sort + early stop)
user_data_2025 = fetch_user_data_2025(username, token)
models_2025 = user_data_2025["models"]
datasets_2025 = user_data_2025["datasets"]
spaces_2025 = user_data_2025["spaces"]
most_liked_model = max(models_2025, key=_repo_likes) if models_2025 else None
most_liked_dataset = max(datasets_2025, key=_repo_likes) if datasets_2025 else None
most_liked_space = max(spaces_2025, key=_repo_likes) if spaces_2025 else None
total_likes = sum(_repo_likes(x) for x in (models_2025 + datasets_2025 + spaces_2025))
top_task, _task_counts = infer_task_and_modality(models_2025, datasets_2025, spaces_2025)
total_artifacts_2025 = len(models_2025) + len(datasets_2025) + len(spaces_2025)
nickname, roast = generate_roast_and_nickname_with_k2(
username=username,
total_artifacts_2025=total_artifacts_2025,
models_2025=len(models_2025),
datasets_2025=len(datasets_2025),
spaces_2025=len(spaces_2025),
top_task=top_task,
)
# Create HTML report
html = f"""
<!DOCTYPE html>
<html>
<head>
<style>
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;600;700;800&display=swap');
body {{
font-family: 'Poppins', sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
margin: 0;
padding: 20px;
min-height: 100vh;
}}
.container {{
max-width: 800px;
margin: 0 auto;
background: rgba(255, 255, 255, 0.95);
border-radius: 30px;
padding: 40px;
box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
animation: fadeIn 0.8s ease-in;
}}
@keyframes fadeIn {{
from {{ opacity: 0; transform: translateY(20px); }}
to {{ opacity: 1; transform: translateY(0); }}
}}
.header {{
text-align: center;
margin-bottom: 40px;
}}
.header h1 {{
font-size: 3em;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin: 0;
font-weight: 800;
animation: slideDown 0.6s ease-out;
}}
@keyframes slideDown {{
from {{ transform: translateY(-30px); opacity: 0; }}
to {{ transform: translateY(0); opacity: 1; }}
}}
.username {{
font-size: 1.5em;
color: #764ba2;
margin-top: 10px;
font-weight: 600;
}}
.nickname {{
font-size: 1.1em;
color: #111 !important;
margin-top: 8px;
font-weight: 700;
background: #ffffff !important;
display: inline-block;
padding: 6px 12px;
border-radius: 999px;
border: 1px solid rgba(245, 87, 108, 0.25);
box-shadow: 0 8px 18px rgba(0, 0, 0, 0.08);
}}
.year {{
font-size: 4em;
font-weight: 800;
text-align: center;
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin: 30px 0;
animation: pulse 2s ease-in-out infinite;
}}
@keyframes pulse {{
0%, 100% {{ transform: scale(1); }}
50% {{ transform: scale(1.05); }}
}}
.stats-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin: 30px 0;
}}
.stat-card {{
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 25px;
border-radius: 20px;
text-align: center;
box-shadow: 0 10px 25px rgba(102, 126, 234, 0.3);
transition: transform 0.3s ease, box-shadow 0.3s ease;
animation: popIn 0.5s ease-out backwards;
}}
.stat-card:nth-child(1) {{ animation-delay: 0.1s; }}
.stat-card:nth-child(2) {{ animation-delay: 0.2s; }}
.stat-card:nth-child(3) {{ animation-delay: 0.3s; }}
@keyframes popIn {{
from {{ transform: scale(0.8); opacity: 0; }}
to {{ transform: scale(1); opacity: 1; }}
}}
.stat-card:hover {{
transform: translateY(-5px) scale(1.05);
box-shadow: 0 15px 35px rgba(102, 126, 234, 0.4);
}}
.stat-number {{
font-size: 3em;
font-weight: 800;
margin: 10px 0;
}}
.stat-label {{
font-size: 1em;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 1px;
}}
.section {{
margin: 40px 0;
padding: 25px;
background: #ffffff !important;
border-radius: 20px;
animation: slideIn 0.6s ease-out;
color: #111 !important;
border: 1px solid rgba(17, 17, 17, 0.08);
box-shadow: 0 12px 30px rgba(0, 0, 0, 0.10);
border-top: 6px solid rgba(102, 126, 234, 0.85);
}}
@keyframes slideIn {{
from {{ transform: translateX(-30px); opacity: 0; }}
to {{ transform: translateX(0); opacity: 1; }}
}}
.section h2 {{
color: #1f1b5a !important;
font-size: 1.8em;
margin-top: 0;
font-weight: 700;
display: flex;
align-items: center;
gap: 10px;
}}
.trophy {{
font-size: 1.5em;
}}
.item {{
background: #ffffff !important;
padding: 20px;
margin: 15px 0;
border-radius: 15px;
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
transition: transform 0.2s ease;
border: 1px solid rgba(17, 17, 17, 0.08);
}}
.item:hover {{
transform: translateX(10px);
}}
.item-name {{
font-weight: 600;
font-size: 1.2em;
color: #111 !important;
margin-bottom: 5px;
}}
.item-likes {{
color: #d92d20 !important;
font-weight: 600;
font-size: 1.1em;
}}
.item-sub {{
color: #1f2937 !important;
font-weight: 600;
font-size: 1.05em;
}}
.emoji {{
font-size: 1.5em;
margin-right: 10px;
}}
.total-likes {{
text-align: center;
margin: 40px 0;
padding: 30px;
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
border-radius: 20px;
color: white;
}}
.total-likes-number {{
font-size: 4em;
font-weight: 800;
margin: 10px 0;
}}
.total-likes-label {{
font-size: 1.3em;
font-weight: 600;
}}
.footer {{
text-align: center;
margin-top: 40px;
color: #111 !important;
font-weight: 600;
background: #ffffff !important;
border: 1px solid rgba(17, 17, 17, 0.08);
border-radius: 16px;
padding: 16px 18px;
box-shadow: 0 10px 24px rgba(0, 0, 0, 0.08);
}}
.footer p {{
margin: 8px 0;
color: #111 !important;
opacity: 1 !important;
font-size: 1.05em;
line-height: 1.35;
}}
.no-data {{
text-align: center;
color: #111 !important;
font-style: italic;
padding: 20px;
}}
.roast {{
font-size: 1.15em;
line-height: 1.5;
color: #111 !important;
background: #fff0f3 !important;
border-left: 6px solid #f5576c;
padding: 18px 18px;
border-radius: 14px;
margin-top: 10px;
border: 1px solid rgba(245, 87, 108, 0.25);
}}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>πŸŽ‰ HF WRAPPED πŸŽ‰</h1>
<div class="username">@{username}</div>
{f'<div class="nickname">You are a {_esc(nickname)}</div>' if nickname else ''}
</div>
<div class="year">2025</div>
<div class="stats-grid">
<div class="stat-card">
<div class="stat-number">{len(models_2025)}</div>
<div class="stat-label">πŸ€– Models</div>
</div>
<div class="stat-card">
<div class="stat-number">{len(datasets_2025)}</div>
<div class="stat-label">πŸ“Š Datasets</div>
</div>
<div class="stat-card">
<div class="stat-number">{len(spaces_2025)}</div>
<div class="stat-label">πŸš€ Spaces</div>
</div>
</div>
<div class="section">
<h2><span class="trophy">🧠</span> Your Signature Vibe</h2>
{f'''
<div class="item">
<div class="item-name"><span class="emoji">🎯</span>Most common task: {_esc(top_task)}</div>
<div class="item-sub">Total 2025 artifacts: {total_artifacts_2025}</div>
</div>
''' if top_task else '<div class="no-data">Not enough metadata to infer your vibe (yet).</div>'}
</div>
<div class="total-likes">
<div class="total-likes-number">❀️ {total_likes}</div>
<div class="total-likes-label">Total Likes Received</div>
</div>
<div class="section">
<h2><span class="trophy">πŸ”₯</span> Roast (Kimi K2)</h2>
{f'<div class="roast">{_esc(roast)}</div>' if roast else '<div class="no-data">Couldn’t generate a roast (missing token or Kimi K2 not reachable).</div>'}
</div>
<div class="section">
<h2><span class="trophy">πŸ†</span> Most Liked Model</h2>
{f'''
<div class="item">
<div class="item-name"><span class="emoji">πŸ€–</span>{_repo_id(most_liked_model)}</div>
<div class="item-likes">❀️ {_repo_likes(most_liked_model)} likes</div>
</div>
''' if most_liked_model else '<div class="no-data">No models yet</div>'}
</div>
<div class="section">
<h2><span class="trophy">πŸ†</span> Most Liked Dataset</h2>
{f'''
<div class="item">
<div class="item-name"><span class="emoji">πŸ“Š</span>{_repo_id(most_liked_dataset)}</div>
<div class="item-likes">❀️ {_repo_likes(most_liked_dataset)} likes</div>
</div>
''' if most_liked_dataset else '<div class="no-data">No datasets yet</div>'}
</div>
<div class="section">
<h2><span class="trophy">πŸ†</span> Most Liked Space</h2>
{f'''
<div class="item">
<div class="item-name"><span class="emoji">πŸš€</span>{_repo_id(most_liked_space)}</div>
<div class="item-likes">❀️ {_repo_likes(most_liked_space)} likes</div>
</div>
''' if most_liked_space else '<div class="no-data">No spaces yet</div>'}
</div>
<div class="footer">
<p>🎊 Thank you for being part of the Hugging Face community! 🎊</p>
<p>Keep building amazing things in 2025! πŸš€</p>
<p>Made with Inference Providers with love πŸ’–</p>
</div>
</div>
</body>
</html>
"""
return html
def show_login_message():
"""Show message for non-logged-in users"""
return """
<div style="text-align: center; padding: 50px; font-family: 'Poppins', sans-serif;">
<h1 style="color: #667eea; font-size: 3em;">πŸŽ‰ Welcome to HF Wrapped 2025! πŸŽ‰</h1>
<p style="font-size: 1.5em; color: #764ba2;">
Please log in with your Hugging Face account to see your personalized report!
</p>
<p style="font-size: 1.2em; color: #666;">
Click the "Sign in with Hugging Face" button above πŸ‘†
</p>
</div>
"""
# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), css="""
.gradio-container {
background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
}
""") as demo:
gr.HTML("""
<div style="text-align: center; padding: 20px; color: white;">
<h1 style="font-size: 3em; margin: 0;">πŸŽ‰ HF Wrapped 2025 πŸŽ‰</h1>
<p style="font-size: 1.2em;">Discover your Hugging Face journey this year!</p>
</div>
""")
with gr.Row():
with gr.Column():
login_button = gr.LoginButton()
output = gr.HTML(value=show_login_message())
def _render(profile_obj: Optional[gr.OAuthProfile] = None):
# In Gradio versions that support OAuth, `profile_obj` is injected after login.
return generate_wrapped_report(profile_obj) if profile_obj is not None else show_login_message()
# On load show the login message (and in some Gradio versions, this also receives the injected profile)
demo.load(fn=_render, inputs=None, outputs=output)
# After login completes, clicking the login button will trigger a rerender.
# Older Gradio treats LoginButton as a button (click event), not a value component (change event).
if hasattr(login_button, "click"):
login_button.click(fn=_render, inputs=None, outputs=output)
if __name__ == "__main__":
demo.launch()