<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Rudrite Research</title>
    <link>https://research.rudrite.com/</link>
    <atom:link href="https://research.rudrite.com/feed.xml" rel="self" type="application/rss+xml" />
    <description>Interactive, animated, visual explainers of landmark AI &amp; ML papers — the frontier, made legible.</description>
    <language>en-us</language>
    <lastBuildDate>Tue, 09 Jun 2026 00:00:00 GMT</lastBuildDate>
    <item>
      <title>Scaling Laws for Neural Language Models — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/scaling-laws</link>
      <guid isPermaLink="true">https://research.rudrite.com/scaling-laws</guid>
      <description>Loss falls as a clean power law in size, data, and compute — and tells you how to spend the budget. A free, interactive, animated walkthrough of Scaling Laws…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Adam: A Method for Stochastic Optimization — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/adam</link>
      <guid isPermaLink="true">https://research.rudrite.com/adam</guid>
      <description>A per-parameter adaptive learning rate from two moving averages of the gradient. A free, interactive, animated walkthrough of Adam: A Method for Stochastic…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Deep Residual Learning for Image Recognition — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/resnet</link>
      <guid isPermaLink="true">https://research.rudrite.com/resnet</guid>
      <description>Add the input back — the identity skip that made 152-layer nets trainable. A free, interactive, animated walkthrough of Deep Residual Learning for Image…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Denoising Diffusion Probabilistic Models — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/ddpm</link>
      <guid isPermaLink="true">https://research.rudrite.com/ddpm</guid>
      <description>Add noise to an image, then learn the reverse — the recipe behind modern diffusion. A free, interactive, animated walkthrough of Denoising Diffusion…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/switch-transformers</link>
      <guid isPermaLink="true">https://research.rudrite.com/switch-transformers</guid>
      <description>Send each token to a single expert — and scale a model to a trillion parameters. A free, interactive, animated walkthrough of Switch Transformers: Scaling to…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>LoRA: Low-Rank Adaptation of Large Language Models — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/lora</link>
      <guid isPermaLink="true">https://research.rudrite.com/lora</guid>
      <description>Freeze the model, learn its change as two skinny matrices — 10,000× fewer trainable weights, zero added latency. A free, interactive, animated walkthrough of…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>GPipe: Efficient Training of Giant Neural Networks using Pipeline Parallelism — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/gpipe</link>
      <guid isPermaLink="true">https://research.rudrite.com/gpipe</guid>
      <description>Split a giant model across chips and pipeline micro-batches to keep them all busy A free, interactive, animated walkthrough of GPipe: Efficient Training of…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>GSPMD: General and Scalable Parallelization for ML Computation Graphs — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/gspmd</link>
      <guid isPermaLink="true">https://research.rudrite.com/gspmd</guid>
      <description>Annotate a few tensors; the compiler shards the trillion-parameter rest. A free, interactive, animated walkthrough of GSPMD: General and Scalable…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Pathways: Asynchronous Distributed Dataflow for ML — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/pathways</link>
      <guid isPermaLink="true">https://research.rudrite.com/pathways</guid>
      <description>One controller, thousands of accelerators — parallel dispatch makes single-controller ML as fast as SPMD. A free, interactive, animated walkthrough of…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Ring Attention with Blockwise Transformers for Near-Infinite Context — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/ring-attention</link>
      <guid isPermaLink="true">https://research.rudrite.com/ring-attention</guid>
      <description>Shard one sequence across a ring of devices, rotate the KV blocks — context scales with device count. A free, interactive, animated walkthrough of Ring…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Efficiently Scaling Transformer Inference — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/scaling-inference</link>
      <guid isPermaLink="true">https://research.rudrite.com/scaling-inference</guid>
      <description>Chop a 540B model across a TPU pod: 29ms/token, 76% MFU, 32x longer context A free, interactive, animated walkthrough of Efficiently Scaling Transformer…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Mooncake: A KVCache-centric Disaggregated Architecture for LLM Serving — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/mooncake</link>
      <guid isPermaLink="true">https://research.rudrite.com/mooncake</guid>
      <description>Schedule the KV cache, not the GPU: disaggregated prefill/decode serving that survives overload. A free, interactive, animated walkthrough of Mooncake: A…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Fast Inference from Transformers via Speculative Decoding — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/speculative-decoding</link>
      <guid isPermaLink="true">https://research.rudrite.com/speculative-decoding</guid>
      <description>A small model guesses ahead, the big one verifies in parallel — same output, 2–3× faster. A free, interactive, animated walkthrough of Fast Inference from…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Chain-of-Thought Prompting Elicits Reasoning in Large Language Models — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/chain-of-thought</link>
      <guid isPermaLink="true">https://research.rudrite.com/chain-of-thought</guid>
      <description>Add worked examples to the prompt — and reasoning emerges in big models, no training A free, interactive, animated walkthrough of Chain-of-Thought Prompting…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Training language models to follow instructions with human feedback — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/instructgpt</link>
      <guid isPermaLink="true">https://research.rudrite.com/instructgpt</guid>
      <description>RLHF: align GPT-3 from human feedback — a 1.3B model beats the 175B on preference A free, interactive, animated walkthrough of Training language models to…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Direct Preference Optimization: Your Language Model is Secretly a Reward Model — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/dpo</link>
      <guid isPermaLink="true">https://research.rudrite.com/dpo</guid>
      <description>Skip the reward model and the RL — one cross-entropy loss aligns the policy directly from preferences. A free, interactive, animated walkthrough of Direct…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/deepseekmath</link>
      <guid isPermaLink="true">https://research.rudrite.com/deepseekmath</guid>
      <description>A 7B open model hits 51.7% on MATH — by web-mining 120B math tokens and inventing GRPO. A free, interactive, animated walkthrough of DeepSeekMath: Pushing the…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Scaling LLM Test-Time Compute Optimally can be More Effective than Scaling Model Parameters — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/test-time-compute</link>
      <guid isPermaLink="true">https://research.rudrite.com/test-time-compute</guid>
      <description>Think longer on hard prompts — and let difficulty decide how to spend the compute. A free, interactive, animated walkthrough of Scaling LLM Test-Time Compute…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Constitutional AI: Harmlessness from AI Feedback — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/constitutional-ai</link>
      <guid isPermaLink="true">https://research.rudrite.com/constitutional-ai</guid>
      <description>Train a harmless, non-evasive assistant from a written constitution — zero human harm labels. A free, interactive, animated walkthrough of Constitutional AI:…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>DAPO: An Open-Source LLM Reinforcement Learning System at Scale — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/dapo</link>
      <guid isPermaLink="true">https://research.rudrite.com/dapo</guid>
      <description>Four named techniques turn DeepSeek-style RL into a reproducible run to AIME 50. A free, interactive, animated walkthrough of DAPO: An Open-Source LLM…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Tree of Thoughts: Deliberate Problem Solving with Large Language Models — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/tree-of-thoughts</link>
      <guid isPermaLink="true">https://research.rudrite.com/tree-of-thoughts</guid>
      <description>Wrap a frozen GPT-4 in tree search — branch, self-evaluate, prune. Game of 24: 4% to 74%. A free, interactive, animated walkthrough of Tree of Thoughts:…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>ReAct: Synergizing Reasoning and Acting in Language Models — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/react</link>
      <guid isPermaLink="true">https://research.rudrite.com/react</guid>
      <description>A frozen LLM that thinks, acts, and reads results in one loop — the blueprint for every agent. A free, interactive, animated walkthrough of ReAct: Synergizing…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>FlashAttention-3: Fast and Accurate Attention with Asynchrony and Low-precision — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/flash-attention-3</link>
      <guid isPermaLink="true">https://research.rudrite.com/flash-attention-3</guid>
      <description>Rebuilds attention for Hopper — async warps + FP8 — for 740 TFLOPs/s, 1.5-2.0x over FA-2. A free, interactive, animated walkthrough of FlashAttention-3: Fast…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Transformers are SSMs: Generalized Models and Efficient Algorithms Through Structured State Space Duality — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/mamba-2</link>
      <guid isPermaLink="true">https://research.rudrite.com/mamba-2</guid>
      <description>Selective SSMs and masked attention are one structured matrix, computed two ways. A free, interactive, animated walkthrough of Transformers are SSMs:…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/deepseek-v2</link>
      <guid isPermaLink="true">https://research.rudrite.com/deepseek-v2</guid>
      <description>236B MoE, 21B active per token — MLA folds the whole KV cache into one latent vector A free, interactive, animated walkthrough of DeepSeek-V2: A Strong,…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>EAGLE: Speculative Sampling Requires Rethinking Feature Uncertainty — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/eagle</link>
      <guid isPermaLink="true">https://research.rudrite.com/eagle</guid>
      <description>Draft one layer down: autoregress on features, not tokens — 2.7–3.5× faster, losslessly. A free, interactive, animated walkthrough of EAGLE: Speculative…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/awq</link>
      <guid isPermaLink="true">https://research.rudrite.com/awq</guid>
      <description>Find the 1% of weights that matter by watching activations, then scale to protect them at INT4. A free, interactive, animated walkthrough of AWQ:…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>RoFormer: Enhanced Transformer with Rotary Position Embedding — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/rope</link>
      <guid isPermaLink="true">https://research.rudrite.com/rope</guid>
      <description>Encode position by rotating Q and K, so attention sees only the relative offset m−n. A free, interactive, animated walkthrough of RoFormer: Enhanced…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/vision-transformer</link>
      <guid isPermaLink="true">https://research.rudrite.com/vision-transformer</guid>
      <description>Cut an image into 16×16 patches, call each a word, feed a plain Transformer. A free, interactive, animated walkthrough of An Image is Worth 16x16 Words:…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Learning Transferable Visual Models From Natural Language Supervision — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/clip</link>
      <guid isPermaLink="true">https://research.rudrite.com/clip</guid>
      <description>Match captions to images, and you get a classifier for any concept you can name. A free, interactive, animated walkthrough of Learning Transferable Visual…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>High-Resolution Image Synthesis with Latent Diffusion Models — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/latent-diffusion</link>
      <guid isPermaLink="true">https://research.rudrite.com/latent-diffusion</guid>
      <description>Move diffusion into a compact latent space — cheaper, and the architecture behind Stable Diffusion. A free, interactive, animated walkthrough of…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Scalable Diffusion Models with Transformers — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/dit</link>
      <guid isPermaLink="true">https://research.rudrite.com/dit</guid>
      <description>Drop the U-Net: a plain transformer on latent patches whose quality scales with Gflops. A free, interactive, animated walkthrough of Scalable Diffusion Models…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Robust Speech Recognition via Large-Scale Weak Supervision — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/whisper</link>
      <guid isPermaLink="true">https://research.rudrite.com/whisper</guid>
      <description>680k hours of weak supervision → one Transformer that transcribes the real world, zero-shot A free, interactive, animated walkthrough of Robust Speech…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Native Sparse Attention: Hardware-Aligned and Natively Trainable Sparse Attention — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/native-sparse-attention</link>
      <guid isPermaLink="true">https://research.rudrite.com/native-sparse-attention</guid>
      <description>Trainable, hardware-aligned sparse attention: 3 gated branches, 11.6x decode, beats dense A free, interactive, animated walkthrough of Native Sparse…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Group Sequence Policy Optimization — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/gspo</link>
      <guid isPermaLink="true">https://research.rudrite.com/gspo</guid>
      <description>Reward lands on the whole sequence — so the importance ratio should too, not per token A free, interactive, animated walkthrough of Group Sequence Policy…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>DistServe: Disaggregating Prefill and Decoding for Goodput-optimized Large Language Model Serving — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/distserve</link>
      <guid isPermaLink="true">https://research.rudrite.com/distserve</guid>
      <description>Split a request's timeline into prefill and decode GPU pools — 4.48x more requests under SLO. A free, interactive, animated walkthrough of DistServe:…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>CacheBlend: Fast Large Language Model Serving for RAG with Cached Knowledge Fusion — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/cacheblend</link>
      <guid isPermaLink="true">https://research.rudrite.com/cacheblend</guid>
      <description>Reuse every retrieved chunk's KV cache anywhere, then recompute the ~15% of tokens that stitch cross-attention back. A free, interactive, animated walkthrough…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/gshard</link>
      <guid isPermaLink="true">https://research.rudrite.com/gshard</guid>
      <description>Top-2 experts per token + an SPMD compiler: a 600B model trained in 4 days. A free, interactive, animated walkthrough of GShard: Scaling Giant Models with…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/gqa</link>
      <guid isPermaLink="true">https://research.rudrite.com/gqa</guid>
      <description>One dial from MQA to MHA — near-MHA quality at near-MQA decode speed, retrofitted cheaply. A free, interactive, animated walkthrough of GQA: Training…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>YaRN: Efficient Context Window Extension of Large Language Models — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/yarn</link>
      <guid isPermaLink="true">https://research.rudrite.com/yarn</guid>
      <description>Extend a RoPE model to 128k by reshaping frequencies per wavelength — for a tenth of the tuning A free, interactive, animated walkthrough of YaRN: Efficient…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Efficient Streaming Language Models with Attention Sinks — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/streaming-llm</link>
      <guid isPermaLink="true">https://research.rudrite.com/streaming-llm</guid>
      <description>Pin 4 &quot;attention-sink&quot; tokens + a rolling window — stream 4M tokens, no fine-tuning. A free, interactive, animated walkthrough of Efficient Streaming Language…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Generative Adversarial Networks — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/gan</link>
      <guid isPermaLink="true">https://research.rudrite.com/gan</guid>
      <description>Two networks duel — a forger and a detective — until the fakes pass for real. A free, interactive, animated walkthrough of Generative Adversarial Networks —…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Segment Anything — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/segment-anything</link>
      <guid isPermaLink="true">https://research.rudrite.com/segment-anything</guid>
      <description>Point at anything, get a clean mask back in milliseconds — segmentation as a foundation model. A free, interactive, animated walkthrough of Segment Anything —…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Visual Instruction Tuning — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/llava</link>
      <guid isPermaLink="true">https://research.rudrite.com/llava</guid>
      <description>A blind GPT-4 writes the lessons; one matrix turns sight into tokens — the open VLM template. A free, interactive, animated walkthrough of Visual Instruction…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>PPO vs DPO vs GRPO — what's the difference? | Rudrite Research</title>
      <link>https://research.rudrite.com/compare/ppo-vs-dpo-vs-grpo</link>
      <guid isPermaLink="true">https://research.rudrite.com/compare/ppo-vs-dpo-vs-grpo</guid>
      <description>Three ways to turn preferences into a better policy — a full RL loop, a single classification loss, or group-relative RL without a critic. A clear,…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>MHA vs GQA vs MLA — what's the difference? | Rudrite Research</title>
      <link>https://research.rudrite.com/compare/mha-vs-gqa-vs-mla</link>
      <guid isPermaLink="true">https://research.rudrite.com/compare/mha-vs-gqa-vs-mla</guid>
      <description>Three points on the attention-memory curve — how much of the KV cache you keep decides how long a context you can afford to serve. A clear, side-by-side…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>GAN vs VAE vs Diffusion — what's the difference? | Rudrite Research</title>
      <link>https://research.rudrite.com/compare/gan-vs-vae-vs-diffusion</link>
      <guid isPermaLink="true">https://research.rudrite.com/compare/gan-vs-vae-vs-diffusion</guid>
      <description>Three ways to learn a distribution and sample from it — an adversarial game, a probabilistic autoencoder, and an iterative denoiser. A clear, side-by-side…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>FlashAttention vs FlashAttention-3 — what's the difference? | Rudrite Research</title>
      <link>https://research.rudrite.com/compare/flashattention-vs-flashattention-3</link>
      <guid isPermaLink="true">https://research.rudrite.com/compare/flashattention-vs-flashattention-3</guid>
      <description>The same exact-attention algorithm, rebuilt for a new generation of GPU — IO-aware tiling, then Hopper-era asynchrony and FP8. A clear, side-by-side…</description>
      <pubDate>Tue, 09 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Transformers vs Mamba — what's the difference? | Rudrite Research</title>
      <link>https://research.rudrite.com/compare/transformers-vs-mamba</link>
      <guid isPermaLink="true">https://research.rudrite.com/compare/transformers-vs-mamba</guid>
      <description>All-pairs attention versus a selective state-space recurrence — quadratic recall against linear-time throughput. A clear, side-by-side comparison with examples.</description>
      <pubDate>Mon, 08 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>FlashAttention vs PagedAttention — what's the difference? | Rudrite Research</title>
      <link>https://research.rudrite.com/compare/flashattention-vs-pagedattention</link>
      <guid isPermaLink="true">https://research.rudrite.com/compare/flashattention-vs-pagedattention</guid>
      <description>Two attention optimizations that solve different problems — and are used together, not instead of each other. A clear, side-by-side comparison with examples.</description>
      <pubDate>Mon, 08 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Dense vs Mixture-of-Experts — what's the difference? | Rudrite Research</title>
      <link>https://research.rudrite.com/compare/dense-vs-mixture-of-experts</link>
      <guid isPermaLink="true">https://research.rudrite.com/compare/dense-vs-mixture-of-experts</guid>
      <description>Activate every parameter for every token, or route each token to a few of many experts. A clear, side-by-side comparison with examples.</description>
      <pubDate>Mon, 08 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>DeepSeek-V3 — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/deepseek-v3</link>
      <guid isPermaLink="true">https://research.rudrite.com/deepseek-v3</guid>
      <description>A 671B mixture-of-experts that activates only 37B — via latent-KV attention and loss-free routing. A free, interactive, animated walkthrough of DeepSeek-V3 —…</description>
      <pubDate>Sun, 07 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Qwen3 — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/qwen3</link>
      <guid isPermaLink="true">https://research.rudrite.com/qwen3</guid>
      <description>One family, dense and MoE — with a unified thinking / non-thinking switch. A free, interactive, animated walkthrough of Qwen3 — Qwen Team, 2025.</description>
      <pubDate>Sun, 07 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>OLMo 2 — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/olmo-2</link>
      <guid isPermaLink="true">https://research.rudrite.com/olmo-2</guid>
      <description>A fully-open model, stabilized by moving the norms to the output and clamping QK. A free, interactive, animated walkthrough of OLMo 2 — Ai2, 2025.</description>
      <pubDate>Sun, 07 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>MiniMax-01 — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/minimax-01</link>
      <guid isPermaLink="true">https://research.rudrite.com/minimax-01</guid>
      <description>Near-linear attention at 456B — lightning attention, with a softmax layer every eighth block. A free, interactive, animated walkthrough of MiniMax-01 —…</description>
      <pubDate>Sun, 07 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Gemma 4 — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/gemma-4</link>
      <guid isPermaLink="true">https://research.rudrite.com/gemma-4</guid>
      <description>Five sizes, one design — interleaved local/global sliding-window attention, now with MoE. A free, interactive, animated walkthrough of Gemma 4 — Google…</description>
      <pubDate>Sun, 07 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Attention Is All You Need — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/attention</link>
      <guid isPermaLink="true">https://research.rudrite.com/attention</guid>
      <description>The 2017 paper behind every LLM you use — watch attention decide what matters. A free, interactive, animated walkthrough of Attention Is All You Need —…</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>FlashAttention — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/flash-attention</link>
      <guid isPermaLink="true">https://research.rudrite.com/flash-attention</guid>
      <description>Exact attention, made fast by never writing the big matrix to memory. A free, interactive, animated walkthrough of FlashAttention — Dao et al., NeurIPS 2022.</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>PagedAttention (vLLM) — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/paged-attention</link>
      <guid isPermaLink="true">https://research.rudrite.com/paged-attention</guid>
      <description>Serve far more requests by paging the KV cache like an operating system. A free, interactive, animated walkthrough of PagedAttention (vLLM) — Kwon et al.,…</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Megatron-LM — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/megatron-lm</link>
      <guid isPermaLink="true">https://research.rudrite.com/megatron-lm</guid>
      <description>Split a model across GPUs along the matrix — and train billions of parameters. A free, interactive, animated walkthrough of Megatron-LM — Shoeybi et al.,…</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>DeepSeek-R1 — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/deepseek-r1</link>
      <guid isPermaLink="true">https://research.rudrite.com/deepseek-r1</guid>
      <description>Reasoning that emerges from reinforcement learning, not imitation. A free, interactive, animated walkthrough of DeepSeek-R1 — DeepSeek-AI, 2025.</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>GPT-3: Language Models are Few-Shot Learners — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/gpt-3</link>
      <guid isPermaLink="true">https://research.rudrite.com/gpt-3</guid>
      <description>Scale a language model until it learns new tasks from a few examples. A free, interactive, animated walkthrough of GPT-3: Language Models are Few-Shot…</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>ZeRO: Zero Redundancy Optimizer — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/zero</link>
      <guid isPermaLink="true">https://research.rudrite.com/zero</guid>
      <description>Partition a model across GPUs instead of replicating it — and train toward a trillion parameters. A free, interactive, animated walkthrough of ZeRO: Zero…</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Mixtral of Experts — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/mixtral</link>
      <guid isPermaLink="true">https://research.rudrite.com/mixtral</guid>
      <description>Grow capacity without growing per-token cost — route each token to two of eight experts. A free, interactive, animated walkthrough of Mixtral of Experts —…</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Training Compute-Optimal Large Language Models — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/chinchilla</link>
      <guid isPermaLink="true">https://research.rudrite.com/chinchilla</guid>
      <description>Given a fixed compute budget, double the model and double the data — in equal proportion. A free, interactive, animated walkthrough of Training…</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>Mamba: Linear-Time Sequence Modeling with Selective State Spaces — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/mamba</link>
      <guid isPermaLink="true">https://research.rudrite.com/mamba</guid>
      <description>Let a state-space model read what it's reading — and a recurrence outruns attention. A free, interactive, animated walkthrough of Mamba: Linear-Time Sequence…</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
    <item>
      <title>BERT: Pre-training of Deep Bidirectional Transformers — interactive visual explainer | Rudrite Research</title>
      <link>https://research.rudrite.com/bert</link>
      <guid isPermaLink="true">https://research.rudrite.com/bert</guid>
      <description>Read the whole sentence at once — pre-train by filling in the blanks, then fine-tune anywhere. A free, interactive, animated walkthrough of BERT: Pre-training…</description>
      <pubDate>Fri, 05 Jun 2026 00:00:00 GMT</pubDate>
    </item>
  </channel>
</rss>
