<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Magenta</title>
    <description>A research project exploring the role of machine learning in the process of creating art and music.</description>
    <link>https://magenta.withgoogle.com/</link>
    <atom:link href="https://magenta.withgoogle.com/feed.xml" rel="self" type="application/rss+xml"/>
    <pubDate>Tue, 10 Mar 2026 09:01:42 -0700</pubDate>
    <lastBuildDate>Tue, 10 Mar 2026 09:01:42 -0700</lastBuildDate>
    <generator>Jekyll v4.4.1</generator>
    
      <item>
        <title>Open-sourcing The Infinite Crate DAW plugin</title>
        <description>&lt;!-- disableFinding(G3DOC_JINJA) --&gt;
&lt;div align=&quot;center&quot; class=&quot;action-container&quot;&gt;
  &lt;a class=&quot;action grey&quot; href=&quot;https://github.com/magenta/the-infinite-crate&quot;&gt;
    &lt;span class=&quot;studio-icon&quot;&gt;&lt;svg viewBox=&quot;0 0 16 16&quot;&gt;&lt;path d=&quot;M7.999,0.431c-4.285,0-7.76,3.474-7.76,7.761 c0,3.428,2.223,6.337,5.307,7.363c0.388,0.071,0.53-0.168,0.53-0.374c0-0.184-0.007-0.672-0.01-1.32 c-2.159,0.469-2.614-1.04-2.614-1.04c-0.353-0.896-0.862-1.135-0.862-1.135c-0.705-0.481,0.053-0.472,0.053-0.472 c0.779,0.055,1.189,0.8,1.189,0.8c0.692,1.186,1.816,0.843,2.258,0.645c0.071-0.502,0.271-0.843,0.493-1.037 C4.86,11.425,3.049,10.76,3.049,7.786c0-0.847,0.302-1.54,0.799-2.082C3.768,5.507,3.501,4.718,3.924,3.65 c0,0,0.652-0.209,2.134,0.796C6.677,4.273,7.34,4.187,8,4.184c0.659,0.003,1.323,0.089,1.943,0.261 c1.482-1.004,2.132-0.796,2.132-0.796c0.423,1.068,0.157,1.857,0.077,2.054c0.497,0.542,0.798,1.235,0.798,2.082 c0,2.981-1.814,3.637-3.543,3.829c0.279,0.24,0.527,0.713,0.527,1.437c0,1.037-0.01,1.874-0.01,2.129 c0,0.208,0.14,0.449,0.534,0.373c3.081-1.028,5.302-3.935,5.302-7.362C15.76,3.906,12.285,0.431,7.999,0.431z&quot; /&gt;&lt;/svg&gt;
&lt;/span&gt;
    View on GitHub&lt;/a&gt;
  &lt;a class=&quot;action grey&quot; href=&quot;https://g.co/magenta/discord&quot;&gt;
    &lt;span class=&quot;studio-icon&quot;&gt;&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;&lt;svg id=&quot;Discord-Logo&quot; xmlns=&quot;http://www.w3.org/2000/svg&quot; viewBox=&quot;0 0 126.644 96&quot;&gt;&lt;defs&gt;&lt;style&gt;.cls-1{fill:#fff;}&lt;/style&gt;&lt;/defs&gt;&lt;path id=&quot;Discord-Symbol-White&quot; class=&quot;cls-1&quot; d=&quot;M81.15,0c-1.2376,2.1973-2.3489,4.4704-3.3591,6.794-9.5975-1.4396-19.3718-1.4396-28.9945,0-.985-2.3236-2.1216-4.5967-3.3591-6.794-9.0166,1.5407-17.8059,4.2431-26.1405,8.0568C2.779,32.5304-1.6914,56.3725.5312,79.8863c9.6732,7.1476,20.5083,12.603,32.0505,16.0884,2.6014-3.4854,4.8998-7.1981,6.8698-11.0623-3.738-1.3891-7.3497-3.1318-10.8098-5.1523.9092-.6567,1.7932-1.3386,2.6519-1.9953,20.281,9.547,43.7696,9.547,64.0758,0,.8587.7072,1.7427,1.3891,2.6519,1.9953-3.4601,2.0457-7.0718,3.7632-10.835,5.1776,1.97,3.8642,4.2683,7.5769,6.8698,11.0623,11.5419-3.4854,22.3769-8.9156,32.0509-16.0631,2.626-27.2771-4.496-50.9172-18.817-71.8548C98.9811,4.2684,90.1918,1.5659,81.1752.0505l-.0252-.0505ZM42.2802,65.4144c-6.2383,0-11.4159-5.6575-11.4159-12.6535s4.9755-12.6788,11.3907-12.6788,11.5169,5.708,11.4159,12.6788c-.101,6.9708-5.026,12.6535-11.3907,12.6535ZM84.3576,65.4144c-6.2637,0-11.3907-5.6575-11.3907-12.6535s4.9755-12.6788,11.3907-12.6788,11.4917,5.708,11.3906,12.6788c-.101,6.9708-5.026,12.6535-11.3906,12.6535Z&quot; /&gt;&lt;/svg&gt;&lt;/span&gt;
    Discuss on Discord&lt;/a&gt;
  &lt;a class=&quot;action grey&quot; href=&quot;https://magenta.withgoogle.com/infinite-crate&quot;&gt;
    &lt;span class=&quot;studio-icon&quot;&gt;&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot;?&gt;
&lt;!-- Generator: Adobe Illustrator 18.1.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  --&gt;
&lt;svg version=&quot;1.1&quot; id=&quot;Capa_1&quot; xmlns=&quot;http://www.w3.org/2000/svg&quot; xmlns:xlink=&quot;http://www.w3.org/1999/xlink&quot; x=&quot;0px&quot; y=&quot;0px&quot; viewBox=&quot;0 0 24.637 24.637&quot; style=&quot;enable-background:new 0 0 24.637 24.637;&quot; xml:space=&quot;preserve&quot;&gt;
&lt;g&gt;
     &lt;path d=&quot;M18.537,6.945H1.432C0.641,6.945,0,7.582,0,8.369v14.262c0,0.785,0.641,1.426,1.432,1.426h17.105
          c0.785,0,1.426-0.641,1.426-1.426V8.369C19.963,7.582,19.322,6.945,18.537,6.945z M6.817,8.016c0.395,0,0.712,0.318,0.712,0.713
          c0,0.393-0.317,0.713-0.712,0.713c-0.392,0-0.71-0.32-0.71-0.713C6.107,8.334,6.426,8.016,6.817,8.016z M4.639,8.016
          c0.391,0,0.71,0.318,0.71,0.713c0,0.393-0.319,0.713-0.71,0.713c-0.397,0-0.717-0.32-0.717-0.713
          C3.922,8.334,4.241,8.016,4.639,8.016z M2.494,8.016c0.396,0,0.715,0.318,0.715,0.713c0,0.393-0.318,0.713-0.715,0.713
          c-0.39,0-0.709-0.32-0.709-0.713C1.785,8.334,2.104,8.016,2.494,8.016z M18.537,22.631H1.432V10.527h17.105
          C18.537,10.527,18.537,22.631,18.537,22.631z M18.537,9.101H8.559V8.387h9.979L18.537,9.101L18.537,9.101z&quot; /&gt;
     &lt;path d=&quot;M23.209,0.58H6.102c-0.79,0-1.426,0.637-1.426,1.426v4.133h1.426V4.162h17.107v12.104h-2.483v1.426h2.483
          c0.786,0,1.428-0.641,1.428-1.426V2.006C24.637,1.217,23.995,0.58,23.209,0.58z M7.17,3.076c-0.394,0-0.711-0.316-0.711-0.711
          c0-0.398,0.317-0.713,0.711-0.713c0.393,0,0.713,0.314,0.713,0.713C7.883,2.76,7.562,3.076,7.17,3.076z M9.309,3.076
          c-0.396,0-0.713-0.316-0.713-0.711c0-0.398,0.316-0.713,0.713-0.713c0.394,0,0.714,0.314,0.714,0.713
          C10.022,2.76,9.702,3.076,9.309,3.076z M11.494,3.076c-0.396,0-0.717-0.316-0.717-0.711c0-0.398,0.32-0.713,0.717-0.713
          c0.389,0,0.707,0.314,0.707,0.713C12.201,2.76,11.883,3.076,11.494,3.076z M23.209,2.734h-9.98V2.019h9.98V2.734z&quot; /&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
     &lt;g&gt;
     &lt;/g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;g&gt;
&lt;/g&gt;
&lt;/svg&gt;
&lt;/span&gt;
    Get the plugin&lt;/a&gt;
&lt;/div&gt;
&lt;p&gt;&lt;br /&gt;&lt;/p&gt;

&lt;p&gt;Six months ago we released
&lt;a href=&quot;https://magenta.withgoogle.com/infinite-crate&quot;&gt;The Infinite Crate&lt;/a&gt;, a DAW
plugin that brings the
&lt;a href=&quot;https://magenta.withgoogle.com/lyria-realtime&quot;&gt;Lyria RealTime&lt;/a&gt; music model
into Digital Audio Workstations (DAWs) to improve the sampling workflow for
producers. Since its release it’s been used by some of our favorite artists —
including a wonderful showcase with
&lt;a href=&quot;https://daito.ws/en/&quot;&gt;Daito Manabe&lt;/a&gt; in Tokyo — and was featured as
&lt;a href=&quot;https://www.youtube.com/watch?v=BHY15gnMwtc&amp;amp;t=841s&quot;&gt;an exciting new music tool&lt;/a&gt;
at &lt;a href=&quot;https://www.namm.org/&quot;&gt;NAMM&lt;/a&gt; 2026.&lt;/p&gt;

&lt;p&gt;Today we’re fully open sourcing the DAW plugin for developers to fork, modify,
and make their own under the permissive
&lt;a href=&quot;https://www.apache.org/licenses/LICENSE-2.0&quot;&gt;Apache 2.0 license&lt;/a&gt;.&lt;/p&gt;

&lt;p align=&quot;center&quot;&gt;
  &lt;img src=&quot;/assets/oss-infinite-crate/oss_infinite_crate.png&quot; alt=&quot;Plugin interface&quot; /&gt;
&lt;/p&gt;

&lt;p&gt;The VST was born out of discussions and studio collaborations with musicians
and producers from around the world. Many were intrigued by music models as a
creative partner but needed deeper integration into the tools they know and
trust — Ableton, Logic, and other DAWs that support VST3/AU plugins. Bridging
this gap simplifies audio routing and MIDI-mapping for studio recording and
live performance, allowing musicians to focus on what matters: the music.&lt;/p&gt;

&lt;p&gt;We architected the plugin using React/TypeScript for the UI layer and JUCE/C++
for DAW connection, audio processing, and websocket audio streaming from the
Gemini/Lyria API. This allowed us to rapidly iterate on the frontend using
hot-reload (Shadcn/Tailwind), while ensuring latency sensitive operations
(audio streaming and playback) happen in a compiled and unmanaged language with
a tight clock. State is synced between TypeScript and C++ using Zustand’s
state management and nlohmann json.&lt;/p&gt;

&lt;p align=&quot;center&quot;&gt;
  &lt;img src=&quot;/assets/oss-infinite-crate/plugin_architecture.png&quot; alt=&quot;Plugin architecture&quot; /&gt;
&lt;/p&gt;

&lt;p&gt;The plugin is a functional interface that exposes most of the controls
available on the Lyria RealTime API to the React frontend and feeds the
resulting audio stream into the DAW. Developers can fork the plugin and build
creative interfaces and visualizations for the API (like
&lt;a href=&quot;https://magenta.withgoogle.com/spacedj-announce&quot;&gt;Space DJ&lt;/a&gt;,
&lt;a href=&quot;https://mididj-dot-envisioning-studio.appspot.com/&quot;&gt;MIDI DJ&lt;/a&gt;, or
&lt;a href=&quot;https://x.com/poetengineer__/status/1944812105699356984?s=20&quot;&gt;creative controls&lt;/a&gt;)
directly in the DAW by spinning up the Vite server. Because
the frontend uses a standard set of web frameworks it’s easy to explore new
interfaces using AI-assisted coding tools like Gemini and
&lt;a href=&quot;https://antigravity.google/&quot;&gt;Antigravity&lt;/a&gt;.&lt;/p&gt;

&lt;h2 id=&quot;looking-ahead&quot;&gt;Looking ahead&lt;/h2&gt;

&lt;p&gt;In the near term, we hope to update the plugin to support on-device inference
of the &lt;a href=&quot;https://magenta.withgoogle.com/magenta-realtime&quot;&gt;Magenta RealTime&lt;/a&gt;
open-weights model for offline use. In the long term, we hope to support
future music models with improved controls, such as audio and MIDI input.&lt;/p&gt;

&lt;p&gt;We hope this open source plugin can support and be built with the growing
community of music makers using machine learning as part of their creative
process.&lt;/p&gt;

&lt;p&gt;Join the discussion on our
&lt;a href=&quot;https://g.co/magenta/discord&quot;&gt;Discord&lt;/a&gt;.&lt;/p&gt;

&lt;h1 id=&quot;acknowledgments&quot;&gt;Acknowledgments&lt;/h1&gt;
&lt;p&gt;We thank: Spencer Salazar for his talk on prototyping DAW plugins in web
technologies at ADC 2020, JUCE for implementing a C++ to Web/JS bridge in
JUCE 8,
Tommy Cappel for rigorous testing,
Alberto Lalama and Joyce Xie for their work on the API,
Nikhil Bhanu for his work on the windows build,
and the DeepMind research team that contributed to Lyria RealTime.&lt;/p&gt;
</description>
        <pubDate>Mon, 09 Mar 2026 12:00:00 -0700</pubDate>
        <link>https://magenta.withgoogle.com/oss-infinite-crate</link>
        <guid isPermaLink="true">https://magenta.withgoogle.com/oss-infinite-crate</guid>
        
        
        <category>blog</category>
        
      </item>
    
      <item>
        <title>Lyria Camera: Soundtrack your life</title>
        <description>&lt;p&gt;Today we’re launching &lt;strong&gt;Lyria Camera&lt;/strong&gt;, an app that uses
&lt;a href=&quot;https://deepmind.google/models/lyria/lyria-realtime/&quot;&gt;Lyria RealTime&lt;/a&gt; to make
music with your camera. By combining Gemini’s image understanding and the
&lt;a href=&quot;https://ai.google.dev/gemini-api/docs/music-generation&quot;&gt;Lyria RealTime API&lt;/a&gt;,
Lyria Camera generates a musical score that adapts to your environment on the
fly.&lt;/p&gt;

&lt;p&gt;It works by translating the visual scene into &lt;strong&gt;musical descriptors via
Gemini&lt;/strong&gt;, producing prompts like &lt;em&gt;Reflective piano, cityscape calm&lt;/em&gt;. The Lyria
RealTime API uses these terms as prompts to create a continuous stream of music
that’s generated on the fly. As you move about your world, the prompts and the
music they create will evolve over time.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://aistudio.google.com/apps/bundled/lyria_camera?fullscreenApplet=true&amp;amp;showPreview=true&amp;amp;showAssistant=true&quot;&gt;Try Lyria Camera now&lt;/a&gt;
or remix it on
&lt;a href=&quot;https://aistudio.google.com/apps/bundled/lyria_camera?showPreview=true&amp;amp;showAssistant=true&quot;&gt;AI Studio&lt;/a&gt;.&lt;/p&gt;

&lt;!-- disableFinding(G3DOC_JINJA) --&gt;
&lt;style&gt;
  /* fallback */
  @font-face {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-style: normal;
    font-weight: 400;
    src: url(https://fonts.gstatic.com/s/materialsymbolsoutlined/v250/kJF1BvYX7BgnkSrUwT8OhrdQw4oELdPIeeII9v6oDMzByHX9rA6RzaxHMPdY43zj-jCxv3fzvRNU22ZXGJpEpjC_1v-p_4MrImHCIJIZrDCvHOejbd5zrDAt.woff2)
      format(&quot;woff2&quot;);
  }
  .material-symbols-outlined {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-weight: normal;
    font-style: normal;
    font-size: 36px;
    line-height: 1;
    letter-spacing: normal;
    text-transform: none;
    display: inline-block;
    white-space: nowrap;
    word-wrap: normal;
    direction: ltr;
    -webkit-font-feature-settings: &quot;liga&quot;;
    -webkit-font-smoothing: antialiased;
    color: black;
    border-radius: 100%;
    padding: 0.2em;
    background-color: rgb(241, 241, 241);
    transition: 0.1s;
  }
  .material-symbols-outlined:hover {
    background-color: rgb(160, 160, 160);
    color: white;
    transition: 0.1s;
    cursor: pointer;
  }
  .control-overlay {
    position: absolute;
    display: flex;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    background: rgba(0, 0, 0, 0);
    /* Align items to the end of the flex container (right) */
    justify-content: flex-end;
    /* Align items to the end of the cross axis (bottom) */
    align-items: flex-end;
    font-family: sans-serif;
    font-size: 1.2rem;
  }
  .video-container {
    position: relative;
    display: flex;
    justify-content: center;
  }
&lt;/style&gt;

&lt;div class=&quot;video-container&quot; id=&quot;video-container-lyria_camera_video&quot; style=&quot;width:100%;&quot;&gt;
  &lt;video height=&quot;100%&quot; src=&quot;/assets/lyria_camera/lyria_camera_video.mp4&quot; id=&quot;video-lyria_camera_video&quot; muted=&quot;&quot; loop=&quot;&quot; autoplay=&quot;&quot;&gt;&lt;/video&gt;
  &lt;div class=&quot;control-overlay&quot;&gt;
    &lt;span class=&quot;material-symbols-outlined&quot; id=&quot;unmute-button-lyria_camera_video&quot;&gt;volume_off&lt;/span&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;script&gt;
  document.addEventListener(&quot;DOMContentLoaded&quot;, () =&gt; {
    let videoContainer = document.getElementById(
      &quot;video-container-lyria_camera_video&quot;
    );
    let unmuteButton = document.getElementById(
      &quot;unmute-button-lyria_camera_video&quot;
    );
    let video = document.getElementById(&quot;video-lyria_camera_video&quot;);

    unmuteButton.addEventListener(&quot;click&quot;, function () {
      video.muted = !video.muted;
      if (video.muted) {
        unmuteButton.textContent = &quot;volume_off&quot;;
      } else {
        unmuteButton.textContent = &quot;volume_up&quot;;
      }
    });

    if (&quot;IntersectionObserver&quot; in window) {
      const observer = new IntersectionObserver(
        (entries) =&gt; {
          entries.forEach((entry) =&gt; {
            if (entry.isIntersecting) {
              if (unmuteButton.textContent == &quot;volume_up&quot;) {
                video.muted = false;
              }
            } else {
              video.muted = true;
            }
          });
        },
        {
          root: null,
          rootMargin: &quot;0px&quot;,
          threshold: 0.1,
        }
      );

      observer.observe(videoContainer);
    }
  });
&lt;/script&gt;

&lt;h2 id=&quot;the-world-is-your-instrument&quot;&gt;The world is your instrument&lt;/h2&gt;

&lt;ul&gt;
  &lt;li&gt;
    &lt;p&gt;&lt;strong&gt;Reward your curiosity:&lt;/strong&gt; When you’re using Lyria Camera, every image is a
new instrument. You can find songs in your sketchbook, at the laundromat or
in your breakfast cereal. Film around and see what you can find.&lt;/p&gt;
  &lt;/li&gt;
  &lt;li&gt;
    &lt;p&gt;&lt;strong&gt;DJ your commute:&lt;/strong&gt; Point your camera out the train window or mount it on
your dashboard. Lyria Camera responds to the shifting scenery—the rhythm of
passing streetlights or the calm of an open road—creating a drive-time score
that matches your journey beat for beat.&lt;/p&gt;
  &lt;/li&gt;
  &lt;li&gt;
    &lt;p&gt;&lt;strong&gt;Score your screen:&lt;/strong&gt; On desktop, try the “Share Screen” feature to use a
browser tab instead of your camera. Actually, any app on your computer can
be used as a video feed. Try it while you’re working or gaming for a
tailor-made soundtrack.&lt;/p&gt;
  &lt;/li&gt;
&lt;/ul&gt;

&lt;h2 id=&quot;how-it-works&quot;&gt;How it Works&lt;/h2&gt;

&lt;p&gt;Lyria Camera brings together several AI capabilities to create a seamless
audiovisual feedback loop.&lt;/p&gt;

&lt;ul&gt;
  &lt;li&gt;
    &lt;p&gt;&lt;strong&gt;Multimodal Prompting.&lt;/strong&gt; This is the bridge between sight and sound. We use
Gemini to analyze your camera feed, translating visual cues into rich
textual descriptions. These descriptions act as musical instructions,
telling Lyria exactly how to interpret and ”play” what you’re seeing.&lt;/p&gt;
  &lt;/li&gt;
  &lt;li&gt;
    &lt;p&gt;&lt;strong&gt;Continuous &amp;amp; Steerable Generation:&lt;/strong&gt; The Lyria RealTime API is designed
for continuous music generation. Instead of generating a static song, it
creates an endless stream of audio that you can “steer” in different
directions. This allows the music to morph smoothly from one mood to another
without ever stopping or skipping a beat.&lt;/p&gt;
  &lt;/li&gt;
&lt;/ul&gt;

&lt;h2 id=&quot;what-will-you-build&quot;&gt;What will you build?&lt;/h2&gt;

&lt;p&gt;Lyria Camera is a great companion for a walk or a drive, and it’s just one thing
you can do with the
&lt;a href=&quot;https://ai.google.dev/gemini-api/docs/music-generation&quot;&gt;Lyria RealTime API&lt;/a&gt;. We
built this app to demonstrate the possibilities of continuous, steerable music
generation, but the real potential lies in what comes next.&lt;/p&gt;

&lt;p&gt;You can
&lt;a href=&quot;https://aistudio.google.com/apps/bundled/lyria_camera?fullscreenApplet=true&amp;amp;showPreview=true&amp;amp;showAssistant=true&quot;&gt;try Lyria Camera&lt;/a&gt;
on your phone or desktop today. For developers ready to push the boundaries
further, the Lyria RealTime API can help you build the next generation of music
experiences.&lt;/p&gt;
</description>
        <pubDate>Wed, 03 Dec 2025 12:00:00 -0800</pubDate>
        <link>https://magenta.withgoogle.com/lyria-camera-announce</link>
        <guid isPermaLink="true">https://magenta.withgoogle.com/lyria-camera-announce</guid>
        
        
        <category>blog</category>
        
      </item>
    
      <item>
        <title>Space DJ: Navigating a Musical Universe</title>
        <description>&lt;p&gt;Today, we’re excited to launch Space DJ, a web application from Magenta that
turns music exploration into an interactive journey through a constellation of
sounds. You pilot a spaceship through a galaxy where each star represents a
musical genre. As you navigate this universe, Space DJ uses the
&lt;a href=&quot;https://magenta.withgoogle.com/lyria-realtime&quot;&gt;Lyria RealTime API&lt;/a&gt; to generate
a continuous stream of music that reflects your position and selections in
real-time.&lt;/p&gt;

&lt;p&gt;We used the &lt;a href=&quot;https://cloud.google.com/blog/products/ai-machine-learning/ai-studio-to-cloud-run-and-cloud-run-mcp-server&quot;&gt;deploy app feature&lt;/a&gt; in AI Studio to make this available to everyone!&lt;br /&gt;
&lt;strong&gt;&lt;a href=&quot;https://spacedj-363947264390.us-west1.run.app&quot;&gt;Try Space DJ now&lt;/a&gt;&lt;/strong&gt;, or
&lt;strong&gt;view and fork the source code in &lt;a href=&quot;https://aistudio.google.com/apps/bundled/spacedj&quot;&gt;AI Studio&lt;/a&gt;&lt;/strong&gt;.&lt;/p&gt;

&lt;!-- disableFinding(G3DOC_JINJA) --&gt;
&lt;style&gt;
  /* fallback */
  @font-face {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-style: normal;
    font-weight: 400;
    src: url(https://fonts.gstatic.com/s/materialsymbolsoutlined/v250/kJF1BvYX7BgnkSrUwT8OhrdQw4oELdPIeeII9v6oDMzByHX9rA6RzaxHMPdY43zj-jCxv3fzvRNU22ZXGJpEpjC_1v-p_4MrImHCIJIZrDCvHOejbd5zrDAt.woff2)
      format(&quot;woff2&quot;);
  }
  .material-symbols-outlined {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-weight: normal;
    font-style: normal;
    font-size: 36px;
    line-height: 1;
    letter-spacing: normal;
    text-transform: none;
    display: inline-block;
    white-space: nowrap;
    word-wrap: normal;
    direction: ltr;
    -webkit-font-feature-settings: &quot;liga&quot;;
    -webkit-font-smoothing: antialiased;
    color: black;
    border-radius: 100%;
    padding: 0.2em;
    background-color: rgb(241, 241, 241);
    transition: 0.1s;
  }
  .material-symbols-outlined:hover {
    background-color: rgb(160, 160, 160);
    color: white;
    transition: 0.1s;
    cursor: pointer;
  }
  .control-overlay {
    position: absolute;
    display: flex;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    background: rgba(0, 0, 0, 0);
    /* Align items to the end of the flex container (right) */
    justify-content: flex-end;
    /* Align items to the end of the cross axis (bottom) */
    align-items: flex-end;
    font-family: sans-serif;
    font-size: 1.2rem;
  }
  .video-container {
    position: relative;
    display: flex;
    justify-content: center;
  }
&lt;/style&gt;

&lt;div class=&quot;video-container&quot; id=&quot;video-container-spacedj&quot; style=&quot;width:100%;&quot;&gt;
  &lt;video height=&quot;100%&quot; src=&quot;/assets/spacedj/spacedj-video.mp4&quot; id=&quot;video-spacedj&quot; muted=&quot;&quot; loop=&quot;&quot; autoplay=&quot;&quot;&gt;&lt;/video&gt;
  &lt;div class=&quot;control-overlay&quot;&gt;
    &lt;span class=&quot;material-symbols-outlined&quot; id=&quot;unmute-button-spacedj&quot;&gt;volume_off&lt;/span&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;script&gt;
  document.addEventListener(&quot;DOMContentLoaded&quot;, () =&gt; {
    let videoContainer = document.getElementById(
      &quot;video-container-spacedj&quot;
    );
    let unmuteButton = document.getElementById(
      &quot;unmute-button-spacedj&quot;
    );
    let video = document.getElementById(&quot;video-spacedj&quot;);

    unmuteButton.addEventListener(&quot;click&quot;, function () {
      video.muted = !video.muted;
      if (video.muted) {
        unmuteButton.textContent = &quot;volume_off&quot;;
      } else {
        unmuteButton.textContent = &quot;volume_up&quot;;
      }
    });

    if (&quot;IntersectionObserver&quot; in window) {
      const observer = new IntersectionObserver(
        (entries) =&gt; {
          entries.forEach((entry) =&gt; {
            if (entry.isIntersecting) {
              if (unmuteButton.textContent == &quot;volume_up&quot;) {
                video.muted = false;
              }
            } else {
              video.muted = true;
            }
          });
        },
        {
          root: null,
          rootMargin: &quot;0px&quot;,
          threshold: 0.1,
        }
      );

      observer.observe(videoContainer);
    }
  });
&lt;/script&gt;

&lt;h2 id=&quot;fly-through-music&quot;&gt;Fly Through Music&lt;/h2&gt;

&lt;ul&gt;
  &lt;li&gt;&lt;strong&gt;Explore a Musical Universe:&lt;/strong&gt; Fly through a star constellation where each
star is labeled with a music genre. This galaxy is a 3D projection of genre
embeddings.&lt;/li&gt;
  &lt;li&gt;&lt;strong&gt;Generate Music in Real-Time:&lt;/strong&gt; As you fly, the stars close to the
spaceship light up and influence the music. Clicking on a star or a point in
space anchors your selection. The Lyria Realtime model blends the prompts of
nearby genres into a unique musical mashup that evolves dynamically as you
move.&lt;/li&gt;
  &lt;li&gt;&lt;strong&gt;Uncover Hidden Connections:&lt;/strong&gt; Similar genres appear close together in the
3D space. You can also enable “High-Dimensional Neighbors” to find genres
that are semantically similar in the original high-dimensional embedding
space, even if they aren’t visual neighbors in the projection.&lt;/li&gt;
  &lt;li&gt;&lt;strong&gt;Engage Auto-Pilot:&lt;/strong&gt; Randomly drift through space for an ever-changing,
generative soundscape.&lt;/li&gt;
&lt;/ul&gt;

&lt;h2 id=&quot;how-it-works&quot;&gt;How it Works&lt;/h2&gt;

&lt;p&gt;Space DJ combines several technologies to create an immersive experience:&lt;/p&gt;

&lt;ul&gt;
  &lt;li&gt;&lt;strong&gt;Genre Embeddings:&lt;/strong&gt; We start with text prompts for 300 musical genres out
of a 1000 genre dataset. The text is converted into a rich numerical
representation (embedding) using the open-source
&lt;a href=&quot;https://colab.sandbox.google.com/github/magenta/magenta-realtime/blob/main/notebooks/Magenta_RT_Demo.ipynb&quot;&gt;MagentaRT&lt;/a&gt;
model’s MusicCoca embedder. These 768-dimensional embeddings are then
reduced to 128 dimensions using
&lt;a href=&quot;https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html&quot;&gt;Principal Component Analysis&lt;/a&gt;
for efficiency.&lt;/li&gt;
  &lt;li&gt;&lt;strong&gt;3D Projection:&lt;/strong&gt; To render the embeddings in 3D, we use Uniform Manifold
Approximation and Projection
(&lt;a href=&quot;https://pair-code.github.io/understanding-umap/&quot;&gt;UMAP&lt;/a&gt;), an algorithm that
projects the data into 3D space while trying to preserve its
high-dimensional structure. You can tweak UMAP parameters in the settings
for different constellation shapes.&lt;/li&gt;
  &lt;li&gt;&lt;strong&gt;Interactive Rendering:&lt;/strong&gt; The 3D space, spaceship, and stars are rendered
in your browser using &lt;a href=&quot;http://three.js&quot;&gt;three.js&lt;/a&gt;. You can select how many
stars to create and whether to randomize the selection.&lt;/li&gt;
  &lt;li&gt;&lt;strong&gt;Real-Time Audio Synthesis:&lt;/strong&gt; Your interactions within the 3D space are
translated into a set of weighted text prompts (i.e. Deep House: 0.7,
Ambient Techno: 0.3) based on proximity. These prompts are sent to the
&lt;a href=&quot;https://ai.google.dev/gemini-api/docs/music-generation&quot;&gt;Lyria RealTime API&lt;/a&gt;,
which synthesizes the music you hear, responding instantly to the
spaceship’s position.&lt;/li&gt;
  &lt;li&gt;&lt;strong&gt;Development and Deployment:&lt;/strong&gt; We used AI Studio to develop the applet
through its interactive code editor. We leveraged AI Studio’s Cloud Run
integration to deploy the application. This approach simplifies the
deployment process and helps protect the Gemini API key by securely proxying
requests to the Lyria RealTime API.&lt;/li&gt;
&lt;/ul&gt;

&lt;h2 id=&quot;a-new-frontier-for-musical-interaction&quot;&gt;A New Frontier for Musical Interaction&lt;/h2&gt;

&lt;p&gt;Space DJ is an exploration into new ways of interacting with generative AI
models for music. We hope to inspire new forms of musical expression and
discovery.&lt;/p&gt;

&lt;p&gt;Ready to take flight?
&lt;strong&gt;&lt;a href=&quot;https://spacedj-363947264390.us-west1.run.app&quot;&gt;Try Space DJ Now!&lt;/a&gt;&lt;/strong&gt;&lt;/p&gt;
</description>
        <pubDate>Mon, 03 Nov 2025 12:00:00 -0800</pubDate>
        <link>https://magenta.withgoogle.com/spacedj-announce</link>
        <guid isPermaLink="true">https://magenta.withgoogle.com/spacedj-announce</guid>
        
        
        <category>blog</category>
        
      </item>
    
      <item>
        <title>Lyria RealTime VST: The Infinite Crate</title>
        <description>&lt;style&gt;
  .crate-cover-container p {
    display: flex;
    justify-content: center;
    align-items: center;
  }
&lt;/style&gt;

&lt;table align=&quot;center&quot; class=&quot;overview&quot;&gt;
  &lt;tbody&gt;&lt;tr&gt;
    &lt;td&gt;🎵&lt;a href=&quot;https://magenta.withgoogle.com/infinite-crate&quot;&gt;Get the plugin&lt;/a&gt;&lt;/td&gt;
    &lt;td&gt;📖 &lt;a href=&quot;https://g.co/magenta/lyria-realtime&quot;&gt;Learn more&lt;/a&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;

&lt;p&gt;&lt;strong&gt;Live Generative Music in your DAW&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;Today, we’re happy to share &lt;a href=&quot;https://magenta.withgoogle.com/infinite-crate&quot;&gt;&lt;em&gt;The Infinite Crate&lt;/em&gt;&lt;/a&gt;, a DAW plugin prototype that integrates the &lt;a href=&quot;https://g.co/magenta/lyria-realtime&quot;&gt;Lyria RealTime API&lt;/a&gt; directly into your favorite music software. Use text prompts to steer a continuously evolving stream of music and feed the audio directly into your DAW for sampling, live performance, or a backing track to jam with.&lt;/p&gt;

&lt;!-- disableFinding(G3DOC_JINJA) --&gt;
&lt;style&gt;
  /* fallback */
  @font-face {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-style: normal;
    font-weight: 400;
    src: url(https://fonts.gstatic.com/s/materialsymbolsoutlined/v250/kJF1BvYX7BgnkSrUwT8OhrdQw4oELdPIeeII9v6oDMzByHX9rA6RzaxHMPdY43zj-jCxv3fzvRNU22ZXGJpEpjC_1v-p_4MrImHCIJIZrDCvHOejbd5zrDAt.woff2)
      format(&quot;woff2&quot;);
  }
  .material-symbols-outlined {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-weight: normal;
    font-style: normal;
    font-size: 36px;
    line-height: 1;
    letter-spacing: normal;
    text-transform: none;
    display: inline-block;
    white-space: nowrap;
    word-wrap: normal;
    direction: ltr;
    -webkit-font-feature-settings: &quot;liga&quot;;
    -webkit-font-smoothing: antialiased;
    color: black;
    border-radius: 100%;
    padding: 0.2em;
    background-color: rgb(241, 241, 241);
    transition: 0.1s;
  }
  .material-symbols-outlined:hover {
    background-color: rgb(160, 160, 160);
    color: white;
    transition: 0.1s;
    cursor: pointer;
  }
  .control-overlay {
    position: absolute;
    display: flex;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    background: rgba(0, 0, 0, 0);
    /* Align items to the end of the flex container (right) */
    justify-content: flex-end;
    /* Align items to the end of the cross axis (bottom) */
    align-items: flex-end;
    font-family: sans-serif;
    font-size: 1.2rem;
  }
  .video-container {
    position: relative;
    display: flex;
    justify-content: center;
  }
&lt;/style&gt;

&lt;div class=&quot;video-container&quot; id=&quot;video-container-infinitecrate&quot; style=&quot;width:100%;&quot;&gt;
  &lt;video height=&quot;100%&quot; src=&quot;/assets/infinite-crate/hero_loop.mp4&quot; id=&quot;video-infinitecrate&quot; muted=&quot;&quot; loop=&quot;&quot; autoplay=&quot;&quot;&gt;&lt;/video&gt;
  &lt;div class=&quot;control-overlay&quot;&gt;
    &lt;span class=&quot;material-symbols-outlined&quot; id=&quot;unmute-button-infinitecrate&quot;&gt;volume_off&lt;/span&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;script&gt;
  document.addEventListener(&quot;DOMContentLoaded&quot;, () =&gt; {
    let videoContainer = document.getElementById(
      &quot;video-container-infinitecrate&quot;
    );
    let unmuteButton = document.getElementById(
      &quot;unmute-button-infinitecrate&quot;
    );
    let video = document.getElementById(&quot;video-infinitecrate&quot;);

    unmuteButton.addEventListener(&quot;click&quot;, function () {
      video.muted = !video.muted;
      if (video.muted) {
        unmuteButton.textContent = &quot;volume_off&quot;;
      } else {
        unmuteButton.textContent = &quot;volume_up&quot;;
      }
    });

    if (&quot;IntersectionObserver&quot; in window) {
      const observer = new IntersectionObserver(
        (entries) =&gt; {
          entries.forEach((entry) =&gt; {
            if (entry.isIntersecting) {
              if (unmuteButton.textContent == &quot;volume_up&quot;) {
                video.muted = false;
              }
            } else {
              video.muted = true;
            }
          });
        },
        {
          root: null,
          rootMargin: &quot;0px&quot;,
          threshold: 0.1,
        }
      );

      observer.observe(videoContainer);
    }
  });
&lt;/script&gt;

&lt;p&gt;&lt;br /&gt;&lt;/p&gt;

&lt;p&gt;Integrating generative models with existing creative workflows has always been
an important part of Magenta’s mission, as it allows people more control and
agency in how they use these models in their own practice.
Our previous experiments with plugins, including
&lt;a href=&quot;https://g.co/magenta/studio&quot;&gt;Magenta Studio&lt;/a&gt; for manipulating MIDI clips and
&lt;a href=&quot;https://g.co/magenta/ddsp-vst&quot;&gt;DDSP VST&lt;/a&gt; for realtime audio-to-audio
transformations, have over a million downloads combined and have validated for
us the value of making these tools creatively accessible.&lt;/p&gt;

&lt;p&gt;We hope The Infinite Crate will be a welcome addition to this lineup.
We were inspired to create it though our collaborations with musicians such as
&lt;a href=&quot;https://www.youtube.com/watch?v=IUQW5LgBZvQ&quot;&gt;Jacob Collier&lt;/a&gt; and
&lt;a href=&quot;https://www.youtube.com/watch?v=thAhd82XnMc&quot;&gt;Toro y Moi&lt;/a&gt;, where we saw the
potential for integrating capabilities similar to
&lt;a href=&quot;https://labs.google/fx/tools/music-fx-dj&quot;&gt;MusicFX DJ&lt;/a&gt; more directly into studio and live
performance workflows.&lt;/p&gt;

&lt;p&gt;The Infinite Crate is cross-platform, available for both Mac and Windows, as a
VST3 plugin, an AU component, and a standalone app.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Looking ahead&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;Lyria RealTime is not capable of running locally on consumer hardware, so thus the plugin requires an &lt;a href=&quot;https://aistudio.google.com/apikey&quot;&gt;API key (free for Lyria RealTime)&lt;/a&gt; and internet access.
We’re excited to explore complementing this approach with more efficient variants that can run locally on consumer hardware such as our recently released open model &lt;a href=&quot;https://magenta.withgoogle.com/magenta-realtime&quot;&gt;Magenta RealTime&lt;/a&gt;, so stay tuned!&lt;/p&gt;
</description>
        <pubDate>Wed, 09 Jul 2025 07:00:01 -0700</pubDate>
        <link>https://magenta.withgoogle.com/infinite-crate-announce</link>
        <guid isPermaLink="true">https://magenta.withgoogle.com/infinite-crate-announce</guid>
        
        
        <category>blog</category>
        
      </item>
    
      <item>
        <title>Magenta RealTime: An Open-Weights Live Music Model</title>
        <description>&lt;style&gt;
  table tr.wrap {
    display: flex;
    flex-direction: row;
    flex-wrap: wrap;
  }
  table tr.wrap &gt; td {
    display: block;
    flex: 1;
  }
  td {text-align: center !important}
  .from {background-color: #d3d3d3;}
  img.inline {
    vertical-align: middle;
    display: inline-block;
    max-height: 16px;
    width: auto !important;
    margin-right: 6px;
  }
  img.centered {
    max-width: 90%;
    margin: auto;
  }
  video {
    max-width: 100% !important;
  }
&lt;/style&gt;

&lt;h1 id=&quot;magenta-realtime&quot;&gt;Magenta RealTime&lt;/h1&gt;

&lt;p&gt;Today, we’re happy to share a research preview of Magenta RealTime (Magenta RT), an open-weights live music model that allows you to interactively create, control and perform music in the moment.&lt;/p&gt;

&lt;table align=&quot;center&quot; class=&quot;overview&quot;&gt;
  &lt;tbody&gt;&lt;tr&gt;
    &lt;td&gt;&lt;img alt=&quot;&quot; src=&quot;/assets/magenta_realtime/colab.jpg&quot; class=&quot;inline&quot; /&gt;&lt;a href=&quot;https://colab.research.google.com/github/magenta/magenta-realtime/blob/main/notebooks/Magenta_RT_Demo.ipynb&quot;&gt;Colab Demo&lt;/a&gt;&lt;/td&gt;
    &lt;td&gt;📝&lt;a href=&quot;https://arxiv.org/abs/2508.04651&quot;&gt;Paper&lt;/a&gt;&lt;/td&gt;
    &lt;td&gt;&lt;img alt=&quot;&quot; src=&quot;/assets/magenta_realtime/github.png&quot; class=&quot;inline&quot; /&gt;&lt;a href=&quot;https://github.com/magenta/magenta-realtime&quot;&gt;GitHub Code&lt;/a&gt;&lt;/td&gt;
    &lt;td&gt;&lt;img alt=&quot;&quot; src=&quot;/assets/magenta_realtime/hf-logo.png&quot; class=&quot;inline&quot; /&gt;&lt;a href=&quot;https://huggingface.co/google/magenta-realtime&quot;&gt;Model Card&lt;/a&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;

&lt;p&gt;Magenta RT is the latest in a series of models and applications developed as part of the Magenta Project. It is the open-weights cousin of &lt;a href=&quot;http://g.co/magenta/lyria-realtime&quot;&gt;Lyria RealTime&lt;/a&gt;, the real-time generative music model powering &lt;a href=&quot;https://labs.google/fx/tools/music-fx-dj&quot;&gt;Music FX DJ&lt;/a&gt; and the &lt;a href=&quot;https://magenta.withgoogle.com/lyria-realtime&quot;&gt;real-time music API&lt;/a&gt; in &lt;a href=&quot;https://aistudio.google.com/app/apps/bundled/promptdj-midi?showPreview=true%3Futm_source%3Ddeepmind.google&amp;amp;utm_medium=referral&amp;amp;utm_campaign=gdm&amp;amp;utm_content=&quot;&gt;Google AI Studio&lt;/a&gt;, developed by Google DeepMind. Real-time music generation models open up unique opportunities for live music exploration and performance, and we’re excited to see what new tools, experiences, and art you create with them.&lt;/p&gt;

&lt;p&gt;As an open-weights model, Magenta RT is targeted towards eventually running locally on consumer hardware (currently runs on free-tier Colab TPUs). It is an 800 million parameter autoregressive transformer model trained on ~190k hours of stock music from multiple sources, mostly instrumental. The model code is &lt;a href=&quot;https://github.com/magenta/magenta-realtime&quot;&gt;available on Github&lt;/a&gt; and the weights are available on Google Cloud Storage and &lt;a href=&quot;https://huggingface.co/google/magenta-realtime&quot;&gt;Hugging Face&lt;/a&gt; under permissive licenses with some additional bespoke terms. To see how to run inference with the model and try it yourself, check out our &lt;a href=&quot;https://colab.research.google.com/github/magenta/magenta-realtime/blob/main/notebooks/Magenta_RT_Demo.ipynb&quot;&gt;Colab Demo&lt;/a&gt;. You may also &lt;a href=&quot;https://colab.research.google.com/github/magenta/magenta-realtime/blob/main/notebooks/Magenta_RT_Finetune.ipynb&quot;&gt;customize MagentaRT on your own audio&lt;/a&gt; or explore &lt;a href=&quot;https://colab.research.google.com/github/magenta/magenta-realtime/blob/main/notebooks/Magenta_RT_Audio_Injection.ipynb&quot;&gt;live audio input&lt;/a&gt; Options for local, on device inference are coming soon.&lt;/p&gt;

&lt;figure&gt;
  &lt;iframe width=&quot;560&quot; height=&quot;315&quot; src=&quot;https://www.youtube.com/embed/Ae1Kz2zmh9M&quot; title=&quot;YouTube video player&quot; frameborder=&quot;0&quot; allow=&quot;accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share&quot; referrerpolicy=&quot;strict-origin-when-cross-origin&quot; allowfullscreen=&quot;&quot;&gt;&lt;/iframe&gt;
&lt;/figure&gt;
&lt;p&gt;&lt;!-- disableFinding(FIGURE_NO_CAPTION) --&gt;&lt;/p&gt;

&lt;h2 id=&quot;how-it-works&quot;&gt;How it Works&lt;/h2&gt;

&lt;p&gt;Live generative music is particularly difficult because it requires both
real-time generation (i.e. real-time factor &amp;gt; 1, generating X seconds of audio
in less than X seconds), causal streaming (i.e. online generation), and
low-latency controllability.&lt;/p&gt;

&lt;figure&gt;
  &lt;video width=&quot;100%&quot; src=&quot;/assets/lyria_realtime/lyria_realtime_diagram.mp4&quot; autoplay=&quot;&quot; muted=&quot;&quot; loop=&quot;&quot;&gt;&lt;/video&gt;
&lt;/figure&gt;

&lt;p&gt;Magenta RT overcomes these challenges by adapting the &lt;a href=&quot;https://research.google/pubs/musiclm-generating-music-from-text/&quot;&gt;MusicLM&lt;/a&gt; architecture to perform block autoregression. The model generates a continuous stream of music in sequential chunks, each conditioned on the previous audio output (10s of coarse audio tokens) and a style embedding to produce the next audio chunk (2s of fine audio tokens). By manipulating the style embedding (weighted average of &lt;a href=&quot;https://www.youtube.com/watch?v=Ae1Kz2zmh9M&quot;&gt;text&lt;/a&gt; or &lt;a href=&quot;https://www.youtube.com/watch?v=vHIf2UKXmp4&quot;&gt;audio&lt;/a&gt; prompt embeddings), players can shape and morph the music in real-time, mixing together different styles, instruments, and musical attributes.&lt;/p&gt;

&lt;p&gt;The latency of controls is set by the chunk size, which has a maximum output
size of two seconds but can be reduced to increase reactivity. On a Colab
free-tier TPU (v2-8 TPU), these two seconds of audio are generated in 1.25
seconds, giving a real-time factor of 1.6.&lt;/p&gt;

&lt;p&gt;Compared to the original MusicLM, we’ve upgraded our representations to SpectroStream for high-fidelity (48kHz stereo) audio, which is a successor to SoundStream (&lt;a href=&quot;https://arxiv.org/abs/2107.03312&quot;&gt;Zeghidour+ 21&lt;/a&gt;). We also trained a new joint music+text embedding model called MusicCoCa that is influenced by both MuLan (&lt;a href=&quot;https://arxiv.org/abs/2208.12415&quot;&gt;Huang+ 22&lt;/a&gt;) and the CoCa models (&lt;a href=&quot;https://arxiv.org/abs/2205.01917&quot;&gt;Yu+ 22&lt;/a&gt;). Additional details are provided in the &lt;a href=&quot;https://huggingface.co/google/magenta-realtime&quot;&gt;model card&lt;/a&gt; and deeper technical descriptions are available in our &lt;a href=&quot;https://arxiv.org/abs/2508.04651&quot;&gt;paper&lt;/a&gt;.&lt;/p&gt;

&lt;h2 id=&quot;latent-space-exploration-in-real-time&quot;&gt;Latent Space Exploration… In Real Time&lt;/h2&gt;

&lt;p&gt;Magenta’s earlier work in latent music models for MIDI clips (&lt;a href=&quot;https://magenta.withgoogle.com/music-vae&quot;&gt;MusicVAE&lt;/a&gt;, &lt;a href=&quot;https://magenta.withgoogle.com/groovae&quot;&gt;GrooVAE&lt;/a&gt;) and instrumental timbre (&lt;a href=&quot;https://magenta.withgoogle.com/nsynth&quot;&gt;NSynth&lt;/a&gt;), &lt;a href=&quot;https://vibertthio.com/runn/&quot;&gt;offered&lt;/a&gt; &lt;a href=&quot;https://vibertthio.com/sornting/&quot;&gt;a&lt;/a&gt; &lt;a href=&quot;https://experiments.withgoogle.com/nsynth-super&quot;&gt;wide&lt;/a&gt; &lt;a href=&quot;https://experiments.withgoogle.com/ai/beat-blender/view/&quot;&gt;range&lt;/a&gt; &lt;a href=&quot;https://experiments.withgoogle.com/sound-maker&quot;&gt;of&lt;/a&gt; &lt;a href=&quot;https://teampieshop.github.io/latent-loops/&quot;&gt;possible&lt;/a&gt; &lt;a href=&quot;https://experiments.withgoogle.com/ai/melody-mixer/view/&quot;&gt;interfaces&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;With Magenta RT, it is now possible to traverse the space of multi-instrumental
audio: explore the never-before-heard music between genres, unusual instrument
combinations, or &lt;a href=&quot;https://www.youtube.com/watch?v=vHIf2UKXmp4&quot;&gt;your own audio samples&lt;/a&gt;.&lt;/p&gt;

&lt;figure&gt;
  &lt;iframe width=&quot;560&quot; height=&quot;315&quot; src=&quot;https://www.youtube.com/embed/vHIf2UKXmp4&quot; title=&quot;YouTube video player&quot; frameborder=&quot;0&quot; allow=&quot;accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share&quot; referrerpolicy=&quot;strict-origin-when-cross-origin&quot; allowfullscreen=&quot;&quot;&gt;&lt;/iframe&gt;
&lt;/figure&gt;
&lt;p&gt;&lt;!-- disableFinding(FIGURE_NO_CAPTION) --&gt;&lt;/p&gt;

&lt;p&gt;The ability to adjust prompt mixtures in real-time allows you to efficiently
explore the sonic landscape and find novel textures and loops to use as part of
a larger piece of music.&lt;/p&gt;

&lt;p&gt;Real-time interactivity also provides the possibility of this latent exploration being its own type of musical performance, the interpolation through space combined with anchoring of the audio context producing a structure similar to a &lt;a href=&quot;https://www.youtube.com/watch?v=thAhd82XnMc&quot;&gt;DJ set&lt;/a&gt; or improvisation session. Beyond performance, it can also be used to provide interactive soundscapes for physical spaces like artist installations or virtual spaces like video games.&lt;/p&gt;

&lt;p&gt;This opens up a world of possibilities to build new tools and interfaces, and below you can see three example applications built on the &lt;a href=&quot;http://g.co/magenta/lyria-realtime&quot;&gt;Lyria RealTime API&lt;/a&gt; in AI Studio. Over time, Magenta RT will open up similar opportunities for on-device applications.&lt;/p&gt;

&lt;table align=&quot;center&quot;&gt; &lt;tr&gt;
&lt;td&gt; &lt;video src=&quot;/assets/lyria_realtime/image2.gif.mp4&quot; muted=&quot;&quot; autoplay=&quot;&quot; loop=&quot;&quot; class=&quot;centered&quot;&gt;&lt;/video&gt; &lt;a href=&quot;https://aistudio.google.com/apps/bundled/promptdj?showPreview=true&quot;&gt;&lt;b&gt;PromptDJ&lt;/b&gt;&lt;/a&gt; &lt;/td&gt;
&lt;td&gt; &lt;video src=&quot;/assets/lyria_realtime/image4.gif.mp4&quot; muted=&quot;&quot; autoplay=&quot;&quot; loop=&quot;&quot; class=&quot;centered&quot;&gt;&lt;/video&gt; &lt;a href=&quot;https://aistudio.google.com/apps/bundled/promptdj-midi?showPreview=true&quot;&gt;&lt;b&gt;PromptDJ MIDI&lt;/b&gt;&lt;/a&gt;&lt;/td&gt;
&lt;td&gt; &lt;video src=&quot;/assets/lyria_realtime/image1.gif.mp4&quot; muted=&quot;&quot; autoplay=&quot;&quot; loop=&quot;&quot; class=&quot;centered&quot;&gt;&lt;/video&gt;&lt;a href=&quot;https://aistudio.google.com/app/prompts?state=%7B%22ids%22:%5B%221_pYvZFu7gFns_0w21GsW55moNR0gNmbS%22%5D,%22action%22:%22open%22,%22userId%22:%22103620588905886731599%22,%22resourceKeys%22:%7B%7D%7D&amp;amp;usp=sharing&quot;&gt; &lt;b&gt;PromptDJ Pad&lt;/b&gt; &lt;/a&gt; &lt;/td&gt;
&lt;/tr&gt; &lt;/table&gt;

&lt;h2 id=&quot;why-magenta-realtime&quot;&gt;Why Magenta RealTime?&lt;/h2&gt;

&lt;p&gt;Enhancing human creativity (not replacing it) has always been at the core of Magenta’s mission. AI, however, can be a double-edged sword for creative agency. It offers new opportunities for accessibility and expression, but it can also create a deluge of more passive creation and consumption compared to traditional methods. With this in mind, we have always strived to build tools that help close the skill gap to make creation more accessible, while also valuing existing musical practices and encouraging people to dig deeper in their own creative journeys. In this regard, real-time interactive music models offer several important advantages that have motivated our research over the years (&lt;a href=&quot;https://magenta.withgoogle.com/pianogenie&quot;&gt;Piano Genie&lt;/a&gt;, &lt;a href=&quot;https://magenta.withgoogle.com/ddsp&quot;&gt;DDSP&lt;/a&gt;, &lt;a href=&quot;https://magenta.withgoogle.com/nsynth&quot;&gt;NSynth&lt;/a&gt;, &lt;a href=&quot;https://experiments.withgoogle.com/ai/ai-duet/view/&quot;&gt;AI Duet,&lt;/a&gt; and more).&lt;/p&gt;

&lt;p&gt;Live interaction demands more from the player but can offer more in return. The continuous perception-action loop between the human and the model provides access to a creative flow state, centering the experience on the joy of the process over the final product. The higher bandwidth channel of communication and control often results in outputs that are more unique and personal, as every action the player takes (or doesn’t) has an effect.&lt;/p&gt;

&lt;p&gt;Finally, live models naturally avoid creating a deluge of passive content,
because they intrinsically balance listening with generation in a 1:1 ratio.
They create a unique moment in time, shared by the player, the model, and
listeners.&lt;/p&gt;

&lt;p&gt;While &lt;a href=&quot;http://g.co/magenta/lyria-realtime&quot;&gt;Lyria RealTime&lt;/a&gt; provides access to
state-of-the-art live music generation to developers and users around the globe,
the Magenta Project remains committed to providing more direct access to code
and models to enable researchers, artists, and creative coders to further build
upon and adapt to achieve their creative goals.&lt;/p&gt;

&lt;h2 id=&quot;known-limitations&quot;&gt;Known Limitations&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;Coverage of broad musical styles&lt;/strong&gt;. Magenta RT’s training data primarily consists of Western instrumental music. As a consequence, Magenta RT has incomplete coverage of both vocal performance and the broader landscape of rich musical traditions worldwide. For real-time generation with broader style coverage, we refer users to our &lt;a href=&quot;http://g.co/magenta/lyria-realtime&quot;&gt;Lyria RealTime API&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Vocals&lt;/strong&gt;. While the model is capable of generating non-lexical vocalizations
and humming, it is not conditioned on lyrics and is unlikely to generate actual
words. However, there remains some risk of generating explicit or
culturally-insensitive lyrical content.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Latency&lt;/strong&gt;. Because the Magenta RT LLM operates on two second chunks, user
inputs for the style prompt may take two or more seconds to influence the
musical output.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Limited context&lt;/strong&gt;. Because the Magenta RT encoder has a maximum audio context
window of ten seconds, the model is unable to directly reference music that has
been output earlier than that. While the context is sufficient to enable the
model to create melodies, rhythms, and chord progressions, the model is not
capable of automatically creating longer-term song structures.&lt;/p&gt;

&lt;h2 id=&quot;future-work&quot;&gt;Future Work&lt;/h2&gt;

&lt;p&gt;Magenta RT and Lyria RT are pushing the boundaries of live generative music, and
we are happy that Magenta RT marks a return of open releases from Magenta.&lt;/p&gt;

&lt;p&gt;We are hard at work at making MagentaRT run locally on your own device - stay
tuned for more info!&lt;/p&gt;

&lt;p&gt;We are also working on the next generation of real-time models with higher
quality, lower latency, and more interactivity, to create truly playable
instruments and live accompaniment.&lt;/p&gt;

&lt;h2 id=&quot;how-to-cite&quot;&gt;How to cite&lt;/h2&gt;

&lt;p&gt;Please cite our technical report:&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;BibTeX:&lt;/strong&gt;&lt;/p&gt;

&lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;@article{gdmlyria2025live,
    title={Live Music Models},
    author={Caillon, Antoine and McWilliams, Brian and Tarakajian, Cassie and Simon, Ian and Manco, Ilaria and Engel, Jesse and Constant, Noah and Li, Pen and Denk, Timo I. and Lalama, Alberto and Agostinelli, Andrea and Huang, Anna and Manilow, Ethan and Brower, George and Erdogan, Hakan and Lei, Heidi and Rolnick, Itai and Grishchenko, Ivan and Orsini, Manu and Kastelic, Matej and Zuluaga, Mauricio and Verzetti, Mauro and Dooley, Michael and Skopek, Ondrej and Ferrer, Rafael and Borsos, Zal{\&apos;a}n and van den Oord, {\&quot;A}aron and Eck, Douglas and Collins, Eli and Baldridge, Jason and Hume, Tom and Donahue, Chris and Han, Kehang and Roberts, Adam},
    journal={arXiv:2508.04651},
    year={2025}
}
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
</description>
        <pubDate>Fri, 20 Jun 2025 07:00:01 -0700</pubDate>
        <link>https://magenta.withgoogle.com/magenta-realtime</link>
        <guid isPermaLink="true">https://magenta.withgoogle.com/magenta-realtime</guid>
        
        
        <category>blog</category>
        
      </item>
    
      <item>
        <title>Introducing Lyria RealTime API</title>
        <description>&lt;!-- disableFinding(LINE_OVER_80) --&gt;
&lt;!-- disableFinding(G3DOC_JINJA) --&gt;

&lt;h1 id=&quot;lyria-realtime-api&quot;&gt;Lyria RealTime API&lt;/h1&gt;

&lt;p&gt;&lt;em&gt;Lyria team&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;For the last few years, we have continued to explore how different ways of interacting with generative AI technologies for music can lead to new creative possibilities. A primary focus has been on what we refer to as “&lt;a href=&quot;https://arxiv.org/abs/2508.04651&quot;&gt;live music models&lt;/a&gt;”, which can be controlled by a user in real-time.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://goo.gle/lyria-realtime&quot;&gt;Lyria RealTime&lt;/a&gt; is Google DeepMind’s latest model developed for this purpose, and we are excited to share an experimental API that &lt;strong&gt;anyone&lt;/strong&gt; can use to explore the technology, create some jams, develop an app, or build their own musical instruments. You can try a demo app now in &lt;a href=&quot;https://aistudio.google.com/app/apps/bundled/promptdj-midi?showPreview=true%3Futm_source%3Ddeepmind.google&amp;amp;utm_medium=referral&amp;amp;utm_campaign=gdm&amp;amp;utm_content=&quot;&gt;Google AI Studio&lt;/a&gt;, fork it to build your own, or have a look at the &lt;a href=&quot;https://ai.google.dev/gemini-api/docs/music-generation&quot;&gt;API documentation&lt;/a&gt;. For more details on how Lyria RealTime works, see our &lt;a href=&quot;https://arxiv.org/abs/2508.04651&quot;&gt;technical report&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;Here are a few interfaces we have open sourced in Google AI Studio for inspiration that you can easily fork and make your own:&lt;/p&gt;

&lt;style&gt;
  /* Style the tab */
  .tab {
    overflow: hidden;
    border: 1px solid #ccc;
    background-color: #f1f1f1;
  }

  /* Style the buttons that are used to open the tab content */
  .tab button {
    background-color: inherit;
    float: left;
    border: none;
    outline: none;
    cursor: pointer;
    padding: 14px 16px;
    transition: 0.3s;
    font-size: revert;
  }

  /* Change background color of buttons on hover */
  .tab button:hover {
    background-color: #ddd;
  }

  /* Create an active/current tablink class */
  .tab button.active {
    background-color: #ccc;
  }

  /* Style the tab content */
  .tabcontent_demos {
    display: none;
    padding: 6px 12px;
    border: 1px solid #ccc;
    border-top: none;
    justify-content: center;
    align-items: center;
    flex-direction: row;
    height: 20em;
  }

  .tabcontent_demos.active {
    display: flex;
  }

  @media screen and (max-width: 700px) {
    .tabcontent_demos {
      height: initial;
      flex-direction: column;
    }
  }

  @media screen and (max-width: 400px) {
    .tab button {
      font-size: 0.68em;
    }
  }
&lt;/style&gt;

&lt;script&gt;
  function openTab(events, tabName) {
    // Declare all variables
    var i;
    var tabcontent;
    var tablinks;

    // Get all elements with class=&quot;tabcontent&quot; and hide them
    tabcontent = document.getElementsByClassName(&quot;tabcontent_demos&quot;);
    for (i = 0; i &lt; tabcontent.length; i++) {
      tabcontent[i].style.display = &quot;none&quot;;
    }

    // Get all elements with class=&quot;tablinks&quot; and remove the class &quot;active&quot;
    tablinks = document.getElementsByClassName(&quot;tablinks_demos&quot;);
    for (i = 0; i &lt; tablinks.length; i++) {
      tablinks[i].className = tablinks[i].className.replace(&quot; active&quot;, &quot;&quot;);
    }

    // Show the current tab, and add an &quot;active&quot; class to the button that opened the tab
    document.getElementById(tabName).style.display = &quot;flex&quot;;
    events.currentTarget.className += &quot; active&quot;;
  }
&lt;/script&gt;

&lt;div class=&quot;tab&quot;&gt;
  &lt;button class=&quot;tablinks_demos&quot; onclick=&quot;openTab(event, &apos;pdj&apos;)&quot;&gt;
    PromptDJ
  &lt;/button&gt;
  &lt;button class=&quot;tablinks_demos&quot; onclick=&quot;openTab(event, &apos;pdjmidi&apos;)&quot;&gt;
    PromptDJ MIDI
  &lt;/button&gt;
  &lt;button class=&quot;tablinks_demos&quot; onclick=&quot;openTab(event, &apos;pdjpad&apos;)&quot;&gt;
    PromptDJ Pad
  &lt;/button&gt;
&lt;/div&gt;

&lt;div id=&quot;pdj&quot; class=&quot;tabcontent_demos active&quot;&gt;
  &lt;div style=&quot;flex: 1&quot;&gt;
    &lt;a href=&quot;https://aistudio.google.com/apps/bundled/promptdj?showPreview=true&quot;&gt;&lt;h3&gt;PromptDJ&lt;/h3&gt;&lt;/a&gt;
    &lt;p&gt;
      Our most fully-featured demo allows you to add prompts and use sliders to
      control their relative impact on the music. Advanced Settings let you try
      out manual overrides for different musical aspects like note density,
      tempo, and key.
    &lt;/p&gt;
    &lt;a href=&quot;https://aistudio.google.com/apps/bundled/promptdj?showPreview=true&quot;&gt;&lt;b&gt;Try it now !&lt;/b&gt;&lt;/a&gt;
  &lt;/div&gt;
  &lt;div style=&quot;flex: 1; display: flex; justify-content: flex-end&quot;&gt;
    &lt;video src=&quot;/assets/lyria_realtime/image2.gif.mp4&quot; muted=&quot;&quot; autoplay=&quot;&quot; loop=&quot;&quot;&gt;&lt;/video&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;div id=&quot;pdjmidi&quot; class=&quot;tabcontent_demos&quot;&gt;
  &lt;div style=&quot;flex: 1&quot;&gt;
    &lt;a href=&quot;https://aistudio.google.com/apps/bundled/promptdj-midi?showPreview=true&quot;&gt;&lt;h3&gt;PromptDJ MIDI&lt;/h3&gt;&lt;/a&gt;
    &lt;p&gt;
      With PromptDJ MIDI, you can use a virtual MIDI controller to mix together
      text descriptors (that you can edit) and produce a single stream of music.
      You can even map the knobs to a physical MIDI controller via WebMIDI like Toro y Moi
      used during the &lt;a href=&quot;https://www.youtube.com/watch?v=thAhd82XnMc&quot;&gt;I/O preshow&lt;/a&gt;.
    &lt;/p&gt;
    &lt;a href=&quot;https://aistudio.google.com/apps/bundled/promptdj-midi?showPreview=true&quot;&gt;&lt;b&gt;Try it now !&lt;/b&gt;&lt;/a&gt;
  &lt;/div&gt;
  &lt;div style=&quot;flex: 1; display: flex; justify-content: flex-end&quot;&gt;
    &lt;video src=&quot;/assets/lyria_realtime/image4.gif.mp4&quot; muted=&quot;&quot; autoplay=&quot;&quot; loop=&quot;&quot;&gt;&lt;/video&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;div id=&quot;pdjpad&quot; class=&quot;tabcontent_demos&quot;&gt;
  &lt;div style=&quot;flex: 1&quot;&gt;
    &lt;a href=&quot;https://aistudio.google.com/app/prompts?state=%7B%22ids%22:%5B%221_pYvZFu7gFns_0w21GsW55moNR0gNmbS%22%5D,%22action%22:%22open%22,%22userId%22:%22103620588905886731599%22,%22resourceKeys%22:%7B%7D%7D&amp;amp;usp=sharing&quot;&gt;&lt;h3&gt;PromptDJ Pad&lt;/h3&gt;&lt;/a&gt;
    &lt;p&gt;
      PromptDJ Pad harkens back to our earlier experiments with latent space
      interfaces
      &lt;a href=&quot;https://nsynthsuper.withgoogle.com/&quot;&gt;NSynth Super&lt;/a&gt; and
      &lt;a href=&quot;https://experiments.withgoogle.com/ai/beat-blender/view/&quot;&gt;MusicVAE Beat Blender&lt;/a&gt;, allowing you to easily explore the space between four editable prompts.
    &lt;/p&gt;
    &lt;a href=&quot;https://aistudio.google.com/app/prompts?state=%7B%22ids%22:%5B%221_pYvZFu7gFns_0w21GsW55moNR0gNmbS%22%5D,%22action%22:%22open%22,%22userId%22:%22103620588905886731599%22,%22resourceKeys%22:%7B%7D%7D&amp;amp;usp=sharing&quot;&gt;&lt;b&gt;Try it now !&lt;/b&gt;&lt;/a&gt;
  &lt;/div&gt;
  &lt;div style=&quot;flex: 1; display: flex; justify-content: flex-end&quot;&gt;
    &lt;video src=&quot;/assets/lyria_realtime/image1.gif.mp4&quot; muted=&quot;&quot; autoplay=&quot;&quot; loop=&quot;&quot;&gt;&lt;/video&gt;
  &lt;/div&gt;
&lt;/div&gt;
&lt;p&gt;&lt;br /&gt;&lt;/p&gt;

&lt;p&gt;A key advantage of the API is its versatility, allowing it to be called from various platforms, not just web apps. For instance, we’ve developed a VST plugin called &lt;a href=&quot;https://magenta.withgoogle.com/infinite-crate-announce&quot;&gt;The Infinite Crate&lt;/a&gt;, which enables a seamless interaction between Lyria RealTime and the digital audio workstation of your choice!&lt;/p&gt;

&lt;style&gt;
  /* fallback */
  @font-face {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-style: normal;
    font-weight: 400;
    src: url(https://fonts.gstatic.com/s/materialsymbolsoutlined/v250/kJF1BvYX7BgnkSrUwT8OhrdQw4oELdPIeeII9v6oDMzByHX9rA6RzaxHMPdY43zj-jCxv3fzvRNU22ZXGJpEpjC_1v-p_4MrImHCIJIZrDCvHOejbd5zrDAt.woff2)
      format(&quot;woff2&quot;);
  }
  .material-symbols-outlined {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-weight: normal;
    font-style: normal;
    font-size: 36px;
    line-height: 1;
    letter-spacing: normal;
    text-transform: none;
    display: inline-block;
    white-space: nowrap;
    word-wrap: normal;
    direction: ltr;
    -webkit-font-feature-settings: &quot;liga&quot;;
    -webkit-font-smoothing: antialiased;
    color: black;
    border-radius: 100%;
    padding: 0.2em;
    background-color: rgb(241, 241, 241);
    transition: 0.1s;
  }
  .material-symbols-outlined:hover {
    background-color: rgb(160, 160, 160);
    color: white;
    transition: 0.1s;
    cursor: pointer;
  }
  .control-overlay {
    position: absolute;
    display: flex;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    background: rgba(0, 0, 0, 0);
    /* Align items to the end of the flex container (right) */
    justify-content: flex-end;
    /* Align items to the end of the cross axis (bottom) */
    align-items: flex-end;
    font-family: sans-serif;
    font-size: 1.2rem;
  }
  .video-container {
    position: relative;
    display: flex;
    justify-content: center;
  }
&lt;/style&gt;

&lt;div class=&quot;video-container&quot; id=&quot;video-container-infinitecrate&quot; style=&quot;width:100%;&quot;&gt;
  &lt;video height=&quot;100%&quot; src=&quot;/assets/infinite-crate/hero_loop.mp4&quot; id=&quot;video-infinitecrate&quot; muted=&quot;&quot; loop=&quot;&quot; autoplay=&quot;&quot;&gt;&lt;/video&gt;
  &lt;div class=&quot;control-overlay&quot;&gt;
    &lt;span class=&quot;material-symbols-outlined&quot; id=&quot;unmute-button-infinitecrate&quot;&gt;volume_off&lt;/span&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;script&gt;
  document.addEventListener(&quot;DOMContentLoaded&quot;, () =&gt; {
    let videoContainer = document.getElementById(
      &quot;video-container-infinitecrate&quot;
    );
    let unmuteButton = document.getElementById(
      &quot;unmute-button-infinitecrate&quot;
    );
    let video = document.getElementById(&quot;video-infinitecrate&quot;);

    unmuteButton.addEventListener(&quot;click&quot;, function () {
      video.muted = !video.muted;
      if (video.muted) {
        unmuteButton.textContent = &quot;volume_off&quot;;
      } else {
        unmuteButton.textContent = &quot;volume_up&quot;;
      }
    });

    if (&quot;IntersectionObserver&quot; in window) {
      const observer = new IntersectionObserver(
        (entries) =&gt; {
          entries.forEach((entry) =&gt; {
            if (entry.isIntersecting) {
              if (unmuteButton.textContent == &quot;volume_up&quot;) {
                video.muted = false;
              }
            } else {
              video.muted = true;
            }
          });
        },
        {
          root: null,
          rootMargin: &quot;0px&quot;,
          threshold: 0.1,
        }
      );

      observer.observe(videoContainer);
    }
  });
&lt;/script&gt;

&lt;p&gt;&lt;br /&gt;&lt;/p&gt;

&lt;h2 id=&quot;capabilities&quot;&gt;Capabilities&lt;/h2&gt;

&lt;p&gt;With &lt;strong&gt;Lyria RealTime&lt;/strong&gt;, it is possible to traverse the space of multi-instrumental audio: explore the never-before-heard music between genres, unusual instrument combinations, or abstract concepts.&lt;/p&gt;

&lt;style&gt;
  /* fallback */
  @font-face {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-style: normal;
    font-weight: 400;
    src: url(https://fonts.gstatic.com/s/materialsymbolsoutlined/v250/kJF1BvYX7BgnkSrUwT8OhrdQw4oELdPIeeII9v6oDMzByHX9rA6RzaxHMPdY43zj-jCxv3fzvRNU22ZXGJpEpjC_1v-p_4MrImHCIJIZrDCvHOejbd5zrDAt.woff2)
      format(&quot;woff2&quot;);
  }
  .material-symbols-outlined {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-weight: normal;
    font-style: normal;
    font-size: 36px;
    line-height: 1;
    letter-spacing: normal;
    text-transform: none;
    display: inline-block;
    white-space: nowrap;
    word-wrap: normal;
    direction: ltr;
    -webkit-font-feature-settings: &quot;liga&quot;;
    -webkit-font-smoothing: antialiased;
    color: black;
    border-radius: 100%;
    padding: 0.2em;
    background-color: rgb(241, 241, 241);
    transition: 0.1s;
  }
  .material-symbols-outlined:hover {
    background-color: rgb(160, 160, 160);
    color: white;
    transition: 0.1s;
    cursor: pointer;
  }
  .control-overlay {
    position: absolute;
    display: flex;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    background: rgba(0, 0, 0, 0);
    /* Align items to the end of the flex container (right) */
    justify-content: flex-end;
    /* Align items to the end of the cross axis (bottom) */
    align-items: flex-end;
    font-family: sans-serif;
    font-size: 1.2rem;
  }
  .video-container {
    position: relative;
    display: flex;
    justify-content: center;
  }
&lt;/style&gt;

&lt;div class=&quot;video-container&quot; id=&quot;video-container-musicfxdj&quot; style=&quot;width:100%;&quot;&gt;
  &lt;video height=&quot;100%&quot; src=&quot;/assets/lyria_realtime/mixing_cropped.mp4&quot; id=&quot;video-musicfxdj&quot; muted=&quot;&quot; loop=&quot;&quot; autoplay=&quot;&quot;&gt;&lt;/video&gt;
  &lt;div class=&quot;control-overlay&quot;&gt;
    &lt;span class=&quot;material-symbols-outlined&quot; id=&quot;unmute-button-musicfxdj&quot;&gt;volume_off&lt;/span&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;script&gt;
  document.addEventListener(&quot;DOMContentLoaded&quot;, () =&gt; {
    let videoContainer = document.getElementById(
      &quot;video-container-musicfxdj&quot;
    );
    let unmuteButton = document.getElementById(
      &quot;unmute-button-musicfxdj&quot;
    );
    let video = document.getElementById(&quot;video-musicfxdj&quot;);

    unmuteButton.addEventListener(&quot;click&quot;, function () {
      video.muted = !video.muted;
      if (video.muted) {
        unmuteButton.textContent = &quot;volume_off&quot;;
      } else {
        unmuteButton.textContent = &quot;volume_up&quot;;
      }
    });

    if (&quot;IntersectionObserver&quot; in window) {
      const observer = new IntersectionObserver(
        (entries) =&gt; {
          entries.forEach((entry) =&gt; {
            if (entry.isIntersecting) {
              if (unmuteButton.textContent == &quot;volume_up&quot;) {
                video.muted = false;
              }
            } else {
              video.muted = true;
            }
          });
        },
        {
          root: null,
          rootMargin: &quot;0px&quot;,
          threshold: 0.1,
        }
      );

      observer.observe(videoContainer);
    }
  });
&lt;/script&gt;

&lt;p&gt;The core capabilities of the model and API are:&lt;/p&gt;

&lt;ul&gt;
  &lt;li&gt;Generates a continuous stream of 48kHz stereo music.&lt;/li&gt;
  &lt;li&gt;Low latency – maximum of 2 seconds between control change and effect.&lt;/li&gt;
  &lt;li&gt;Latent space steering based on a mixture of text descriptors.&lt;/li&gt;
  &lt;li&gt;Manual control over music features
    &lt;ul&gt;
      &lt;li&gt;Tempo, key.&lt;/li&gt;
      &lt;li&gt;Options to reduce or silence particular instrument groups (drums, bass, other).&lt;/li&gt;
      &lt;li&gt;Control for density of note onsets.&lt;/li&gt;
      &lt;li&gt;Control for spectral brightness.&lt;/li&gt;
    &lt;/ul&gt;
  &lt;/li&gt;
  &lt;li&gt;Sampling temperature and top-k settings (“chaos” control).&lt;/li&gt;
&lt;/ul&gt;

&lt;div class=&quot;carousel__holder&quot; style=&quot;padding-bottom:50%;&quot;&gt;
  &lt;div id=&quot;carousel0&quot; class=&quot;carousel&quot;&gt;
    
    &lt;input class=&quot;carousel__activator&quot; type=&quot;radio&quot; name=&quot;carousel0&quot; id=&quot;slide-0-0&quot; checked=&quot;checked&quot; /&gt;
    
    &lt;input class=&quot;carousel__activator&quot; type=&quot;radio&quot; name=&quot;carousel0&quot; id=&quot;slide-0-1&quot; /&gt;
    
    &lt;input class=&quot;carousel__activator&quot; type=&quot;radio&quot; name=&quot;carousel0&quot; id=&quot;slide-0-2&quot; /&gt;
    
    &lt;input class=&quot;carousel__activator&quot; type=&quot;radio&quot; name=&quot;carousel0&quot; id=&quot;slide-0-3&quot; /&gt;
         

    &lt;div class=&quot;carousel__controls&quot; style=&quot;&quot;&gt;
      &lt;label class=&quot;carousel__control carousel__control--backward&quot; for=&quot;slide-0-3&quot;&gt;&lt;/label&gt;
      &lt;label class=&quot;carousel__control carousel__control--forward&quot; for=&quot;slide-0-1&quot;&gt;&lt;/label&gt;
    &lt;/div&gt;
       

    &lt;div class=&quot;carousel__controls&quot; style=&quot;&quot;&gt;
      &lt;label class=&quot;carousel__control carousel__control--backward&quot; for=&quot;slide-0-0&quot;&gt;&lt;/label&gt;
      &lt;label class=&quot;carousel__control carousel__control--forward&quot; for=&quot;slide-0-2&quot;&gt;&lt;/label&gt;
    &lt;/div&gt;
       

    &lt;div class=&quot;carousel__controls&quot; style=&quot;&quot;&gt;
      &lt;label class=&quot;carousel__control carousel__control--backward&quot; for=&quot;slide-0-1&quot;&gt;&lt;/label&gt;
      &lt;label class=&quot;carousel__control carousel__control--forward&quot; for=&quot;slide-0-3&quot;&gt;&lt;/label&gt;
    &lt;/div&gt;
       

    &lt;div class=&quot;carousel__controls&quot; style=&quot;&quot;&gt;
      &lt;label class=&quot;carousel__control carousel__control--backward&quot; for=&quot;slide-0-2&quot;&gt;&lt;/label&gt;
      &lt;label class=&quot;carousel__control carousel__control--forward&quot; for=&quot;slide-0-0&quot;&gt;&lt;/label&gt;
    &lt;/div&gt;
    

    &lt;div class=&quot;carousel__track&quot;&gt;
      &lt;ul&gt;
        
        &lt;li class=&quot;carousel__slide&quot;&gt;
          &lt;video id=&quot;video-0-0&quot; height=&quot;100%&quot; class=&quot;video-carousel-0&quot; src=&quot;https://deepmind.google/api/blob/website/media/Brightness.mp4&quot; loop=&quot;&quot; muted=&quot;&quot; autoplay=&quot;&quot;&gt;&lt;/video&gt;
        &lt;/li&gt;
        
        &lt;li class=&quot;carousel__slide&quot;&gt;
          &lt;video id=&quot;video-0-1&quot; height=&quot;100%&quot; class=&quot;video-carousel-0&quot; src=&quot;https://deepmind.google/api/blob/website/media/Chaos.mp4&quot; loop=&quot;&quot; muted=&quot;&quot; autoplay=&quot;&quot;&gt;&lt;/video&gt;
        &lt;/li&gt;
        
        &lt;li class=&quot;carousel__slide&quot;&gt;
          &lt;video id=&quot;video-0-2&quot; height=&quot;100%&quot; class=&quot;video-carousel-0&quot; src=&quot;https://deepmind.google/api/blob/website/media/Density.mp4&quot; loop=&quot;&quot; muted=&quot;&quot; autoplay=&quot;&quot;&gt;&lt;/video&gt;
        &lt;/li&gt;
        
        &lt;li class=&quot;carousel__slide&quot;&gt;
          &lt;video id=&quot;video-0-3&quot; height=&quot;100%&quot; class=&quot;video-carousel-0&quot; src=&quot;https://deepmind.google/api/blob/website/media/Instrumentation.mp4&quot; loop=&quot;&quot; muted=&quot;&quot; autoplay=&quot;&quot;&gt;&lt;/video&gt;
        &lt;/li&gt;
        
      &lt;/ul&gt;
    &lt;/div&gt;

    &lt;div class=&quot;carousel__indicators&quot; style=&quot;&quot;&gt;
      
      &lt;label class=&quot;carousel__indicator&quot; for=&quot;slide-0-0&quot;&gt;&lt;/label&gt;
      
      &lt;label class=&quot;carousel__indicator&quot; for=&quot;slide-0-1&quot;&gt;&lt;/label&gt;
      
      &lt;label class=&quot;carousel__indicator&quot; for=&quot;slide-0-2&quot;&gt;&lt;/label&gt;
      
      &lt;label class=&quot;carousel__indicator&quot; for=&quot;slide-0-3&quot;&gt;&lt;/label&gt;
      
    &lt;/div&gt;
  &lt;/div&gt;
  &lt;div class=&quot;mute-overlay&quot;&gt;
    &lt;div id=&quot;mute-button-0&quot; class=&quot;mute-button&quot;&gt;
      &lt;span class=&quot;material-symbols-outlined&quot; id=&quot;mute-label-0&quot;&gt;volume_off&lt;/span&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;style&gt;
  /* fallback */
  @font-face {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-style: normal;
    font-weight: 400;
    src: url(https://fonts.gstatic.com/s/materialsymbolsoutlined/v250/kJF1BvYX7BgnkSrUwT8OhrdQw4oELdPIeeII9v6oDMzByHX9rA6RzaxHMPdY43zj-jCxv3fzvRNU22ZXGJpEpjC_1v-p_4MrImHCIJIZrDCvHOejbd5zrDAt.woff2)
      format(&quot;woff2&quot;);
  }
  .material-symbols-outlined {
    font-family: &quot;Material Symbols Outlined&quot;;
    font-weight: normal;
    font-style: normal;
    font-size: 48px;
    line-height: 1;
    letter-spacing: normal;
    text-transform: none;
    display: inline-block;
    white-space: nowrap;
    word-wrap: normal;
    direction: ltr;
    -webkit-font-feature-settings: &quot;liga&quot;;
    -webkit-font-smoothing: antialiased;
    color: black;
    border-radius: 100%;
    padding: 0.2em;
    background-color: rgb(241, 241, 241);
    transition: 0.1s;
  }
  .material-symbols-outlined:hover {
    background-color: rgb(160, 160, 160);
    color: white;
    transition: 0.1s;
    cursor:pointer;
  }
  .mute-overlay {
    position: absolute;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    background: rgba(0, 0, 0, 0);
    display: flex;
    /* Align items to the end of the flex container (right) */
    justify-content: flex-end;
    /* Align items to the end of the cross axis (bottom) */
    align-items: flex-end;
    font-family: sans-serif;
    font-size: 1.2rem;
    transition: opacity 0.3s ease;
  }
  .carousel__holder {width: 100%; position: relative;  margin: 1rem 0 1rem;}
  .carousel {
    height: 100%;
    width: 100%;
    overflow: hidden;
    text-align: center;
    position: absolute;
    padding: 0;
  }
  .carousel__controls,
  .carousel__activator {
    display: none;
  }
  
  .carousel__activator:nth-of-type(1):checked ~ .carousel__track {
    -webkit-transform: translateX(-000%);
            transform: translateX(-000%);
  }
  .carousel__activator:nth-of-type(1):checked ~ .carousel__slide:nth-of-type(1) {
    transition: opacity 0.5s, -webkit-transform 0.5s;
    transition: opacity 0.5s, transform 0.5s;
    transition: opacity 0.5s, transform 0.5s, -webkit-transform 0.5s;
    top: 0;
    left: 0;
    right: 0;
    opacity: 1;
    -webkit-transform: scale(1);
            transform: scale(1);
  }
  .carousel__activator:nth-of-type(1):checked ~ .carousel__controls:nth-of-type(1) {
    display: block;
    opacity: 1;
  }
  .carousel__activator:nth-of-type(1):checked ~ .carousel__indicators .carousel__indicator:nth-of-type(1) {
    opacity: 1;
  }
  
  .carousel__activator:nth-of-type(2):checked ~ .carousel__track {
    -webkit-transform: translateX(-100%);
            transform: translateX(-100%);
  }
  .carousel__activator:nth-of-type(2):checked ~ .carousel__slide:nth-of-type(2) {
    transition: opacity 0.5s, -webkit-transform 0.5s;
    transition: opacity 0.5s, transform 0.5s;
    transition: opacity 0.5s, transform 0.5s, -webkit-transform 0.5s;
    top: 0;
    left: 0;
    right: 0;
    opacity: 1;
    -webkit-transform: scale(1);
            transform: scale(1);
  }
  .carousel__activator:nth-of-type(2):checked ~ .carousel__controls:nth-of-type(2) {
    display: block;
    opacity: 1;
  }
  .carousel__activator:nth-of-type(2):checked ~ .carousel__indicators .carousel__indicator:nth-of-type(2) {
    opacity: 1;
  }
  
  .carousel__activator:nth-of-type(3):checked ~ .carousel__track {
    -webkit-transform: translateX(-200%);
            transform: translateX(-200%);
  }
  .carousel__activator:nth-of-type(3):checked ~ .carousel__slide:nth-of-type(3) {
    transition: opacity 0.5s, -webkit-transform 0.5s;
    transition: opacity 0.5s, transform 0.5s;
    transition: opacity 0.5s, transform 0.5s, -webkit-transform 0.5s;
    top: 0;
    left: 0;
    right: 0;
    opacity: 1;
    -webkit-transform: scale(1);
            transform: scale(1);
  }
  .carousel__activator:nth-of-type(3):checked ~ .carousel__controls:nth-of-type(3) {
    display: block;
    opacity: 1;
  }
  .carousel__activator:nth-of-type(3):checked ~ .carousel__indicators .carousel__indicator:nth-of-type(3) {
    opacity: 1;
  }
  
  .carousel__activator:nth-of-type(4):checked ~ .carousel__track {
    -webkit-transform: translateX(-300%);
            transform: translateX(-300%);
  }
  .carousel__activator:nth-of-type(4):checked ~ .carousel__slide:nth-of-type(4) {
    transition: opacity 0.5s, -webkit-transform 0.5s;
    transition: opacity 0.5s, transform 0.5s;
    transition: opacity 0.5s, transform 0.5s, -webkit-transform 0.5s;
    top: 0;
    left: 0;
    right: 0;
    opacity: 1;
    -webkit-transform: scale(1);
            transform: scale(1);
  }
  .carousel__activator:nth-of-type(4):checked ~ .carousel__controls:nth-of-type(4) {
    display: block;
    opacity: 1;
  }
  .carousel__activator:nth-of-type(4):checked ~ .carousel__indicators .carousel__indicator:nth-of-type(4) {
    opacity: 1;
  }
  

  .carousel__control {
    height: 30px;
    width: 30px;
    margin-top: -15px;
    top: 50%;
    position: absolute;
    display: block;
    cursor: pointer;
    border-width: 5px 5px 0 0;
    border-style: solid;
    border-color:rgb(84, 84, 84);
    opacity: 0.35;
    opacity: 1;
    outline: 0;
    z-index: 3;
  }
  .carousel__control:hover {
    opacity: 1;
  }
  .carousel__control--backward {
    left: 20px;
    -webkit-transform: rotate(-135deg);
            transform: rotate(-135deg);
  }
  .carousel__control--forward {
    right: 20px;
    -webkit-transform: rotate(45deg);
            transform: rotate(45deg);
  }
  .carousel__indicators {
    position: absolute;
    bottom: 20px;
    width: 100%;
    text-align: center;
  }
  .carousel__indicator {
    height: 15px;
    width: 15px;
    border-radius: 100%;
    display: inline-block;
    z-index: 2;
    cursor: pointer;
    opacity: 0.35;
    margin: 0 2.5px 0 2.5px;
  }
  .carousel__indicator:hover {
    opacity: 0.75;
  }
  .carousel__track {
    position: absolute;
    top: 0;
    right: 0;
    bottom: 0;
    left: 0;
    padding: 0;
    margin: 0;
    transition: -webkit-transform 0.5s ease 0s;
    transition: transform 0.5s ease 0s;
    transition: transform 0.5s ease 0s, -webkit-transform 0.5s ease 0s;
  }
  .carousel__track .carousel__slide {
    display: block;
    top: 0;
    left: 0;
    right: 0;
    opacity: 1;
  }
  
  .carousel__track .carousel__slide:nth-of-type(1) {
    -webkit-transform: translateX(000%);
            transform: translateX(000%);
  }
  
  .carousel__track .carousel__slide:nth-of-type(2) {
    -webkit-transform: translateX(100%);
            transform: translateX(100%);
  }
  
  .carousel__track .carousel__slide:nth-of-type(3) {
    -webkit-transform: translateX(200%);
            transform: translateX(200%);
  }
  
  .carousel__track .carousel__slide:nth-of-type(4) {
    -webkit-transform: translateX(300%);
            transform: translateX(300%);
  }
  

  .carousel--scale .carousel__slide {
    -webkit-transform: scale(0);
            transform: scale(0);
  }
  .carousel__slide {
    height: 100%;
    position: absolute;
    opacity: 0;
    overflow: hidden;
  }
  .carousel__slide .overlay {height: 100%;}
  .carousel--thumb .carousel__indicator {
    height: 30px;
    width: 30px;
  }
  .carousel__indicator {
    background-color: rgb(84, 84, 84);
  }
  
  .carousel__slide:nth-of-type(1),
  .carousel--thumb .carousel__indicators .carousel__indicator:nth-of-type(1) {
    background-size: cover;
    background-position: center;
  }
  
  .carousel__slide:nth-of-type(2),
  .carousel--thumb .carousel__indicators .carousel__indicator:nth-of-type(2) {
    background-size: cover;
    background-position: center;
  }
  
  .carousel__slide:nth-of-type(3),
  .carousel--thumb .carousel__indicators .carousel__indicator:nth-of-type(3) {
    background-size: cover;
    background-position: center;
  }
  
  .carousel__slide:nth-of-type(4),
  .carousel--thumb .carousel__indicators .carousel__indicator:nth-of-type(4) {
    background-size: cover;
    background-position: center;
  }
  
&lt;/style&gt;

&lt;script&gt;
  document.addEventListener(&quot;DOMContentLoaded&quot;, () =&gt; {
    const carouselId = &quot;carousel0&quot;;
    const muteButtonId = &quot;mute-button-0&quot;;
    const muteLabelId = &quot;mute-label-0&quot;;
    const carouselNumber = &quot;0&quot;; // Capture the Liquid number for use in IDs

    const carouselEl = document.getElementById(carouselId);
    const muteButton = document.getElementById(muteButtonId);
    const muteLabel = document.getElementById(muteLabelId);

    if (!carouselEl) return;

    carouselEl.muted = true;

    // --- Mute/Unmute Button Logic ---
    muteButton.addEventListener(&quot;click&quot;, () =&gt; {
      // Toggle the mute state for the carousel
      carouselEl.muted = !carouselEl.muted;

      // Update the mute label to reflect the new state
      muteLabel.textContent = carouselEl.muted ? &quot;volume_off&quot; : &quot;volume_up&quot;;

      // Find the radio button for the currently visible slide
      const activeRadio = carouselEl.querySelector(
        &quot;.carousel__activator:checked&quot;
      );
      if (!activeRadio) return;

      // Extract the slide index from the active radio&apos;s ID
      const idParts = activeRadio.id.split(&quot;-&quot;);
      const videoIndex = idParts[idParts.length - 1];

      // Construct the corresponding video&apos;s ID and get the element
      const activeVideo = document.getElementById(
        `video-${carouselNumber}-${videoIndex}`
      );

      if (activeVideo) {
        // Set the muted property of the active video based on the new state
        activeVideo.muted = carouselEl.muted;
      }
    });

    // --- Slide Change and Intersection Observer Logic (Unchanged) ---
    const radios = carouselEl.querySelectorAll(&quot;.carousel__activator&quot;);
    const videos = carouselEl.querySelectorAll(&quot;.video-carousel-0&quot;);

    if (&quot;IntersectionObserver&quot; in window) {
      for (const video of videos) {
        const observer = new IntersectionObserver(
          (entries) =&gt; {
            entries.forEach((entry) =&gt; {
              if (entry.isIntersecting) {
                video.currently = 0;
                video.muted = carouselEl.muted;
              } else {
                video.muted = true;
              }
            });
          },
          {
            root: null,
            rootMargin: &quot;0px&quot;,
            threshold: 0.1,
          }
        );

        observer.observe(video);
      }
    }
  });
&lt;/script&gt;

&lt;h2 id=&quot;interfaces-for-live-music-models&quot;&gt;Interfaces for Live Music Models&lt;/h2&gt;

&lt;p&gt;One of the things we are most excited about with live music models is the number of novel interfaces they make possible by mapping human actions to musical controls. This harkens back to our earlier work with &lt;a href=&quot;http://g.co/magenta/js&quot;&gt;Magenta.js&lt;/a&gt; and the large number of &lt;a href=&quot;http://g.co/magenta/demos&quot;&gt;applications&lt;/a&gt; it and other earlier Magenta technologies spawned. We hope the Lyria RealTime API will empower even more creativity by developers.&lt;/p&gt;

&lt;p&gt;Live music models introduce a different interaction paradigm than text-to-song generators, which have impressive capabilities but lack the instantaneous feedback loops available to players of traditional instruments. The goal of models like Lyria RealTime is to put the human more deeply in the loop, centering the experience on the joy of the process over the final product. The higher bandwidth channel of communication and control often results in outputs that are more unique and personal, as every action the player takes (or doesn’t) has an effect.&lt;/p&gt;

&lt;p&gt;In Lyria RealTime, the ability to adjust prompt mixtures and quickly hear the results allows players to efficiently explore the sonic landscape to find novel textures and loops. Real-time interactivity also provides the possibility of this latent exploration being its own type of musical performance, the interpolation through space combined with anchoring of the audio context producing a structure similar to DJ set or improvisation session. Beyond performance, it can also be used to provide interactive soundscapes for physical spaces like artist installations or virtual spaces like video games.&lt;/p&gt;

&lt;p&gt;Our first public experiment with Lyria RealTime was &lt;a href=&quot;http://labs.google/musicfx&quot;&gt;MusicFX DJ&lt;/a&gt;, which we developed last year as a collaboration with Google Labs. MusicFX DJ allows you to create and conduct a continuous flow of music, and we worked with producers and artists to make the tool more inspiring and useful to musicians and amateurs alike.&lt;/p&gt;

&lt;p&gt;At this year’s I/O, &lt;a href=&quot;https://toroymoi.com/&quot;&gt;Toro y Moi&lt;/a&gt; (Chaz Bear) took Lyria RealTime for a spin &lt;a href=&quot;https://www.youtube.com/watch?v=thAhd82XnMc&quot;&gt;on stage before the keynote&lt;/a&gt;, using a &lt;a href=&quot;https://aistudio.google.com/app/apps/bundled/promptdj-midi&quot;&gt;different interface&lt;/a&gt; that he operated via a physical MIDI controller. Chaz’s performance leaned deeply into the live nature of the model, improvising with it to lead the crowd on a sonic journey full of surprises for himself and the audience.&lt;/p&gt;

&lt;figure&gt;
  &lt;iframe width=&quot;560&quot; height=&quot;315&quot; src=&quot;https://www.youtube.com/embed/thAhd82XnMc?si=8G1sl0tA90J4reFW&quot; title=&quot;YouTube video player&quot; frameborder=&quot;0&quot; allow=&quot;accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share&quot; referrerpolicy=&quot;strict-origin-when-cross-origin&quot; allowfullscreen=&quot;&quot;&gt;&lt;/iframe&gt;
  &lt;figcaption&gt;Chaz Bear&apos;s performance at Google I/O 2025.&lt;/figcaption&gt;
&lt;/figure&gt;

&lt;h2 id=&quot;how-it-works&quot;&gt;How it Works&lt;/h2&gt;

&lt;p&gt;Live generative music is particularly difficult because it requires both real-time generation (i.e. real-time factor &amp;gt; 1, generating 2 seconds of audio in less than 2 seconds), causal streaming (i.e. online generation), and low-latency controllability.&lt;/p&gt;

&lt;figure&gt;
  &lt;video width=&quot;100%&quot; src=&quot;/assets/lyria_realtime/lyria_realtime_diagram.mp4&quot; autoplay=&quot;&quot; muted=&quot;&quot; loop=&quot;&quot;&gt;&lt;/video&gt;
  &lt;figcaption&gt;Lyria RealTime diagram&lt;/figcaption&gt;
&lt;/figure&gt;

&lt;p&gt;Lyria RealTime overcomes these challenges by adapting the &lt;a href=&quot;https://google-research.github.io/seanet/musiclm/examples/&quot;&gt;MusicLM&lt;/a&gt; architecture to perform block autoregression. The model generates a continuous stream of music in sequential chunks, each steered by the previous audio output and a style embedding for the next chunk. By manipulating the style embedding (weighted average of text or audio prompt embeddings), players can shape and morph the music in real-time, mixing together different styles, instruments, and musical attributes.&lt;/p&gt;

&lt;h2 id=&quot;future-work&quot;&gt;Future Work&lt;/h2&gt;

&lt;p&gt;We are currently working on the next generation of real-time models with higher quality, lower latency, more interactivity, and on-device operability, to create truly playable instruments and live accompaniment. Stay tuned as we continue working with communities of musicians and developers on these technologies.&lt;/p&gt;
</description>
        <pubDate>Thu, 12 Jun 2025 07:00:01 -0700</pubDate>
        <link>https://magenta.withgoogle.com/lyria-realtime</link>
        <guid isPermaLink="true">https://magenta.withgoogle.com/lyria-realtime</guid>
        
        
        <category>blog</category>
        
      </item>
    
      <item>
        <title>Magenta Studio 2.0</title>
        <description>&lt;p&gt;TL;DR: &lt;a href=&quot;https://magenta.withgoogle.com/studio&quot;&gt;Magenta Studio&lt;/a&gt;, first released
in 2019, has been updated to more seamlessly integrate with Ableton Live. No
functionality has changed, there are only UI changes and internal fixes. Please
download and enjoy!&lt;/p&gt;

&lt;p&gt;If you’re new to Magenta Studio, please read our &lt;a href=&quot;https://magenta.withgoogle.com/studio-announce&quot;&gt;previous post&lt;/a&gt; about what it is and how it works.&lt;/p&gt;

&lt;h2 id=&quot;whats-new&quot;&gt;What’s New&lt;/h2&gt;

&lt;p&gt;In the previous version of Magenta Studio, the &lt;a href=&quot;https://www.ableton.com/en/live/max-for-live/&quot;&gt;Max for Live (M4L)&lt;/a&gt;
plugin would launch a separate application specific to your operating system for
each of the tools. Unfortunately, as operating systems were upgraded, sometimes
the applications stopped working. Therefore, we made the decision to integrate
the tools directly into the Max for Live environment to ensure longer-term
stability. The machine learning models are still directly integrated into the
M4L plugin and do not require access to the Internet to use.&lt;/p&gt;

&lt;h2 id=&quot;upgrading&quot;&gt;Upgrading&lt;/h2&gt;

&lt;p&gt;To upgrade from the &lt;a href=&quot;https://magenta.withgoogle.com/v1/studio&quot;&gt;previous version of Magenta Studio&lt;/a&gt;,
you can download the latest version and drop it into Live directly in the place
of the old plugin. The functionality has not been altered, only the interface
and integration, so it works in exactly the same way.&lt;/p&gt;

&lt;h2 id=&quot;documentation&quot;&gt;Documentation&lt;/h2&gt;

&lt;p&gt;&lt;a href=&quot;https://magenta.withgoogle.com/studio&quot;&gt;The documentation&lt;/a&gt; has been updated to
reflect the new interface. The tool-specific videos have not been updated with
the new interface, but the functionality is identical.&lt;/p&gt;

&lt;h2 id=&quot;support&quot;&gt;Support&lt;/h2&gt;

&lt;p&gt;Please report any issues to the &lt;a href=&quot;https://github.com/magenta/magenta-studio&quot;&gt;GitHub repository&lt;/a&gt;. Thanks for using Magenta Studio!&lt;/p&gt;

&lt;h2 id=&quot;acknowledgements&quot;&gt;Acknowledgements&lt;/h2&gt;

&lt;p&gt;Magenta Studio is based on work by members of the Google DeepMind team’s Magenta project along with contributors to the Magenta and Magenta.js libraries. The plug-ins were implemented by &lt;a href=&quot;https://yotammann.info/&quot;&gt;Yotam Mann&lt;/a&gt; and extended by Cassie Tarakajian.&lt;/p&gt;
</description>
        <pubDate>Thu, 24 Aug 2023 07:00:01 -0700</pubDate>
        <link>https://magenta.withgoogle.com/studio-announce-2</link>
        <guid isPermaLink="true">https://magenta.withgoogle.com/studio-announce-2</guid>
        
        <category>studio</category>
        
        
        <category>blog</category>
        
      </item>
    
      <item>
        <title>The 2023 I/O Preshow  – Composed by Dan Deacon (with some help from MusicLM)</title>
        <description>&lt;p&gt;Tl;dr: Dan Deacon worked with Google’s latest music AI models to compose the preshow music.
Check out the MusicLM demo in the &lt;a href=&quot;https://g.co/aitestkitchen&quot;&gt;AI Test Kitchen app&lt;/a&gt;.
Read on for more details about our collaboration with Dan Deacon.&lt;/p&gt;

&lt;h1 id=&quot;dan-deacons-io-performance&quot;&gt;Dan Deacon’s I/O Performance&lt;/h1&gt;

&lt;p&gt;On several occasions, we have had the pleasure of working with musicians that perform at Google I/O.
This is an opportunity for us to bring our latest creative machine learning tools out of the lab and into the hands of the musicians.
In previous years, we have worked with &lt;a href=&quot;https://magenta.withgoogle.com/chain-tripping&quot;&gt;YACHT&lt;/a&gt; and The &lt;a href=&quot;https://magenta.withgoogle.com/fruitgenie&quot;&gt;Flaming&lt;/a&gt; &lt;a href=&quot;https://blog.google/technology/ai/behind-magenta-tech-rocked-io/&quot;&gt;Lips&lt;/a&gt;.
With YACHT we explored custom symbolic music generation models tailored to the band, and with The Flaming Lips we explored an interaction to bridge the audience and performers.&lt;/p&gt;

&lt;p&gt;This year’s I/O pre-show was performed by electronic musician and composer Dan Deacon.
With Dan we explored how artists might interact with generative models of music audio and incorporate them into their artistic process.
Check out his performance in the video below and read on to learn more about his process using Google’s latest music AI tools:&lt;/p&gt;

&lt;figure&gt;
  &lt;iframe width=&quot;560&quot; height=&quot;315&quot; src=&quot;https://www.youtube.com/embed/K_8N8w5CaOs&quot; frameborder=&quot;0&quot; allow=&quot;autoplay; encrypted-media&quot; style=&quot;max-width:100%&quot; allowfullscreen=&quot;&quot;&gt;
  &lt;/iframe&gt;
  &lt;figcaption&gt;Dan Deacon&apos;s performance at Google I/O 2023.&lt;/figcaption&gt;
&lt;/figure&gt;

&lt;p&gt;Dan used two of our new generative models in his performance: &lt;a href=&quot;https://google-research.github.io/seanet/musiclm/examples/&quot;&gt;MusicLM&lt;/a&gt; (&lt;a href=&quot;https://arxiv.org/abs/2301.11325&quot;&gt;paper&lt;/a&gt;, &lt;a href=&quot;https://g.co/aitestkitchen&quot;&gt;demo&lt;/a&gt;), which produces music based on a text-based input prompt, and &lt;a href=&quot;https://g.co/magenta/singsong&quot;&gt;SingSong&lt;/a&gt; (&lt;a href=&quot;https://arxiv.org/abs/2301.12662&quot;&gt;paper&lt;/a&gt;), which will generate an accompaniment track for an audio-based singing input.
Both of these models are part of the &lt;a href=&quot;https://ai.googleblog.com/2022/10/audiolm-language-modeling-approach-to.html&quot;&gt;AudioLM&lt;/a&gt; (&lt;a href=&quot;https://arxiv.org/abs/2209.03143&quot;&gt;paper&lt;/a&gt;) family, and they directly produce audio based on the input conditioning (i.e., text or singing) by autoregressively predicting &lt;a href=&quot;https://ai.googleblog.com/2021/08/soundstream-end-to-end-neural-audio.html&quot;&gt;SoundStream&lt;/a&gt; (&lt;a href=&quot;https://arxiv.org/abs/2107.03312&quot;&gt;paper&lt;/a&gt;) tokens with one or more Transformer language models.
SoundStream tokens can then be converted back to raw audio that can be used in conjunction with other audio editing software.&lt;/p&gt;

&lt;p&gt;For his performance, Dan used MusicLM to create the chill, relaxing piano groove that’s heard behind his two meditations starring the Duck with Lips.
Additionaly, Dan used both MusicLM and SingSong to create the Chiptune song.
Most excitingly, Dan didn’t just &lt;em&gt;use&lt;/em&gt; both SingSong and MusicLM, but actually &lt;em&gt;extended&lt;/em&gt; their capabilities to put his performance together.
We’ll discuss more of how Dan shaped the tools–and why it’s important that he did so–in the next section.&lt;/p&gt;

&lt;h1 id=&quot;working-with-dan&quot;&gt;Working with Dan&lt;/h1&gt;

&lt;p&gt;As Dan discusses at around 7 minutes into his performance, he has always been excited by the promise that new technologies bring to the compositional process.
Technology has a long and intertwined history with the art of making music.
We might not think of things like flutes, violins, or trombones in the same way we think of computers now, but these were revolutionary new technologies when they were first introduced!
They can also often seem disruptive at first–at one point in history, &lt;a href=&quot;https://journals.sagepub.com/doi/abs/10.1177/016344386008003002?journalCode=mcsa.&quot;&gt;microphones caused quite a stir&lt;/a&gt; because they let vocalists sing much more softly (opposed to singing so loud they could be heard over the band).
Yet in retrospect, microphones changed our relationship to music in many positive ways, enabling us to create, represent, and distribute music in ways that would have been inconceivable beforehand.
Importantly, each new technological development expanded the creative palette of musicians, bringing with them new textures, new techniques, and sometimes new conceptions of music itself.&lt;/p&gt;

&lt;p&gt;We view our new models as a continuation of music technology’s evolution.
We’re incredibly inspired by the opportunity for these new tools to bring new creative capabilities to humanity, while remaining conscious of–and working hard to mitigate–their potential negative consequences.
Our goal is and always has been to empower artists and musicians; a crucial piece of empowering musicians is understanding now these new tools situate themselves in different artists’ creative processes.
With that in mind, collaborating with Dan was a great opportunity for us to work towards embodying our goals of empowering musicians in the era of generative modeling.&lt;/p&gt;

&lt;figure&gt;
  &lt;iframe width=&quot;560&quot; height=&quot;315&quot; src=&quot;https://www.youtube.com/embed/2yMBycveWHk&quot; frameborder=&quot;0&quot; allow=&quot;autoplay; encrypted-media&quot; style=&quot;max-width:100%&quot; allowfullscreen=&quot;&quot;&gt;
  &lt;/iframe&gt;
  &lt;figcaption&gt;A glimpse of our in-person workshop where we showed our new tools to Dan Deacon.&lt;/figcaption&gt;
&lt;/figure&gt;

&lt;p&gt;About a month before I/O, we had a workshop with Dan where we introduced him to MusicLM and SingSong.
Initially, Dan found many interesting text prompts to our MusicLM such as “a 600ft trombone.”
He started to push the tools past their limit by, for example, playing his synthesizer into SingSong, ignoring that the system was trained on only singing inputs.
These initial experiments turned out to be really fun and promising!&lt;/p&gt;

&lt;p&gt;As we kept working with Dan, he surprised us by pushing these tools even further.
Inspired by “&lt;a href=&quot;https://en.wikipedia.org/wiki/I_Am_Sitting_in_a_Room&quot;&gt;I Am Sitting in a Room&lt;/a&gt;” (&lt;a href=&quot;https://www.youtube.com/watch?v=fAxHlLK3Oyk&quot;&gt;click here to listen&lt;/a&gt;), he fed the output of the SingSong model back into itself… over and over and over.
Again, Dan moved beyond the model’s design of accepting singing input; by feeding its own output back into itself, the input audio was out of the distribution that the model had seen during training and we weren’t sure if this would work at all.
Yet, not only did it work, but the feedback loop tended to produce music that still accompanies the input; it has the same key, tempo, and style.
This was the interaction that Dan designed to compose the Chiptune song, above.&lt;/p&gt;

&lt;p&gt;Dan began with a handful of text prompts to MusicLM, and then used the generated audio as input to SingSong and that output back through SingSong for numerous iterations.
He was able to create hundreds of audio clips that complemented each other.
From these, he handpicked his favorite clips, edited them slightly, and performed them.&lt;/p&gt;

&lt;p&gt;We’re very proud to have been a part of Dan’s amazing performance.
We’re extremely excited for the direction that this research is headed, and we’re always looking for ways to give musicians new tools to interact with.
Check out the &lt;a href=&quot;https://blog.google/technology/ai/musiclm-google-ai-test-kitchen/&quot;&gt;Google Keyword blog post&lt;/a&gt; to learn more about MusicLM and you can try it yourself by &lt;a href=&quot;https://g.co/aitestkitchen&quot;&gt;signing up via the AI Test Kitchen app&lt;/a&gt;.&lt;/p&gt;

&lt;h3 id=&quot;acknowledgements&quot;&gt;Acknowledgements&lt;/h3&gt;

&lt;p&gt;&lt;em&gt;This year’s I/O pre-show was a huge collaborative effort. We would like to thank everyone involved in making the performance a success (in no particular order): Josh Christman, Daniel Chandler, Meghan Reinhardt, Carolyne De Bellefeuille,  Adi Goodrich, Jon Barron, Meghan Reinhardt, Carolyne De Bellefeuille, Irina Blok, Spencer Sterling, Ruben Beddeleem, Ben Poole, Cadie Desbiens-Desmeules, Chris Donahue, Jorge Gonzalez Mendez, Noah Constant, Jesse Engel, Timo Denk, Andrea Agostinelli, Neil Zeghidour, Christian Frank, Mauricio Zuluaga, Hema Manickavasagam, Tom Hume, and Lynn Cherry.&lt;/em&gt;&lt;/p&gt;

</description>
        <pubDate>Wed, 21 Jun 2023 13:00:00 -0700</pubDate>
        <link>https://magenta.withgoogle.com/dandeacon-io-preshow</link>
        <guid isPermaLink="true">https://magenta.withgoogle.com/dandeacon-io-preshow</guid>
        
        
        <category>blog</category>
        
      </item>
    
      <item>
        <title>The Wordcraft Writers Workshop: Creative Co-Writing with AI</title>
        <description>&lt;p&gt;A core piece of Magenta’s mission is to empower creativity using AI and machine learning. In order to evaluate how well this goal is being achieved, it is important to put tools in the hands of creators, encouraging them to share honest and critical feedback. This feedback can help researchers to thoughtfully develop the next generations of ML-powered creative tools. Most of our prior efforts to engage with creators have been in the domain of music (for example, &lt;a href=&quot;https://magenta.withgoogle.com/studio&quot;&gt;Magenta Studio&lt;/a&gt; and &lt;a href=&quot;https://nsynthsuper.withgoogle.com/&quot;&gt;NSynth&lt;/a&gt;).&lt;/p&gt;

&lt;p&gt;However, human creativity encompasses far more than just music: visual artists paint, draw, and sculpt, and writers craft stories and poetry. In recent years, we’ve seen huge advancements in machine learning techniques that can facilitate creativity in these other modalities. Creative writing is an especially interesting domain because it is so challenging for AI to get right. Even short stories commonly have narrative arcs that span paragraphs or longer, multiple characters with diverging points of view, and a careful balance of familiar archetypes and novel storytelling–all difficult traits for state-of-the-art AI to replicate. At the same time, the omnipresent writer’s block is not a problem at all for neural language models like &lt;a href=&quot;https://ai.googleblog.com/2022/01/lamda-towards-safe-grounded-and-high.html&quot;&gt;LaMDA&lt;/a&gt;, which can effortlessly generate as many words as you ask them for.&lt;/p&gt;

&lt;p&gt;Earlier this year, we invited a cohort of 13 professional creative writers to try their hands at writing stories using &lt;a href=&quot;https://g.co/research/wordcraft&quot;&gt;Wordcraft&lt;/a&gt;, an AI-augmented text editor with a wide range of generative capabilities targeted at creative writing assistance. Wordcraft can suggest story ideas, rewrite text according to user-provided instructions, and elaborate on what has already been written. It also has a chatbot interface where users can engage with LaMDA, Google’s dialog-based language model, about their stories.&lt;/p&gt;

&lt;figure&gt;
  &lt;a href=&quot;https://g.co/research/wordcraft&quot; target=&quot;_blank&quot;&gt;
    &lt;video src=&quot;/assets/wordcraft/wordcraft.webm&quot; autoplay=&quot;&quot; loop=&quot;&quot;&gt;&lt;/video&gt;
  &lt;/a&gt;
  &lt;figcaption&gt;A demo of the Wordcraft web application&lt;/figcaption&gt;
&lt;/figure&gt;

&lt;p&gt;As in generative music, AI-assisted story writing can be a mixed bag. At its best, Wordcraft made suggestions that were inspiring and surrealistic, and writers applauded its usefulness for ideation and overcoming writer’s block. However, it also had a tendency to rehash tired tropes, and it could take wading through many dull suggestions before finding an interesting one.&lt;/p&gt;

&lt;p&gt;All of the writers’ stories are available in the Wordcraft Writer’s Workshop’s &lt;a href=&quot;https://g.co/research/wordcraft&quot;&gt;digital literary magazine&lt;/a&gt;, and a detailed writeup of what we learned about the role machine learning can play in creative writing can be found &lt;a href=&quot;https://arxiv.org/abs/2211.05030&quot;&gt;here&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;We hope you enjoy perusing through the stories, and we are excited to hear your ideas about how AI can create valuable creative writing tools.&lt;/p&gt;
</description>
        <pubDate>Thu, 01 Dec 2022 08:00:00 -0800</pubDate>
        <link>https://magenta.withgoogle.com/wordcraft-writers-workshop</link>
        <guid isPermaLink="true">https://magenta.withgoogle.com/wordcraft-writers-workshop</guid>
        
        <category>wordcraft,</category>
        
        <category>lamda,</category>
        
        <category>writing</category>
        
        
        <category>blog</category>
        
      </item>
    
      <item>
        <title>The Chamber Ensemble Generator and CocoChorales Dataset</title>
        <description>&lt;style&gt;
  table tr.wrap {
    display: flex;
    flex-direction: row;
    flex-wrap: wrap;
  }
  table tr.wrap &gt; td {
    display: block;
    flex: 1;
  }
  td {text-align: center !important}
  .from {background-color: #d3d3d3;}
  img.inline {
    vertical-align: middle;
    display: inline-block;
    max-height: 16px;
    width: auto !important;
    margin-right: 6px;
  }
  img.centered {
    max-width: 90%;
    margin: auto;
  }
&lt;/style&gt;

&lt;figure style=&quot;text-align: center;&quot;&gt;
  &lt;img src=&quot;assets/cocochorales/logos.png&quot; style=&quot;width: 90%; height: auto; margin: auto&quot; alt=&quot;Logos for the Chamber Ensemble Generator and CocoChorales Dataset.&quot; /&gt;
&lt;/figure&gt;

&lt;p&gt;In this post, we’re excited to introduce the &lt;strong&gt;Chamber Ensemble Generator&lt;/strong&gt;, a system for generating realistic chamber ensemble performances, and the corresponding &lt;strong&gt;CocoChorales Dataset&lt;/strong&gt;, which contains over 1,400 hours of audio mixes with corresponding source data and MIDI, multi-f&lt;sub&gt;0&lt;/sub&gt;, and per-note performance annotations.&lt;/p&gt;

&lt;table align=&quot;center&quot; class=&quot;overview&quot;&gt;
  &lt;tbody&gt;&lt;tr&gt;
    &lt;td&gt;🎵&lt;a href=&quot;https://lukewys.github.io/cocochorales/&quot;&gt;Audio Examples&lt;/a&gt;&lt;/td&gt;
    &lt;td&gt;📝&lt;a href=&quot;https://arxiv.org/abs/2209.14458&quot;&gt;arXiv Paper&lt;/a&gt;&lt;/td&gt;
    &lt;td&gt;📂&lt;a href=&quot;https://magenta.withgoogle.com/datasets/cocochorales&quot;&gt;Dataset Download Instructions&lt;/a&gt;&lt;/td&gt;
    &lt;td&gt;&lt;img alt=&quot;&quot; src=&quot;/assets/ddsp/github.png&quot; class=&quot;inline&quot; /&gt;&lt;a href=&quot;https://github.com/lukewys/chamber-ensemble-generator&quot;&gt;Github Code&lt;/a&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;/tbody&gt;&lt;/table&gt;

&lt;p&gt;Data is the bedrock that all machine learning systems are built upon. Historically, researchers applying machine learning to music have not had access to the same scale of data that other fields have. Whereas image and language machine learning researchers measure their datasets by the millions or billions of examples, music researchers feel extremely lucky if they can scrape together a few thousand examples for a given task.&lt;/p&gt;

&lt;p&gt;Modern machine learning systems require large quantities of &lt;em&gt;annotated&lt;/em&gt; data. With music systems, getting annotations for some tasks–like transcription or f&lt;sub&gt;0&lt;/sub&gt; estimation–requires tedious work by expert musicians. When annotating a single example correctly is difficult, how can we annotate hundreds of thousands of examples to make enough data to train a machine learning system?&lt;/p&gt;

&lt;p&gt;In this post, we introduce a new approach to solving these problems by using generative models to create large amounts of realistic-sounding, finely annotated, freely available music data. We combined two structured generative models–a note generation model, &lt;a href=&quot;https://magenta.withgoogle.com/coconet&quot;&gt;Coconet&lt;/a&gt;, and a notes-to-audio generative synthesis model, &lt;a href=&quot;https://magenta.withgoogle.com/midi-ddsp&quot;&gt;MIDI-DDSP&lt;/a&gt;–into a system we call the &lt;strong&gt;Chamber Ensemble Generator&lt;/strong&gt;. As its name suggests, the Chamber Ensemble Generator (or CEG) can generate performances of chamber ensembles playing in the style of four-part Bach chorales. Listen to the following examples performed by the CEG:&lt;/p&gt;

&lt;table align=&quot;center&quot;&gt;
  &lt;tbody&gt;
  &lt;tr class=&quot;wrap&quot;&gt;
    &lt;td colspan=&quot;2&quot;&gt;String Ensemble Mixture:&lt;/td&gt;
  &lt;/tr&gt;
  &lt;tr class=&quot;wrap&quot;&gt;
    &lt;td colspan=&quot;2&quot;&gt;&lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;assets/cocochorales/audio/strings/mix.wav?raw=true&quot; /&gt; &lt;/audio&gt;&lt;/td&gt;
  &lt;/tr&gt;
  &lt;tr class=&quot;wrap&quot;&gt;
    &lt;td colspan=&quot;1&quot;&gt;Soprano: Violin 1&lt;/td&gt;
    &lt;td colspan=&quot;1&quot;&gt;Alto: Violin 2&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr class=&quot;wrap&quot;&gt;
  &lt;td colspan=&quot;1&quot;&gt;&lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;assets/cocochorales/audio/strings/1_violin.wav?raw=true&quot; /&gt; &lt;/audio&gt;&lt;/td&gt;
  &lt;td colspan=&quot;1&quot;&gt;&lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;assets/cocochorales/audio/strings/2_violin.wav?raw=true&quot; /&gt; &lt;/audio&gt;&lt;/td&gt;
  &lt;/tr&gt;
  &lt;tr class=&quot;wrap&quot;&gt;
    &lt;td colspan=&quot;1&quot;&gt;Tenor: Viola&lt;/td&gt;
    &lt;td colspan=&quot;1&quot;&gt;Bass: Cello&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr class=&quot;wrap&quot;&gt;
  &lt;td colspan=&quot;1&quot;&gt;&lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;assets/cocochorales/audio/strings/3_viola.wav?raw=true&quot; /&gt; &lt;/audio&gt;&lt;/td&gt;
  &lt;td colspan=&quot;1&quot;&gt;&lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;assets/cocochorales/audio/strings/4_cello.wav?raw=true&quot; /&gt; &lt;/audio&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;

&lt;table align=&quot;center&quot;&gt;
  &lt;tbody&gt;
  &lt;tr class=&quot;wrap&quot;&gt;
    &lt;td colspan=&quot;2&quot;&gt;Woodwind Ensemble Mixture:&lt;/td&gt;
  &lt;/tr&gt;
  &lt;tr class=&quot;wrap&quot;&gt;
    &lt;td colspan=&quot;2&quot;&gt;&lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;assets/cocochorales/audio/woodwind/mix.wav?raw=true&quot; /&gt; &lt;/audio&gt;&lt;/td&gt;
  &lt;/tr&gt;
  &lt;tr class=&quot;wrap&quot;&gt;
    &lt;td colspan=&quot;1&quot;&gt;Soprano: Flute&lt;/td&gt;
    &lt;td colspan=&quot;1&quot;&gt;Alto: Oboe&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr class=&quot;wrap&quot;&gt;
  &lt;td colspan=&quot;1&quot;&gt;&lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;assets/cocochorales/audio/woodwind/1_flute.wav?raw=true&quot; /&gt; &lt;/audio&gt;&lt;/td&gt;
  &lt;td colspan=&quot;1&quot;&gt;&lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;assets/cocochorales/audio/woodwind/2_oboe.wav?raw=true&quot; /&gt; &lt;/audio&gt;&lt;/td&gt;
  &lt;/tr&gt;
  &lt;tr class=&quot;wrap&quot;&gt;
    &lt;td colspan=&quot;1&quot;&gt;Tenor: Clarinet&lt;/td&gt;
    &lt;td colspan=&quot;1&quot;&gt;Bass: Bassoon&lt;/td&gt;
  &lt;/tr&gt;
&lt;tr class=&quot;wrap&quot;&gt;
  &lt;td colspan=&quot;1&quot;&gt;&lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;assets/cocochorales/audio/woodwind/3_clarinet.wav?raw=true&quot; /&gt; &lt;/audio&gt;&lt;/td&gt;
  &lt;td colspan=&quot;1&quot;&gt;&lt;audio controls=&quot;&quot;&gt; &lt;source src=&quot;assets/cocochorales/audio/woodwind/4_bassoon.wav?raw=true&quot; /&gt; &lt;/audio&gt;&lt;/td&gt;
  &lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;

&lt;p&gt;We then used the CEG to create a massive music dataset for machine learning systems. We call this dataset &lt;strong&gt;CocoChorales&lt;/strong&gt;. What’s exciting about the CEG is that it uses a set of structured generative models which provide annotations for many music machine learning applications like automatic music transcription, multi-f&lt;sub&gt;0&lt;/sub&gt; estimation, source separation, performance analysis, and more.&lt;/p&gt;

&lt;p&gt;Below, we dig deeper into each of these projects.&lt;/p&gt;

&lt;!--more--&gt;

&lt;h1 id=&quot;the-chamber-ensemble-generator&quot;&gt;The Chamber Ensemble Generator&lt;/h1&gt;

&lt;figure style=&quot;text-align: center;&quot;&gt;
  &lt;img src=&quot;assets/cocochorales/hero_diagram.png&quot; style=&quot;width: 100%; height: auto; margin: auto&quot; alt=&quot;Overview image of the Chamber Ensemble Generator.&quot; /&gt;
&lt;/figure&gt;

&lt;p&gt;As we mentioned, the Chamber Ensemble Generator (CEG) is a set of two structured generative models that work together to create new chamber ensemble performances of four-part &lt;a href=&quot;https://en.wikipedia.org/wiki/Chorale&quot;&gt;chorales&lt;/a&gt; in the style of &lt;a href=&quot;https://en.wikipedia.org/wiki/List_of_chorale_harmonisations_by_Johann_Sebastian_Bach&quot;&gt;J.S. Bach&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;As seen in the figure above, the constituent models in the CEG are two previous Magenta models: &lt;a href=&quot;https://magenta.withgoogle.com/coconet&quot;&gt;Coconet&lt;/a&gt; and &lt;a href=&quot;https://magenta.withgoogle.com/midi-ddsp&quot;&gt;MIDI-DDSP&lt;/a&gt;. Coconet is a generative model of notes, creating a set of four-instrument music pieces (“note sequences”), harmonized in the style of a Bach Chorale. Each of these four note sequences is then individually synthesized by MIDI-DDSP. MIDI-DDSP is a generative synthesis model that uses &lt;a href=&quot;https://magenta.withgoogle.com/ddsp&quot;&gt;Differentiable Digital Signal Processing (DDSP)&lt;/a&gt; that turns note sequences into realistic audio that can sound like a number of different instruments (e.g., violin, bassoon, or french horn).&lt;/p&gt;

&lt;p&gt;It’s important to note that the CEG is built on &lt;em&gt;structured&lt;/em&gt; generative models, i.e., models that have interpretable intermediate representations. On the one hand, this structure leads to a very opinionated view of music. The CEG is limited in ways that other generative music models are not; it cannot generate all styles of music, like a rock and roll ensemble for example. It can only generate chorales. However, many generative music models are notoriously “black boxes,” whose internal structures are difficult to interpret. By being built on a modular set of structured models, the internals of the CEG are easy to understand and modify. This also allows us to create a dataset with many types of annotations that would be tedious or impossible to acquire with other types of generative models (such as annotations of the velocity and vibrato applied to each individual note in a performance). In the next section, we will showcase how these interpretable structures can be used to mitigate biases of these generative models.&lt;/p&gt;

&lt;!--more--&gt;

&lt;h1 id=&quot;the-cocochorales-dataset&quot;&gt;The CocoChorales Dataset&lt;/h1&gt;

&lt;p&gt;CocoChorales is a dataset of 240,000 examples totalling over 1,400 hours of mixture data. The Chamber Ensemble Generator (CEG) was used to create CocoChorales by sampling from the CEG’s two constituent generative models, Coconet and MIDI-DDSP. Using the CEG in this way is an example of dataset “amplification,” whereby a generative model trained on a small dataset is used to produce a much larger dataset. In this case, we are amplifying two very small datasets: Coconet is trained on the &lt;a href=&quot;https://github.com/czhuang/JSB-Chorales-dataset&quot;&gt;J.S. Bach Chorales Dataset&lt;/a&gt;, which contains 382 examples, and MIDI-DDSP is trained on &lt;a href=&quot;https://labsites.rochester.edu/air/projects/URMP.html&quot;&gt;URMP&lt;/a&gt;, which contains only 44 examples. But, using the CEG, we were able to generate 240,000 examples!&lt;/p&gt;

&lt;p&gt;CocoChorales has examples performed by 13 different instruments (violin, viola, cello, double bass, flute, oboe, clarinet, bassoon, saxophone, trumpet, french horn, trombone, and tuba) organized into 4 different types of ensembles: a string ensemble, a brass ensemble, a woodwind ensemble, and a random ensemble (see the &lt;a href=&quot;https://magenta.withgoogle.com/datasets/cocochorales&quot;&gt;CocoChorales dataset page&lt;/a&gt; for more info). Each example contains an audio mixture, audio for each source, aligned MIDI, instrument labels, fundamental frequency (f&lt;sub&gt;0&lt;/sub&gt;) for each instrument, notewise performance characteristics (e.g., vibrato, loudness, brightness etc of each note), and raw synthesis parameters.&lt;/p&gt;

&lt;figure style=&quot;text-align: center;&quot;&gt;
  &lt;img src=&quot;assets/cocochorales/f0_distributions.png&quot; style=&quot;width: 50%; height: auto; margin: auto&quot; alt=&quot;Fundamental frequencies (f0&apos;s) histograms showing that we are able to correct for a bias in the model.&quot; /&gt;
&lt;/figure&gt;

&lt;p&gt;What’s cool about using the structured models in the CEG, is that because the system is modular, it is easy to interpret the output of the intermediate steps of the internal CEG models. For example, the MIDI-DDSP model we used tended to produce performances that were oftentimes out of tune and skewed sharp (i.e., frequency of a note being played was often slightly higher than the “proper” tuned frequency of the note in the piece, according to a &lt;a href=&quot;https://en.wikipedia.org/wiki/12_equal_temperament&quot;&gt;12-TET scale&lt;/a&gt;). This is visualized by the orange histogram in the above image (labeled “w/o pitch aug”), which shows how in or out of tune each note is once every 4ms (here, 0.0 means perfectly “in tune”). We were able to correct for this systematic bias by directly adjusting the f&lt;sub&gt;0&lt;/sub&gt; curves output by the synthesis generation module of the MIDI-DDSP model, as shown by the blue histogram (labeled “w/ pitch aug”), which shows a distribution that is more centered on 0.0 in the figure above. This level of control is hard to achieve with black box generative models, and a big reason why we’re very excited about using the structured models in the CEG.&lt;/p&gt;

&lt;h1 id=&quot;downloading-the-dataset&quot;&gt;Downloading the Dataset&lt;/h1&gt;

&lt;p&gt;We’re really excited to see what the research community can do with the CocoChorales dataset. Further details on the dataset can be found &lt;a href=&quot;https://magenta.withgoogle.com/datasets/cocochorales&quot;&gt;here&lt;/a&gt;. Instructions on how to download the dataset can be found at &lt;a href=&quot;https://github.com/lukewys/chamber-ensemble-generator#dataset-download&quot;&gt;this Github link&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;If you want to learn more about either project, please see our &lt;a href=&quot;https://arxiv.org/abs/2209.14458&quot;&gt;arXiv paper&lt;/a&gt;. The code for the Chamber Ensemble Generator is available &lt;a href=&quot;https://github.com/lukewys/chamber-ensemble-generator&quot;&gt;here&lt;/a&gt; and usage instructions are &lt;a href=&quot;https://github.com/lukewys/chamber-ensemble-generator/blob/master/data_pipeline.md&quot;&gt;here&lt;/a&gt;. If you use the Chamber Ensemble Generator or CocoChorales in a research publication, we kindly ask that you use the following bibtex entry to cite it:&lt;/p&gt;

&lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;@article{wu2022chamber,
  title = {The Chamber Ensemble Generator: Limitless High-Quality MIR Data via Generative Modeling},
  author = {Wu, Yusong and Gardner, Josh and Manilow, Ethan and Simon, Ian and Hawthorne, Curtis and Engel, Jesse},
  journal={arXiv preprint arXiv:2209.14458},
  year = {2022},
}
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
</description>
        <pubDate>Fri, 30 Sep 2022 09:00:00 -0700</pubDate>
        <link>https://magenta.withgoogle.com/ceg-and-cocochorales</link>
        <guid isPermaLink="true">https://magenta.withgoogle.com/ceg-and-cocochorales</guid>
        
        <category>chamber-ensemble-generator,</category>
        
        <category>cocochorales,</category>
        
        <category>coconet,</category>
        
        <category>midi-ddsp,</category>
        
        <category>dataset</category>
        
        
        <category>blog</category>
        
      </item>
    
  </channel>
</rss>
