About OpenAI Realtime Blocks

AI Blocks is a collection of components and styles that can be used to build web applications. It is designed to be simple and easy to integrate into your ReactJS and NextJS projects.

Features

  • Simple: AI Blocks is designed to be simple and easy to use. It is built on top of TailwindCSS, which makes it easy to integrate into your projects.
  • Customizable: AI Blocks is highly customizable. You can easily change the colors, fonts, and other styles to match your brand.
  • Responsive: AI Blocks is designed to be responsive. It works on all devices, from mobile to desktop.
  • Open Source: AI Blocks is open source. You can use it for free in your personal and commercial projects and contribute to its development.

Installation

You only need to install the dependencies and import the components that you want to use in your project.

Create the WebRTC Hook

Add this to your project, for example @/hooks/use-webrtc.ts file.

//hooks/use-webrtc.ts
"use client";
 
import { useState, useRef, useEffect } from "react";
import { Tool } from "@/lib/tools";
 
const useWebRTCAudioSession = (voice: string, tools?: Tool[]) => {
  const [status, setStatus] = useState("");
  const [isSessionActive, setIsSessionActive] = useState(false);
  const audioIndicatorRef = useRef<HTMLDivElement | null>(null);
  const audioContextRef = useRef<AudioContext | null>(null);
  const audioStreamRef = useRef<MediaStream | null>(null);
  const peerConnectionRef = useRef<RTCPeerConnection | null>(null);
  const dataChannelRef = useRef<RTCDataChannel | null>(null);
  const [msgs, setMsgs] = useState<any[]>([]);
  // Add function registry
  const functionRegistry = useRef<Record<string, Function>>({});
  const [currentVolume, setCurrentVolume] = useState(0);
  const analyserRef = useRef<AnalyserNode | null>(null);
  const volumeIntervalRef = useRef<number | null>(null);
 
  // Add method to register tool functions
  const registerFunction = (name: string, fn: Function) => {
    functionRegistry.current[name] = fn;
  };
 
  // Add data channel configuration
  const configureDataChannel = (dataChannel: RTCDataChannel) => {
    const sessionUpdate = {
      type: 'session.update',
      session: {
        modalities: ['text', 'audio'],
        tools: tools || []
      }
    };
 
    dataChannel.send(JSON.stringify(sessionUpdate));
  };
 
  // Add data channel message handler
  const handleDataChannelMessage = async (event: MessageEvent) => {
    try {
      const msg = JSON.parse(event.data);
      if (msg.type === 'response.function_call_arguments.done') {
        const fn = functionRegistry.current[msg.name];
        if (fn) {
          const args = JSON.parse(msg.arguments);
          const result = await fn(args);
 
          const response = {
            type: 'conversation.item.create',
            item: {
              type: 'function_call_output',
              call_id: msg.call_id,
              output: JSON.stringify(result)
            }
          };
 
          dataChannelRef.current?.send(JSON.stringify(response));
        }
      }
      setMsgs(prevMsgs => [...prevMsgs, msg]);
      return msg;
    } catch (error) {
      console.error('Error handling data channel message:', error);
    }
  };
 
  useEffect(() => {
    return () => stopSession();
  }, []);
 
  const getEphemeralToken = async () => {
    const response = await fetch('/api/session', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
    });
    const data = await response.json();
    return data.client_secret.value;
  };
 
  const setupAudioVisualization = (stream: MediaStream) => {
    const audioContext = new AudioContext();
    const source = audioContext.createMediaStreamSource(stream);
    const analyzer = audioContext.createAnalyser();
    analyzer.fftSize = 256;
 
    source.connect(analyzer);
 
    const bufferLength = analyzer.frequencyBinCount;
    const dataArray = new Uint8Array(bufferLength);
 
    const updateIndicator = () => {
      if (!audioContext) return;
 
      analyzer.getByteFrequencyData(dataArray);
      const average = dataArray.reduce((a, b) => a + b) / bufferLength;
 
      if (audioIndicatorRef.current) {
        audioIndicatorRef.current.classList.toggle("active", average > 30);
      }
 
      requestAnimationFrame(updateIndicator);
    };
 
    updateIndicator();
    audioContextRef.current = audioContext;
  };
 
  const getVolume = (): number => {
    if (!analyserRef.current) return 0;
 
    const dataArray = new Uint8Array(analyserRef.current.frequencyBinCount);
    analyserRef.current.getByteTimeDomainData(dataArray);
 
    // Calculate RMS (Root Mean Square)
    let sum = 0;
    for (let i = 0; i < dataArray.length; i++) {
      const float = (dataArray[i] - 128) / 128;
      sum += float * float;
    }
    
    return Math.sqrt(sum / dataArray.length);
  };
 
  const startSession = async () => {
    try {
      setStatus("Requesting microphone access...");
 
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      audioStreamRef.current = stream;
      setupAudioVisualization(stream);
 
      setStatus("Fetching ephemeral token...");
      const ephemeralToken = await getEphemeralToken();
 
      setStatus("Establishing connection...");
 
      const pc = new RTCPeerConnection();
      const audioEl = document.createElement("audio");
      audioEl.autoplay = true;
      
      pc.ontrack = (e) => {
        audioEl.srcObject = e.streams[0];
        
        // Set up audio analysis
        const audioContext = new (window.AudioContext || window.AudioContext)();
        const source = audioContext.createMediaStreamSource(e.streams[0]);
        const analyser = audioContext.createAnalyser();
        analyser.fftSize = 256;
        
        source.connect(analyser);
        analyserRef.current = analyser;
 
        // Start volume monitoring
        volumeIntervalRef.current = window.setInterval(() => {
          const volume = getVolume();
          setCurrentVolume(volume);
          
          // Optional: Log when speech is detected
          if (volume > 0.1) {
            console.log('Speech detected with volume:', volume);
          }
        }, 100);
      };
 
      // Add data channel
      const dataChannel = pc.createDataChannel('response');
      dataChannelRef.current = dataChannel;
 
      dataChannel.onopen = () => {
        configureDataChannel(dataChannel);
      };
 
      dataChannel.onmessage = handleDataChannelMessage;
 
      pc.addTrack(stream.getTracks()[0]);
 
      const offer = await pc.createOffer();
      await pc.setLocalDescription(offer);
 
      const baseUrl = "https://api.openai.com/v1/realtime";
      const model = "gpt-4o-realtime-preview-2024-12-17";
      const response = await fetch(`${baseUrl}?model=${model}&voice=${voice}`, {
        method: "POST",
        body: offer.sdp,
        headers: {
          Authorization: `Bearer ${ephemeralToken}`,
          "Content-Type": "application/sdp",
        },
      });
 
      await pc.setRemoteDescription({
        type: "answer",
        sdp: await response.text(),
      });
 
      peerConnectionRef.current = pc;
      setIsSessionActive(true);
      setStatus("Session established successfully!");
    } catch (err) {
      console.error(err);
      setStatus(`Error: ${err}`);
      stopSession();
    }
  };
 
  const stopSession = () => {
    if (dataChannelRef.current) {
      dataChannelRef.current.close();
      dataChannelRef.current = null;
    }
 
    if (peerConnectionRef.current) {
      peerConnectionRef.current.close();
      peerConnectionRef.current = null;
    }
 
    if (audioContextRef.current) {
      audioContextRef.current.close();
      audioContextRef.current = null;
    }
 
    if (audioStreamRef.current) {
      audioStreamRef.current.getTracks().forEach((track) => track.stop());
      audioStreamRef.current = null;
    }
 
    if (audioIndicatorRef.current) {
      audioIndicatorRef.current.classList.remove("active");
    }
 
    if (volumeIntervalRef.current) {
      clearInterval(volumeIntervalRef.current);
      volumeIntervalRef.current = null;
    }
    
    if (analyserRef.current) {
      analyserRef.current = null;
    }
    
    setCurrentVolume(0);
    setIsSessionActive(false);
    setStatus("");
    setMsgs([]);
  };
 
  const handleStartStopClick = () => {
    if (isSessionActive) {
      stopSession();
    } else {
      startSession();
    }
  };
 
  return {
    status,
    isSessionActive,
    audioIndicatorRef,
    startSession,
    stopSession,
    handleStartStopClick,
    registerFunction,
    msgs,
    currentVolume
  };
};
 
export default useWebRTCAudioSession;
 

Note: This is just a sample hook, only the relevant parts are shown to get started. Be sure to checkout the OpenAI Realtime Beta docs to learn more about capabilities.

Dependencies:

  • ReactJS: AI Blocks is built on top of ReactJS, so your project needs to have ReactJS installed, for example you can use it with NextJS, Astro or Create React App.
  • TailwindCSS: AI Blocks uses TailwindCSS for styling, so you need to have TailwindCSS installed in your project.
  • WebRTC: AI Blocks uses WebRTC for real-time audio communication, so you need to have WebRTC installed in your project.
  • Framer Motion: AI Blocks uses Framer Motion for animations, so you need to have Framer Motion installed in your project.
  • Additional dependencies may be required depending on the components you use.

Credits

AI Blocks is inspired by other libraries like shadcn ui, Aceternity, and MagicUI so I want to give them credit for their work and inspiration. Also want to thank @gonzalochale/chonza for their amazing work & providing this component library template to us for customization.