import { getFeatureSettingNumber } from "@glide/common-core";
import { isDefined } from "@glide/support";
import { assert } from "@glideapps/ts-necessities";
import * as React from "react";

const NOISE_FLOOR_DEFAULT = -70; // Default noise floor
const MIN_CALIBRATION_SAMPLES = 30; // Minimum samples for calibration
const SILENCE_THRESHOLD_OFFSET = 15; // Offset for silence threshold

interface SilenceDetectionState {
    analyserNode: AnalyserNode | null;
    audioContext: AudioContext | null;
    animationFrame: number | null;
    silenceStartTime: number | null;
    noiseFloor: number;
    calibrationComplete: boolean;
    calibrationSamples: number[];
    startedSpeaking: boolean;
}

interface UseDetectSilenceProps {
    readonly isRecording: boolean;
    readonly onSilenceDetected: (() => void) | undefined;
}

interface SilenceDetectionControls {
    readonly setupAnalyser: (stream: MediaStream) => void;
}

export function useDetectSilence({ isRecording, onSilenceDetected }: UseDetectSilenceProps): SilenceDetectionControls {
    const silenceDuration = getFeatureSettingNumber("silenceDetectionTimeMS") ?? 0;
    const shouldDetectSilence = silenceDuration > 0;

    const state = React.useRef<SilenceDetectionState>({
        analyserNode: null,
        audioContext: null,
        animationFrame: null,
        silenceStartTime: null,
        noiseFloor: NOISE_FLOOR_DEFAULT,
        calibrationComplete: false,
        calibrationSamples: [],
        startedSpeaking: false,
    });

    const cleanupResources = React.useCallback(() => {
        if (isDefined(state.current.animationFrame)) {
            cancelAnimationFrame(state.current.animationFrame);
            state.current.animationFrame = null;
        }

        if (isDefined(state.current.audioContext)) {
            void state.current.audioContext.close();
            state.current.audioContext = null;
        }

        state.current.analyserNode = null;
        state.current.silenceStartTime = null;
        state.current.calibrationComplete = false;
        state.current.calibrationSamples = [];
        state.current.startedSpeaking = false;
    }, []);

    const setupAnalyser = React.useCallback((stream: MediaStream): void => {
        const audioContext = new AudioContext();
        const analyserNode = audioContext.createAnalyser();
        const source = audioContext.createMediaStreamSource(stream);

        const bandpassFilter = audioContext.createBiquadFilter();
        bandpassFilter.type = "bandpass";
        bandpassFilter.frequency.value = 1700; // Center frequency
        bandpassFilter.Q.value = 0.5; // Wider bandwidth

        source.connect(bandpassFilter);
        bandpassFilter.connect(analyserNode);

        analyserNode.fftSize = 1024; // Smaller FFT size for faster processing
        analyserNode.minDecibels = -90;
        analyserNode.maxDecibels = -10;
        analyserNode.smoothingTimeConstant = 0.5; // Less smoothing for faster response

        state.current.audioContext = audioContext;
        state.current.analyserNode = analyserNode;
        state.current.silenceStartTime = null;
        state.current.noiseFloor = NOISE_FLOOR_DEFAULT;
        state.current.calibrationComplete = false;
        state.current.calibrationSamples = [];
    }, []);

    const calculateSpeechEnergy = React.useCallback((frequencyData: Uint8Array): number => {
        let sum = 0;
        let count = 0;

        const minBin = 7; // ~300Hz
        const maxBin = 79; // ~3400Hz

        for (let i = minBin; i <= maxBin; i++) {
            sum += frequencyData[i];
            count++;
        }

        return sum / count;
    }, []);

    const calibrateNoiseFloor = React.useCallback(() => {
        const { calibrationSamples } = state.current;

        if (calibrationSamples.length >= MIN_CALIBRATION_SAMPLES) {
            const sortedSamples = [...calibrationSamples].sort((a, b) => a - b);
            const noiseFloorIndex = Math.floor(sortedSamples.length * 0.3);
            state.current.noiseFloor = sortedSamples[noiseFloorIndex];
            state.current.calibrationComplete = true;
        }
    }, []);

    const detectSilence = React.useCallback(
        (time: number) => {
            const { analyserNode } = state.current;

            assert(isDefined(analyserNode) && isDefined(onSilenceDetected), "Invalid state for silence detection");

            const frequencyData = new Uint8Array(analyserNode.frequencyBinCount);
            analyserNode.getByteFrequencyData(frequencyData);

            const speechEnergy = calculateSpeechEnergy(frequencyData);

            if (!state.current.calibrationComplete) {
                state.current.calibrationSamples.push(speechEnergy);
                calibrateNoiseFloor();
            }

            const effectiveThreshold = state.current.noiseFloor + SILENCE_THRESHOLD_OFFSET;

            // Don't consider that we're "speaking" if we didn't yet calibrate the noise floor
            // otherwise we'll always be "speaking" at the start.
            const isSpeaking = state.current.calibrationComplete && speechEnergy > effectiveThreshold;

            // We only want to detect silence _after_ you started speaking.
            // otherwise if you open the mic and never talk we'll keep sending partial transcripts.
            if (isSpeaking) {
                state.current.startedSpeaking = true;
            }

            if (state.current.startedSpeaking && !isSpeaking) {
                if (!isDefined(state.current.silenceStartTime)) {
                    state.current.silenceStartTime = time;
                } else {
                    const currentSilenceDuration = time - state.current.silenceStartTime;

                    if (currentSilenceDuration >= silenceDuration) {
                        // Also, if detect silence, we need to wait for the user to start speaking again.
                        state.current.startedSpeaking = false;
                        state.current.silenceStartTime = null;
                        onSilenceDetected();
                        return;
                    }
                }
            } else {
                state.current.silenceStartTime = null;
            }

            state.current.animationFrame = requestAnimationFrame(detectSilence);
        },
        [onSilenceDetected, calculateSpeechEnergy, calibrateNoiseFloor, silenceDuration]
    );

    React.useEffect(() => {
        if (
            !isRecording ||
            !isDefined(state.current.analyserNode) ||
            !isDefined(onSilenceDetected) ||
            !shouldDetectSilence
        ) {
            cleanupResources();
            return;
        }

        const stateRef = state.current;
        stateRef.animationFrame = requestAnimationFrame(detectSilence);

        return () => {
            if (isDefined(stateRef.animationFrame)) {
                cancelAnimationFrame(stateRef.animationFrame);
                stateRef.animationFrame = null;
            }
        };
    }, [isRecording, cleanupResources, detectSilence, onSilenceDetected, shouldDetectSilence]);

    React.useEffect(() => {
        return () => {
            cleanupResources();
        };
    }, [cleanupResources]);

    return { setupAnalyser };
}
