import * as sdk from "microsoft-cognitiveservices-speech-sdk";

import { recordingSettings, speechConfigSettings } from "@/settings";
import Storage from "@/core/utils/LocalStorage";
import { Pause, TranscriptionSegment } from "@/core/models";
import LocalStorage from "@/core/utils/LocalStorage";

export class LiveTranscription {
  text = "";
  isTranscribing = false;
  running = false;
  duration = 0;
  recognizedDuration = 0;
  offset = 0;
  durations: { duration: number; offset: number }[] = [];
  durationInterval: any;
  isPaused = false;
  lastCallback = false;

  lang = LocalStorage.getLocale();

  private segments: TranscriptionSegment[] = [];
  private pauses: Pause[] = [];
  private recognizer: sdk.SpeechRecognizer | null = null;
  private speechConfig: sdk.SpeechConfig;
  private readonly onDoneCallback = () => {};

  //private interimText = ""; // Zwischenergebnisse von `onRecognizing`

  constructor(cb = () => {}) {
    this.onDoneCallback = cb;

    this.speechConfig = sdk.SpeechConfig.fromEndpoint(
      new URL(speechConfigSettings.speechServiceEndpoint),
      "",
    );
  }

  async start(
    stream?: MediaStream,
    lang: string = "en-US",
    device?: string,
    cb: (
      text: string,
      isTranscribing: boolean,
      segment?: TranscriptionSegment,
      pauses?: Pause[],
    ) => void = () => {},
  ): Promise<void> {
    this.segments = [];
    this.pauses = [];
    this.duration = 0;
    this.recognizedDuration = 0;
    this.offset = 0;
    this.durations = [];

    const recognizer = this.setup(stream, lang, device);

    this.durationInterval = setInterval(() => {
      if (!this.isPaused) this.duration += 100;
    }, 100);

    let lastRecognized = "";
    let lastSegment: TranscriptionSegment;

    recognizer.sessionStarted = (_s, e) => {
      const str = "(sessionStarted) SessionId: " + e.sessionId;
      console.log(str);
      this.lastCallback = false;
      this.text = "";
      clearInterval(this.durationInterval);
      this.durationInterval = setInterval(() => {
        if (!this.isPaused) this.duration += 100;
      }, 100);
    };
    recognizer.sessionStopped = (_s, e) => {
      const str = "(sessionStopped) SessionId: " + e.sessionId;
      console.log(str);
      this.text = "";
      clearInterval(this.durationInterval);
    };

    recognizer.speechStartDetected = (_s, _e) => {};
    recognizer.speechEndDetected = (_s, _e) => {
      this.lastCallback = true;
    };

    recognizer.canceled = (_s, e) => {
      console.error(`Recognition canceled: ${e.errorDetails}`);
    };

    recognizer.recognizing = recognizer.recognized = (_s, e) => {
      this.isTranscribing = true;
      if (
        !e.result.text ||
        !e.result.properties ||
        !e.result.properties.getProperty(
          sdk.PropertyId.SpeechServiceResponse_JsonResult,
        )
      ) {
        console.log("No valid recognition result received.");
        return;
      }

      // Text-only callback before the first sentence recognition
      if (!this.segments.length) cb(this.text, this.isTranscribing);

      const speechServiceResponse = e.result.properties.getProperty(
        sdk.PropertyId.SpeechServiceResponse_JsonResult,
      );

      if (sdk.ResultReason[e.result.reason] == "RecognizingSpeech")
        this.text = lastRecognized + e.result.text;
      else {
        lastRecognized += e.result.text + "\r\n";
        this.text = lastRecognized;

        //this.logSpeechConfigProperties(this.speechConfig);

        const nBest = speechServiceResponse
          ? JSON.parse(speechServiceResponse).NBest
          : undefined;
        const confidence = nBest ? nBest[0]?.Confidence : undefined;
        const displayWords = e.result.text.split(" ");
        const words = speechServiceResponse
          ? JSON.parse(speechServiceResponse).NBest[0].Words.map(
              (word: any, i: number) => {
                return {
                  word: word.Word,
                  displayWord: displayWords[i] || undefined,
                  //displayWord: e.result.text.split(" ")[i] || undefined,
                  duration: word.Duration / 10_000,
                  offset: word.Offset / 10_000,
                };
              },
            )
          : undefined;

        let pause: Pause = {
          offset: 0,
          duration: 0,
        };

        if (lastSegment) {
          const duration = Math.round(
            e.result.offset / 10_000 -
              (lastSegment.offset + lastSegment.duration),
          );
          if (duration >= recordingSettings.pauseLen) {
            pause = {
              offset: Math.round(lastSegment.offset + lastSegment.duration),
              duration: duration,
            };
            this.pauses.push(pause);
          }
        }

        const segment: TranscriptionSegment = {
          text: e.result.text,
          offset: Math.round(e.result.offset / 10_000),
          duration: Math.round(e.result.duration / 10_000),
          nBest,
          words,
          confidence: confidence,
          //confidence: nBest ? nBest[0]?.Confidence : undefined,
        };
        this.segments.push(segment);
        lastSegment = segment;

        //this.recognizedDuration = this.duration;
        this.recognizedDuration += segment.duration;
      }

      this.isTranscribing =
        JSON.parse(speechServiceResponse).RecognitionStatus !== "Success";

      // The full followup callbacks after the first sentence recognition
      this.text &&
        cb(
          //this.lastCallback ? lastSegment.text : this.text,
          this.text,
          this.isTranscribing,
          lastSegment,
          this.pauses,
        );
    };
    recognizer.startContinuousRecognitionAsync(() => {
      this.running = true;
    });
    this.recognizer = recognizer;
  }

  async stop() {
    return new Promise<{
      segments: TranscriptionSegment[];
      pauses: Pause[];
    }>((res, _rej) => {
      this.recognizer?.stopContinuousRecognitionAsync(() => {
        this.running = false;
        this.recognizer?.close();
        this.recognizer = null;
        clearInterval(this.durationInterval);
        this.duration = 0;
        this.recognizedDuration = 0;
        this.onDoneCallback();
        res({ segments: this.segments, pauses: this.pauses });
      });
    });
  }

  toggle() {
    if (this.running) void this.stop();
    else void this.start();
  }

  pause() {
    this.recognizer?.stopContinuousRecognitionAsync();
    this.isPaused = true;
  }

  resume() {
    this.recognizer?.startContinuousRecognitionAsync();
    this.isPaused = false;
  }

  /*private logSpeechConfigProperties(speechConfig: any) {
    console.log("SpeechConfig - Current Settings:");

    // Beispiel für das Abrufen von Eigenschaften
    const postProcessing = speechConfig.getProperty(
      sdk.PropertyId.SpeechServiceResponse_PostProcessingOption,
    );
    const profanityOption = speechConfig.getProperty(
      sdk.PropertyId.SpeechServiceResponse_ProfanityOption,
    );
    const language = speechConfig.speechRecognitionLanguage; // Sprache direkt abrufen
    const profanityFilter = speechConfig.getProperty(
      sdk.PropertyId.SpeechServiceResponse_RequestProfanityFilterTrueFalse,
    );
    const stablePartialResultThreshold = speechConfig.getProperty(
      sdk.PropertyId.SpeechServiceResponse_StablePartialResultThreshold,
    );
    const requestPunctuationBoundary = speechConfig.getProperty(
      sdk.PropertyId.SpeechServiceResponse_RequestPunctuationBoundary,
    );
    const requestSentenceBoundary = speechConfig.getProperty(
      sdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
    );
    const requestWordBoundary = speechConfig.getProperty(
      sdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
    );
    const requestWordLevelTimestamps = speechConfig.getProperty(
      sdk.PropertyId.SpeechServiceResponse_RequestWordLevelTimestamps,
    );
    const outputFormat = speechConfig.getProperty(
      sdk.PropertyId.SpeechServiceResponse_OutputFormatOption,
    );
    const recognitionMode = speechConfig.getProperty(
      sdk.PropertyId.SpeechServiceConnection_RecoMode,
    );

    console.log(`Post processing: ${postProcessing}`);
    console.log(`Profanity Option: ${profanityOption}`);
    console.log(`Speech Recognition Language: ${language}`);
    console.log(`Profanity Filter: ${profanityFilter}`);
    console.log(
      `Stable Partial Result Threshold: ${stablePartialResultThreshold}`,
    );
    console.log(`Request Punctuation Boundary: ${requestPunctuationBoundary}`);
    console.log(`Request Sentence Boundary: ${requestSentenceBoundary}`);
    console.log(`Request Word Boundary: ${requestWordBoundary}`);
    console.log(`Request Word Level Timestamps: ${requestWordLevelTimestamps}`);
    console.log(`Output Format: ${outputFormat}`);
    console.log(`Recognition Mode: ${recognitionMode}`);
  }*/

  private setup(_stream?: MediaStream, lang = "en-US", device?: string) {
    const audio = device
      ? sdk.AudioConfig.fromMicrophoneInput(device)
      : sdk.AudioConfig.fromDefaultMicrophoneInput();
    /*this.speechConfig = sdk.SpeechConfig.fromEndpoint(
      new URL(speechConfigSettings.speechServiceEndpoint),
      "",
    );*/
    this.configureSpeechConfig(this.speechConfig, lang);

    return new sdk.SpeechRecognizer(this.speechConfig, audio);
  }

  private configureSpeechConfig(speechConfig: any, _lang: string) {
    speechConfig.enableAudioLogging = speechConfigSettings.enableAudioLogging;
    speechConfig.enableTelemetry = speechConfigSettings.enableTelemetry;
    speechConfig.setProperty(
      sdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs,
      speechConfigSettings.initialSilenceTimeoutMs,
    );
    speechConfig.speechRecognitionLanguage =
      speechConfigSettings.speechRecognitionLanguage;
    speechConfig.setProfanity(speechConfigSettings.profanityOption);
    speechConfig.setProperty(
      sdk.PropertyId.SpeechServiceResponse_RequestProfanityFilterTrueFalse,
      speechConfigSettings.enableProfanityFilter,
    );
    speechConfigSettings.requestWordLevelTimestamps &&
      speechConfig.requestWordLevelTimestamps();
    speechConfig.enableDictation = speechConfigSettings.enableDictation;
    speechConfig.setProperty(
      sdk.PropertyId.SpeechServiceResponse_PostProcessingOption,
      speechConfigSettings.postProcessing,
    );
    speechConfig.setProperty(
      sdk.PropertyId.SpeechServiceResponse_RequestPunctuationBoundary,
      speechConfigSettings.requestPunctuationBoundary,
    );
    speechConfig.setProperty(
      sdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
      speechConfigSettings.requestSentenceBoundary,
    );
    speechConfig.setProperty(
      sdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
      speechConfigSettings.requestWordBoundary,
    );
    speechConfig.setProperty(
      sdk.PropertyId.SpeechServiceConnection_RecoMode,
      "INTERACTIVE",
    );
    speechConfig.outputFormat = speechConfigSettings.outputFormat;
    speechConfig.authorizationToken = Storage.getAccessToken();
  }
}
