import { useState } from 'react';

import { franc } from 'franc';

import { getUserLanguageCode } from 'js/lib/language';

import type { TranslatedContentId } from 'bundles/ondemand-translation/components/TranslationContextProvider';
import {
  francCodeToLangCodeMap,
  francLangCodes,
} from 'bundles/ondemand-translation/language-detection/languageConstants';

export type LanguageDetector = {
  detectedLanguageCode: string | null;
  userLanguageCode: string;
  isLanguageMismatch: boolean;
  registerOriginalTextValue: (id: TranslatedContentId, value: string) => void;
  registerDetectedLanguage: (id: TranslatedContentId, detectedLanguage: string) => void;
};

/**
 * The language detector stores detected languages of all content enabled for translation.
 *
 * Each component will register its text content for language detection on frontend. Frontend language detection is
 * not completely reliable and is only used to decide whether we should or shouldn't show the translation button.
 * These values are stored in the `feDetectedLanguages` state.
 *
 * When the translation query completes, the backend will return the detected language used for translation. Backend
 * language detection is much more reliable, therefore we will use it when we need to show the name of the original
 * language. E.g. "Translated from French" label when translating peer review submission. These values are stored
 * in the `beDetectedLanguages` state.
 *
 * The language detector provides multiple properties that are exposed through the translation context:
 *
 * detectedLanguageCode: The detected language of un-translated content. This value uses detected language returned by
 * the backend service (when the translation query completes) and registered with the `registerDetectedLanguage`
 * function. Returns null if the language can't be determined.
 *
 * userLanguageCode: The configured user language code in the 2-character format. If user language can't be determined,
 * the default value ('en') is returned.
 *
 * isLanguageMismatch: Returns whether the user language doesn't match the language of un-translated content detected
 * on frontend. This value should be used to decide whether we should or shouldn't show the translation button.
 * If there aren't any registered values, it returns false.
 *
 * registerOriginalTextValue: Function to be used by child components to register their string values for frontend
 * language detection.
 *
 * registerDetectedLanguage: Function to be used by child components to register language detected on the backend
 * after the translation query finishes.
 */
export const useLanguageDetector = (): LanguageDetector => {
  /**
   * The Map stores content id as key and language detected on frontend as value. In this map, we are storing the
   * languages in the original 3-character format (ISO 639-3) that is used by the language detection library franc.
   */
  const [feDetectedLanguages, setFeDetectedLanguages] = useState(new Map<TranslatedContentId, string>());

  /**
   * The Map stores content id as key and language detected on the backend during the translation as value. These
   * values are in the 2-character format.
   */
  const [beDetectedLanguages, setBeDetectedLanguages] = useState(new Map<TranslatedContentId, string>());

  /**
   * Function to be used by child components to register their string value for frontend language detection. This is
   * then used to determine whether the content language is different from the user language and therefore to show
   * the translation button.
   */
  const registerOriginalTextValue = (id: TranslatedContentId, value: string) => {
    if (!feDetectedLanguages.has(id)) {
      // We will allow the library to detect any language that is supported by the Amazon Translate service. Allowing
      // library to detect other rare languages brings much more incorrect detections than expected.
      const detectedLanguage = franc(value, {
        only: francLangCodes,
        minLength: 1,
      });
      feDetectedLanguages.set(id, detectedLanguage);
      setFeDetectedLanguages(new Map(feDetectedLanguages));
    }
  };

  /**
   * Function used to register the language detected by the backend. This value is returned from the translation query.
   */
  const registerDetectedLanguage = (id: TranslatedContentId, detectedLanguage: string) => {
    if (!beDetectedLanguages.has(id)) {
      beDetectedLanguages.set(id, detectedLanguage.substring(0, 2));
      setBeDetectedLanguages(new Map(beDetectedLanguages));
    }
  };

  /**
   * Returns user 2-character language code. It will return 'en' if the language can't be determined.
   */
  const getUserShortLanguageCode = () => {
    const userLanguageCode = getUserLanguageCode();

    if (!userLanguageCode || userLanguageCode.length < 2) {
      return 'en';
    } else {
      return userLanguageCode.substring(0, 2).toLowerCase();
    }
  };

  /**
   * Returns detected 2-character language code of the text content. Returns null if language can't be detected or when
   * there are multiple top languages.
   */
  const getContentLanguageCode = (languageList: string[]): string | null => {
    const languageCount = new Map<string, number>();

    languageList.forEach((value) => {
      const currentCount = languageCount.get(value) || 0;
      languageCount.set(value, currentCount + 1);
    });

    let topLanguages: string[] = []; // use array in case there are more than one top
    let topCount = 0;

    languageCount.forEach((count, language) => {
      if (count > topCount) {
        topCount = count;
        topLanguages = [language];
      } else if (count === topCount) {
        topLanguages.push(language);
      }
    });

    // If only a single top language is detected, return it. Otherwise, return null (unknown language).
    if (topLanguages.length === 1) {
      return topLanguages[0];
    }

    return null;
  };

  /**
   * Returns detected 2-character language code of the text content. This is determined by frontend language detection
   * and is used only to determine if there is a language mismatch.
   */
  const getFeDetectedContentLanguageCode = (): string | null => {
    const FRANC_UNKNOWN_LANGUAGE = 'und';

    const languageList = Array.from(feDetectedLanguages.values())
      .filter((x) => x !== FRANC_UNKNOWN_LANGUAGE)
      .map((x) => francCodeToLangCodeMap.get(x))
      // filter out undefined values
      .filter((x): x is string => !!x);

    return getContentLanguageCode(languageList);
  };

  /**
   * Returns detected 2-character language code of the text content. This is determined from values returned from the
   * backend after the translation query completes.
   */
  const getBeDetectedContentLanguageCode = (): string | null => {
    return getContentLanguageCode(Array.from(beDetectedLanguages.values()));
  };

  const detectedLanguageCode = getBeDetectedContentLanguageCode();

  const userLanguageCode = getUserShortLanguageCode();

  /**
   * Uses the content language detected on the frontend and compares it with configured user language. If the languages
   * are different, it will return a positive language mismatch.
   */
  const getLanguageMismatch = (): boolean => {
    const feDetectedLanguageCode = getFeDetectedContentLanguageCode();
    return feDetectedLanguages.size > 0 && feDetectedLanguageCode !== userLanguageCode;
  };

  const isLanguageMismatch = getLanguageMismatch();

  return {
    detectedLanguageCode,
    userLanguageCode,
    isLanguageMismatch,
    registerOriginalTextValue,
    registerDetectedLanguage,
  };
};
