import type { Descendant } from 'slate';

import { NEWLINES_REGEX } from 'bundles/cml/editor/html/constants';
import {
  isBold as isBoldGoogleDocs,
  isItalic as isItalicGoogleDocs,
  isSubscript as isSubscriptGoogleDocs,
  isSuperscript as isSuperscriptGoogleDocs,
  isUnderline as isUnderlineGoogleDocs,
} from 'bundles/cml/editor/html/googleDocsUtils';
import {
  isBold as isBoldMsWord,
  isItalic as isItalicMsWord,
  isSubscript as isSubscriptMsWord,
  isSuperscript as isSuperscriptMsWord,
  isUnderline as isUnderlineMsWord,
} from 'bundles/cml/editor/html/msWordUtils';
import type { Options } from 'bundles/cml/editor/html/types';
import { BLACKLIST_TEXT_REGEX, MARKS } from 'bundles/cml/shared/constants';
import type { Marks } from 'bundles/cml/shared/types/elementTypes';
import { deserializeMath, hasMathBlocks, normalizeTextWithMathBlocks } from 'bundles/cml/shared/utils/deserializeMath';

export const WHITESPACE_REGEX = / +/g;

const getText = (el: HTMLElement, { isWord }: Options) => {
  // handle newline characters as spaces (line breaks are handled already via <br> tags)
  // and remove invisible characters
  const text = (el.textContent || '').replace(NEWLINES_REGEX, ' ').replace(BLACKLIST_TEXT_REGEX, '');

  if (isWord) {
    // CP-7098 prevents double blank lines from getting pasted in Word
    if (text.trim().length === 0) {
      return null;
    }

    // AUTHORING-497 prevents extra whitespace between words in Word
    return text.replace(WHITESPACE_REGEX, ' ');
  }

  return text;
};

const deserializeWordTextMarks = (el: HTMLElement): Marks => {
  const marks: Marks = {};

  const parentTag = el.parentElement?.closest('span.TextRun');
  const parentTagStyle = parentTag ? parentTag.getAttribute('style') || '' : '';

  if (isBoldMsWord(parentTagStyle)) {
    marks[MARKS.BOLD] = true;
  }

  if (isItalicMsWord(parentTagStyle)) {
    marks[MARKS.ITALIC] = true;
  }

  if (isUnderlineMsWord(parentTagStyle)) {
    marks[MARKS.UNDERLINE] = true;
  }

  if (isSuperscriptMsWord(el.parentElement?.getAttribute('style') ?? '')) {
    marks[MARKS.SUPERSCRIPT] = true;
  }

  if (isSubscriptMsWord(el.parentElement?.getAttribute('style') ?? '')) {
    marks[MARKS.SUBSCRIPT] = true;
  }

  return marks;
};

const deserializeGoogleDocsTextMarks = (el: HTMLElement): Marks => {
  /*
    Parse `style` attribute for information on text formatting because for some
    reason, Google Docs uses a <span> with `style` attributes to represent text formatting :/
  */
  const style = el.parentElement?.getAttribute('style') || '';
  const marks: Marks = {};

  if (isBoldGoogleDocs(style)) {
    marks[MARKS.BOLD] = true;
  }

  if (isItalicGoogleDocs(style)) {
    marks[MARKS.ITALIC] = true;
  }

  if (isUnderlineGoogleDocs(style)) {
    // skip underline styling for links as we have custom styling for it
    const isParentAnchorTag = el.parentElement?.parentElement?.tagName === 'A';
    if (!isParentAnchorTag) {
      marks[MARKS.UNDERLINE] = true;
    }
  }

  if (isSuperscriptGoogleDocs(style)) {
    marks[MARKS.SUPERSCRIPT] = true;
  }

  if (isSubscriptGoogleDocs(style)) {
    marks[MARKS.SUBSCRIPT] = true;
  }

  return marks;
};

const deserializeTextMarks = (el: HTMLElement, { isWord, isGoogleDocs }: Options): Marks => {
  if (isWord) {
    return deserializeWordTextMarks(el);
  }

  if (isGoogleDocs) {
    return deserializeGoogleDocsTextMarks(el);
  }

  return {};
};

export const deserializeText = (
  el: HTMLElement,
  options: Options,
  prevMarks: Marks
): Descendant | Descendant[] | null => {
  if (el.nodeType !== Node.TEXT_NODE) {
    return null;
  }

  const text = getText(el, options);
  if (text == null) {
    return null;
  }

  const marks = { ...prevMarks, ...deserializeTextMarks(el, options) };
  const { nodes, hasMath } = deserializeMath(text, marks);
  if (!hasMath) {
    return nodes[0];
  }

  if (hasMathBlocks(nodes)) {
    return normalizeTextWithMathBlocks(nodes);
  }

  return nodes;
};
