import { useMemo, useState } from "react";
import parse, { domToReact } from "html-react-parser";
import unidecode from "unidecode";


export function accumulateText (domNode: Element) {
  // Array<Element|Text>
  return (domNode.children as Array<any>).reduce((acc, value) => {
    if (value.type == "text") {
      // don't trim() here yet: it would merge 2 words together sometimes
      return acc + value.data
    } else if (value.children.length > 0) {
      return acc + accumulateText(value)
    } else {
      return acc
    }
  }, "")
}

const lineCanBe = new Set(["li", "p"])
for (let i = 1; i <= 6; i++) {
  lineCanBe.add(`h${i}`)
}


export function useParser ({ htmlFile, keywords, activeMatch }) {

  const [cssStyle, setCssStyle] = useState<string>("")
  const [matches, setMatches] = useState<string[]>([])
  // TODO: implement this in a better way!!!
  let matches_ = []  // temporary variable, only used inside search fn
  let searchableElements_ : {text: string, textPrepro: string, idx: number}[] = []
  let nodeIdx_

  function iterateDomNodes (domNode) {
    // well, each "clause" contains multiple spans -> it's always li, p, h1, ...
    if (lineCanBe.has(domNode.name)) {
      nodeIdx_ += 1
      // here, accumulate all text in this node into a string
      const text = accumulateText(domNode).replaceAll(/\s+/g, " ").trim()
      const textPrepro = text.toLowerCase()
      if (textPrepro.length > 0) {
        const item = {"text": text, "textPrepro": textPrepro, "idx": nodeIdx_}
        searchableElements_.push(item)
        return <></>
      }
    }
  }

  const replaceFunctionBody = iterateDomNodes

  function replaceFunctionToplevel (domNode) {
    if (domNode.name === "html") {
      const children = domNode.children
      const domChildren = domToReact(children, { replace: replaceFunctionToplevel })
      return <div>{domChildren}</div>
    }
    if (domNode.name === "body") {
      domNode.name = "div"
      const children = domNode.children
      const domChildren = domToReact([domNode], { replace: replaceFunctionBody })
      return <div>{domChildren}</div>
    }
    if (domNode.name === "head") {
      const domChildren = domToReact(domNode.children, { replace: function (childNode) {
          if (childNode.type === "style") {
            setCssStyle((childNode.children[0] as Text).data)
            // return <div>{domToReact([childNode])}</div>
          }
          return <></>
        }})
      return <></>
    }
  }

  const [searchableElements] = useMemo(() => {
    if (!htmlFile) return [null];
    matches_ = []
    searchableElements_ = []
    nodeIdx_ = -1
    parse(htmlFile, {replace: replaceFunctionToplevel})
    console.log(`In A: ${nodeIdx_}`)
    setMatches(matches_)
    // FIXME: here i just delete watermarks
    return [searchableElements_.slice(1, searchableElements_.length - 1)]
  }, [setMatches, activeMatch, htmlFile])
  // ^ keywords must be absolutely there, otherwise it will trigger an infinite loop

  return { cssStyle, matches, searchableElements }
}
