

import {encloseInQuotes, removeQuotes, splitQuotedString} from "@/app/utils/StringUtils";
import XRegExp from "xregexp";
import {flattenFilterNodes, parseFilterString} from "@/dashboards/filter/FilterParser";
import {buildBasicFilter, convertExpToAttrs} from "@/dashboards/filter/BasicFilter";
import {notifyUserOfError} from "@/app/framework/notifications/Notifications";
import {MentionQAst} from "@/mentionq/mentionq";

/**
 * Should this term be quoted?
 */
export function phraseNeedsQuotes(t) {
    if (!t) return false;
    return t.indexOf(' ') > 0 || t.indexOf('-') > 0 || t.indexOf('&') > 0 || t.indexOf('+') || t.indexOf('.') > 0;
}

/**
 * Cleanup the query by parsing it and then reconstructing. If q is already an array of terms they are combined
 * into a query.
 */
export function normalisePhrase(q) {
    let a = Array.isArray(q) ? q : parsePhraseString(q);
    let b = [];
    for (let i = 0; i < a.length; i++) {
        let t = a[i];
        if (!t) continue;
        if (phraseNeedsQuotes(t)) {
            if (t.charAt(0) === '-') t = '-"' + t.substring(1) + '"';
            else t = '"' + t + '"';
        }
        b.push(t);
    }
    return b.join(" ");
}

/**
 * Parse a phrase into an array of strings, each prefixed with '-' if subtracted. Quotes and '+' prefixes are
 * removed.
 */
export function parsePhraseString(q) {
    if (!q) throw new Error("No phrase provided");
    let n = q.length;
    let a = [], inQuotes = false, i, p, neg = false, inWord = false, t;
    for (i = p = 0; i < n; ++i) {
        var c = q.charAt(i);
        if (inQuotes) {
            if (c === '"') {
                if (i > p) {
                    t = q.substring(p, i);
                    a.push(neg ? "-" + t : t);
                }
                inQuotes = false;
                neg = false;
            }
        } else if (inWord) {
            if (c === ' ' || c === '"') {
                if (i > p) {
                    t = q.substring(p, i);
                    a.push(neg ? "-" + t : t);
                }
                inWord = false;
                neg = false;
                if (c === '"') --i;
            }
        } else if (c === '"') {
            inQuotes = true;
            p = i + 1;
        } else if (c === "-") {
            neg = true;
        } else if (c !== " " && c !== '+' && c !== '\t' && c !== '\r') {
            p = i;
            inWord = true;
        }
    }
    if ((inQuotes || inWord) && i > p) {
        t = q.substring(p);
        a.push(neg ? "-" + t : t);
    }
    return a;
}

/**
 * Combines an array into a string for the phrase, preserving quotes
 */
export function phraseCombiner(a) {
    $.each(a, function (i, v) {
        v =  encloseInQuotes(v, '"');
        a[i] = v;
    });
    return a.join(" ");
}


/**
 *
 * @param {Array} relevantMentions
 * @param {Array} irrelevantMentions
 * @return {*|*[]}
 */
export function getExclusionSuggestions(relevantMentions, irrelevantMentions) {
    let exclusions = [];
    if (!irrelevantMentions?.length) return exclusions;

    let relevant = {}, irrelevant = {};
    relevantMentions.forEach(m => countWords(m.authorHandle + " " + m.authorName + " " + m.content, relevant));
    irrelevantMentions.forEach(m => countWords(m.authorHandle + " " + m.authorName + " " + m.content, irrelevant));

    // find all words in irrelevant that are not in relevant or stopwords with neg counts
    for (let word in irrelevant) {
        const count = irrelevant[word];
        if (!relevant[word] && !PHRASE_STOPWORDS[word]) exclusions.push({word: word, c: -count});
    }

    return exclusions.sort((lhs, rhs) => rhs.c - lhs.c)
        .slice(0, 50)
        .map(d => d.word);
}

/** we cannot use the \\w character class of normal JS regex's as it doesn't handle unicode characters */
const NON_TOKEN_CHARS_REGEX = XRegExp("[^_@#\\p{L}]+", "g");

export function countWords(text, map) {
    // XRegExp split is very slow - its much faster to replace and then use the native split
    // noinspection DuplicatedCode
    text = XRegExp.replace(text.toLowerCase(), NON_TOKEN_CHARS_REGEX, " ");
    let a = text.split(" ");
    let p;
    for (let i = 0; i < a.length; i++) {
        let w = a[i];
        if (w.length > 2 && w.length <= 18) {
            map[w] = (map[w] || 0) + 1;
            if (p) {
                let w2 = p + " " + w;
                map[w2] = (map[w2] || 0) + 1;
            }
            p = w;
        } else {
            p = null;
        }
    }
}

/**
 *
 * @param {String[]} lhs
 * @param {String[]} rhs
 * @param {boolean} joinWords
 * @return {String[]}
 *
 * See <a href="@/ui/tests/unit/phrases.spec.js">phrases.spec.js</a>
 */
export function mergePhraseLists(lhs, rhs, joinWords= false) {
    if (!lhs.length) return Array.from(rhs);
    if (!rhs.length) return Array.from(lhs);
    const phrases = [];
    for (let i = 0; i < lhs.length; i++) {
        for (let j = 0; j < rhs.length; j++) {
            let a = lhs[i];
            let b = rhs[j];

            // Handles someone joining a phrase with the string "\"\""
            let p;
            if (a === "\"\"") {
                p = b;
            } else if (b === "\"\"") {
                p = a;
            } else {
                if (joinWords) p = quoteIfNeeded(a + b);
                else p = quoteIfNeeded(a) + " " + quoteIfNeeded(b);
            }

            if (phrases.indexOf(p) < 0) phrases.push(p.trim());
        }
    }
    return phrases;
}

function quoteIfNeeded(s) {
    return false && s.indexOf(' ') >= 0 ? '"' + s + '"' : s;
}

/**
 * Tests values returned by the goodness checker to see if the brand / phrase has been set up
 * okay enough for us to allow it to be used.
 * @param {Brand|{id,mentionFilter, socialNetworks, feeds}} brand
 * @param {{mentionFilter}} phrase
 * @param {String} level From the goodness checker.
 * @param {number} mentionsTotal From the goodness checker.
 * @return {boolean}
 */
export function isPhraseANo(brand, phrase, {level, mentionsTotal}) {
    if (!brand.id) throw new Error("Expected a Brand object");
    if (brand.parent) throw new Error("Brand must be a root brand");
    if (!level) throw new Error("Expected volumes to be a goodness object");
    if (!Number.isFinite(mentionsTotal)) throw new Error("mentionTotal should be a number");
    if (mentionsTotal < 0) throw new Error("mentionTotal should be non-negative");

    // This constant relates to maximum usage relative to our twitter cap (in a month).
    const PER_MONTH_MAXIMUM = 100_000;

    if (level !== "NO" && mentionsTotal < PER_MONTH_MAXIMUM) return false;

    if (brand.socialNetworks) {
        const networks = brand.socialNetworks.split(" ");
        if (!networks.includes("TWITTER")) return false;       // We aren't filtering on twitter, so all good.
    }

    if (brand.feeds) {
        const feeds = brand.feeds.split(" ");
        if (!feeds.includes("TWITTER")) return false;           // We aren't filtering on twitter, so all good.
    }

    // Here if the volumes are above a threshold, we ALWAYS make sure that
    // authorLocation filtering is enabled.
    if (mentionsTotal > PER_MONTH_MAXIMUM) {
        if (brand.mentionFilter) {
            const brandFilter = PhraseFilter.parse(brand.mentionFilter);
            if (brandFilter.authorLocation) return false;

            if (brandFilter.sampleRate) {
                const rate = brandFilter.sampleRate;
                const total = mentionsTotal * rate;
                if (total < PER_MONTH_MAXIMUM) return false;
            }
        }

        if (phrase.mentionFilter) {
            const mentionFilter = PhraseFilter.parse(phrase.mentionFilter);
            if (mentionFilter.authorLocation) return false;
        }

        // If no authorLocation filtering is enabled, then no phrase may be added.
        return true;
    }

    // If volumes are low, we just need a filter on the brand or the phrase.
    // Don't be too picky.
    // noinspection RedundantIfStatementJS
    if (brand.mentionFilter || phrase.mentionFilter) {
        return false;        // There's no filtering happening at all.
    }

    return true;
}


export class PhraseFilter {
    //region Is this just for twitter and / or webhose
    twitter;
    webhose;
    //endregion

    location;
    language;
    authorLocation;
    sampleRate;
    dissallowReshares;

    toString() {
        return phraseFilterToString(this);
    }

    static parse(string) {
        return parseBrandAndPhraseMentionFilter(string)
    }

    /**
     *
     * @param {{twitter: (string|boolean|*), webhose: (boolean|*), language, location, sampleRate: *, authorLocation: (string|*), dissallowReshares: (boolean|*)}} attrs
     * @return {PhraseFilter}
     */
    static from(attrs) {
        return Object.assign(new PhraseFilter(), {
            "location": attrs.location ?? null,
            "language": attrs.language ?? null,
            "authorLocation": attrs.authorLocation ?? null,
            "sampleRate": attrs.sampleRate ?? null,
            "dissallowReshares": attrs.dissallowReshares ?? null,
            "twitter": attrs.twitter ?? false,
            "webhose": attrs.webhose ?? false,
        });
    }
}

/**
 *
 * @param {PhraseFilter} attrs
 * @return {string}
 */

function phraseFilterToString(attrs) {
    // noinspection DuplicatedCode
    let countryFilter = buildBasicFilter({location: attrs.location});

        if (countryFilter) {
            const terms = [];
            if (attrs.twitter) terms.push("site ISNT 'twitter.com'");
            if (attrs.webhose) terms.push("feed ISNT 'webhose'");
            if (terms.length > 0) countryFilter = "(" + countryFilter + " OR (" + terms.join(" AND ") + "))"
        }

        const languageFilter = attrs.language ? buildBasicFilter({language: attrs.language}) : "";

        let authorLocationFilter = attrs.authorLocation ? buildBasicFilter({authorLocation: attrs.authorLocation}) : "";
        if (authorLocationFilter) {
            const terms = [];
            if (attrs.twitter) terms.push("site ISNT 'twitter.com'");
            if (attrs.webhose) terms.push("feed ISNT 'webhose'");
            if (terms.length > 0) authorLocationFilter = "(" + authorLocationFilter + " OR (" + terms.join(" AND ") + "))"
        }

        let sampleRateFilter = attrs.sampleRate ? `SAMPLE(${attrs.sampleRate / 100})` : "";
        if (sampleRateFilter) {
            const terms = [];
            if (attrs.twitter) terms.push("site ISNT 'twitter.com'");
            if (attrs.webhose) terms.push("feed ISNT 'webhose'");
            if (terms.length > 0) sampleRateFilter = "(" + sampleRateFilter + " OR (" + terms.join(" AND ") + "))"
        }

        let reshareFilter = attrs.dissallowReshares ? "reshareof is unknown" : "";
        if (reshareFilter) {
            const terms = [];
            if (attrs.twitter) terms.push("site ISNT 'twitter.com'");
            if (attrs.webhose) terms.push("feed ISNT 'webhose'");
            if (terms.length > 0) reshareFilter = "(" + reshareFilter + " OR (" + terms.join(" AND ") + "))"
        }

        return [countryFilter, languageFilter, authorLocationFilter, sampleRateFilter, reshareFilter]
            .map(d => d.trim())
            .filter(d => d.length)
            .map(d => `(${d})`)
            .join(" and ");

}

function parseBrandAndPhraseMentionFilter(filter) {

    function visit(node, attrs) {
        if (node.operandType === MentionQAst.FEED && node.operationType === MentionQAst.ISNT) {
            attrs[node.literal] = true;
            node.children = [];
        } else if (node.operandType === MentionQAst.SITE && node.operationType === MentionQAst.ISNT
            && node.literal === 'twitter.com') {
            attrs.twitter = true;
            node.children = [];
        }  else if (node.operandType === MentionQAst.SAMPLE) {
            attrs.sampleRate = node.literal.proportion * 100;
            node.children = [];
        } else if (node.children) {
            let keep = [];
            for (let i = 0; i < node.children.length; i++) {
                let c = node.children[i];
                visit(c, attrs);
                if (!c.children) keep.push(c);
                else if (c.children.length === 1) keep.push(c.children[0]);
                else if (c.children.length > 0) keep.push(c);
            }
            if (keep.length < node.children.length) node.children = keep;
        }
    }


    let root = parseFilterString(filter);

    let attrs = {};
    if (root) {
        root = flattenFilterNodes(root);
        visit(root, attrs);
        if (attrs.moreover) attrs.webhose = true;
        Object.assign(attrs, convertExpToAttrs(root, {noPublished: true}));
    }

    if (attrs.authorLocation) {
        attrs.authorLocation = splitQuotedString(attrs.authorLocation)
            .map(d => removeQuotes(d));
    }

    attrs.dissallowReshares = attrs.reshareOf === "unknown";

    if (!attrs.location && !attrs.authorLocation && !attrs.dissallowReshares && !attrs.sampleRate) {
        attrs.twitter = true;
        attrs.webhose = true;
    }

    if (attrs.errors) {
        console.error("Brand or phrase filter has errors: ", attrs.errors);
        notifyUserOfError("Brand or phrase filters has errors. See javascript console for details.")
        throw new Error("mentionFilter has errors: " + attrs.errors);
    }

    return PhraseFilter.from(attrs);
}


export const PHRASE_STOPWORDS = Object.freeze({
    able: true,
    about: true,
    above: true,
    abroad: true,
    according: true,
    accordingly: true,
    across: true,
    actually: true,
    adj: true,
    after: true,
    afterwards: true,
    again: true,
    against: true,
    ago: true,
    ahead: true,
    aint: true,
    all: true,
    allow: true,
    allows: true,
    almost: true,
    alone: true,
    along: true,
    alongside: true,
    already: true,
    also: true,
    although: true,
    always: true,
    amid: true,
    amidst: true,
    among: true,
    amongst: true,
    amp: true,
    and: true,
    another: true,
    any: true,
    anybody: true,
    anyhow: true,
    anyone: true,
    anything: true,
    anyway: true,
    anyways: true,
    anywhere: true,
    apart: true,
    appear: true,
    appreciate: true,
    appropriate: true,
    archive: true,
    are: true,
    arent: true,
    around: true,
    article: true,
    artid: true,
    aside: true,
    ask: true,
    asking: true,
    asp: true,
    aspx: true,
    associated: true,
    available: true,
    away: true,
    awfully: true,
    back: true,
    backtype: true,
    backward: true,
    backwards: true,
    became: true,
    because: true,
    become: true,
    becomes: true,
    becoming: true,
    been: true,
    before: true,
    beforehand: true,
    begin: true,
    behind: true,
    being: true,
    believe: true,
    below: true,
    beside: true,
    besides: true,
    best: true,
    better: true,
    between: true,
    beyond: true,
    bit: true,
    blog: true,
    blogger: true,
    blogspot: true,
    both: true,
    brand: true,
    brief: true,
    but: true,
    cmon: true,
    came: true,
    can: true,
    cant: true,
    cannot: true,
    caption: true,
    catid: true,
    cause: true,
    causes: true,
    certain: true,
    certainly: true,
    cgi: true,
    changes: true,
    clearly: true,
    click: true,
    com: true,
    come: true,
    comes: true,
    comment: true,
    comments: true,
    concerning: true,
    consequently: true,
    consider: true,
    considering: true,
    contain: true,
    containing: true,
    contains: true,
    content: true,
    corresponding: true,
    cot: true,
    could: true,
    couldnt: true,
    course: true,
    currently: true,
    dare: true,
    darent: true,
    day: true,
    default: true,
    definitely: true,
    described: true,
    despite: true,
    did: true,
    didnt: true,
    different: true,
    directly: true,
    dll: true,
    does: true,
    doesnt: true,
    doing: true,
    dont: true,
    done: true,
    down: true,
    downwards: true,
    during: true,
    each: true,
    edu: true,
    eight: true,
    eighty: true,
    either: true,
    else: true,
    elsewhere: true,
    end: true,
    ending: true,
    enough: true,
    entirely: true,
    especially: true,
    etc: true,
    even: true,
    ever: true,
    evermore: true,
    every: true,
    everybody: true,
    everyone: true,
    everything: true,
    everywhere: true,
    exactly: true,
    example: true,
    except: true,
    fairly: true,
    far: true,
    farther: true,
    few: true,
    fewer: true,
    fifth: true,
    first: true,
    five: true,
    followed: true,
    following: true,
    follows: true,
    for: true,
    forever: true,
    former: true,
    formerly: true,
    forth: true,
    forum: true,
    forums: true,
    forward: true,
    found: true,
    four: true,
    from: true,
    further: true,
    furthermore: true,
    get: true,
    gets: true,
    getting: true,
    given: true,
    gives: true,
    goes: true,
    going: true,
    gone: true,
    goo: true,
    got: true,
    gotten: true,
    gov: true,
    greetings: true,
    guid: true,
    had: true,
    hadnt: true,
    haha: true,
    half: true,
    happens: true,
    hardly: true,
    has: true,
    hasnt: true,
    have: true,
    havent: true,
    having: true,
    hed: true,
    hell: true,
    hes: true,
    heh: true,
    hello: true,
    help: true,
    hence: true,
    her: true,
    here: true,
    heres: true,
    hereafter: true,
    hereby: true,
    herein: true,
    hereupon: true,
    hers: true,
    herself: true,
    him: true,
    himself: true,
    his: true,
    hither: true,
    hopefully: true,
    how: true,
    howbeit: true,
    however: true,
    htm: true,
    html: true,
    http: true,
    https: true,
    hundred: true,
    ill: true,
    ive: true,
    ignored: true,
    immediate: true,
    inasmuch: true,
    inc: true,
    indeed: true,
    index: true,
    indicate: true,
    indicated: true,
    indicates: true,
    info: true,
    inner: true,
    inside: true,
    insofar: true,
    instead: true,
    into: true,
    inward: true,
    isnt: true,
    itd: true,
    itll: true,
    its: true,
    itemid: true,
    itself: true,
    jsp: true,
    just: true,
    keep: true,
    keeps: true,
    kept: true,
    know: true,
    known: true,
    knows: true,
    last: true,
    lately: true,
    later: true,
    latter: true,
    latterly: true,
    least: true,
    less: true,
    lest: true,
    let: true,
    lets: true,
    like: true,
    liked: true,
    likely: true,
    likewise: true,
    little: true,
    livejournal: true,
    lol: true,
    long: true,
    look: true,
    looking: true,
    looks: true,
    low: true,
    lower: true,
    ltd: true,
    made: true,
    mainly: true,
    make: true,
    makes: true,
    man: true,
    many: true,
    may: true,
    maybe: true,
    maynt: true,
    mean: true,
    meantime: true,
    meanwhile: true,
    merely: true,
    might: true,
    mightnt: true,
    mine: true,
    minus: true,
    miss: true,
    more: true,
    moreover: true,
    most: true,
    mostly: true,
    mrs: true,
    msan: true,
    much: true,
    must: true,
    mustnt: true,
    myself: true,
    name: true,
    namely: true,
    near: true,
    nearly: true,
    necessary: true,
    need: true,
    neednt: true,
    needs: true,
    neither: true,
    net: true,
    never: true,
    neverf: true,
    neverless: true,
    nevertheless: true,
    new: true,
    news: true,
    newsid: true,
    next: true,
    nine: true,
    ninety: true,
    nobody: true,
    non: true,
    none: true,
    nonetheless: true,
    noone: true,
    nor: true,
    normally: true,
    not: true,
    nothing: true,
    notwithstanding: true,
    novel: true,
    now: true,
    nowhere: true,
    obviously: true,
    off: true,
    often: true,
    okay: true,
    old: true,
    omg: true,
    once: true,
    one: true,
    ones: true,
    only: true,
    onto: true,
    opposite: true,
    org: true,
    other: true,
    others: true,
    otherwise: true,
    ought: true,
    oughtnt: true,
    our: true,
    ours: true,
    ourselves: true,
    out: true,
    outside: true,
    over: true,
    overall: true,
    own: true,
    particular: true,
    particularly: true,
    past: true,
    per: true,
    perhaps: true,
    php: true,
    placed: true,
    please: true,
    plus: true,
    possible: true,
    presumably: true,
    probably: true,
    provided: true,
    provides: true,
    que: true,
    quite: true,
    raquo: true,
    rather: true,
    really: true,
    reasonably: true,
    recent: true,
    recently: true,
    redirect: true,
    regarding: true,
    regardless: true,
    regards: true,
    relatively: true,
    respectively: true,
    right: true,
    rofl: true,
    round: true,
    rss: true,
    said: true,
    same: true,
    saw: true,
    say: true,
    saying: true,
    says: true,
    second: true,
    secondly: true,
    sectid: true,
    see: true,
    seeing: true,
    seem: true,
    seemed: true,
    seeming: true,
    seems: true,
    seen: true,
    self: true,
    selves: true,
    sensible: true,
    sent: true,
    serious: true,
    seriously: true,
    setid: true,
    seven: true,
    several: true,
    shall: true,
    shant: true,
    she: true,
    shed: true,
    shell: true,
    shes: true,
    should: true,
    shouldnt: true,
    since: true,
    siteid: true,
    six: true,
    some: true,
    somebody: true,
    someday: true,
    somehow: true,
    someone: true,
    something: true,
    sometime: true,
    sometimes: true,
    somewhat: true,
    somewhere: true,
    soon: true,
    sorry: true,
    specified: true,
    specify: true,
    specifying: true,
    src: true,
    statuses: true,
    still: true,
    story: true,
    storyid: true,
    sub: true,
    such: true,
    sup: true,
    sure: true,
    take: true,
    taken: true,
    taking: true,
    tell: true,
    tends: true,
    than: true,
    thank: true,
    thanks: true,
    thanx: true,
    that: true,
    thatll: true,
    thats: true,
    thatve: true,
    the: true,
    their: true,
    theirs: true,
    them: true,
    themselves: true,
    then: true,
    thence: true,
    there: true,
    thered: true,
    therell: true,
    therere: true,
    theres: true,
    thereve: true,
    thereafter: true,
    thereby: true,
    therefore: true,
    therein: true,
    thereupon: true,
    these: true,
    they: true,
    theyd: true,
    theyll: true,
    theyre: true,
    theyve: true,
    thing: true,
    things: true,
    think: true,
    third: true,
    thirty: true,
    this: true,
    thorough: true,
    thoroughly: true,
    those: true,
    though: true,
    thread: true,
    three: true,
    through: true,
    throughout: true,
    thru: true,
    thus: true,
    till: true,
    tiny: true,
    tinyurl: true,
    together: true,
    too: true,
    took: true,
    top: true,
    toward: true,
    towards: true,
    tried: true,
    tries: true,
    truly: true,
    try: true,
    trying: true,
    twice: true,
    twitpic: true,
    twitter: true,
    two: true,
    ure: true,
    under: true,
    underneath: true,
    undoing: true,
    unfortunately: true,
    unless: true,
    unlike: true,
    unlikely: true,
    until: true,
    unto: true,
    upon: true,
    upwards: true,
    url: true,
    use: true,
    used: true,
    useful: true,
    uses: true,
    using: true,
    usually: true,
    value: true,
    various: true,
    versus: true,
    very: true,
    via: true,
    view: true,
    viz: true,
    want: true,
    wants: true,
    was: true,
    wasnt: true,
    way: true,
    wed: true,
    well: true,
    were: true,
    weve: true,
    welcome: true,
    went: true,
    werent: true,
    what: true,
    whatll: true,
    whats: true,
    whatve: true,
    whatever: true,
    when: true,
    whence: true,
    whenever: true,
    where: true,
    wheres: true,
    whereafter: true,
    whereas: true,
    whereby: true,
    wherein: true,
    whereupon: true,
    wherever: true,
    whether: true,
    which: true,
    whichever: true,
    while: true,
    whilst: true,
    whither: true,
    who: true,
    whod: true,
    wholl: true,
    whos: true,
    whoever: true,
    whole: true,
    whom: true,
    whomever: true,
    whose: true,
    why: true,
    will: true,
    willing: true,
    wish: true,
    with: true,
    within: true,
    without: true,
    wont: true,
    wonder: true,
    wordpress: true,
    would: true,
    wouldnt: true,
    www: true,
    xml: true,
    yes: true,
    yet: true,
    you: true,
    youd: true,
    youll: true,
    youre: true,
    youve: true,
    your: true,
    yours: true,
    yourself: true,
    yourselves: true,
    zero: true,
    zomg: true
});
