/* fzy.c
 * C implementation of fzy matching
 *
 * original code by John Hawthorn, https://github.com/jhawthorn/fzy
 * modifications by
 *   Rom Grk, https://github.com/romgrk
 *   Seth Warn, https://github.com/swarn
 *
 * Taken from: https://luarocks.org/modules/sethwarn/fzy
 *
 * For the original fzy C library: Copyright (c) 2014 John Hawthorn, MIT licence.
 * For the Lua binding: Copyright (c) 2020 Seth Warn, MIT licence.
 */

#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#define fzy_c
#define LUA_LIB

#include "agena.h"
#include "agnxlib.h"
#include "agenalib.h"
#include "lucase.def"

#include "fzy.h"


#define SCORE_GAP_LEADING (-0.005)
#define SCORE_GAP_TRAILING (-0.005)
#define SCORE_GAP_INNER (-0.01)
#define SCORE_MATCH_CONSECUTIVE (1.0)
#define SCORE_MATCH_SLASH (0.9)
#define SCORE_MATCH_WORD (0.8)
#define SCORE_MATCH_CAPITAL (0.7)
#define SCORE_MATCH_DOT (0.6)

/* clang-format off */
#define ASSIGN_LOWER(v) \
  ['a'] = (v), \
  ['b'] = (v), \
  ['c'] = (v), \
  ['d'] = (v), \
  ['e'] = (v), \
  ['f'] = (v), \
  ['g'] = (v), \
  ['h'] = (v), \
  ['i'] = (v), \
  ['j'] = (v), \
  ['k'] = (v), \
  ['l'] = (v), \
  ['m'] = (v), \
  ['n'] = (v), \
  ['o'] = (v), \
  ['p'] = (v), \
  ['q'] = (v), \
  ['r'] = (v), \
  ['s'] = (v), \
  ['t'] = (v), \
  ['u'] = (v), \
  ['v'] = (v), \
  ['w'] = (v), \
  ['x'] = (v), \
  ['y'] = (v), \
  ['z'] = (v)

#define ASSIGN_UPPER(v) \
  ['A'] = (v), \
  ['B'] = (v), \
  ['C'] = (v), \
  ['D'] = (v), \
  ['E'] = (v), \
  ['F'] = (v), \
  ['G'] = (v), \
  ['H'] = (v), \
  ['I'] = (v), \
  ['J'] = (v), \
  ['K'] = (v), \
  ['L'] = (v), \
  ['M'] = (v), \
  ['N'] = (v), \
  ['O'] = (v), \
  ['P'] = (v), \
  ['Q'] = (v), \
  ['R'] = (v), \
  ['S'] = (v), \
  ['T'] = (v), \
  ['U'] = (v), \
  ['V'] = (v), \
  ['W'] = (v), \
  ['X'] = (v), \
  ['Y'] = (v), \
  ['Z'] = (v)

#define ASSIGN_DIGIT(v) \
  ['0'] = (v), \
  ['1'] = (v), \
  ['2'] = (v), \
  ['3'] = (v), \
  ['4'] = (v), \
  ['5'] = (v), \
  ['6'] = (v), \
  ['7'] = (v), \
  ['8'] = (v), \
  ['9'] = (v)

static const score_t bonus_states[3][256] = {
  { 0 },
  { ['/'] = SCORE_MATCH_SLASH,
    ['\\'] = SCORE_MATCH_SLASH,
    ['-'] = SCORE_MATCH_WORD,
    ['_'] = SCORE_MATCH_WORD,
    [' '] = SCORE_MATCH_WORD,
    ['.'] = SCORE_MATCH_DOT,
  },
  { ['/'] = SCORE_MATCH_SLASH,
    ['\\'] = SCORE_MATCH_SLASH,
    ['-'] = SCORE_MATCH_WORD,
    ['_'] = SCORE_MATCH_WORD,
    [' '] = SCORE_MATCH_WORD,
    ['.'] = SCORE_MATCH_DOT,
    /* ['a' ... 'z'] = SCORE_MATCH_CAPITAL, */
    ASSIGN_LOWER(SCORE_MATCH_CAPITAL)
  }
};

static const ind_t bonus_index[256] = {
  /* ['A' ... 'Z'] = 2 */
  ASSIGN_UPPER(2),
  /* ['a' ... 'z'] = 1 */
  ASSIGN_LOWER(1),
  /* ['0' ... '9'] = 1 */
  ASSIGN_DIGIT(1)
};

/* clang-format on */

#define COMPUTE_BONUS(last_ch, ch)                                                     \
  (bonus_states[bonus_index[(unsigned char)(ch)]][(unsigned char)(last_ch)])

int fzy_has_match (char const *needle, char const *haystack, int needlelen, int haystacklen, int case_sensitive) {
  int i;
  char needle_lower[MATCH_MAX_LEN + 1];
  char haystack_lower[MATCH_MAX_LEN + 1];
  if  (!case_sensitive) {
    for(i=0; i < needlelen; i++) needle_lower[i] = (char)tools_lowercase[uchar(needle[i])];
    for(i=0; i < haystacklen; i++) haystack_lower[i] = (char)tools_lowercase[uchar(haystack[i])];
    needle_lower[needlelen] = 0;
    haystack_lower[haystacklen] = 0;
    needle = needle_lower;
    haystack = haystack_lower;
  }
  while (*needle) {
    haystack = tools_memchr(haystack, *needle++, haystacklen);
    if (!haystack) return 0;
    haystack++;
  }
  return 1;
}

#define aux_SWAP(x, y, T) \
  do {                    \
    T SWAP = x;           \
    (x) = y;              \
    (y) = SWAP;           \
  } while (0)

#define max(a, b) (((a) > (b)) ? (a) : (b))

struct match_struct {
  int needle_len;
  int haystack_len;
  char const *needle;
  char const *haystack;
  char lower_needle[MATCH_MAX_LEN];
  char lower_haystack[MATCH_MAX_LEN];
  score_t match_bonus[MATCH_MAX_LEN];
};

static void precompute_bonus (char const *haystack, score_t *match_bonus) {
  /* Which positions are beginning of words */
  int i;
  char last_ch = '/';
  for (i=0; haystack[i]; i++) {
    char ch = haystack[i];
    match_bonus[i] = COMPUTE_BONUS(last_ch, ch);
    last_ch = ch;
  }
}

static void setup_match_struct (struct match_struct *match, char const *needle, char const *haystack, int is_case_sensitive) {
  int i;
  match->needle_len = (int)strlen(needle);
  match->haystack_len = (int)strlen(haystack);
  if (match->haystack_len > MATCH_MAX_LEN || match->needle_len > match->haystack_len) return;
  if (is_case_sensitive) {
    match->needle = needle;
    match->haystack = haystack;
  } else {
    for (i=0; i < match->needle_len; i++)
      match->lower_needle[i] = (char)tools_lowercase[uchar(needle[i])];
    for (i=0; i < match->haystack_len; i++)
      match->lower_haystack[i] = (char)tools_lowercase[uchar(haystack[i])];
    match->needle = match->lower_needle;
    match->haystack = match->lower_haystack;
  }
  precompute_bonus(haystack, match->match_bonus);
}

static inline void match_row (struct match_struct const *match, int row,
  score_t *curr_D, score_t *curr_M, score_t const *last_D, score_t const *last_M) {
  unsigned n = match->needle_len;
  unsigned m = match->haystack_len;
  int j;
  int i = row;
  char const *needle = match->needle;
  char const *haystack = match->haystack;
  score_t const *match_bonus = match->match_bonus;
  score_t prev_score = SCORE_MIN;
  score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
  for (j=0; j < m; j++) {
    if (needle[i] == haystack[j]) {
      score_t score = SCORE_MIN;
      if (i == 0) {
        /* The match_bonus values are computed out to the length of the haystack in precompute_bonus. The index j is less than m,
           the length of the haystack. So the "garbage value" warning here is false.
           NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult) */
        score = (j * SCORE_GAP_LEADING) + match_bonus[j];
      } else if (j) {
        /* i > 0 && j > 0*/
        score =
          /* NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult) */
          max(last_M[j - 1] + match_bonus[j],
            /* consecutive match, doesn't stack with match_bonus */
            last_D[j - 1] + SCORE_MATCH_CONSECUTIVE);
      }
      curr_D[j] = score;
      curr_M[j] = prev_score = max(score, prev_score + gap_score);
    } else {
      curr_D[j] = SCORE_MIN;
      curr_M[j] = prev_score = prev_score + gap_score;
    }
  }
}

score_t fzy_match (char const *needle, char const *haystack, int case_sensitive) {
  int i;
  unsigned m, n;
  struct match_struct match;
  score_t r;
  if (!*needle) return 0;
  setup_match_struct(&match, needle, haystack, case_sensitive);
  n = match.needle_len;
  m = match.haystack_len;
  /* Unreasonably large candidate; return no score. If it is a valid match, it will still be returned, it will just be ranked
     below any reasonably sized candidates. */
  if (m > MATCH_MAX_LEN || n > m) return 0;
  /* If `needle` is a subsequence of `haystack` and the same length, then they are the same string. */
  if (n == m) return SCORE_MAX;
  /* D[][] Stores the best score for this position ending with a match.
     M[][] Stores the best possible score at this position. */
  score_t D[2][MATCH_MAX_LEN];
  score_t M[2][MATCH_MAX_LEN];
  score_t *last_D = D[0];
  score_t *last_M = M[0];
  score_t *curr_D = D[1];
  score_t *curr_M = M[1];
  for (i=0; i < n; i++) {
    match_row(&match, i, curr_D, curr_M, last_D, last_M);
    aux_SWAP(curr_D, last_D, score_t *);
    aux_SWAP(curr_M, last_M, score_t *);
  }
  r = last_M[m - 1];
  return r == SCORE_MIN ? 0 : r;
}

score_t fzy_match_positions (char const *needle, char const *haystack, ind_t *positions, int is_case_sensitive) {
  int i, j, m, n;
  if (!*needle) return 0;
  struct match_struct match;
  setup_match_struct(&match, needle, haystack, is_case_sensitive);
  n = match.needle_len;
  m = match.haystack_len;
  /* Unreasonably large candidate; return no score. If it is a valid match, it will still be returned, it will just be ranked
     below any reasonably sized candidates */
  if (m > MATCH_MAX_LEN || n > m) return 0;
  /* If `needle` is a subsequence of `haystack` and the same length, then they are the same string. */
  if (n == m) {
    if (positions) {
      for (i=0; i < n; i++) positions[i] = i;
    }
    return SCORE_MAX;
  }
  /* D[][] Stores the best score for this position ending with a match.
     M[][] Stores the best possible score at this position. */
  typedef score_t score_row_t[MATCH_MAX_LEN];
  score_row_t * const D = malloc(sizeof(score_row_t)*n);
  if (D == NULL) return 0;  /* 4.11.5 fix */
  score_row_t * const M = malloc(sizeof(score_row_t)*n);
  if (M == NULL) {  /* 4.11.5 fix */
    free(D);
    return 0;
  }
  score_t *last_D = NULL;
  score_t *last_M = NULL;
  score_t *curr_D = NULL;
  score_t *curr_M = NULL;
  for (i=0; i < n; i++) {
    curr_D = &D[i][0];
    curr_M = &M[i][0];
    match_row(&match, i, curr_D, curr_M, last_D, last_M);
    last_D = curr_D;
    last_M = curr_M;
  }
  /* backtrace to find the positions of optimal matching */
  int match_required = 0;
  for (i=n - 1, j = m - 1; i >= 0; i--) {
    for (; j >= 0; j--) {
      /* There may be multiple paths which result in the optimal weight.
         For simplicity, we will pick the first one we encounter, the latest in the candidate string.
         NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult) */
      if (D[i][j] != SCORE_MIN && (match_required || D[i][j] == M[i][j])) {
        /* If this score was determined using SCORE_MATCH_CONSECUTIVE,
           the previous character MUST be a match */
        match_required =
            i && j && M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE;
        if (positions) positions[i] = j--;
        break;
      }
    }
  }
  score_t result = M[n - 1][m - 1];
  free(M);
  free(D);
  return result;
}

/* *******************************************************/
/* The lua wrapper to the native C implementation of fzy */
/* *******************************************************/

/* `fzy.has (haystack, needle [, true])`

Checks if string `needle` is a string of string `haystack` and returns `true` or `false`. By default, the check is case-insensitive,
by passing optional `true` it will be case-sensitive.

> fzy.has('acB', 'ab'):
true

> fzy.has('ac', 'ab'):
false

> fzy.has('acB', 'ab', true):
false */

static int fzy_has (lua_State *L) {
  size_t haylen, needlelen;
  char const *haystack = luaL_checklstring(L, 1, &haylen);
  char const *needle = luaL_checklstring(L, 2, &needlelen);
  int case_sensitive = agnL_optboolean(L, 3, 0);
  lua_pushboolean(L, fzy_has_match(needle, haystack, needlelen, haylen, case_sensitive));
  return 1;
}


/* `fzy.score(haystack, needle [, true])`

Get a score for string `needle` that matches string `haystack`.

The `needle` must be a substring of `haystack`, or the result is undefined. You can verify this by calling
the `fzy.has` function before.

The return is a number, where higher numbers indicate better matches. `undefined` denotes no match.

These functions are split for performance reasons. Either use `fzy.has` before each use of `fzy.score`, or
use the `fzy.filter` function to do it automatically.

By default, the processing is case-insensitive, by passing optional `true` it will be case-sensitive.

> fzy.score('app/models/user.rb', 'amuser'):
5.595

> fzy.score('app/models/customer.rb', 'amuser'):
3.655 */

static int fzy_score (lua_State *L) {
  char const *haystack = luaL_checkstring(L, 1);
  char const *needle = luaL_checkstring(L, 2);
  int case_sensitive = agnL_optboolean(L, 3, 0);
  lua_pushnumber(L, fzy_match(needle, haystack, case_sensitive));
  return 1;
}


/* `fzy.positions(haystack, needle [, true])`

Determine where each character of string `needle` is matched to string `haystack` in the optimal match.

`needle` must be a substring of `haystack`, or the result will be undefined. You can verify this by calling the
`fzy.has` function before.

By default, the processing is case-insensitive, by passing optional `true` it will be case-sensitive.

Returns:
* indices, a table [int, ...], where `indices[n]` is the location of the `n`th character of `needle` in `haystack`.
* score, a number: the same matching score returned by `fzy.score`

> fzy.positions("app/models/user.rb", "amuser", )
[1, 5, 12, 13, 14, 15]

> fzy.positions("app/models/customer.rb", "amuser")
[1, 5, 13, 14, 18, 19]
*/

/* Given an array of `count` 0-based indices, push a table with equivalent 1-based indices. */
static void push_indices (lua_State *L, ind_t const * const indices, int count) {
  int i;
  lua_createtable(L, count, 0);
  for (i=0; i < count; i++) {  /* convert from 0-indexing to 1-indexing. */
    lua_pushinteger(L, indices[i] + 1);
    lua_rawseti(L, -2, i + 1);
  }
}

static int fzy_positions (lua_State *L) {
  size_t needlelen;
  char const *haystack = luaL_checkstring(L, 1);
  char const *needle = luaL_checklstring(L, 2, &needlelen);
  int case_sensitive = agnL_optboolean(L, 3, 0);
  ind_t result[MATCH_MAX_LEN];
  score_t score = fzy_match_positions(needle, haystack, result, case_sensitive);
  push_indices(L, result, (int)needlelen);
  lua_pushnumber(L, score);
  return 2;
}


/* `fzy.filter(needle, haystacks[, case_sensitive])`

Applies the `fzy.has` and `fzy.positions` functions to an array of `haystacks`. For large numbers of haystacks, this will have
better performance than iterating over the `haystacks` and calling those functions for each string.

> fzy.filter(['*ab', 'b', 'a*b'], 'ab'):
[[1, [2, 3}, 0.995], [3, [1, 3], 0.89]]

Parameters:
* needle (string): unlike the other functions, the `needle` need not be a subsequence of any of the strings in the `haystack`.
* haystacks ([string, ...])
* case_sensitive (bool, optional)  defaults to false

Returns:
* [idx ~ [score, positions], ...], a hash table with one entry per matching line in `haystacks`, each entry giving the index
  of the line in `haystacks` as well as the equivalent to the return value of `positions` for that line.

> haystacks := ['cab', 'ant/bat/cat', 'ant/bat/ace'];
> needle := 'abc';
> fzy.filter(haystacks, needle)
[2 ~ [2.63, [1, 5, 9]], 3 ~ [1.725, [1, 5, 10]] */

static int fzy_filter (lua_State *L) {
  int i;
  size_t haylen, needlelen;
  luaL_checktype(L, 1, LUA_TTABLE);
  int const haystacks_len = agn_size(L, 1);
  char const *needle = luaL_checklstring(L, 2, &needlelen);
  int case_sensitive = agnL_optboolean(L, 3, 0);
  lua_newtable(L);  /* result table */
  for (i=1; i <= haystacks_len; i++) {  /* call `positions` on each haystack string. */
    lua_rawgeti(L, 1, i);
    char const *haystack = luaL_checklstring(L, -1, &haylen);
    if (fzy_has_match(needle, haystack, needlelen, haylen, case_sensitive)) {
      lua_createtable(L, 2, 0);  /* make the idx ~ [score, positions] table. */
      /* generate the positions and the score */
      ind_t result[MATCH_MAX_LEN];
      score_t score = fzy_match_positions(needle, haystack, result, case_sensitive);
      lua_pushnumber(L, score);
      lua_rawseti(L, -2, 1);  /* set the score */
      push_indices(L, result, needlelen);
      lua_rawseti(L, -2, 2);  /* set the positions */
      lua_rawseti(L, -3, i);  /* add this table to the result */
    }
    /* pop the current haystack string */
    lua_pop(L, 1);
  }
  return 1;
}


static int fzy_scores (lua_State *L) {  /* UNDOC */
  luaL_checkstack(L, 5, "not enough stack space");
  lua_pushnumber(L, SCORE_MIN);
  lua_pushnumber(L, SCORE_MAX);
  lua_pushnumber(L, MATCH_MAX_LEN);
  lua_pushnumber(L, MATCH_MAX_LEN * SCORE_GAP_INNER);
  lua_pushnumber(L, MATCH_MAX_LEN * SCORE_MATCH_CONSECUTIVE);
  return 5;
}


static const luaL_Reg fzy[] = {
  {"filter",    fzy_filter},
  {"has",       fzy_has},
  {"positions", fzy_positions},
  {"score",     fzy_score},
  {"scores",    fzy_scores},
  {NULL, NULL}
};

/*
** Open fzy library
*/
LUALIB_API int luaopen_fzy (lua_State *L) {
  luaL_register(L, AGENA_FZYLIBNAME, fzy);
  return 1;
}

