// Copyright 2010-2021, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifndef MOZC_PREDICTION_RESULT_H_
#define MOZC_PREDICTION_RESULT_H_

#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>

#include "absl/base/no_destructor.h"
#include "absl/base/nullability.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
#include "composer/query.h"
#include "converter/inner_segment.h"
#include "dictionary/dictionary_token.h"

namespace mozc {
namespace prediction {

enum PredictionType {
  // don't need to show any suggestions.
  NO_PREDICTION = 0,
  // suggests from current key user is now typing
  UNIGRAM = 1,
  // suggests from the previous history key user typed before.
  BIGRAM = 2,
  // suggests from immutable_converter
  REALTIME = 4,
  // add suffixes like "さん", "が" which matches to the previous context.
  SUFFIX = 8,
  // add English words.
  ENGLISH = 16,
  // add prediction to type corrected keys
  TYPING_CORRECTION = 32,
  // prefix candidates
  // "今日", "教" for the input, "きょうは"
  PREFIX = 64,
  // entries from number decoder.
  NUMBER = 128,
  // entries from single kanji dictionary.
  SINGLE_KANJI = 256,
  // typing completion generated by typing corrector
  TYPING_COMPLETION = 1024,

  // entries from a supplemental model.
  SUPPLEMENTAL_MODEL = 512,

  // Suggests from |converter_|. The difference from REALTIME is that it uses
  // the full converter with rewriter, history, etc.
  // TODO(noriyukit): This label should be integrated with REALTIME. This is
  // why 65536 is used to indicate that it is a temporary assignment.
  REALTIME_TOP = 65536,

  // Key is expanded inside the dictionary lookup.
  // Usually Kana-modifiers are expanded.
  // TODO(taku): This label should be migrated to TYPING_CORRECTION.
  KEY_EXPANDED_IN_DICTIONARY = 32768,
};
// Bitfield to store a set of PredictionType.
using PredictionTypes = int32_t;

struct Result {
  void InitializeByTokenAndTypes(const dictionary::Token& token,
                                 PredictionTypes types);
  void SetTypesAndTokenAttributes(
      PredictionTypes prediction_types,
      dictionary::Token::AttributesBitfield token_attr);

  inline static const Result& DefaultResult() {
    static const absl::NoDestructor<Result> kResult;
    return *kResult;
  }

  std::string key;
  std::string value;
  std::string description;
  std::string display_value;
  // Indicating which PredictionType creates this instance.
  // UNIGRAM, BIGRAM, REALTIME, SUFFIX, ENGLISH or TYPING_CORRECTION
  // is set exclusively.
  PredictionTypes types = NO_PREDICTION;
  // Context "insensitive" candidate cost.
  int wcost = 0;
  // Context "sensitive" candidate cost.
  // TODO(noriyukit): The cost is basically calculated by the underlying LM, but
  // currently it is updated by other modules and heuristics at many locations;
  // e.g., see SetPredictionCostForMixedConversion() in
  // dictionary_predictgor.cc. Ideally, such cost adjustments should be kept
  // separately from the original LM cost to perform rescoring in a rigorous
  // manner.
  int cost = 0;
  uint16_t lid = 0;
  uint16_t rid = 0;
  uint32_t candidate_attributes = 0;
  // Boundary information for realtime conversion.
  // This will be set only for realtime conversion result candidates.
  // This contains inner segment size for key and value.
  // If the candidate key and value are
  // "わたしの|なまえは|なかのです", " 私の|名前は|中野です",
  // |inner_segment_boundary| have [(4,2), (4, 3), (5, 4)].
  converter::InnerSegmentBoundary inner_segment_boundary;
  size_t consumed_key_size = 0;
  // The total penalty added to this result.
  int penalty = 0;
  // The original cost before rescoring. Used for debugging purpose.
  int cost_before_rescoring = 0;
  // If removed is true, this result is not used for a candidate.
  bool removed = false;
  // Confidence score of typing correction. Larger is more confident.
  float typing_correction_score = 0.0;
  // Adjustment for `wcost` made by the typing correction. This value can be
  // zero, positive (penalty) or negative (bonus), and it is added to `wcost`.
  int typing_correction_adjustment = 0;
#ifndef NDEBUG
  std::string log;
#endif  // NDEBUG

  converter::InnerSegments inner_segments() const {
    return converter::InnerSegments(key, value, inner_segment_boundary);
  }

  // Used to emulate positive infinity for cost. This value is set for those
  // candidates that are thought to be aggressive; thus we can eliminate such
  // candidates from suggestion or prediction. Note that for this purpose we
  // don't want to use INT_MAX because someone might further add penalty after
  // cost is set to INT_MAX, which leads to overflow and consequently aggressive
  // candidates would appear in the top results.
  inline static constexpr int kInvalidCost = (2 << 20);

  template <typename S>
  friend void AbslStringify(S& sink, const Result& r) {
    absl::Format(
        &sink,
        "key: %s, value: %s, types: %d, wcost: %d, cost: %d, cost_before: %d, "
        "lid: %d, rid: %d, attrs: %d, bdd: %s, consumed_key_size: %d, penalty: "
        "%d, tc_adjustment: %d, removed: %v",
        r.key, r.value, r.types, r.wcost, r.cost, r.cost_before_rescoring,
        r.lid, r.rid, r.candidate_attributes,
        absl::StrJoin(r.inner_segment_boundary, ","), r.consumed_key_size,
        r.penalty, r.typing_correction_adjustment, r.removed);
#ifndef NDEBUG
    sink.Append(", log:\n");
    for (absl::string_view line : absl::StrSplit(r.log, '\n')) {
      absl::Format(&sink, "    %s\n", line);
    }
#endif  // NDEBUG
  }
};

namespace result_internal {

// ValueLess returns if lhs is less than rhs by comparing the two strings by
// the number of Unicode characters and then value.
// Examples,
//  "ん" < "あいうえお"
//  "あいうえお" < "かきくけこ"
//  "テスト1" < "テスト00"
bool ValueLess(absl::string_view lhs, absl::string_view rhs);

}  // namespace result_internal

// Comparator for sorting prediction candidates.
// If we have words A and AB, for example "六本木" and "六本木ヒルズ",
// assume that cost(A) < cost(AB).
struct ResultWCostLess {
  bool operator()(const Result& lhs, const Result& rhs) const {
    if (lhs.wcost == rhs.wcost) {
      return result_internal::ValueLess(lhs.value, rhs.value);
    }
    return lhs.wcost < rhs.wcost;
  }
};

// Returns true if `lhs` is less than `rhs`
struct ResultCostLess {
  bool operator()(const Result& lhs, const Result& rhs) const {
    if (lhs.cost == rhs.cost) {
      return result_internal::ValueLess(lhs.value, rhs.value);
    }
    return lhs.cost < rhs.cost;
  }
};

// Populates the typing correction result in `query` to prediction::Result
// TODO(taku): rename `query` as it is not a query.
void PopulateTypeCorrectedQuery(
    const composer::TypeCorrectedQuery& typing_corrected_result,
    Result* absl_nonnull result);

// Makes debug string from `types`.
std::string GetPredictionTypeDebugString(PredictionTypes types);

// Demotes elements in `results` that match a given predicate (pred).
// Specifically, it reorders candidates so that no matching elements appear
// within the top N positions. If the number of matched elements is greater than
// results.size() - N, some demoted elements might still end up within the top N
// positions.  The relative ranking of both the matched and unmatched elements
// remains unchanged. This is useful for preventing inappropriate elements from
// appearing in the top candidates.
//
// Example: (K: keep, D: demote)
// [K1, D1, K2, K3, D2, K4, K5 ] -> [K1, K2, K3, D1, D2, K4, K5]  (N = 3)
template <typename T, typename Pred>
void DemoteFirstN(absl::Span<T> results, size_t N, Pred pred) {
  if (results.empty() || N == 0) return;

  std::vector<size_t> demoted;
  size_t last_pos = 0;

  // remembers the index to be demoted.
  for (last_pos = 0; last_pos < results.size(); ++last_pos) {
    if (pred(results[last_pos])) {
      demoted.emplace_back(last_pos);
    } else if (N-- == 0) {
      break;
    }
  }

  if (demoted.empty()) return;

  std::vector<T> temp;
  temp.reserve(demoted.size());

  // moves the unmatched elements to `results`.
  auto demoted_iter = demoted.begin();
  size_t write_pos = 0;
  for (size_t read_pos = 0; read_pos < last_pos; ++read_pos) {
    if (demoted_iter != demoted.end() && read_pos == *demoted_iter) {
      temp.emplace_back(std::move(results[read_pos]));
      ++demoted_iter;
    } else {
      if (write_pos != read_pos) {
        results[write_pos] = std::move(results[read_pos]);
      }
      ++write_pos;
    }
  }

  // Inserts the matched element to `results`.
  std::move(temp.begin(), temp.end(), results.begin() + write_pos);
}

#ifndef NDEBUG
#define MOZC_WORD_LOG(result, ...)                                  \
  {                                                                 \
    if (!(result).log.empty()) absl::StrAppend(&(result).log, " "); \
    absl::StrAppend(&(result).log, __FILE__, ":", __LINE__, " ",    \
                    ##__VA_ARGS__);                                 \
  }
#else  // NDEBUG
#define MOZC_WORD_LOG(result, ...) \
  {                                \
  }
#endif  // NDEBUG

}  // namespace prediction
}  // namespace mozc

#endif  // MOZC_PREDICTION_RESULT_H_
