index.js

/**
 * @typedef {Object} MarkovChainJSON - A JSON representation of a Markov chain object.
 * @property {string[][]} corpus - The corpus.
 * @property {string[]} dictionary - The dictionary.
 * @property {GenerationConfig} config - The default config used for sentence generation.
 */

/**
 * @typedef {MarkovChainJSON | string} MarkovChainResolvable
 */

/**
 * @typedef {Object} GenerationConfig - A config object used to generate sentences.
 * @property {string} from - The word to start generating from
 * @property {number} grams - The sequence length to use.
 * @property {boolean} backward - Whether to generate backward or not.
 */

/**
 * A Markov chain.
 */
class MarkovChain {

  /**
   * Create a new MarkovChain.
   * 
   * @example
   * // Create a new chain
   * const chain = new MarkovChain()
   * 
   * // Add text
   * chain.update('some text')
   * 
   * // Export it to string or to JSON
   * const str = chain.toString()
   * const json = chain.toJSON()
   * 
   * // Create new chains based on our saves
   * const fromStr = new MarkovChain(str)
   * const fromJSON = new markovChain(json)
   * 
   * @param {MarkovChainResolvable} [base] - A saved chain from .toJSON or .toString.
   */
  constructor(base = {}) {
    try {
      base = typeof base === 'string' ? base : JSON.stringify(base)
    } catch {
      throw 'Invalid \'base\' argument. Needs type MarkovChainResolvable.'
    }

    base = JSON.parse(base)

    /**
     * The sentences learnt by the Markov chain.
     * 
     * @type {string[][]}
     */
    this.corpus = base.corpus || []

    /**
     * A list of all the words learnt.
     * 
     * @type {string[]}
     */
    this.dictionary = base.dictionary || []

    /**
     * The default config to use for generation
     * 
     * @type {GenerationConfig}
     */
    this.config = base.config || {
      from: '',
      grams: 2,
      backward: false
    }
  }

  /**
   * Add a sentence to the Markov chain.
   * 
   * @example
   * const chain = new MarkovChain()
   * 
   * chain.update('Some text')
   * 
   * @param {string} sentence - The sentence to add.
   */
  update(sentence) {
    if (typeof sentence !== 'string') throw 'The argument should be a string'

    const words = sentence.split(' ')
    const last = this.corpus.length

    this.corpus.push([])

    words.forEach(word => {
      const index = this.dictionary.indexOf(word)

      if (index === -1) {
        this.dictionary.push(word)
        this.corpus[last].push(this.dictionary.length - 1)
      } else {
        this.corpus[last].push(index)
      }
    })
  }

  /**
   * Search if a word is in the corpus.
   * 
   * @example
   * const chain = new MarkovChain()
   * 
   * console.log(chain.contains('OwO')) // false
   * 
   * chain.update('hi OwO')
   * 
   * console.log(chain.contains('OwO')) // true
   * 
   * @param {string} word - The word to search for.
   * @return {boolean}
   */
  contains(word) {
    return this.dictionary.includes(word)
  }

  /**
   * Generate a new sentence.
   * 
   * @example <caption>Generate a sentence</caption>
   * // Create a Markov chain
   * const chain = new MarkovChain()
   * 
   * // Add some words to it
   * chain.update('Some words')
   * chain.update('Some more words')
   * chain.update('Some OwO')
   * 
   * // Generate a sentence
   * const sentence = chain.generate()
   * console.log(sentence)
   * 
   * // Generate a sentence starting with 'OwO'
   * const owo = chain.generate({ from: 'OwO' })
   * console.log(owo)
   * 
   * // Generate a sentence backward
   * const backwardOwO = chain.generate({ from: 'OwO', backward: true })
   * console.log(backwardOwO)
   * 
   * @param {GenerationConfig} [config=this.config] - The config to use for the generation.
   * @return {string}
   */
  generate(config = {}) {
    const from = config.from || this.config.from
    const grams = config.grams || this.config.grams
    const backward = config.backward || this.config.backward

    if (from !== '' && !this.contains(from)) return ''

    const  sentence = []

    let done = false

    // If no 'from' is set, get a random first word
    if (!from.length) {
      if (backward) {
        const random = this._getRandom(this.corpus)

        sentence.push(random[random.length - 1])
      } else {
        sentence.push(this._getRandom(this.corpus)[0])
      }
    } else {
      sentence.push(this.dictionary.indexOf(from))
    }

    // Get next words until done
    while (!done) {
      const last = sentence[sentence.length - 1]

      // Get the possible chains
      const possibleChains = this.corpus.filter(s => s.includes(last))

      // Select a random one
      const chain = this._getRandom(possibleChains)

      // Keep the ngram length of it
      const wordIndex = chain.indexOf(last)
      const sequence = []

      if (backward) {
        const start = wordIndex - grams

        chain
          .slice(start >= 0 ? start : 0, wordIndex)
          .reverse()
          .forEach(word => sequence.push(word))
      } else {
        chain
          .slice(wordIndex + 1, wordIndex + grams + 1)
          .forEach(word => sequence.push(word))
      }

      sentence.push(...sequence)

      // Set done to true if end of sentence
      if (backward) {
        done = wordIndex - grams <= 0
      } else {
        done = wordIndex + grams >= chain.length - 1
      }
    }

    // Translate and return the result
    if (backward) {
      return sentence
        .reverse()
        .map(word => this.dictionary[word])
        .join(' ')
    } else {
      return sentence
        .map(word => this.dictionary[word])
        .join(' ')
    }
  }

  /**
   * Get a JSON version of the chain object.
   * 
   * @return {MarkovChainJSON}
   */
  toJSON() {
    return {
      corpus: this.corpus.map(s => [...s]),
      dictionary: [...this.dictionary],
      config: { ...this.config }
    }
  }

  /**
   * Get a string version of the chain object.
   * 
   * @return {string}
   */
  toString() {
    return JSON.stringify(this.toJSON())
  }

  /**
   * Get a random element from an array.
   * 
   * @param {Array} arr - The array to get a random element from.
   * @return {*}
   * @private
   */
  _getRandom(arr) {
    return arr[Math.floor(Math.random() * arr.length)]
  }
}

if (typeof window === 'undefined') {
  module.exports = MarkovChain;
}