chatwoot/chatwoot

View on GitHub
enterprise/app/models/enterprise/concerns/article.rb

Summary

Maintainability
A
0 mins
Test Coverage
F
40%
module Enterprise::Concerns::Article
  extend ActiveSupport::Concern

  included do
    after_save :add_article_embedding, if: -> { saved_change_to_title? || saved_change_to_description? || saved_change_to_content? }

    def self.add_article_embedding_association
      has_many :article_embeddings, dependent: :destroy_async
    end

    add_article_embedding_association if Features::HelpcenterEmbeddingSearchService.new.feature_enabled?

    def self.vector_search(params)
      embedding = Openai::EmbeddingsService.new.get_embedding(params['query'], 'text-embedding-3-small')
      records = joins(
        :category
      ).search_by_category_slug(
        params[:category_slug]
      ).search_by_category_locale(params[:locale]).search_by_author(params[:author_id]).search_by_status(params[:status])
      filtered_article_ids = records.pluck(:id)

      # Fetch nearest neighbors and their distances, then filter directly

      # experimenting with filtering results based on result threshold
      # distance_threshold = 0.2
      # if using add the filter block to the below query
      # .filter { |ae| ae.neighbor_distance <= distance_threshold }

      article_ids = ArticleEmbedding.where(article_id: filtered_article_ids)
                                    .nearest_neighbors(:embedding, embedding, distance: 'cosine')
                                    .limit(5)
                                    .pluck(:article_id)

      # Fetch the articles by the IDs obtained from the nearest neighbors search
      where(id: article_ids)
    end
  end

  def add_article_embedding
    return unless account.feature_enabled?('help_center_embedding_search')

    Portal::ArticleIndexingJob.perform_later(self)
  end

  def generate_and_save_article_seach_terms
    terms = generate_article_search_terms
    article_embeddings.destroy_all
    terms.each { |term| article_embeddings.create!(term: term) }
  end

  def article_to_search_terms_prompt
    <<~SYSTEM_PROMPT_MESSAGE
      For the provided article content, generate potential search query keywords and snippets that can be used to generate the embeddings.
      Ensure the search terms are as diverse as possible but capture the essence of the article and are super related to the articles.
      Don't return any terms if there aren't any terms of relevance.
      Always return results in valid JSON of the following format
      {
        "search_terms": []
      }
    SYSTEM_PROMPT_MESSAGE
  end

  def generate_article_search_terms
    messages = [
      { role: 'system', content: article_to_search_terms_prompt },
      { role: 'user', content: "title: #{title} \n description: #{description} \n content: #{content}" }
    ]
    headers = { 'Content-Type' => 'application/json', 'Authorization' => "Bearer #{ENV.fetch('OPENAI_API_KEY', nil)}" }
    body = { model: 'gpt-4o', messages: messages, response_format: { type: 'json_object' } }.to_json
    Rails.logger.info "Requesting Chat GPT with body: #{body}"
    response = HTTParty.post('https://api.openai.com/v1/chat/completions', headers: headers, body: body)
    Rails.logger.info "Chat GPT response: #{response.body}"
    JSON.parse(response.parsed_response['choices'][0]['message']['content'])['search_terms']
  end
end