app/models/investor.rb

Summary

Maintainability
F
3 days
Test Coverage
class Investor < ApplicationRecord
  extend Concerns::Ignorable
  include Concerns::AttributeArrayable
  include Concerns::Cacheable
  include Concerns::Ignorable
  include Concerns::TimeZonable
  include Concerns::Locationable
  include Concerns::Graphable
  include Concerns::Socialable
  include Concerns::Entityable

  GENDERS = %w(unknown male female)

  belongs_to :competitor
  has_many :target_investors, dependent: :nullify
  has_many :notes, as: :subject
  has_many :investments, dependent: :destroy
  has_many :companies, through: :investments
  belongs_to :university
  has_many :news, dependent: :destroy
  has_many :emails
  has_many :posts, dependent: :destroy
  has_many :intro_requests

  validates :competitor, presence: true
  validates :first_name, presence: true, uniqueness: { scope: [:last_name, :competitor_id] }
  validates :last_name, presence: true
  validates :email, uniqueness: { allow_nil: true }
  validates :crunchbase_id, uniqueness: { allow_nil: true }
  validates :al_id, uniqueness: { allow_nil: true }
  validates :facebook, uniqueness: { allow_nil: true }
  validates :linkedin, uniqueness: { allow_nil: true }
  validates :twitter, uniqueness: { allow_nil: true }
  validates :homepage, uniqueness: { allow_nil: true }
  validates :photo, uniqueness: { allow_nil: true }

  array :industry
  array :fund_type
  enum gender: GENDERS

  before_save :titleize_role
  after_commit :start_crunchbase_job, on: :create
  after_update :check_competitor_domain
  before_validation :normalize_location, :set_first_name, :fix_linkedin!

  scope :with_token, Proc.new { |token| where.not(token: nil).where(token: token) }

  def self.from_addr(addr)
    found = where(email: addr.address).first
    return found if found.present?
    return nil unless addr.name.present?
    first, last = Util.split_name(addr.name)
    potential = search(first_name: first, last_name: last).includes(:competitor)
    return nil unless potential.present?
    potential.each do |investor|
      return investor if investor.competitor.domain == addr.domain
    end
    nil
  end

  def name
    "#{first_name} #{last_name}"
  end

  def homepage=(homepage)
    if homepage.present? && !homepage.include?('//')
      super "http://#{homepage}"
    else
      super homepage
    end
  end

  def populate_from_cb_basic!
    person = crunchbase_person
    return unless person.present? && person.found?

    self.first_name = person.first_name
    self.last_name = person.last_name
    if person.affiliation.present? && person.affiliation.investor
      self.role = person.affiliation.role
      self.competitor = Competitor.from_crunchbase!(person.affiliation.permalink, person.affiliation.name) if person.affiliation.permalink.present?
    else
      job = person.affiliated_companies.sort_by(&:updated_at).reverse.find { |job| job.organization.role_investor }
      if job.present?
        self.role = job.title
        self.competitor = Competitor.from_crunchbase!(job.organization.permalink, job.organization.name)
      end
    end
    self.description = person.bio
    self.location = person.location&.name
    self.country = person.location&.country_code2
    self.gender = person.gender || self.gender
    self.university = University.from_name(person.university) if person.university.present?
  end

  def populate_from_cb!
    person = crunchbase_person
    return unless person.present? && person.found?

    self.photo = person.image
    Founder::SOCIAL_KEYS.each do |attr|
      self[attr] = person.public_send(attr) if person.public_send(attr).present?
    end

    populate_from_cb_basic!

    add_entities! Entity.from_html(self.description)

    return unless self.competitor.present?

    person.affiliated_companies.each do |job|
      next unless job.organization.role_company
      scope = self.competitor.companies
      company = scope.where(crunchbase_id: job.organization.permalink).or(scope.where(name: job.organization.name)).first
      next unless company.present?
      assign_company! company, featured: true
    end

    news = person.news.map { |n| [n['url'], n] }.to_h
    Http::Fetch.get(news.keys).each do |url, body|
      next unless body.present?
      ignore_invalid { import_news_with_body(url, body, news[url]['posted_on']) }
    end
  end

  def fetch_news!
    # news = Http::Bing.news("#{name} + #{competitor.name}").map { |n| [n['url'], n] }.to_h
    # Http::Fetch.get(news.keys).each do |url, body|
    #   next unless body.present?
    #   next unless name.downcase.in?(body.downcase) || competitor.name.downcase.in?(body.downcase)
    #   meta = news[url]
    #   next unless meta.present?
    #   import_news_with_attrs(url, body, title: meta['name'], description: meta['description'], published_at: meta['datePublished'])
    # end

    Http::Newsriver.news(name, competitor.name).each do |item|
      next unless item['text'].present?
      next unless name.downcase.in?(item['text'].downcase) || competitor.name.downcase.in?(item['text'].downcase)
      body = Http::Fetch.get_one(item['url']) || item['text']
      import_news_with_attrs(item['url'], body, title: item['title'], published_at: item['discoverDate'])
    end
  end

  def set_timezone!
    return unless self.location.present?
    return unless (timezone = Http::GoogleMaps.new.timezone(self.location)).present?
    update! time_zone: timezone.name
  end

  def crawl_homepage!
    return unless self.homepage.present?
    body = Http::Fetch.get_one self.homepage
    unless body.present?
      self.homepage = nil
      return
    end
    add_entities! Entity.from_html(body)
    self.competitor.companies.find_each do |company|
      if body.include?(company.name)
        assign_company! company, featured: true
      end
    end
  end

  def crawl_posts!
    new_posts = fetch_posts!
    existing = Set.new Post.where(url: new_posts.map { |p| p[:url] }).pluck(:url)

    new_posts.reject { |p| existing.include? p[:url] }.each do |meta|
      html = Http::Fetch.get_one(meta[:url])
      body = meta[:content] || Util.text_content(html)
      next unless body.present?
      begin
        description = Util.fix_encoding(MetaInspector.new(meta[:url], document: html).best_description)
      rescue
        next
      end
      post = begin
        posts.where(url: meta[:url]).first_or_create!(title: meta[:title], published_at: meta[:published], description: description)
      rescue ActiveRecord::RecordInvalid
        next
      end
      add_entities! Entity.from_html(body), owner: post, bump_counts: true
      meta[:categories].each do |category|
        entity = Entity.from_name(category)
        next unless entity.present?
        PersonEntity.where(entity: entity, person: post).first_or_create!(featured: true)
      end if meta[:categories].present?
    end
  end

  def populate_from_al!
    return unless angelist_user.present? && angelist_user.found?

    self.al_url = angelist_user.angellist_url

    name = angelist_user.name.split(' ')
    self.first_name ||= name.first
    self.last_name ||= name.drop(1).join(' ')

    self.photo = angelist_user.image if angelist_user.image.present?
    self.twitter = angelist_user.twitter if angelist_user.twitter.present?
    self.facebook = angelist_user.facebook if angelist_user.facebook.present?
    self.linkedin = angelist_user.linkedin if angelist_user.linkedin.present?

    self.description = angelist_user.bio if self.description.blank? || (self.description.length < 50 && angelist_user.bio.length > self.description.length)
    self.homepage ||= angelist_user.homepage
    self.location ||= angelist_user.locations.first

    if angelist_user.fund_types.present?
      self.fund_type = (self.fund_type || []) + angelist_user.fund_types
    end
  end

  def assign_company!(company, featured: false, no_replace: false)
    Investment.assign_to_investor(self, company, featured: featured, no_replace: no_replace)
  end

  def set_gender!
    return unless self.gender == :unknown
    gender = GenderDetector.new.get_gender(self.first_name)
    gender = gender.to_s.remove('mostly_').to_sym if gender.to_s.starts_with?('mostly_')
    update! gender: gender if gender.in?(GENDERS)
  end

  def save_and_fix_duplicates!
    begin
      self.save! if self.changed?
    rescue ActiveRecord::RecordInvalid => e
      raise unless e.record.errors.details.all? { |k,v| v.all? { |e| e[:error].to_sym == :taken } }
      attrs = e.record.errors.details.transform_values { |v| v.first[:value] }
      other = Investor.where(attrs).first
      return unless other.present?
      begin
        other.destroy!
        Investor.from_crunchbase(other.crunchbase_id) if other.crunchbase_id.present? && other.crunchbase_id != self.crunchbase_id
      rescue ActiveRecord::InvalidForeignKey
        other.update! attrs.transform_values { |v| nil }.merge(email: nil, al_id: nil)
        InvestorCrunchbaseJob.perform_later(other.id)
      end
      ignore_invalid { self.save! }
    end
  end

  def self.custom_fuzzy_search(q, existing_ids)
    first_name, last_name = Util.split_name(q)
    last_name = first_name unless last_name.present?
    results = Search.search_investors({ first_name: first_name, last_name: last_name, firm_name: q })
    investors = Investor
      .joins("INNER JOIN (#{results}) AS results ON results.match_id = investors.id")
      .joins("INNER JOIN competitors ON results.id = competitors.id")
    investors = investors.where("investors.id NOT IN (#{existing_ids.to_sql})") if existing_ids.to_sql.present?
    investors
      .select('investors.*')
      .order('results.rank DESC', 'investors.featured DESC')
      .limit(10)
  end

  def self.from_crunchbase(cb_id)
    return nil unless cb_id.present?
    ignore_invalid { retry_unique { where(crunchbase_id: cb_id).first_or_create!(&:populate_from_cb_basic!) } }
  end

  def self.from_angelist(al_id)
    return nil unless al_id.present?
    where(al_id: al_id).first_or_create! do |investor|
      investor.populate_from_al!
    end
  rescue ActiveRecord::RecordInvalid
    nil
  end

  def self.from_name(name)
    existing = fuzzy_search(first_name: name, last_name: name).first
    return existing if existing.present?
    created = from_crunchbase(Http::Crunchbase::Person.find_investor_id(name))
    return created if created.present?
    from_angelist(Http::AngelList::User.find_id(name))
  end

  def self.create_for_competitor!(competitor, first_name, last_name)
    where(competitor: competitor, first_name: first_name, last_name: last_name).first_or_create!
  end

  def self.searchable_columns
    [:first_name, :last_name]
  end

  def demo?
    email == ENV['DEMO_EMAIL']
  end

  def opted_out?
    opted_in == false
  end

  def al_username
    al_url.split('/').last if al_url.present?
  end

  def al_username=(al_username)
    al_url_will_change!
    self.al_url = "https://angel.co/#{al_username}"
  end

  def as_json(options = {})
    super options.reverse_merge(
      only: [
        :id,
        :role,
        :first_name,
        :last_name,
        :description,
        :industry,
        :fund_type,
        :industry_highlight,
        :photo,
        :twitter,
        :facebook,
        :linkedin,
        :homepage,
        :location,
        :time_zone,
        :al_url,
        :verified,
        :review,
        :tags,
      ],
     methods: [
       :competitor,
       :popular_entities,
       :recent_investments,
       :recent_news,
       :university,
       :tweets,
       :public_posts,
       :utc_offset,
     ]
    )
  end

  def as_search_json
    as_json(
      only: [
        :id,
        :role,
        :first_name,
        :last_name,
        :photo,
        :industry,
        :fund_type,
      ],
      methods: [:initials],
    ).merge(competitor: competitor.as_search_json)
  end

  def as_light_json
    as_json(only: [:first_name, :last_name, :photo, :id, :role], methods: [])
  end

  def crunchbase_person
    @crunchbase_person ||= begin
      person = Http::Crunchbase::Person.new(crunchbase_id, nil)
      person if person.found?
    end if crunchbase_id.present?
  end

  def angelist_user(safe: false)
    @angelist_user ||= begin
      user = Http::AngelList::User.new al_id
      user if user.found?
    end if al_id.present?
  rescue HTTP::AngelList::Errors::RateLimited
    raise unless safe
    nil
  end

  def blog_url
    @blog_url ||= cache_for_a_month do
      angelist_user(safe: true)&.blog || crunchbase_person&.blog || homepage || ("https://medium.com/@#{twitter}" if twitter.present?)
    end
  end

  def public_posts(n: 3)
    posts.order(published_at: :desc).limit(n)
  end

  def token
    update! token: Util.token unless super.present?
    super
  end

  def tweets(n = 3)
    return [] unless tweeter.present?
    tweeter.tweets.order(tweeted_at: :desc).limit(n)
  end

  def travel_status(city)
    if city == location
      :working
    elsif city.in?(competitor.location || [])
      :work_traveling
    else
      :pleasure_traveling
    end
  end

  def set_average_response_time!
    self.average_response_time = Util.average_response_time(emails, :founder_id)
  end

  def founder_overlap(founder)
    founder.entities.where(id: n_popular_entities(50))
  end

  def fetch_review!
    url = URI.escape "https://knapi-prod.knowyourvc.com/api/investors/search?name=#{name}"
    value = HTTP::Fetch.get_one(url)
    return unless value.present?
    response = JSON.parse(value).with_indifferent_access
    return if response[:errors].present?
    return if response[:review].blank? || !response[:review][:publishedAt] || response[:review][:overall] < 4
    update! review: { text: response[:review][:comment], id: response[:investorId] }
  end

  def interactions(founder)
    interactions = {
      entities: n_popular_entities(3, Entity.where.not(wiki: nil))
    }
    return interactions unless founder.present?
    email_scope = emails.where(founder: founder).order(created_at: :desc)
    incoming_email = email_scope.where(direction: :incoming).first
    last_opened_email = email_scope.where(direction: :outgoing).joins(:tracking_pixel).where('tracking_pixels.opened_at IS NOT NULL').first
    outgoing_openable = last_opened_email&.tracking_pixel || intro_requests.where(founder: founder).first
    overlap = founder_overlap(founder)
    interactions.merge({
      opened_at: outgoing_openable&.opened_at,
      open_city: outgoing_openable&.open_city,
      travel_status: outgoing_openable&.travel_status,
      last_contact: incoming_email&.created_at,
      overlap: overlap.present? ? overlap : nil,
      paths: founder.count_paths_to(self),
    })
  end

  def start_job_now!
    @start_job_now = true
  end

  private

  def check_competitor_domain
    if email.present? && competitor.domain.blank?
      competitor.update! domain: Mail::Address.new(email).domain
    end
  end

  def normalize_location
    self.location = Util.normalize_city(self.location) if self.location.present?
  end

  def import_news_with_attrs(url, body, attrs)
    news = News.where(investor: self, url: url).first_or_initialize(attrs).tap do |news|
      news.body = body
      ignore_invalid { news.save! }
    end
    import_news news
  end

  def import_news_with_body(url, body, published_at)
    news = News.create_with_body(url, body, investor: self, attrs: { published_at: published_at })
    import_news news
  end

  def import_news(news)
    self.competitor.companies.find_each do |company|
      if news.body.include?(company.name)
        begin
          news.update! company: company
        rescue ActiveRecord::RecordInvalid
          next
        end
        assign_company! company, no_replace: true
      end
    end
  end

  def fetch_posts!
    return [] unless blog_url.present?
    body = Http::Fetch.get_one blog_url
    return [] unless body.present?
    feed_url = MetaInspector.new(blog_url, document: body).feed
    return [] unless feed_url.present?
    feed_body = Http::Fetch.get_one feed_url
    return [] unless feed_body.present?
    feed = begin
      Feedjira::Feed.parse feed_body
    rescue Feedjira::NoParserAvailable
      return []
    end
    feed.entries.map do |e|
      e.to_h.with_indifferent_access.slice(:title, :url, :categories, :published, :content)
    end
  end

  def recent_news(n = 5)
    news.order('published_at DESC, created_at DESC').limit(n)
  end

  def popular_entities
    ids = cached { n_popular_entities.pluck(:id) } || []
    Entity.find(ids)
  end

  def n_popular_entities(n = 3, scope = Entity.all)
    popular = post_entities(n)
    return popular unless (count = popular.count) < n
    Entity.where(id: entities.where('person_entities.count > ?', 1).order('person_entities.count DESC').limit(n - count)).or(popular).merge(scope)
  end

  def post_entities(n = 3)
    threshold = (posts.joins(:entities).count * 0.05).to_i
    ids = posts
      .joins(:entities)
      .group('entities.id')
      .order('count(*) DESC')
      .having('count(*) >= ?', threshold)
      .limit(n)
      .select('entities.id')
    Entity.where(id: ids)
  end

  def recent_investments(n = 3)
    companies
      .group('companies.id')
      .joins(:investments)
      .order('bool_or(investments.featured) DESC, companies.capital_raised DESC', 'count(investments.id) DESC', 'MAX(investments.funded_at) DESC NULLS LAST')
      .select('companies.*', 'MAX(investments.funded_at)')
      .limit(n)
  end

  def initials
    "#{first_name.first.upcase}#{last_name.first.upcase}"
  end

  def set_first_name
    return unless self.first_name.blank? && self.last_name.blank?
    count = self.class.where(competitor_id: self.competitor_id, first_name: 'Investor').count
    self.first_name = count.present? ? "Investor #{count}" : 'Investor'
  end

  def titleize_role
    return unless self.role.present?
    self.role = self.role.titleize unless self.role.upcase == self.role
  end

  def start_crunchbase_job
    InvestorCrunchbaseJob.set(queue: @start_job_now ? :long_now : :long).perform_later(id)
  end
end