petervandenabeele/dbd

View on GitHub
bin/test_5.rb

Summary

Maintainability
A
0 mins
Test Coverage
#!/usr/bin/env ruby

# This implementation now streams to disk.
#
# Some performance (ruby 2.0 on MacBook Pro)
# /Users/peter_v/dbd/bin $ time ./test_5.rb 100 t_5_100
# added resource 0 to the graph
# ...
# added resource 99 to the graph
# Graph is ready (took 4.285428s), now starting the write to disk
#
# real  0m8.515s
# user  0m8.331s
# sys 0m0.181s
# ...
# /Users/peter_v/dbd/bin $ time ./test_6.rb t_5_100
# Graph is ready (took 14.455278s).
# graph.size is 100100
#
# real  0m14.922s
# user  0m14.728s
# sys 0m0.189s
#
# From version 0.0.13 with newline escaping, the times went up:
# writing (test_5)
# real  0m11.656s
#
# reading back (test_6)
# real  0m18.442s

FACTS_PER_RESOURCE = 1000

count = ARGV[0].to_i
unless count > 0
  puts 'Give a "count" as first argument.'
  exit(1)
end

filename = ARGV[1]
unless filename
  puts 'Give a "filename" as second argument.'
  exit(1)
end

require 'dbd'

start = Time.now

graph = Dbd::Graph.new

(0...count).each do |i|
  context = Dbd::Context.new
  context << Dbd::ContextFact.new(
    predicate: 'prov:test',
    object_type: 's',
    object: 'A' * 10)

  resource = Dbd::Resource.new(context_subject: context.subject)
  (0...FACTS_PER_RESOURCE).each do |j|
    resource << Dbd::Fact.new(
      predicate: 'test',
      object_type: 's',
      object: "#{'B' * 75} #{i * FACTS_PER_RESOURCE + j} \n CD")
  end

  graph << context << resource
  puts ("added resource #{i} to the graph")
end

puts "Graph is ready (took #{Time.now - start}s), now starting the write to disk"

graph.to_CSV_file(filename)