external/nina.no/seatrack/bin/seatrack-csv-to-json
#!/usr/bin/env ruby
# $ cd ~/npolar/api.npolar.no/external/nina.no/seatrack
# $ cat seatrack-db/v2/2016/seatrack_export_2016-05-13.csv | ./bin/seatrack-csv-to-json > seed/seatrack-db-v2.geojson
require "csv"
require "time"
require "json"
require "logger"
require "digest/sha1"
require "hashie"
begin
log = Logger.new(STDERR)
log.level = Logger::DEBUG
input = STDIN
separator = ","
species_lambda = lambda {|name|
case name
when /^Common eider/i
"Somateria mollissima" # Ærfugl
when /^Common guillemot/i
"Uria aalge" # Lomvi
when /Atlantic puffin/i
"Fratercula arctica" # Lunde
else
raise "Missing species: #{name}"
end
}
def colony_lambda
lambda {|colony|
colony = case colony
when "Rost"
"Røst"
when "Bjornoya"
"Bjørnøya"
when "Hornoya"
"Hornøya"
when "Iceland"
"Ísland"
when "Hjelmsoya"
"Hjelmsøya"
else
colony
end
colony
}
end
row_lambda = lambda {|row, i|
log.debug row.to_json
dt = row.date_time
begin
measured = Time.parse(dt.gsub(/\s/,"T")+"Z")
rescue
measured = Time.parse("1000-01-01")
end
latitude = longitude = geometry = nil
if (!row.eqfilter3.nil? and row.lat_smooth2_eqfilt3 != row.lat_smooth2)
if row.lat_smooth2_eqfilt3 != "NA"
log.warn row.lat_smooth2_eqfilt3
log.warn row.lat_smooth2
log.warn row.to_json
log.warn "*"*80
raise row.to_json
else
# noop: OK
end
end
if row.eqfilter3 == "1" and row.lat_smooth2_eqfilt3 != "NA" and row.lon_smooth2 != "NA"
latitude = row.lat_smooth2_eqfilt3.to_f
longitude = row.lon_smooth2.to_f
algorithm = "eqfilter3"
elsif row.lat_smooth2? and row.lon_smooth2? and row.lat_smooth2 != "NA" and row.lon_smooth2 != "NA"
latitude = row.lat_smooth2.to_f
longitude = row.lon_smooth2.to_f
algorithm = "smooth2"
elsif row.lat_raw? and row.lon_raw? and row.lat_raw != "NA" and row.lon_raw != "NA"
latitude = row.lat_raw.to_f
longitude = row.lon_raw.to_f
algorithm = "raw"
else
latitude = nil
longitude = nil
algorithm = nil
end
if algorithm.nil?
geometry = nil
else
coordinates = [longitude,latitude]
geometry = {
type: "Point",
coordinates: coordinates
}
if ["raw", "smooth2"].include? algorithm
log.debug geometry
end
end
#id = Digest::SHA1.hexdigest(row.to_json)
created = "#{row.import_date}T12:00:00Z"
p = Hashie::Mash.new({ id: row.id,
created: Time.parse(created).utc.iso8601,
geometry: geometry,
# The following data duplication of time and position is to facilate easier search
latitude: latitude,
longitude: longitude,
#genus: species_lambda.call(row.species).split(" ").first,
year: measured.year.to_i,
month: measured.month.to_i,
day: measured.day.to_i,
## End of data duplication
type: "Feature",
properties: {
time: measured.utc.iso8601,
species: row.latin_name,
confidence: row.conf.to_i,
colony: colony_lambda.call(row.colony),
technology: "gls",
algorithm: algorithm,
individual: row.ring_number.to_s,
project: "seatrack",
object: row.species,
data_version: row.data_version.to_i
}
})
#log.info p.to_json
p
}
csv_options = {
:col_sep => separator,
:header_converters => [:symbol,:downcase],
:headers => true,
:return_headers => true,
:row_sep => :auto,
#:field_size_limit => 100
}
features = []
csv = CSV(input, csv_options)
csv.each_with_index {|row,i|
unless row.header_row?
features << row_lambda.call(Hashie::Mash.new(row.to_hash), i)
end
}
log.debug __FILE__+" parsed #{features.size} rows"
features = features.reject {|f| f["geometry"].nil? }
log.debug __FILE__+" rows #{features.size} contained geometry"
geojson = {
type: "FeatureCollection",
features: features
}
outputGeoJSON = true
if outputGeoJSON
puts geojson.to_json
else
puts features.to_json
end
rescue => e
puts e
puts e.backtrace
exit(1)
end