rdfio/rdf2smw

View on GitHub
main.go

Summary

Maintainability
A
1 hr
Test Coverage
/*
rdf2smw is a commandline tool to convert from RDF data to MediaWiki XML Dump
files, for import using MediaWiki's built in importDump.php script.
 
Usage
 
./rdf2smw -in <infile> -out <outfile>
 
Flags
 
-in Input file in RDF N-triples format
-out Output file in (MediaWiki) XML format
 
Example usage
 
./rdf2smw -in mydata.nt -out mydata.xml
 
For importing the generated XML Dumps into MediaWiki, see this page:
https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps
*/
package main
 
import (
"flag"
"fmt"
"github.com/rdfio/rdf2smw/components"
"os"
 
str "strings"
 
"github.com/flowbase/flowbase"
)
 
const (
exported const BUFSIZE should have comment (or a comment on this block) or be unexported
BUFSIZE = 16
)
 
Function `main` has 56 lines of code (exceeds 50 allowed). Consider refactoring.
func main() {
//flowbase.InitLogDebug()
 
inFileName := flag.String("in", "", "The input file name")
outFileName := flag.String("out", "", "The output file name")
flag.Parse()
 
doExit := false
if *inFileName == "" {
fmt.Println("No filename specified to --in")
doExit = true
} else if *outFileName == "" {
fmt.Println("No filename specified to --out")
doExit = true
}
 
if doExit {
os.Exit(1)
}
 
// ------------------------------------------
// Initialize processes
// ------------------------------------------
 
// Create a pipeline runner
net := flowbase.NewNet()
 
// Read in-file
ttlFileRead := components.NewOsTurtleFileReader()
net.AddProcess(ttlFileRead)
 
// TripleAggregator
aggregator := components.NewTripleAggregator()
net.AddProcess(aggregator)
 
// Create an subject-indexed "index" of all triples
indexCreator := components.NewResourceIndexCreator()
net.AddProcess(indexCreator)
 
// Fan-out the triple index to the converter and serializer
indexFanOut := components.NewResourceIndexFanOut()
net.AddProcess(indexFanOut)
 
// Serialize the index back to individual subject-tripleaggregates
indexToAggr := components.NewResourceIndexToTripleAggregates()
net.AddProcess(indexToAggr)
 
// Convert TripleAggregate to WikiPage
triplesToWikiConverter := components.NewTripleAggregateToWikiPageConverter()
net.AddProcess(triplesToWikiConverter)
 
//categoryFilterer := components.NewCategoryFilterer([]string{"DataEntry"})
//net.AddProcess(categoryFilterer)
 
// Pretty-print wiki page data
//wikiPagePrinter := components.NewWikiPagePrinter()
//net.AddProcess(wikiPagePrinter)
 
useTemplates := true
xmlCreator := components.NewMWXMLCreator(useTemplates)
net.AddProcess(xmlCreator)
 
//printer := components.NewStringPrinter()
//net.AddProcess(printer)
templateWriter := components.NewStringFileWriter(str.Replace(*outFileName, ".xml", "_templates.xml", 1))
net.AddProcess(templateWriter)
 
propertyWriter := components.NewStringFileWriter(str.Replace(*outFileName, ".xml", "_properties.xml", 1))
net.AddProcess(propertyWriter)
 
pageWriter := components.NewStringFileWriter(*outFileName)
net.AddProcess(pageWriter)
 
snk := flowbase.NewSink()
net.AddProcess(snk)
 
// ------------------------------------------
// Connect network
// ------------------------------------------
 
ttlFileRead.OutTriple = aggregator.In
 
aggregator.Out = indexCreator.In
 
indexCreator.Out = indexFanOut.In
indexFanOut.Out["serialize"] = indexToAggr.In
indexFanOut.Out["conv"] = triplesToWikiConverter.InIndex
 
indexToAggr.Out = triplesToWikiConverter.InAggregate
 
//triplesToWikiConverter.OutPage = categoryFilterer.In
//categoryFilterer.Out = xmlCreator.InWikiPage
 
triplesToWikiConverter.OutPage = xmlCreator.InWikiPage
 
xmlCreator.OutTemplates = templateWriter.In
xmlCreator.OutProperties = propertyWriter.In
xmlCreator.OutPages = pageWriter.In
 
snk.Connect(templateWriter.OutDone)
snk.Connect(propertyWriter.OutDone)
snk.Connect(pageWriter.OutDone)
 
// ------------------------------------------
// Send in-data and run
// ------------------------------------------
 
go func() {
defer close(ttlFileRead.InFileName)
ttlFileRead.InFileName <- *inFileName
}()
 
net.Run()
}