src/main/kotlin/ltd/guimc/lgzbot/utils/LL4JUtils.kt
package ltd.guimc.lgzbot.utils
import huzpsb.ll4j.model.Model
import huzpsb.ll4j.nlp.token.Tokenizer
import huzpsb.ll4j.utils.data.DataSet
import java.io.BufferedReader
import java.io.InputStreamReader
object LL4JUtils {
lateinit var model: Model
lateinit var tokenizer: Tokenizer
var version = "FEB25"
fun init() {
val tokenizerFile = LL4JUtils.javaClass.getResourceAsStream("/ts.model")!!
val modelFile = LL4JUtils.javaClass.getResourceAsStream("/anti-ad.model")!!
tokenizer = Tokenizer.load(tokenizerFile.bufferedReader(Charsets.UTF_8))
modelFile.bufferedReader(Charsets.UTF_8).use {
model = Model.read(it)
}
}
fun predict(string: String): Boolean =
model.predictDebug(tokenizer.tokenize(0, string.replace("\n", "")).values).first == 1
fun predictDebug(string: String): Pair<Int, Double> =
model.predictDebug(tokenizer.tokenize(0, string.replace("\n", "")).values)
fun predictAllResult(string: String): DoubleArray =
model.predictAllResult(tokenizer.tokenize(0, string.replace("\n", "")).values)
fun learn(type: Int, string: String) {
val dataSet = DataSet()
dataSet.split.add(tokenizer.tokenize(type, string.replace("\n", "")))
model.trainOn(dataSet)
}
fun downloadModel() {
try {
model = Model.read(HttpUtils.getResponse("https://raw.githubusercontent.com/siuank/ADDetector4J/main/anti-ad.model"))
tokenizer = Tokenizer.load(HttpUtils.getResponse("https://raw.githubusercontent.com/siuank/ADDetector4J/main/t1.tokenized.txt").reader())
val time = GithubUtils.getLastCommit("siuank/ADDetector4J").commitTime
version = "${time.month.name.substring(0..3)}${time.dayOfMonth}"
} catch (_: Exception) {}
}
}