.github/templates/model_regression_test_results.tmpl
{{- /*
The template reads a file with a report (the report file is available
as an artifact in the model regression tests workflow) and returns
a markdown table with a summary of the tests.
*/ -}}
{{- /*
The print_result_nlu template returns data depends on available fields.
*/ -}}
{{ define "print_result_nlu" -}}
{{- if and (has (index .branch "micro avg") "f1-score") (has (index .main "micro avg") "f1-score") -}}
{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} ({{ printf "%.2f" ((index (index .main "micro avg") "f1-score") | math.Sub (index (index .branch "micro avg") "f1-score")) }})
{{- else if and (has .branch "accuracy") (has .main "accuracy") -}}
{{ printf "%.4f" .branch.accuracy }} ({{ printf "%.2f" (.main.accuracy | math.Sub .branch.accuracy) }})
{{- else if and (has .branch "accuracy") (has (index .main "micro avg") "f1-score") -}}
{{ printf "%.4f" .branch.accuracy }} ({{ printf "%.2f" ((index (index .main "micro avg") "f1-score") | math.Sub .branch.accuracy) }})
{{- else if and (has (index .branch "micro avg") "f1-score") (has .main "accuracy") -}}
{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} ({{ printf "%.2f" (.main.accuracy | math.Sub (index (index .branch "micro avg") "f1-score")) }})
{{- else if (has .branch "accuracy") -}}
{{ printf "%.4f" .branch.accuracy }} (`no data`)
{{- else if has (index .branch "micro avg") "f1-score" -}}
{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} (`no data`)
{{- else -}}
`no data`
{{- end -}}
{{- end -}}
{{- /*
The print_result_core template returns data depends on available fields.
*/ -}}
{{ define "print_result_core_micro_avg" -}}
{{- if and (has (index .branch "micro avg") "f1-score") (has (index .main "micro avg") "f1-score") -}}
{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} ({{ printf "%.2f" ((index (index .main "micro avg") "f1-score") | math.Sub (index (index .branch "micro avg") "f1-score")) }})
{{- else if and (has .branch "accuracy") (has .main "accuracy") -}}
{{ printf "%.4f" .branch.accuracy }} ({{ printf "%.2f" (.main.accuracy | math.Sub .branch.accuracy) }})
{{- else if and (has .branch "accuracy") (has (index .main "micro avg") "f1-score") -}}
{{ printf "%.4f" .branch.accuracy }} ({{ printf "%.2f" ((index (index .main "micro avg") "f1-score") | math.Sub .branch.accuracy) }})
{{- else if and (has (index .branch "micro avg") "f1-score") (has .main "accuracy") -}}
{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} ({{ printf "%.2f" (.main.accuracy | math.Sub (index (index .branch "micro avg") "f1-score")) }})
{{- else if (has .branch "accuracy") -}}
{{ printf "%.4f" .branch.accuracy }} (`no data`)
{{- else if has (index .branch "micro avg") "f1-score" -}}
{{ printf "%.4f" (index (index .branch "micro avg") "f1-score") }} (`no data`)
{{- else -}}
`no data`
{{- end -}}
{{- end -}}
{{ define "print_result_core_conversation_accuracy" -}}
{{- if and (has (index .branch "conversation_accuracy") "accuracy") (has (index .main "conversation_accuracy") "accuracy") -}}
{{ printf "%.4f" (index (index .branch "conversation_accuracy") "accuracy") }} ({{ printf "%.2f" ((index (index .main "conversation_accuracy") "accuracy") | math.Sub (index (index .branch "conversation_accuracy") "accuracy")) }})
{{- else if has (index .branch "conversation_accuracy") "accuracy" -}}
{{ printf "%.4f" (index (index .branch "conversation_accuracy") "accuracy") }} (`no data`)
{{- else -}}
`no data`
{{- end -}}
{{- end -}}
{{ define "print_table_nlu" }}
{{- $available_types := (index .results_for_dataset | jsonpath `@..type`) -}}
{{- if isKind "string" $available_types }}{{- $available_types = (index .results_for_dataset | jsonpath `@..type` | slice) -}}{{- end -}}
{{- if has $available_types "nlu" -}}
| Configuration | Intent Classification Micro F1 | Entity Recognition Micro F1 | Response Selection Micro F1 |
|---------------|-----------------|-----------------|-------------------|
{{ range $config_name, $config_data_array := .results_for_dataset -}}
{{ range $config_data := $config_data_array }}
{{- if eq $config_data.type "nlu" -}}
| `{{ $config_name }}`<br> test: `{{ $config_data.test_run_time }}`, train: `{{ $config_data.train_run_time }}`, total: `{{ $config_data.total_run_time }}`|
{{- if has $config_data "intent_classification" -}}
{{- $intent_class_main := dict -}}
{{- if has $.results_for_dataset_main $config_name -}}
{{- $intent_class_main = (index (index $.results_for_dataset_main $config_name) 0).intent_classification -}}
{{- end -}}
{{- $intent_class := $config_data.intent_classification -}}
{{ template "print_result_nlu" (dict "branch" $intent_class "main" $intent_class_main) }}|
{{- else -}}
`no data`|
{{- end -}}
{{- if has $config_data "entity_prediction" -}}
{{- $entity_class_main := dict -}}
{{- if has $.results_for_dataset_main $config_name -}}
{{- $entity_class_main = (index (index $.results_for_dataset_main $config_name) 0).entity_prediction -}}
{{- end -}}
{{- $entity_class := $config_data.entity_prediction -}}
{{ template "print_result_nlu" (dict "branch" $entity_class "main" $entity_class_main) }}|
{{- else -}}
`no data`|
{{- end -}}
{{- if has $config_data "response_selection" -}}
{{- $response_class_main := dict -}}
{{- if has $.results_for_dataset_main $config_name -}}
{{- $response_class_main = (index (index $.results_for_dataset_main $config_name) 0).response_selection -}}
{{- end -}}
{{- $response_class := $config_data.response_selection -}}
{{ template "print_result_nlu" (dict "branch" $response_class "main" $response_class_main) }}|
{{- else -}}
`no data`|
{{- end }}
{{end}}
{{- end}}
{{- end}}
{{- end -}}
{{- end -}}
{{- define "print_table_core" -}}
{{- $available_types := (index .results_for_dataset | jsonpath `@..type`) -}}
{{- if isKind "string" $available_types }}{{- $available_types = (index .results_for_dataset | jsonpath `@..type` | slice) -}}{{- end -}}
{{- if has $available_types "core" -}}
| Dialog Policy Configuration | Action Level Micro Avg. F1 | Conversation Level Accuracy | Run Time Train | Run Time Test |
|---------------|-----------------|-----------------|-------------------|-------------------|
{{ range $config_name, $config_data_array := .results_for_dataset -}}
{{ range $config_data := $config_data_array }}
{{- if eq $config_data.type "core" -}}
| `{{ $config_name }}` |
{{- if has $config_data "story_prediction" -}}
{{- $story_prediction_main := dict -}}
{{- if has $.results_for_dataset_main $config_name -}}
{{- $story_prediction_main = (index (index $.results_for_dataset_main $config_name) 0).story_prediction -}}
{{- end -}}
{{- $story_prediction := $config_data.story_prediction -}}
{{ template "print_result_core_micro_avg" (dict "branch" $story_prediction "main" $story_prediction_main) }}|
{{- else -}}
`no data`|
{{- end -}}
{{- if has $config_data "story_prediction" -}}
{{- $story_prediction_main := dict -}}
{{- if has $.results_for_dataset_main $config_name -}}
{{- $story_prediction_main = (index (index $.results_for_dataset_main $config_name) 0).story_prediction -}}
{{- end -}}
{{- $story_prediction := index $config_data.story_prediction -}}
{{ template "print_result_core_conversation_accuracy" (dict "branch" $story_prediction "main" $story_prediction_main) }}|
{{- else -}}
`no data`|
{{- end -}}
`{{ $config_data.train_run_time }}`| `{{ $config_data.test_run_time }}`|
{{ end }}
{{- end}}
{{- end}}
{{- end -}}
{{- end -}}
{{- $results_main := (datasource "results_main") -}}
{{ range $dataset, $results_for_dataset := (datasource "data")}}
{{ $results_for_dataset_main := (index $results_main $dataset) -}}
{{ $content_dicts := index $results_for_dataset (index (keys $results_for_dataset) 0) -}}
{{ $one_content_dict := index $content_dicts 0 -}}
{{- if ($one_content_dict).external_dataset_repository -}}
Dataset: `{{$dataset}}`, Dataset repository branch: `{{ ($one_content_dict).dataset_repository_branch }}` (external repository), commit: `{{ ($one_content_dict).dataset_commit }}`
Configuration repository branch: `{{ ($one_content_dict).config_repository_branch }}`
{{ else -}}
Dataset: `{{$dataset}}`, Dataset repository branch: `{{ ($one_content_dict).dataset_repository_branch }}`, commit: `{{ ($one_content_dict).dataset_commit }}`
{{ end -}}
{{ template "print_table_nlu" (dict "results_for_dataset" $results_for_dataset "results_for_dataset_main" $results_for_dataset_main) }}
{{ template "print_table_core" (dict "results_for_dataset" $results_for_dataset "results_for_dataset_main" $results_for_dataset_main) }}
{{- end }}