stephensolis/kameris

View on GitHub
demo/hiv1-lanl.yml

Summary

Maintainability
Test Coverage
name: hiv1-lanl-whole

experiments:
  subtype:
    expand_options:
      k: 5..6
    min_group_pts: 18
    include_recombinants: true
    dataset:
      archive: hiv1
      archive_folder: lanl-whole
      metadata: hiv1-lanl-whole
    selection_key: subtype
    groups: |
      lambda options, metadata:
        import collections
        counts = collections.Counter(x[options['selection_key']] for x in metadata)
        return {v: {'selection_key': options['selection_key'], 'values': [v]} for v in counts if v and counts[v] >= options['min_group_pts']}

steps:
  - type: select
    copy_for_options: [k]
    pick_group: |
      lambda metadata, group_options, options:
        return [x for x in metadata if (options['include_recombinants'] or not x['recombinant']) and
                                       x[group_options['selection_key']] in group_options['values']]

  - type: kmers
    output_file: cgrs.mm-repr
    mode: frequencies
    k: from_options
    bits_per_element: 16

  - type: classify
    features_file: cgrs.mm-repr
    output_file: classification-kmers.json
    validation_count: 10
    classifiers:
      #- 10-nearest-neighbors
      #- nearest-centroid-mean
      #- nearest-centroid-median
      #- logistic-regression
      #- sgd
      - linear-svm
      #- quadratic-svm
      #- cubic-svm
      #- decision-tree
      #- random-forest
      #- adaboost
      #- gaussian-naive-bayes
      #- lda
      #- qda
      - multilayer-perceptron