mfinzi/OMGchess

View on GitHub
chess/testrun.py

Summary

Maintainability
A
1 hr
Test Coverage

import os
import numpy as np
import torch
from oil.tuning.study import Study, train_trial
from oil.tuning.configGenerator import uniform,logUniform,sample_config
from oil.utils.utils import LoaderTo, cosLr, recursively_update
from oil.tuning.study import train_trial
from chess_dataset import ChessDataset,ChessDatasetWOpp
from chess_network import ChessResnet,ChessDensenet,ChessDRN
from torch.utils.data import DataLoader
#import oil.augLayers as augLayers

from gameTrainer2D import GameTrainer2D, baseGameTrainTrial

logdir = os.path.expanduser('~/OMGchess/chess/runs/gpu1_again')
adam_config = {
    #'trainer_config':{'log_suffix':'adam/'},
    'optimizer':torch.optim.Adam,
    'opt_config':{'lr':2e-3},
    'num_epochs':8,
    'network':ChessResnet,'net_config': {'coords':True,'num_blocks':20,'k':128,'drop_rate':.2},
    #'network':ChessDensenet,'net_config': {'M':5,'N':20,'k':20,'drop_rate':0,'coords':True},
}
sgd_config = {
    'trainer_config':{'log_suffix':'sgd/'},
    'optimizer':torch.optim.SGD,
    'opt_config':{'lr':.2,'momentum':.9,'weight_decay':2e-6,'nesterov':True},
    'num_epochs':8,
    'trainer_config':{'value_weight':1}
}

def makeTrainer(config):
    cfg = {
        'dataset': 'chess_3000k_0.1s',
        'datadir': os.path.expanduser('~/OMGchess/chess/data/'),
        'bs': 128,
        'trainer_config':{'log_dir':logdir,'value_weight':2.5}
        }#'network':ChessResnet,'net_config': {'coords':True,'num_blocks':20,'k':128},
    cfg = recursively_update(cfg,config)
    lr_sched = cosLr(cfg['num_epochs'])
    trainset = ChessDatasetWOpp(cfg['datadir']+cfg['dataset']+'_train_0.pkl')
    train_small = ChessDatasetWOpp(cfg['datadir']+cfg['dataset']+'_trainsmall.pkl')
    val = ChessDatasetWOpp(cfg['datadir']+cfg['dataset']+'_val.pkl')
    device = torch.device('cuda:0')
    torch.backends.cudnn.benchmark=True
    #num_gpus = torch.cuda.device_count()
    #print("Using {} gpus".format(num_gpus))
    fullCNN = cfg['network'](**cfg['net_config']).to(device)
    #cfg['bs'] *= num_gpus
    #cfg['opt_config']['lr'] *=num_gpus #not for adam
    dataloaders = {}
    dataloaders['train'] = DataLoader(trainset,batch_size=cfg['bs'],
                            shuffle=True,drop_last=True,pin_memory=True,num_workers=2)
    dataloaders['train_'] = DataLoader(train_small,batch_size=cfg['bs'],shuffle=False)
    dataloaders['val'] = DataLoader(val,batch_size=cfg['bs'],shuffle=False)
    dataloaders = {k:LoaderTo(v,device) for k,v in dataloaders.items()}
    opt_constr = lambda params: cfg['optimizer'](params, **cfg['opt_config'])
    return GameTrainer2D(fullCNN,dataloaders,opt_constr,lr_sched,**cfg['trainer_config'])

Trial = train_trial(makeTrainer,strict=True)
Trial(adam_config)
#Trial(sgd_config)

# Completed Improvements:
# Coordinate convolutions in all layers (+.3% acc, +17% time)
# Feed legal moves as input to the network (major boost)
# Use both start and end legal move encodings for input (small improvement?)
# No Improvement: remove tanh on value network, train on cp value directly (weights too much on extreme states?)
# Minor to no Improvement: Add opponent move encoding to input features (yields worse or similar accs?)
# Dual encoding policy network (+.7% acc)
# Encode partial move history into the input tensors (+x% acc)
# Dropout p=0.2: (+.5% acc)
# Why does Adam work better?

# TODO: Replace resnet backbone with a densenet  (in progress, helps but more so with value function)
# TODO: Get SWA setup and working
# TODO: Yarin Gal's multitask uncertainty loss for balancing policy & value
# TODO: Add in a FiLM layer using (to_move,num_moves,castling rights features)
# TODO: Temperature scaling on human data to encourage more diversity

# TODO: Weight sharing with repeating layers (aka RNN) for planning (investigate CTC)
# TODO: Add (flip board, swap white for black pieces and tomove, negate cp) data aug (only 1.6x data though?)

# TODO: Primitive elo evaluation
# TODO: Measure speed of inference as function of batch size
# TODO: Move to lower precision inference (16 bits)
# TODO: Move to TensorRT with onnx

# Asynchronous MCTS
# Base cython implementation
# + Multithreading (GPU queue)
# + Transposition table (get zobrist keys)
# + Opening Book
# + Endgame database

# Visualizations
# Graph of acc,mse value pairs vs elo
# Table of improvements, e.g. + dual-head: + 1.5 acc, -.5 mse, +200 elo
# Detailed plot of elo vs temperature
# Visualization of the search tree: histogram for branch depth
# Graph of performence vs size of dataset 10^5 -> 10^8

# Extensions
# Static elo evaluation task:
# Use trained network & train from scratch
# Try to predict elos of black and white players
# Alternative approach with interactive agent
# CPNS feature visualization

# See how well static elo evaluation correlates with elo of trained net