def prepare_data(nb_seq=20):
    all_xs = []
    all_ys = []
    all_words = set()
    all_labels = set()