for i in range(4):
        shift = ((control >> (i * 2)) & 3) * 16
        out.append(src[shift: shift + 16])