for category in categories:
            # Select 1 instance for test and insert the whole category to test set
            category_df = input_df[input_df.category == category]
            max_instance = np.max(category_df.instance_number)
            test_instance = np.random.randint(max_instance) + 1