| from dreamcoder.utilities import eprint |
| import random |
|
|
|
|
| class DefaultTaskBatcher: |
| """Iterates through task batches of the specified size. Defaults to all tasks if taskBatchSize is None.""" |
|
|
| def __init__(self): |
| pass |
|
|
| def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
| if taskBatchSize is None: |
| taskBatchSize = len(tasks) |
| elif taskBatchSize > len(tasks): |
| eprint("Task batch size is greater than total number of tasks, aborting.") |
| assert False |
| |
|
|
| start = (taskBatchSize * currIteration) % len(tasks) |
| end = start + taskBatchSize |
| taskBatch = (tasks + tasks)[start:end] |
| return taskBatch |
|
|
| class RandomTaskBatcher: |
| """Returns a randomly sampled task batch of the specified size. Defaults to all tasks if taskBatchSize is None.""" |
|
|
| def __init__(self): |
| pass |
|
|
| def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
| if taskBatchSize is None: |
| taskBatchSize = len(tasks) |
| elif taskBatchSize > len(tasks): |
| eprint("Task batch size is greater than total number of tasks, aborting.") |
| assert False |
|
|
| return random.sample(tasks, taskBatchSize) |
|
|
| class RandomShuffleTaskBatcher: |
| """Randomly shuffles the task batch first, and then iterates through task batches of the specified size like DefaultTaskBatcher. |
| Reshuffles across iterations - intended as benchmark comparison to test the task ordering.""" |
| def __init__(self, baseSeed=0): self.baseSeed = baseSeed |
|
|
| def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
| if taskBatchSize is None: |
| taskBatchSize = len(tasks) |
| elif taskBatchSize > len(tasks): |
| eprint("Task batch size is greater than total number of tasks, aborting.") |
| assert False |
| |
| |
| currEpoch = int(int(currIteration * taskBatchSize) / int(len(tasks))) |
|
|
| shuffledTasks = tasks.copy() |
| random.Random(self.baseSeed + currEpoch).shuffle(shuffledTasks) |
|
|
| shuffledTasksWrap = tasks.copy() |
| random.Random(self.baseSeed + currEpoch + 1).shuffle(shuffledTasksWrap) |
|
|
| start = (taskBatchSize * currIteration) % len(shuffledTasks) |
| end = start + taskBatchSize |
| taskBatch = (shuffledTasks + shuffledTasksWrap)[start:end] |
|
|
| return list(set(taskBatch)) |
|
|
| class UnsolvedTaskBatcher: |
| """At a given epoch, returns only batches of the tasks that have not been solved at least twice""" |
|
|
| def __init__(self): |
| self.timesSolved = {} |
| self.start = 0 |
|
|
| def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
| assert taskBatchSize is None, "This batching strategy does not support batch sizes" |
|
|
| for t,f in ec_result.allFrontiers.items(): |
| if f.empty: |
| self.timesSolved[t] = max(0, self.timesSolved.get(t,0)) |
| else: |
| self.timesSolved[t] = 1 + self.timesSolved.get(t, 0) |
| return [t for t in tasks if self.timesSolved.get(t,0) < 2 ] |
| |
| def entropyRandomBatch(ec_result, tasks, taskBatchSize, randomRatio): |
| numRandom = int(randomRatio * taskBatchSize) |
| numEntropy = taskBatchSize - numRandom |
|
|
| eprint("Selecting top %d tasks from the %d overall tasks given lowest entropy." % (taskBatchSize, len(tasks))) |
| eprint("Will be selecting %d by lowest entropy and %d randomly." %(numEntropy, numRandom)) |
| taskGrammarEntropies = ec_result.recognitionModel.taskGrammarEntropies(tasks) |
| sortedEntropies = sorted(taskGrammarEntropies.items(), key=lambda x:x[1]) |
|
|
| entropyBatch = [task for (task, entropy) in sortedEntropies[:numEntropy]] |
| randomBatch = random.sample([task for (task, entropy) in sortedEntropies[numEntropy:]], numRandom) |
| batch = entropyBatch + randomBatch |
|
|
| return batch |
|
|
| def kNearestNeighbors(ec_result, tasks, k, task): |
| """Finds the k nearest neighbors in the recognition model logProduction space to a given task.""" |
| import numpy as np |
| cosDistance = ec_result.recognitionModel.grammarLogProductionDistanceToTask(task, tasks) |
| argSort = np.argsort(-cosDistance) |
| topK = argSort[:k] |
| topKTasks = list(np.array(tasks)[topK]) |
| return topKTasks |
|
|
|
|
| class RandomkNNTaskBatcher: |
| """Chooses a random task and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits.""" |
| def __init__(self): |
| pass |
|
|
| def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
| if taskBatchSize is None: |
| taskBatchSize = len(tasks) |
| elif taskBatchSize > len(tasks): |
| eprint("Task batch size is greater than total number of tasks, aborting.") |
| assert False |
|
|
| if ec_result.recognitionModel is None: |
| eprint("No recognition model, falling back on random %d" % taskBatchSize) |
| return random.sample(tasks, taskBatchSize) |
| else: |
| randomTask = random.choice(tasks) |
| kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask) |
| return [randomTask] + kNN |
|
|
| class RandomLowEntropykNNTaskBatcher: |
| """Choose a random task from the 10 unsolved with the lowest entropy, and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits.""" |
| def __init__(self): |
| pass |
|
|
| def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
| unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty] |
|
|
| if taskBatchSize is None: |
| return unsolvedTasks |
| elif taskBatchSize > len(tasks): |
| eprint("Task batch size is greater than total number of tasks, aborting.") |
| assert False |
|
|
| if ec_result.recognitionModel is None: |
| eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks))) |
| return random.sample(unsolvedTasks, taskBatchSize) |
| else: |
| lowEntropyUnsolved = entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0) |
| randomTask = random.choice(lowEntropyUnsolved) |
| kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask) |
| return [randomTask] + kNN |
|
|
|
|
| class UnsolvedEntropyTaskBatcher: |
| """Returns tasks that have never been solved at any previous iteration. |
| Given a task batch size, returns the unsolved tasks with the lowest entropy.""" |
| def __init__(self): |
| pass |
|
|
| def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
| unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty] |
|
|
| if taskBatchSize is None: |
| return unsolvedTasks |
| elif taskBatchSize > len(tasks): |
| eprint("Task batch size is greater than total number of tasks, aborting.") |
| assert False |
|
|
| if ec_result.recognitionModel is None: |
| eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks))) |
| return random.sample(unsolvedTasks, taskBatchSize) |
| else: |
| return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0) |
|
|
| class UnsolvedRandomEntropyTaskBatcher: |
| """Returns tasks that have never been solved at any previous iteration. |
| Given a task batch size, returns a mix of unsolved tasks with percentRandom |
| selected randomly and the remaining selected by lowest entropy.""" |
| def __init__(self): |
| pass |
|
|
| def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): |
| unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty] |
|
|
| if taskBatchSize is None: |
| return unsolvedTasks |
| elif taskBatchSize > len(tasks): |
| eprint("Task batch size is greater than total number of tasks, aborting.") |
| assert False |
|
|
| if ec_result.recognitionModel is None: |
| eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks))) |
| return random.sample(unsolvedTasks, taskBatchSize) |
| else: |
| return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=.5) |
|
|
|
|
|
|
|
|
|
|
| |
|
|
|
|