Skip to content

Commit

Permalink
FIX potential problem related to automl#69
Browse files Browse the repository at this point in the history
  • Loading branch information
mfeurer committed Sep 23, 2016
1 parent 5a7207f commit 3542fc7
Showing 1 changed file with 16 additions and 11 deletions.
27 changes: 16 additions & 11 deletions autosklearn/ensemble_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ def main(self):
# over time!
old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
dir_ensemble_list_mtimes = []
# The ensemble dir can contain non-model files. We filter them and
# use the following list instead
dir_ensemble_model_files = []

for dir_ensemble_file in dir_ensemble_list:
if dir_ensemble_file.endswith("/"):
Expand All @@ -132,18 +135,19 @@ def main(self):
self.logger.warning('Error loading file (not .npy): %s', dir_ensemble_file)
continue

dir_ensemble_model_files.append(dir_ensemble_file)
basename = os.path.basename(dir_ensemble_file)
dir_ensemble_file = os.path.join(dir_ensemble, basename)
mtime = os.path.getmtime(dir_ensemble_file)
dir_ensemble_list_mtimes.append(mtime)

if len(dir_ensemble_list) == 0:
if len(dir_ensemble_model_files) == 0:
self.logger.debug('Directories are empty')
time.sleep(2)
used_time = watch.wall_elapsed('ensemble_builder')
continue

if len(dir_ensemble_list) <= current_num_models and \
if len(dir_ensemble_model_files) <= current_num_models and \
old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
self.logger.debug('Nothing has changed since the last time')
time.sleep(2)
Expand All @@ -169,7 +173,7 @@ def main(self):
model_names_to_scores = dict()

model_idx = 0
for model_name in dir_ensemble_list:
for model_name in dir_ensemble_model_files:
if model_name.endswith("/"):
model_name = model_name[:-1]
basename = os.path.basename(model_name)
Expand Down Expand Up @@ -254,7 +258,7 @@ def main(self):

indices_to_model_names = dict()
indices_to_run_num = dict()
for i, model_name in enumerate(dir_ensemble_list):
for i, model_name in enumerate(dir_ensemble_model_files):
match = model_and_automl_re.search(model_name)
automl_seed = int(match.group(1))
num_run = int(match.group(2))
Expand All @@ -265,7 +269,8 @@ def main(self):

try:
all_predictions_train, all_predictions_valid, all_predictions_test =\
self.get_all_predictions(dir_ensemble, dir_ensemble_list,
self.get_all_predictions(dir_ensemble,
dir_ensemble_model_files,
dir_valid, dir_valid_list,
dir_test, dir_test_list,
include_num_runs,
Expand Down Expand Up @@ -314,7 +319,7 @@ def main(self):

# Set this variable here to avoid re-running the ensemble builder
# every two seconds in case the ensemble did not change
current_num_models = len(dir_ensemble_list)
current_num_models = len(dir_ensemble_model_files)

ensemble_predictions = ensemble.predict(all_predictions_train)
if sys.version_info[0] == 2:
Expand Down Expand Up @@ -342,7 +347,7 @@ def main(self):
backend.save_ensemble(ensemble, index_run, self.seed)

# Save predictions for valid and test data set
if len(dir_valid_list) == len(dir_ensemble_list):
if len(dir_valid_list) == len(dir_ensemble_model_files):
all_predictions_valid = np.array(all_predictions_valid)
ensemble_predictions_valid = ensemble.predict(all_predictions_valid)
if self.task_type == BINARY_CLASSIFICATION:
Expand Down Expand Up @@ -379,11 +384,11 @@ def main(self):
else:
self.logger.info('Could not find as many validation set predictions (%d)'
'as ensemble predictions (%d)!.',
len(dir_valid_list), len(dir_ensemble_list))
len(dir_valid_list), len(dir_ensemble_model_files))

del all_predictions_valid

if len(dir_test_list) == len(dir_ensemble_list):
if len(dir_test_list) == len(dir_ensemble_model_files):
all_predictions_test = np.array(all_predictions_test)
ensemble_predictions_test = ensemble.predict(all_predictions_test)
if self.task_type == BINARY_CLASSIFICATION:
Expand Down Expand Up @@ -420,11 +425,11 @@ def main(self):
else:
self.logger.info('Could not find as many test set predictions (%d) as '
'ensemble predictions (%d)!',
len(dir_test_list), len(dir_ensemble_list))
len(dir_test_list), len(dir_ensemble_model_files))

del all_predictions_test

current_num_models = len(dir_ensemble_list)
current_num_models = len(dir_ensemble_model_files)
watch.stop_task('index_run' + str(index_run))
time_iter = watch.get_wall_dur('index_run' + str(index_run))
used_time = watch.wall_elapsed('ensemble_builder')
Expand Down

0 comments on commit 3542fc7

Please sign in to comment.