Skip to content
This repository has been archived by the owner on Jun 3, 2020. It is now read-only.

Commit

Permalink
Merge pull request #158 from Oslandia/rde-refactor-output-folder-mana…
Browse files Browse the repository at this point in the history
…gement

Include every output paths into the prepare_output_folder() function
  • Loading branch information
delhomer authored May 14, 2020
2 parents 8b8108e + bbd34ed commit 1cf5142
Show file tree
Hide file tree
Showing 12 changed files with 100 additions and 115 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Les sections conserveront leur nom en anglais.

### Changed

- `utils.prepare_output_folder()` returns now a dictionary of all useful output paths
- Some dependency updates (Tensorflow, opencv, pillow, keras, daiquiri)
- The preprocessing has been modified for geographic datasets: `-t`, `-v` and `-T` now
refer to raw images, the amount of preprocessed tiles being obtained by a combination
Expand Down
2 changes: 1 addition & 1 deletion deeposlandia/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from osgeo import gdal
from PIL import Image

from deeposlandia import geometries, utils
from deeposlandia import geometries

logger = daiquiri.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion deeposlandia/datasets/shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def populate(
output_dir=None,
input_dir=None,
nb_images=10000,
nb_tiles_per_images=None,
nb_tiles_per_image=None,
aggregate=False,
labelling=True,
buf=8,
Expand Down
4 changes: 2 additions & 2 deletions deeposlandia/geometries.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import cv2
import daiquiri
import fiona
from fiona.crs import from_epsg
import geopandas as gpd
import numpy as np
import shapely.geometry as shgeom
Expand Down Expand Up @@ -265,7 +265,7 @@ def extract_tile_items(
raster_features, min_x, min_y, tile_width, tile_height
)
bdf = gpd.GeoDataFrame(
crs=fiona.crs.from_epsg(raster_features["srid"]), geometry=[area]
crs=from_epsg(raster_features["srid"]), geometry=[area]
)
reproj_labels = labels.to_crs(epsg=raster_features["srid"])
tile_items = gpd.sjoin(reproj_labels, bdf)
Expand Down
13 changes: 5 additions & 8 deletions deeposlandia/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,8 @@ def predict(
"Please generate a valid dataset before calling the program."
)

output_folder = utils.prepare_output_folder(datapath, dataset, problem)
instance_filename = "best-instance-" + str(model_input_size) + ".json"
instance_path = os.path.join(output_folder, instance_filename)
output_folder = utils.prepare_output_folder(datapath, dataset, model_input_size, problem)
instance_path = os.path.join(output_folder, output_folder["best-instance"])
dropout, network = utils.recover_instance(instance_path)
model = init_model(
problem,
Expand All @@ -179,13 +178,11 @@ def predict(
dropout,
network,
)
checkpoint_filename = "best-model-" + str(model_input_size) + ".h5"
checkpoint_full_path = os.path.join(output_folder, checkpoint_filename)
if os.path.isfile(checkpoint_full_path):
model.load_weights(checkpoint_full_path)
if os.path.isfile(output_folder["best-model"]):
model.load_weights(output_folder["best-model"])
logger.info(
"Model weights have been recovered from %s",
checkpoint_full_path,
output_folder["best-model"],
)
else:
logger.info(
Expand Down
82 changes: 20 additions & 62 deletions deeposlandia/postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,15 @@
logger = daiquiri.getLogger(__name__)


def get_image_paths(datapath, dataset, image_size, image_basename):
def get_image_paths(testing_folder, image_basename):
"""Returns a list with the path of every image that belongs to the
`dataset`, preprocessed in `image_size`-pixelled images, that were
extracted from an original image named as `image_basename`.
Parameters
----------
datapath : str
Path of the data on the file system
dataset : str
Name of the dataset
image_size : int
Size of preprocessed images, in pixels
testing_folder : str
Path of the testing image folder
image_basename : str
Original image name
Expand All @@ -46,15 +42,7 @@ def get_image_paths(datapath, dataset, image_size, image_basename):
list
List of image full paths
"""
image_raw_paths = os.path.join(
datapath,
dataset,
"preprocessed",
str(image_size),
"testing",
"images",
image_basename + "*",
)
image_raw_paths = os.path.join(testing_folder, "images", image_basename + "*")
return [glob.glob(f) for f in [image_raw_paths]][0]


Expand Down Expand Up @@ -121,16 +109,14 @@ def get_labels(datapath, dataset, tile_size):
return [l for l in test_config["labels"] if l["is_evaluate"]]


def get_trained_model(datapath, dataset, image_size, nb_labels):
def get_trained_model(model_filepath, image_size, nb_labels):
"""Recover model weights stored on the file system, and assign them into
the `model` structure
Parameters
----------
datapath : str
Path of the data on the file system
dataset : str
Name of the dataset
model_filepath : str
Path of the model on the file system
image_size : int
Image size, in pixels (height=width)
nb_labels : int
Expand All @@ -150,16 +136,9 @@ def get_trained_model(datapath, dataset, image_size, nb_labels):
architecture="unet",
)
model = Model(net.X, net.Y)
output_folder = utils.prepare_output_folder(
datapath, dataset, "semseg"
)
checkpoint_filename = "best-model-" + str(image_size) + ".h5"
checkpoint_full_path = os.path.join(output_folder, checkpoint_filename)
if os.path.isfile(checkpoint_full_path):
model.load_weights(checkpoint_full_path)
logger.info(
"Model weights have been recovered from %s" % checkpoint_full_path
)
if os.path.isfile(model_filepath):
model.load_weights(model_filepath)
logger.info("Model weights have been recovered from %s" % model_filepath)
else:
logger.info(
(
Expand Down Expand Up @@ -392,23 +371,26 @@ def get_image_features(datapath, dataset, filename):

def main(args):

logger.info("Postprocess %s...", args.image_basename)
features = get_image_features(
args.datapath, args.dataset, args.image_basename
)

img_width, img_height = features["width"], features["height"]
logger.info("Raw image size: %s, %s" % (img_width, img_height))

image_paths = get_image_paths(
args.datapath, args.dataset, args.image_size, args.image_basename
)
prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset, args.image_size)
image_paths = get_image_paths(prepro_folder["testing"], args.image_basename)
logger.info("The image will be splitted into %s tiles" % len(image_paths))
images = extract_images(image_paths)
coordinates = extract_coordinates_from_filenames(image_paths)
labels = get_labels(args.datapath, args.dataset, args.image_size)

output_folder = utils.prepare_output_folder(
args.datapath, args.dataset, args.image_size, "semseg"
)
model = get_trained_model(
args.datapath, args.dataset, args.image_size, len(labels)
output_folder["best-model"], args.image_size, len(labels)
)

logger.info("Predict labels for input images...")
Expand All @@ -429,16 +411,8 @@ def main(args):
colored_data = draw_grid(
colored_data, img_width, img_height, args.image_size
)
predicted_label_folder = os.path.join(
args.datapath,
args.dataset,
"output",
"semseg",
"predicted_labels"
)
os.makedirs(predicted_label_folder, exist_ok=True)
predicted_label_file = os.path.join(
predicted_label_folder,
output_folder["labels"],
args.image_basename + "_" + str(args.image_size) + ".png",
)
Image.fromarray(colored_data).save(predicted_label_file)
Expand All @@ -449,16 +423,8 @@ def main(args):
gdf = gpd.GeoDataFrame(
{"labels": vectorized_labels, "geometry": vectorized_data}
)
predicted_geom_folder = os.path.join(
args.datapath,
args.dataset,
"output",
"semseg",
"predicted_geometries",
)
os.makedirs(predicted_geom_folder, exist_ok=True)
predicted_geom_file = os.path.join(
predicted_geom_folder,
output_folder["geometries"],
args.image_basename + "_" + str(args.image_size) + ".geojson",
)
if os.path.isfile(predicted_geom_file):
Expand All @@ -473,16 +439,8 @@ def main(args):
colored_raster_data = draw_grid(
colored_raster_data, img_width, img_height, args.image_size
)
predicted_raster_folder = os.path.join(
args.datapath,
args.dataset,
"output",
"semseg",
"predicted_rasters",
)
os.makedirs(predicted_raster_folder, exist_ok=True)
predicted_raster_file = os.path.join(
predicted_raster_folder,
output_folder["rasters"],
args.image_basename + "_" + str(args.image_size) + ".png",
)
Image.fromarray(colored_raster_data).save(predicted_raster_file)
20 changes: 7 additions & 13 deletions deeposlandia/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,9 @@ def run_model(
def main(args):
# Grid search
model_output = []
output_folder = utils.prepare_output_folder(
args.datapath, args.dataset, args.image_size, args.model
)
for batch_size in args.batch_size:
logger.info("Generating data with batch of %s images...", batch_size)
# Data generator building
Expand Down Expand Up @@ -283,16 +286,14 @@ def main(args):
learning_rate_decay,
]
instance_name = utils.list_to_str(instance_args, "_")
output_folder = utils.prepare_output_folder(
args.datapath, args.dataset, args.model, instance_name
)
instance_folder = os.path.join(output_folder["checkpoints"], instance_name)
# Model running
model_output.append(
run_model(
train_gen,
valid_gen,
args.model,
output_folder,
instance_folder,
instance_name,
args.image_size,
nb_labels,
Expand All @@ -309,15 +310,8 @@ def main(args):
best_instance = max(model_output, key=lambda x: x["val_acc"])

# Save best model
output_folder = utils.prepare_output_folder(
args.datapath, args.dataset, args.model
)
instance_name = os.path.join(
output_folder,
"best-{}-" + str(args.image_size) + ".{}",
)
best_instance["model"].save(instance_name.format("model", "h5"))
with open(instance_name.format("instance", "json"), "w") as fobj:
best_instance["model"].save(output_folder["best-model"])
with open(output_folder["best-instance"], "w") as fobj:
json.dump(
{
key: best_instance[key]
Expand Down
45 changes: 29 additions & 16 deletions deeposlandia/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import pandas as pd
from PIL import Image

from deeposlandia.datasets import GEOGRAPHIC_DATASETS


logger = daiquiri.getLogger(__name__)

Expand Down Expand Up @@ -186,7 +188,7 @@ def prepare_preprocessed_folder(
}


def prepare_output_folder(datapath, dataset, model, instance_name=None):
def prepare_output_folder(datapath, dataset, image_size, model):
"""Dataset and repository management; create and return the dataset output
path
Expand All @@ -196,27 +198,38 @@ def prepare_output_folder(datapath, dataset, model, instance_name=None):
Data root directory, contain all used the datasets
dataset : str
Dataset name, *e.g.* `mapillary` or `shapes`
image_size : int
Size of the considered images (height and width are equal)
model : str
Research problem that is tackled, *e.g.* `feature_detection` or
`semantic_segmentation`
instance_name : str
Instance name, used to create the accurate output folders
Research problem that is tackled, *e.g.* `featdet` or `semseg`
Returns
-------
str
Dataset output path
dict
Dataset output paths
"""
if instance_name is not None:
output_folder = os.path.join(
datapath, dataset, "output", model, "checkpoints", instance_name
)
else:
output_folder = os.path.join(
datapath, dataset, "output", model, "checkpoints"
)
output_folder = os.path.join(datapath, dataset, "output", model)
os.makedirs(output_folder, exist_ok=True)
return output_folder
checkpoint_folder = os.path.join(output_folder, "checkpoints")
os.makedirs(checkpoint_folder, exist_ok=True)
best_model_filename = "best-model-" + str(image_size) + ".h5"
best_instance_filename = "best-instance-" + str(image_size) + ".json"
label_folder = os.path.join(output_folder, "predicted_labels")
os.makedirs(label_folder, exist_ok=True)
geometry_folder = raster_folder = None
if dataset in GEOGRAPHIC_DATASETS:
geometry_folder = os.path.join(output_folder, "predicted_geometries")
raster_folder = os.path.join(output_folder, "predicted_rasters")
os.makedirs(geometry_folder, exist_ok=True)
os.makedirs(raster_folder, exist_ok=True)
return {
"best-model": os.path.join(checkpoint_folder, best_model_filename),
"best-instance": os.path.join(checkpoint_folder, best_instance_filename),
"checkpoints": checkpoint_folder,
"geometries": geometry_folder,
"labels": label_folder,
"rasters": raster_folder,
}


def recover_instance(instance_path):
Expand Down
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ def aerial_nb_images():
return 1


@pytest.fixture
def nb_tiles_per_image():
return 10


@pytest.fixture
def aerial_nb_output_training_images():
return 10
Expand Down
Loading

0 comments on commit 1cf5142

Please sign in to comment.