From 2099786033ffc1342bee763add1d4b34eb56442d Mon Sep 17 00:00:00 2001 From: Toshio Kuratomi Date: Mon, 8 Jun 2020 11:02:14 -0700 Subject: [PATCH] Fix for some docs not parsing. This started out by looking at why some documentation didn't have short_descriptions on the collection index pages. Looking into it, lead to a long chain of behaviours that eventually arrived at that bug. * I wanted to run pydantic validation and normalization in parallel for all of the plugins that need to be read in but that step would be CPU-bound so I used EventLoop.run_in_executor() with a concurrent.futures.ProcessPoolExecutor. That way each worker would be a separate process and hopefully take better advantage of the CPUs on the system. * Under the hood, ProcessPoolExecutor uses multiprocessing to hand off work to the worker processes. multiprocessing has to be able to pickle the Python objects that are sent to and received from the workers. * It turns out that there's a few bugs in the Python pickle library that cause pydantic exceptions to fail in a specific way: they can pickle fine but they can't unpickle. * That means that my code would encounter a validation error, raise a pydantic.ValidationError, then the worker process would pickle that and send it to the parent process. Once on the parent process, unpickling the error would traceback. * That traceback would be unexpected and ProcessPoolExecutor would assume that things were in an unknown state and it should cancel all of the pending tasks. * So instead of getting a few nice error messages about the few plugins which had broken documentation on the remote side, 30-50% of the plugins were cancelled and gave back BrokenPoolError, which wasn't informative of what the real problem was. The workaround for this is very simple: catch the pydantic exception, extract the information we care about, and then reraise as a different, picklable exception so that the asyncio framework can properly operate on it. Fixes #86 --- antsibull/cli/doc_commands/stable.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/antsibull/cli/doc_commands/stable.py b/antsibull/cli/doc_commands/stable.py index a4ed4682..52127b9a 100644 --- a/antsibull/cli/doc_commands/stable.py +++ b/antsibull/cli/doc_commands/stable.py @@ -107,12 +107,16 @@ def normalize_plugin_info(plugin_type: str, except ValidationError as e: if field == 'doc': # We can't recover if there's not a doc field - raise + # pydantic exceptions are not picklable (probably due to bugs in the pickle module) + # so convert it to an exception type which is picklable + raise ValueError(str(e)) + # But we can use the default value (some variant of "empty") for everything else - # Note: We looped through doc first and raised an exception if doc did not normalize + # Note: We looped through doc first and returned an exception if doc did not normalize # so we're able to use it in the error message here. errors.append(f'Unable to normalize {new_info["doc"]["name"]}: {field}' f' due to: {str(e)}') + field_model = DOCS_SCHEMAS[plugin_type][field].parse_obj({}) new_info.update(field_model.dict(by_alias=True)) @@ -177,7 +181,7 @@ def get_collection_contents(plugin_info: t.Mapping[str, t.Mapping[str, t.Any]], nonfatal_errors: PluginErrorsRT ) -> t.DefaultDict[str, t.DefaultDict[str, t.Dict[str, str]]]: """ - Return the contents plugins which are in each collection. + Return the plugins which are in each collection. :arg plugin_info: Mapping of plugin type to a mapping of plugin name to plugin record. The plugin_type, plugin_name, and short_description from plugin_records are used. @@ -273,6 +277,10 @@ def generate_docs(args: 'argparse.Namespace') -> int: import json json.dump(plugin_info, f) flog.debug('Finished dumping raw plugin_info') + + with open('dump_formatted_plugin_info.json', 'r') as f: + import json + plugin_info = json.load(f) """ plugin_info, nonfatal_errors = asyncio_run(normalize_all_plugin_info(plugin_info))