Skip to content

Commit

Permalink
Merge branch 'ocrd-tool-schema-filegrp-cardinality'
Browse files Browse the repository at this point in the history
# Conflicts:
#	CHANGELOG.md
  • Loading branch information
kba committed Aug 15, 2024
2 parents e30d86c + 1b912fd commit 282d5e5
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

Changed:

- Deprecate `input_file_grp` and `output_file_grp`, #255
- Introduce `input_file_grp_cardinality` and `ouptut_file_grp_cardinality`, #255

Fixed:

- sorted imports in `scripts/yaml-to-json.py`, #252
Expand Down
2 changes: 1 addition & 1 deletion ocrd_tool.schema.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"type": "object", "description": "Schema for tools by OCR-D MP", "required": ["version", "git_url", "tools"], "additionalProperties": false, "properties": {"version": {"description": "Version of the tool, expressed as MAJOR.MINOR.PATCH.", "type": "string", "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$"}, "git_url": {"description": "GitHub/GitLab URL", "type": "string", "format": "url"}, "dockerhub": {"description": "DockerHub image", "type": "string"}, "tools": {"type": "object", "additionalProperties": false, "patternProperties": {"ocrd-.*": {"type": "object", "additionalProperties": false, "required": ["description", "steps", "executable", "categories", "input_file_grp"], "properties": {"executable": {"description": "The name of the CLI executable in $PATH", "type": "string"}, "input_file_grp": {"description": "Input fileGrp@USE this tool expects by default", "type": "array", "items": {"type": "string"}}, "output_file_grp": {"description": "Output fileGrp@USE this tool produces by default", "type": "array", "items": {"type": "string"}}, "parameters": {"description": "Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.", "type": "object", "default": {}, "patternProperties": {".*": {"type": "object", "additionalProperties": false, "required": ["description", "type"], "properties": {"type": {"type": "string", "description": "Data type of this parameter", "enum": ["string", "number", "boolean", "object", "array"]}, "format": {"description": "Subtype, such as `float` for type `number` or `uri` for type `string`."}, "description": {"description": "Concise description of syntax and semantics of this parameter"}, "items": {"type": "object", "description": "describe the items of an array further"}, "minimum": {"type": "number", "description": "Minimum value for number parameters, including the minimum"}, "maximum": {"type": "number", "description": "Maximum value for number parameters, including the maximum"}, "exclusiveMinimum": {"type": "number", "description": "Minimum value for number parameters, excluding the minimum"}, "exclusiveMaximum": {"type": "number", "description": "Maximum value for number parameters, excluding the maximum"}, "multipleOf": {"type": "number", "description": "For number values, those values must be multiple of this number"}, "properties": {"type": "object", "description": "Describe the properties of an object value"}, "additionalProperties": {"type": "boolean", "description": "Whether an object value may contain properties not explicitly defined"}, "required": {"type": "boolean", "description": "Whether this parameter is required"}, "default": {"description": "Default value when not provided by the user"}, "enum": {"type": "array", "description": "List the allowed values if a fixed list."}, "content-type": {"type": "string", "default": "application/octet-stream", "description": "The media type of resources this processor expects for this parameter. Most processors use files for resources (e.g. `*.traineddata` for `ocrd-tesserocr-recognize`) while others use directories of files (e.g. `default` for `ocrd-eynollah-segment`). If a parameter requires directories, it must set `content-type` to `text/directory`.\n"}, "cacheable": {"type": "boolean", "description": "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change.", "default": false}}}}}, "description": {"description": "Concise description of what the tool does"}, "categories": {"description": "Tools belong to these categories, representing modules within the OCR-D project structure", "type": "array", "items": {"type": "string", "enum": ["Image preprocessing", "Layout analysis", "Text recognition and optimization", "Model training", "Long-term preservation", "Quality assurance"]}}, "steps": {"description": "This tool can be used at these steps in the OCR-D functional model", "type": "array", "items": {"type": "string", "enum": ["preprocessing/characterization", "preprocessing/optimization", "preprocessing/optimization/cropping", "preprocessing/optimization/deskewing", "preprocessing/optimization/despeckling", "preprocessing/optimization/dewarping", "preprocessing/optimization/binarization", "preprocessing/optimization/grayscale_normalization", "recognition/text-recognition", "recognition/font-identification", "recognition/post-correction", "layout/segmentation", "layout/segmentation/text-nontext", "layout/segmentation/region", "layout/segmentation/line", "layout/segmentation/word", "layout/segmentation/classification", "layout/analysis"]}}, "resource_locations": {"type": "array", "description": "The locations in the filesystem this processor supports for resource lookup", "default": ["data", "cwd", "system", "module"], "items": {"type": "string", "enum": ["data", "cwd", "system", "module"]}}, "resources": {"type": "array", "description": "Resources for this processor", "items": {"type": "object", "additionalProperties": false, "required": ["url", "description", "name", "size"], "properties": {"url": {"type": "string", "description": "URLs of all components of this resource"}, "description": {"type": "string", "description": "A description of the resource"}, "name": {"type": "string", "description": "Name to store the resource as"}, "type": {"type": "string", "enum": ["file", "directory", "archive"], "default": "file", "description": "Type of the URL"}, "parameter_usage": {"type": "string", "description": "Defines how the parameter is to be used", "enum": ["as-is", "without-extension"], "default": "as-is"}, "path_in_archive": {"type": "string", "description": "If type is archive, the resource is at this location in the archive", "default": "."}, "version_range": {"type": "string", "description": "Range of supported versions, syntax like in PEP 440", "default": ">= 0.0.1"}, "size": {"type": "number", "description": "Size of the resource in bytes to be retrieved (for archives: size of the archive)"}}}}}}}}}}
{"type": "object", "description": "Schema for tools by OCR-D MP", "required": ["version", "git_url", "tools"], "additionalProperties": false, "properties": {"version": {"description": "Version of the tool, expressed as MAJOR.MINOR.PATCH.", "type": "string", "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$"}, "git_url": {"description": "GitHub/GitLab URL", "type": "string", "format": "url"}, "dockerhub": {"description": "DockerHub image", "type": "string"}, "tools": {"type": "object", "additionalProperties": false, "patternProperties": {"ocrd-.*": {"type": "object", "additionalProperties": false, "required": ["description", "steps", "executable", "categories", "input_file_grp_cardinality", "output_file_grp_cardinality"], "properties": {"executable": {"description": "The name of the CLI executable in $PATH", "type": "string"}, "input_file_grp": {"deprecated": true, "description": "(DEPRECATED) Input fileGrp@USE this tool expects by default", "type": "array", "items": {"type": "string"}}, "output_file_grp": {"deprecated": true, "description": "(DEPRECATED) Output fileGrp@USE this tool produces by default", "type": "array", "items": {"type": "string"}}, "input_file_grp_cardinality": {"description": "Number of (comma-separated) input fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)", "oneOf": [{"type": "number", "multipleOf": 1}, {"type": "array", "items": {"type": "number", "multipleOf": 1}, "minItems": 2, "maxItems": 2}], "default": 1}, "output_file_grp_cardinality": {"description": "Number of (comma-separated) output fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)", "oneOf": [{"type": "number", "multipleOf": 1}, {"type": "array", "items": {"type": "number", "multipleOf": 1}, "minItems": 2, "maxItems": 2}], "default": 1}, "parameters": {"description": "Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.", "type": "object", "default": {}, "patternProperties": {".*": {"type": "object", "additionalProperties": false, "required": ["description", "type"], "properties": {"type": {"type": "string", "description": "Data type of this parameter", "enum": ["string", "number", "boolean", "object", "array"]}, "format": {"description": "Subtype, such as `float` for type `number` or `uri` for type `string`."}, "description": {"description": "Concise description of syntax and semantics of this parameter"}, "items": {"type": "object", "description": "describe the items of an array further"}, "minimum": {"type": "number", "description": "Minimum value for number parameters, including the minimum"}, "maximum": {"type": "number", "description": "Maximum value for number parameters, including the maximum"}, "exclusiveMinimum": {"type": "number", "description": "Minimum value for number parameters, excluding the minimum"}, "exclusiveMaximum": {"type": "number", "description": "Maximum value for number parameters, excluding the maximum"}, "multipleOf": {"type": "number", "description": "For number values, those values must be multiple of this number"}, "properties": {"type": "object", "description": "Describe the properties of an object value"}, "additionalProperties": {"type": "boolean", "description": "Whether an object value may contain properties not explicitly defined"}, "required": {"type": "boolean", "description": "Whether this parameter is required"}, "default": {"description": "Default value when not provided by the user"}, "enum": {"type": "array", "description": "List the allowed values if a fixed list."}, "content-type": {"type": "string", "default": "application/octet-stream", "description": "The media type of resources this processor expects for this parameter. Most processors use files for resources (e.g. `*.traineddata` for `ocrd-tesserocr-recognize`) while others use directories of files (e.g. `default` for `ocrd-eynollah-segment`). If a parameter requires directories, it must set `content-type` to `text/directory`.\n"}, "cacheable": {"type": "boolean", "description": "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change.", "default": false}}}}}, "description": {"description": "Concise description of what the tool does"}, "categories": {"description": "Tools belong to these categories, representing modules within the OCR-D project structure", "type": "array", "items": {"type": "string", "enum": ["Image preprocessing", "Layout analysis", "Text recognition and optimization", "Model training", "Long-term preservation", "Quality assurance"]}}, "steps": {"description": "This tool can be used at these steps in the OCR-D functional model", "type": "array", "items": {"type": "string", "enum": ["preprocessing/characterization", "preprocessing/optimization", "preprocessing/optimization/cropping", "preprocessing/optimization/deskewing", "preprocessing/optimization/despeckling", "preprocessing/optimization/dewarping", "preprocessing/optimization/binarization", "preprocessing/optimization/grayscale_normalization", "recognition/text-recognition", "recognition/font-identification", "recognition/post-correction", "layout/segmentation", "layout/segmentation/text-nontext", "layout/segmentation/region", "layout/segmentation/line", "layout/segmentation/word", "layout/segmentation/classification", "layout/analysis"]}}, "resource_locations": {"type": "array", "description": "The locations in the filesystem this processor supports for resource lookup", "default": ["data", "cwd", "system", "module"], "items": {"type": "string", "enum": ["data", "cwd", "system", "module"]}}, "resources": {"type": "array", "description": "Resources for this processor", "items": {"type": "object", "additionalProperties": false, "required": ["url", "description", "name", "size"], "properties": {"url": {"type": "string", "description": "URLs of all components of this resource"}, "description": {"type": "string", "description": "A description of the resource"}, "name": {"type": "string", "description": "Name to store the resource as"}, "type": {"type": "string", "enum": ["file", "directory", "archive"], "default": "file", "description": "Type of the URL"}, "parameter_usage": {"type": "string", "description": "Defines how the parameter is to be used", "enum": ["as-is", "without-extension"], "default": "as-is"}, "path_in_archive": {"type": "string", "description": "If type is archive, the resource is at this location in the archive", "default": "."}, "version_range": {"type": "string", "description": "Range of supported versions, syntax like in PEP 440", "default": ">= 0.0.1"}, "size": {"type": "number", "description": "Size of the resource in bytes to be retrieved (for archives: size of the archive)"}}}}}}}}}}
35 changes: 30 additions & 5 deletions ocrd_tool.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,50 @@ properties:
- steps
- executable
- categories
- input_file_grp
# Not required because not all processors produce output files
# - output_file_grp
- input_file_grp_cardinality
- output_file_grp_cardinality
properties:
executable:
description: The name of the CLI executable in $PATH
type: string
input_file_grp:
description: Input fileGrp@USE this tool expects by default
deprecated: true
description: (DEPRECATED) Input fileGrp@USE this tool expects by default
type: array
items:
type: string
# pattern: '^OCR-D-[A-Z0-9-]+$'
output_file_grp:
description: Output fileGrp@USE this tool produces by default
deprecated: true
description: (DEPRECATED) Output fileGrp@USE this tool produces by default
type: array
items:
type: string
# pattern: '^OCR-D-[A-Z0-9-]+$'
input_file_grp_cardinality:
description: Number of (comma-separated) input fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)
oneOf:
- type: number
multipleOf: 1
- type: array
items:
type: number
multipleOf: 1
minItems: 2
maxItems: 2
default: 1
output_file_grp_cardinality:
description: Number of (comma-separated) output fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)
oneOf:
- type: number
multipleOf: 1
- type: array
items:
type: number
multipleOf: 1
minItems: 2
maxItems: 2
default: 1
parameters:
description: Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.
type: object
Expand Down

0 comments on commit 282d5e5

Please sign in to comment.