diff --git a/docs/using.rst b/docs/using.rst index a735eb1..0b8f469 100644 --- a/docs/using.rst +++ b/docs/using.rst @@ -59,7 +59,8 @@ Configuring Components ---------------------- Most Stetl Components, i.e. inputs, filters, outputs, have properties that can be configured within their respective [section] in the config file. But what are the possible properties, values and defaults? -This is documented within each Component class using the `@Config` decorator much similar to `@property`, only with +This is documented within each Component class using the ``@Config`` decorator much similar to the standard Python +``@property``, only with some more intelligence for type conversions, defaults, required presence and documentation. It is loosely based on https://wiki.python.org/moin/PythonDecoratorLibrary#Cached_Properties and Bruce Eckel's http://www.artima.com/weblogs/viewpost.jsp?thread=240845 with a fix/hack for Sphinx documentation. @@ -67,7 +68,7 @@ http://www.artima.com/weblogs/viewpost.jsp?thread=240845 with a fix/hack for Sph See for example the :class:`stetl.inputs.fileinput.FileInput` documentation. For class authors: this information is added -via the Python Decorators much similar to `@property`. The :class:`stetl.component.Config` +via the Python Decorators much similar to ``@property``. The :class:`stetl.component.Config` is used to define read-only properties for each Component instance. For example, :: class FileInput(Input): @@ -112,8 +113,8 @@ is used to define read-only properties for each Component instance. For example, self.file_list = Util.make_file_list(self.file_path, None, self.filename_pattern, self.depth_search) This defines two configurable properties for the class FileInput. -Each ``@Config`` has three parameters: ``p_type``, the Python type (``str``, ``list``, ``dict``, ``bool``,``int``), -`default` (default value if not present) and `required` (if property in mandatory or optional). +Each ``@Config`` has three parameters: ``p_type``, the Python type (``str``, ``list``, ``dict``, ``bool``, ``int``), +``default`` (default value if not present) and ``required`` (if property in mandatory or optional). Within the config one can set specific config values like, :: @@ -122,23 +123,23 @@ config values like, :: class = inputs.fileinput.XmlFileInput file_path = input/cities.xml -This automagically assigns `file_path` to `self.file_path` without any custom code and assigns the -default value to `filename_pattern`. Automatic checks are performed: if `file_path` (``required=True``) is present, if its type is string. -In some cases type conversions may be applied e.g. when type is `dict` or `list`. It is guarded that the value is not -overwritten and the docstrings will appear in the auto-generated documentation each property with a ``CONFIG`` tag. +This automagically assigns ``file_path`` to ``self.file_path`` without any custom code and assigns the +default value to ``filename_pattern``. Automatic checks are performed: if ``file_path`` (``required=True``) is present, if its type is string. +In some cases type conversions may be applied e.g. when type is ``dict`` or ``list``. It is guarded that the value is not +overwritten and the docstrings will appear in the auto-generated documentation, each entry prepended with a ``CONFIG`` tag. Running Stetl ------------- -The above ETL spec can be found in the file `etl.cfg`. Now Stetl can be run, simply by typing :: +The above ETL spec can be found in the file ``etl.cfg``. Now Stetl can be run, simply by typing :: stetl -c etl.cfg -Stetl will parse `etl.cfg`, create all Components by their class name and link them in a Chain and execute +Stetl will parse ``etl.cfg``, create all Components by their class name and link them in a Chain and execute that Chain. Of course this example is very trivial, as we could just call XSLT without Stetl. But it becomes interesting with more complex transformations. -Suppose we want to convert the resulting GML to an ESRI Shapefile. As we cannot use GDAL `ogr2ogr` on the input +Suppose we want to convert the resulting GML to an `ESRI Shapefile`. As we cannot use GDAL ``ogr2ogr`` on the input file, we need to combine XSLT and `ogr2ogr`. See example `3_shape `_. Now we replace the output by using `outputs.ogroutput.Ogr2OgrOutput`, which can execute any `ogr2ogr` command, converting diff --git a/examples/bgt/etl-bgt-postgis-anonymous.sh b/examples/bgt/etl-bgt-postgis-anonymous.sh old mode 100644 new mode 100755 index e31f906..69e5eb7 --- a/examples/bgt/etl-bgt-postgis-anonymous.sh +++ b/examples/bgt/etl-bgt-postgis-anonymous.sh @@ -10,13 +10,13 @@ #input_file="input/20130814_GML_crotec_input.gml" input_file="input/Maastricht_GML_20130924.gml" -# Databasse connection +# Database connection host="localhost" port="5432" -user="myusername" -database="mydatabasename" -password="mypassword" +user="postgres" +database="bgt" +password="postgres" schema="public" -stetl -c etl-bgt-postgis.cfg -a "gml_input=$input_file host=$host port=$port user=$user database=$database password=$password schema=$schema max_in_memory_features=100000" +stetl -c etl-bgt-postgis.cfg -a "gml_input=$input_file host=$host port=$port user=$user database=$database password=$password schema=$schema max_in_memory_features=10000" diff --git a/examples/bgt/etl-bgt-shp.sh b/examples/bgt/etl-bgt-shp.sh old mode 100644 new mode 100755 index eb27787..b964caa --- a/examples/bgt/etl-bgt-shp.sh +++ b/examples/bgt/etl-bgt-shp.sh @@ -13,5 +13,5 @@ input_file="input/Maastricht_GML_20130924.gml" output_file="output/bgt-gml-test.shp" # output_file="/Users/just/project/stetl/contrib/duiv/data/20130814_GML_crotec_output.gml" -stetl -c etl-bgt-shp.cfg -a "gml_input=$input_file shp_output=$output_file max_in_memory_features=100000" +stetl -c etl-bgt-shp.cfg -a "gml_input=$input_file shp_output=$output_file max_in_memory_features=10000" diff --git a/examples/inspire/ad-bag-nl/readme.txt b/examples/inspire/ad-bag-nl/readme.txt index 77fe94a..b89f753 100644 --- a/examples/inspire/ad-bag-nl/readme.txt +++ b/examples/inspire/ad-bag-nl/readme.txt @@ -3,3 +3,7 @@ at the INSPIRE-FOSS project: https://code.google.com/p/inspire-foss/source/browse/#svn%2Ftrunk%2Fetl%2FNL.Kadaster%2FAddresses +NEW (23.9.2014) +See also the examples/basics/10_jinja2_templating for a more compact INSPIRE example. + + diff --git a/examples/ordnancesurvey/etl.sh b/examples/ordnancesurvey/etl.sh index d2e357f..cdc1238 100755 --- a/examples/ordnancesurvey/etl.sh +++ b/examples/ordnancesurvey/etl.sh @@ -4,4 +4,7 @@ PYTHONPATH=.:$PYTHONPATH GML_FILES=input # GML_FILES=/Users/just/project/stetl/contrib/astun/osdata/58116-SX9192-2c1.gml -stetl -c stetl.cfg -a "database=ordsurvey host=localhost port=5432 user=postgres password=postgres schema=osmm temp_dir=temp max_features=5000 gml_files=$GML_FILES" \ No newline at end of file +stetl=stetl +# stetl=../../stetl/main.py + +$stetl -c stetl.cfg -a "database=ordsurvey host=localhost port=5432 user=postgres password=postgres schema=osmm temp_dir=temp max_features=5000 gml_files=$GML_FILES" \ No newline at end of file diff --git a/examples/ordnancesurvey/output/osmm_topo_prepared.gml b/examples/ordnancesurvey/output/osmm_topo_prepared.gml index cb5d4b9..9c87edb 100644 --- a/examples/ordnancesurvey/output/osmm_topo_prepared.gml +++ b/examples/ordnancesurvey/output/osmm_topo_prepared.gml @@ -1,20 +1,16 @@ - - Ordnance Survey, (c) Crown Copyright. All rights reserved, 2009-07-30 - - unknown - - 2009-07-30T02:35:17 - - - 291000.000,92000.000 293000.000,94000.000 - - - + + Ordnance Survey, (c) Crown Copyright. All rights reserved, 2009-07-30 + + unknown + + 2009-07-30T02:35:17 + + + 291000.000,92000.000 293000.000,94000.000 + + + 10123 10 @@ -223,13 +219,9 @@ - osgb1000000347738391 - stetl - Roads Tracks And Paths - Path - + osgb1000000347738391stetlRoads Tracks And PathsPath - + 10056 9 @@ -310,13 +302,9 @@ - osgb1000000347738429 - stetl - Land - General Surface - + osgb1000000347738429stetlLandGeneral Surface - + 10046 3 @@ -341,13 +329,9 @@ - osgb1000000347735642 - stetl - Land - General Feature - + osgb1000000347735642stetlLandGeneral Feature - + 10046 2 @@ -369,13 +353,9 @@ - osgb1000000347735857 - stetl - Land, Roads Tracks And Paths - General Feature - + osgb1000000347735857stetlLand, Roads Tracks And PathsGeneral Feature - + 10128 6 @@ -417,14 +397,9 @@ - osgb1000000738106555 - stetl - Administrative Boundaries - Political Or Administrative - Electoral - + osgb1000000738106555stetlAdministrative BoundariesPolitical Or AdministrativeElectoral - + 10128 9 @@ -538,14 +513,9 @@ - osgb1000000738106597 - stetl - Administrative Boundaries - Political Or Administrative - Electoral - + osgb1000000738106597stetlAdministrative BoundariesPolitical Or AdministrativeElectoral - + 10128 8 @@ -639,14 +609,9 @@ - osgb1000000738106616 - stetl - Administrative Boundaries - Political Or Administrative - Electoral - + osgb1000000738106616stetlAdministrative BoundariesPolitical Or AdministrativeElectoral - + 10128 11 @@ -737,14 +702,9 @@ - osgb1000000738107869 - stetl - Administrative Boundaries - Political Or Administrative - Electoral - + osgb1000000738107869stetlAdministrative BoundariesPolitical Or AdministrativeElectoral - + 10165 1 @@ -763,15 +723,9 @@ 291781.470,92943.570 - osgb1000000729439973 - stetl - 300 - Rail - Rail - Switch - + osgb1000000729439973stetl300RailRailSwitch - + 10066 3 @@ -794,15 +748,9 @@ 291706.100,92740.500 - osgb1000000729439974 - stetl - 223 - Terrain And Height - Height Control - Bench Mark - + osgb1000000729439974stetl223Terrain And HeightHeight ControlBench Mark - + 10066 3 @@ -825,15 +773,9 @@ 291976.100,92634.800 - osgb1000000729439975 - stetl - 303 - Terrain And Height - Height Control - Bench Mark - + osgb1000000729439975stetl303Terrain And HeightHeight ControlBench Mark - + 10066 3 @@ -856,15 +798,9 @@ 291744.300,92717.000 - osgb1000000729439976 - stetl - 25 - Terrain And Height - Height Control - Bench Mark - + osgb1000000729439976stetl25Terrain And HeightHeight ControlBench Mark - + 10066 3 @@ -887,15 +823,9 @@ 291530.200,92918.750 - osgb1000000729439977 - stetl - 40 - Terrain And Height - Height Control - Bench Mark - + osgb1000000729439977stetl40Terrain And HeightHeight ControlBench Mark - + 10026 1 @@ -918,16 +848,11 @@ 2 1.500 545 - 54 - + 54 44 - osgb1000000729439996 - stetl - Buildings - Buildings Or Structure - + osgb1000000729439996stetlBuildingsBuildings Or Structure - + 10074 3 @@ -957,16 +882,11 @@ 2 1.750 3230 - 323 - + 323 North Gate - osgb1000000729439997 - stetl - Heritage And Antiquities - Historic Interest - + osgb1000000729439997stetlHeritage And AntiquitiesHistoric Interest - + 10026 1 @@ -989,16 +909,11 @@ 2 1.500 1811 - 181 - + 181 34 - osgb1000000729439998 - stetl - Buildings - Buildings Or Structure - + osgb1000000729439998stetlBuildingsBuildings Or Structure - + 10026 1 @@ -1021,16 +936,11 @@ 1 1.500 0 - 0 - + 0 PH - osgb1000000729439999 - stetl - Buildings - Buildings Or Structure - + osgb1000000729439999stetlBuildingsBuildings Or Structure - + 10026 1 @@ -1053,16 +963,11 @@ 2 1.500 2141 - 214 - + 214 6 - osgb1000000729440000 - stetl - Buildings - Buildings Or Structure - + osgb1000000729440000stetlBuildingsBuildings Or Structure - + 10184 3 @@ -1084,16 +989,11 @@ 1 1.500 0 - 0 - + 0 LBs - osgb1000000729440001 - stetl - Structures - Structure - + osgb1000000729440001stetlStructuresStructure - + 10169 2 @@ -1120,17 +1020,11 @@ 1 1.750 473 - 47 - + 47 HALDON ROAD - osgb1000000729440002 - stetl - Roads Tracks And Paths - Road Or Track - Road Name Or Classification - + osgb1000000729440002stetlRoads Tracks And PathsRoad Or TrackRoad Name Or Classification - + 10026 2 @@ -1153,16 +1047,11 @@ 1 1.750 605 - 60 - + 60 St David's - osgb1000000729440003 - stetl - Buildings - Buildings Or Structure - + osgb1000000729440003stetlBuildingsBuildings Or Structure - + 10026 1 @@ -1185,16 +1074,11 @@ 2 1.500 1298 - 129 - + 129 29 - osgb1000000729440004 - stetl - Buildings - Buildings Or Structure - + osgb1000000729440004stetlBuildingsBuildings Or Structure - + 10026 1 @@ -1217,16 +1101,11 @@ 1 2.000 0 - 0 - + 0 School - osgb1000000729440005 - stetl - Buildings - Buildings Or Structure - + osgb1000000729440005stetlBuildingsBuildings Or Structure - + 10197 1 @@ -1249,14 +1128,9 @@ 291548.750,92569.850 - osgb1000000732239646 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239646stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1279,14 +1153,9 @@ 291574.550,92876.700 - osgb1000000732239647 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239647stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1309,14 +1178,9 @@ 291698.500,92598.250 - osgb1000000732239649 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239649stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1339,14 +1203,9 @@ 291700.450,92751.650 - osgb1000000732239650 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239650stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1369,14 +1228,9 @@ 291596.050,92720.150 - osgb1000000732239652 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239652stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1399,14 +1253,9 @@ 291875.050,92591.750 - osgb1000000732239653 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239653stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1429,14 +1278,9 @@ 291676.700,92535.000 - osgb1000000732239659 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239659stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1459,14 +1303,9 @@ 291798.950,92670.350 - osgb1000000732239660 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239660stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1489,14 +1328,9 @@ 291858.950,92921.850 - osgb1000000732239661 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239661stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1519,14 +1353,9 @@ 291619.800,92634.500 - osgb1000000732239663 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239663stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1549,14 +1378,9 @@ 291809.550,92778.550 - osgb1000000732239664 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239664stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1579,14 +1403,9 @@ 291998.000,92813.400 - osgb1000000732239666 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239666stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1609,14 +1428,9 @@ 291761.800,92705.050 - osgb1000000732239667 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239667stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1639,14 +1453,9 @@ 291893.950,92881.850 - osgb1000000732239669 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239669stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1669,14 +1478,9 @@ 291650.200,92797.300 - osgb1000000732239670 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239670stetlTerrain And HeightTerrain And HeightSpot Height - + 10197 1 @@ -1699,11 +1503,6 @@ 291927.550,92988.750 - osgb1000000732239671 - stetl - Terrain And Height - Terrain And Height - Spot Height - + osgb1000000732239671stetlTerrain And HeightTerrain And HeightSpot Height diff --git a/stetl/component.py b/stetl/component.py index 061cbd0..143bbee 100644 --- a/stetl/component.py +++ b/stetl/component.py @@ -26,7 +26,7 @@ def __init__(self, ptype=str, default=None, required=False): to be decorated is passed to the constructor. """ # print "Inside __init__()" - self.python_type = ptype + self.ptype = ptype self.default = default self.required = required @@ -53,23 +53,23 @@ def __get__(self, comp_inst, owner): # print "Inside __get__() owner=%s" % owner """ descr.__get__(obj[, type]) -> value """ if self.property_name not in comp_inst.cfg_vals: - cfg, name, value = comp_inst.cfg, self.property_name, self.default + cfg, name, default_value = comp_inst.cfg, self.property_name, self.default # Do type conversion where needed from the string values - if self.python_type is str: - value = cfg.get(name, value) - elif self.python_type is bool: - value = cfg.get_bool(name, value) - elif self.python_type is list: - value = cfg.get_list(name, value) - elif self.python_type is dict: - value = cfg.get_dict(name, value) - elif self.python_type is int: - value = cfg.get_int(name, value) - elif self.python_type is tuple: - value = cfg.get_tuple(name, value) + if self.ptype is str: + value = cfg.get(name, default=default_value) + elif self.ptype is bool: + value = cfg.get_bool(name, default=default_value) + elif self.ptype is list: + value = cfg.get_list(name, default=default_value) + elif self.ptype is dict: + value = cfg.get_dict(name, default=default_value) + elif self.ptype is int: + value = cfg.get_int(name, default=default_value) + elif self.ptype is tuple: + value = cfg.get_tuple(name, default=default_value) else: - value = cfg.get(name, value) + value = cfg.get(name, default=default_value) if self.required is True and value is None: raise Exception('Config property: %s is required in config for %s' % (name, str(comp_inst))) diff --git a/stetl/inputs/fileinput.py b/stetl/inputs/fileinput.py index 51fccd0..1a0c019 100644 --- a/stetl/inputs/fileinput.py +++ b/stetl/inputs/fileinput.py @@ -229,6 +229,7 @@ def __init__(self, configdict, section): self.root = None self.cur_file_path = None self.elem_count = 0 + log.info("Element tags to be matched: %s" % self.element_tags) def read(self, packet): event = None