diff --git a/src/bin/main.ml b/src/bin/main.ml index a52f4443..004529f3 100644 --- a/src/bin/main.ml +++ b/src/bin/main.ml @@ -123,21 +123,29 @@ let edit ~proc ~net ~fs () file port = Cohttp_eio.Server.run socket server ~on_error:log_warning let md ~fs ~net ~domain_mgr ~proc () no_run store conf file port fetcher jobs - src_dir env_override = + src_dir env_override input_override = + let import_map = + List.map + (fun (k, v) -> + match Fpath.of_string v with + | Ok p -> (k, p) + | Error (`Msg msg) -> Fmt.failwith "Not a valid path %s: %s" v msg) + input_override + in + run_eventloop @@ fun () -> let ((_, store) as s) = store_or_default store in let (Builder ((module Builder), _builder) as obuilder) = create_builder ~fs ~net ~domain_mgr fetcher s conf in Fun.protect ~finally:(fun () -> ()) @@ fun () -> - let doc = - In_channel.with_open_bin file @@ fun ic -> - Cmarkit.Doc.of_string (In_channel.input_all ic) - in - let file_path = Eio.Path.(fs / file) in let template_markdown = Eio.Path.load file_path in - let ast = Shark.Ast.of_sharkdown ~template_markdown in + let ast, markdown = + Shark.Ast.of_sharkdown ~concrete_paths:import_map template_markdown + in + + let doc = Cmarkit.Doc.of_string markdown in let pool = Eio.Pool.create jobs (fun () -> ()) in let store = Lwt_eio.run_lwt @@ fun () -> store in @@ -151,7 +159,12 @@ let md ~fs ~net ~domain_mgr ~proc () no_run store conf file port fetcher jobs (* First we translate the import statement to a build block *) let uid = string_of_int !import_uid in incr import_uid; - let cb, blk = Shark.Md.translate_import_block ~uid block in + let (cb, blk), src_dir_opt = + Shark.Md.translate_import_block ~uid block + in + let import_src_dir = + match src_dir_opt with Some x -> x | None -> src_dir + in (* Now we build the block *) (* Import block digests need to be mapped to this build hash *) let hb = @@ -166,7 +179,8 @@ let md ~fs ~net ~domain_mgr ~proc () no_run store conf file port fetcher jobs let _alias, _id, cb = Shark.Build_cache.with_build build_cache @@ fun _build_cache -> let cb, blk = - Shark.Md.process_build_block ~src_dir ~hb obuilder ast (cb, blk) + Shark.Md.process_build_block ~src_dir:import_src_dir ~hb obuilder + ast (cb, blk) in ( Shark.Block.alias blk, option_get ~msg:"Block hash for import" (Shark.Block.hash blk), @@ -337,6 +351,11 @@ let env_override = KEY=VALUE." ~docv:"ENVIRONMENT" [ "e" ] +let input_override = + Arg.value + @@ Arg.(opt_all (pair ~sep:'=' string string)) [] + @@ Arg.info ~doc:"Provide input file names KEY=VALUE." ~docv:"INPUT" [ "i" ] + let build ~fs ~net ~domain_mgr ~clock = let doc = "Build a spec file." in let info = Cmd.info "build" ~doc in @@ -361,7 +380,8 @@ let md ~fs ~net ~domain_mgr ~proc ~clock = Term.( const (md ~fs ~net ~domain_mgr ~proc ~clock) $ setup_log $ no_run $ store $ Obuilder.Native_sandbox.cmdliner - $ markdown_file $ port $ fetcher $ jobs $ src_dir $ env_override) + $ markdown_file $ port $ fetcher $ jobs $ src_dir $ env_override + $ input_override) let editor ~proc ~net ~fs ~clock = let doc = "Run the editor for a markdown file" in diff --git a/src/lib/ast/ast.ml b/src/lib/ast/ast.ml index c81c4561..b7152799 100644 --- a/src/lib/ast/ast.ml +++ b/src/lib/ast/ast.ml @@ -216,12 +216,40 @@ let pass_one_on_list inputs section_list = let to_list ast = List.map snd ast.nodes -let of_sharkdown ~template_markdown = - let metadata, sections = +let synthesize_import_block input_map input_override_map = + let imports = + List.map + (fun (k, p) -> + let dest = List.assoc k input_map in + (p, dest)) + input_override_map + in + let codeblock = + List.fold_left + (fun acc (src, dst) -> + acc + ^ Printf.sprintf "%s %s\n" (Fpath.to_string src) (Fpath.to_string dst)) + "" imports + in + let block = Block.import codeblock in + ("imports", [ block_to_superblock block ]) + +let synthesize_unmapped_import_block input_map = + let codeblock = + List.fold_left + (fun acc (src, dst) -> + acc ^ Printf.sprintf "%s %s\n" src (Fpath.to_string dst)) + "" input_map + in + let block = Block.import codeblock in + ("imports", [ block_to_superblock block ]) + +let of_sharkdown ?concrete_paths template_markdown = + let metadata, sections, markdown = match String.cuts ~sep:"---" template_markdown with | [ frontmatter; markdown ] | [ ""; frontmatter; markdown ] -> - (parse_frontmatter frontmatter, parse_markdown markdown) - | [ markdown ] -> (Frontmatter.empty, parse_markdown markdown) + (parse_frontmatter frontmatter, parse_markdown markdown, markdown) + | [ markdown ] -> (Frontmatter.empty, parse_markdown markdown, markdown) | _ -> failwith "Malformed frontmatter/markdown file" in @@ -239,11 +267,40 @@ let of_sharkdown ~template_markdown = sections in + let input_map = Frontmatter.input_map metadata in + let synthesized_sections = + match input_map with + | [] -> [] + | _ -> ( + match concrete_paths with + | Some concrete_paths -> + [ synthesize_import_block input_map concrete_paths ] + | None -> [ synthesize_unmapped_import_block input_map ]) + in + + let expanded_markdown = + List.fold_left + (fun acc (name, bs) -> + let title = Printf.sprintf "# %s\n\n" name in + let body = + List.fold_left + (fun acc b -> + Printf.sprintf "```%s\n%s\n```\n\n" + (Block.to_info_string b.block) + (Block.body b.block) + ^ acc) + "\n" bs + in + + (title ^ body) ^ acc) + markdown synthesized_sections + in + + let expanded_sections = synthesized_sections @ detailed_sections in + (* we can only infer the dependancy graph globally, so we need to do this at the top level before then working out the DAG. *) - let pass1 = - pass_one_on_list (Frontmatter.inputs metadata) detailed_sections - in + let pass1 = pass_one_on_list [] expanded_sections in (* Now I have the global graph implicitly, turn the list into a graph of blocks *) let all_hyperblocks = List.concat_map Section.blocks pass1 in @@ -273,7 +330,7 @@ let of_sharkdown ~template_markdown = id_all_hyperblocks) in - { nodes = id_all_hyperblocks; edges } + ({ nodes = id_all_hyperblocks; edges }, expanded_markdown) let find_id_of_block ast ib = let d = Block.digest ib in diff --git a/src/lib/ast/ast.mli b/src/lib/ast/ast.mli index f15201b9..d48ae007 100644 --- a/src/lib/ast/ast.mli +++ b/src/lib/ast/ast.mli @@ -30,7 +30,14 @@ type t [@@deriving sexp] (** An AST instance *) val pp : t Fmt.t -val of_sharkdown : template_markdown:string -> t + +val of_sharkdown : + ?concrete_paths:(string * Fpath.t) list -> string -> t * string +(** [of_sharkdown] takes in the sharkdown document and generates and AST. If the frontmatter contains + declarations of external inputs they can be overridden by supplying [concerte_paths] that maps the input + name to a file path. In addition to the AST the sharkdown document is returned, with the body section + being updated for any autogenerated blocks. *) + val find_id_of_block : t -> Block.t -> block_id option val block_by_id : t -> block_id -> Hyperblock.t option val find_hyperblock_from_block : t -> Block.t -> Hyperblock.t option diff --git a/src/lib/block.ml b/src/lib/block.ml index 000e0825..7a0738c6 100644 --- a/src/lib/block.ml +++ b/src/lib/block.ml @@ -80,7 +80,7 @@ let to_info_string = function ^ match hash with Some hash -> ":" ^ hash | None -> "") | Publish _ -> "shark-publish" | Import { hash; alias; _ } -> ( - Fmt.str "shark-run" + Fmt.str "shark-import" ^ (match alias with Some alias -> ":" ^ alias | None -> "") ^ match hash with Some hash -> ":" ^ hash | None -> "") @@ -138,11 +138,14 @@ let imports = function | Import { body; _ } -> let cut_import s = match String.cut ~sep:" " s with - | Some (url, path) -> (url, path) - | None -> Fmt.failwith "Invalid import statement %s" s + | Some (url, path) -> ( + match Fpath.of_string path with + | Ok path -> (Uri.of_string url, path) + | Error (`Msg msg) -> + Fmt.failwith "Error parsing path %s: %s" path msg) + | None -> Fmt.failwith "Invalid import statement '%s'" s in - let imports = String.cuts ~sep:"\n" body in - List.map cut_import imports + String.cuts ~sep:"\n" (String.trim body) |> List.map cut_import let digest : t -> string = function | Import { body; _ } @@ -150,3 +153,52 @@ let digest : t -> string = function | Run { body; _ } | Build { body; _ } -> Digest.string body + +let import_spec b = + let open Obuilder_spec in + (* TODO: Support multi-import statements *) + let url, target_path = imports b |> List.hd in + match Uri.scheme url with + | None | Some "file" -> + (* Choose better image, just need tools to import? *) + let fpath = + match Fpath.of_string (Uri.path url) with + | Ok p -> p + | Error (`Msg msg) -> + Fmt.failwith "Failed to parse path %s: %s" (Uri.path url) msg + in + let src_dir, path = Fpath.split_base fpath in + let src_dir = Fpath.rem_empty_seg src_dir in + ( stage ~from:(`Image "alpine") + [ + (* shell [ "/bin/sh"; "-c" ]; *) + (* run "mkdir -p %s" (Fpath.to_string (Fpath.parent path)); *) + copy [ Fpath.to_string path ] ~dst:(Fpath.to_string target_path); + ], + Some (Fpath.to_string src_dir) ) + | Some "http" | Some "https" -> ( + let src_path = Uri.path url in + match String.cut ~rev:true ~sep:"." src_path with + | Some (_, "git") -> + (* Choose better image, just need tools to import? *) + ( stage ~from:(`Image "alpine") + [ + shell [ "/bin/sh"; "-c" ]; + run ~network:[ "host" ] "apk add --no-cache git"; + run ~network:[ "host" ] "mkdir -p /data && git clone %s %s" + (Uri.to_string url) + (Fpath.to_string target_path); + ], + None ) + | _ -> + (* Choose better image, just need tools to import? *) + ( stage ~from:(`Image "alpine") + [ + shell [ "/bin/sh"; "-c" ]; + run ~network:[ "host" ] "apk add --no-cache curl"; + run ~network:[ "host" ] "mkdir -p /data && curl -O %s %s" + (Fpath.to_string target_path) + (Uri.to_string url); + ], + None )) + | Some scheme -> Fmt.failwith "Unsupported import scheme %s" scheme diff --git a/src/lib/block.mli b/src/lib/block.mli index 0cb868c1..6edc4f89 100644 --- a/src/lib/block.mli +++ b/src/lib/block.mli @@ -68,7 +68,10 @@ val body : t -> string val output : t -> [ `Directory of string ] (** The output of a publish block *) -val imports : t -> (string * string) list +val imports : t -> (Uri.t * Fpath.t) list (** The imports from an import block i.e. a list of [URL, Path] pairs. *) val digest : t -> string + +val import_spec : t -> Obuilder_spec.t * string option +(** For a shark-import block generate the spec to execute to enact the import. *) diff --git a/src/lib/dotrenderer.ml b/src/lib/dotrenderer.ml index 3115b08c..b1654407 100644 --- a/src/lib/dotrenderer.ml +++ b/src/lib/dotrenderer.ml @@ -40,6 +40,27 @@ let render_publish_to_dot ppf command = (Datafile.id datafile) process_index label) (Leaf.inputs command) +let render_import_to_dot ppf command = + let process_index = Leaf.id command in + List.iter + (fun datafile -> + let label = + match Datafile.subpath datafile with + | Some x -> Fmt.str ",label=\"%s\"" x + | None -> "" + in + Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"%s];\n" + (Datafile.id datafile) process_index label) + (Leaf.inputs command); + Format.fprintf ppf "\tn%d[shape=\"cylinder\",label=\"%s\"];\n" process_index + (Uri.pct_encode (Command.name (Leaf.command command))); + List.iter + (fun datafile -> + Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"];\n" process_index + (Datafile.id datafile)) + (Leaf.outputs command); + Format.fprintf ppf "\n" + let datafile_to_dot ppf datafile = Format.fprintf ppf "\tn%d[shape=\"cylinder\",label=\"%s\"];\n" (Datafile.id datafile) @@ -83,6 +104,7 @@ let render_ast_to_dot ppf hyperblocks : unit = let renderer = match kind with + | `Import -> render_import_to_dot | `Run -> render_command_to_dot | `Publish -> render_publish_to_dot | _ -> fun _a _b -> () @@ -94,7 +116,7 @@ let render_ast_to_dot ppf hyperblocks : unit = Format.fprintf ppf "}\n" let render ~template_markdown = - Ast.of_sharkdown ~template_markdown - |> Ast.to_list + Ast.of_sharkdown template_markdown + |> fst |> Ast.to_list |> render_ast_to_dot Format.str_formatter; Format.flush_str_formatter () diff --git a/src/lib/dune b/src/lib/dune index 4d86d9df..79fda625 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -3,6 +3,6 @@ (library (name shark) (public_name shark) - (libraries eio str yaml lwt_eio cmarkit obuilder fpath) + (libraries eio str yaml lwt_eio cmarkit obuilder fpath uri) (preprocess (pps ppx_sexp_conv))) diff --git a/src/lib/frontmatter.ml b/src/lib/frontmatter.ml index 8193cd62..76f78d6c 100644 --- a/src/lib/frontmatter.ml +++ b/src/lib/frontmatter.ml @@ -65,3 +65,4 @@ let of_toplevel_yaml = function let of_string s = String.trim s |> Yaml.of_string |> Result.map of_toplevel_yaml let variables t = t.variables let inputs t = List.map (fun (_, v) -> v) t.inputs +let input_map t = t.inputs diff --git a/src/lib/frontmatter.mli b/src/lib/frontmatter.mli index 90591df2..ba1ba70c 100644 --- a/src/lib/frontmatter.mli +++ b/src/lib/frontmatter.mli @@ -6,3 +6,4 @@ val empty : t val of_string : string -> (t, [ `Msg of string ]) result val variables : t -> (string * string list) list val inputs : t -> Fpath.t list +val input_map : t -> (string * Fpath.t) list diff --git a/src/lib/md.ml b/src/lib/md.ml index 411a334f..3084d0d4 100644 --- a/src/lib/md.ml +++ b/src/lib/md.ml @@ -215,7 +215,7 @@ let process_run_block ?(env_override = []) ~fs ~build_cache ~pool store ast cmdstr = Eio.Pool.use pool @@ fun () -> let command = Leaf.command leaf in - Logs.debug (fun f -> f "Processing command: %a" Command.pp command); + Logs.info (fun f -> f "Processing command: %a" Command.pp command); match Command.name command with | "cd" -> (* If a command block is a call to `cd` we treat this similarly to Docker's @@ -223,13 +223,20 @@ let process_run_block ?(env_override = []) ~fs ~build_cache ~pool store ast (* If the dir is in the inputs we should substitute it, otherwise we assume it's a new dir in this current image. *) - let path = - Fpath.to_string (List.nth (Command.file_args command) 0) - in + let args = Command.file_args command in let inspected_path = - match List.assoc_opt path file_subs_map with - | None -> path - | Some pl -> List.nth pl 0 + match args with + | [] -> + (* no /data path in this, so just pull the path directly as the AST only works with /data paths *) + String.cut ~sep:" " (Command.to_string command) + |> Option.get ~err:"Failed to get path in cd" + |> snd + | _ -> ( + let path = Fpath.to_string (List.nth args 0) in + match List.assoc_opt path file_subs_map with + | None -> path + | Some pl -> ( + match pl with [] -> path | _ -> List.nth pl 0)) in let cmd_result = CommandResult.v ~build_hash cmdstr in @@ -437,20 +444,8 @@ let process_publish_block (Obuilder.Store_spec.Store ((module Store), store)) let translate_import_block ~uid block = match Block.kind block with | `Import -> - (* TODO: Support multi-import statements *) - let git_url, git_path = Block.imports block |> List.hd in - (* We just need to get the data into the OBuilder store -- HACK *) - let spec = - let open Obuilder_spec in - (* Choose better image, just need tools to import? *) - stage ~from:(`Image "alpine") - [ - shell [ "/bin/sh"; "-c" ]; - run ~network:[ "host" ] "apk add --no-cache git"; - run ~network:[ "host" ] "mkdir -p /data && git clone %s %s" git_url - git_path; - ] - in + let spec, src_dir_opt = Block.import_spec block in + Logs.info (fun f -> f "import spec: %a" Obuilder_spec.pp spec); let body = Sexplib.Sexp.to_string_hum (Obuilder_spec.sexp_of_t spec) in let alias = Fmt.str "import-statement-%s" uid in let block = Block.build_or_run ~alias ~body `Build in @@ -459,5 +454,5 @@ let translate_import_block ~uid block = ~info_string:(Fmt.str "shark-build:%s" alias, Cmarkit.Meta.none) (Cmarkit.Block_line.list_of_string body) in - (code_block, block) + ((code_block, block), src_dir_opt) | _ -> failwith "Expected Import Block" diff --git a/src/lib/md.mli b/src/lib/md.mli index 9a786300..49fc87a0 100644 --- a/src/lib/md.mli +++ b/src/lib/md.mli @@ -48,4 +48,9 @@ val process_publish_block : Cmarkit.Block.Code_block.t * Block.t val translate_import_block : - uid:string -> Block.t -> Cmarkit.Block.Code_block.t * Block.t + uid:string -> + Block.t -> + (Cmarkit.Block.Code_block.t * Block.t) * string option +(** [translate_import_block uid block] will generate an expanded code block that contains a shark-build spec that + carries out the actual import when evaluated. If the import is from the file system then the optional second + return is the src_dir needed for the file system context when the spec is evaluated. *) diff --git a/src/test/ast.ml b/src/test/ast.ml index 37590de2..6f92d54e 100644 --- a/src/test/ast.ml +++ b/src/test/ast.ml @@ -8,7 +8,7 @@ $ python3 something.py /data/something.txt ``` |} in - let test = Shark.Ast.of_sharkdown ~template_markdown in + let test, _ = Shark.Ast.of_sharkdown template_markdown in let hyperblocks = Shark.Ast.to_list test in Alcotest.(check int) "Single command group expected" 1 (List.length hyperblocks); @@ -24,7 +24,7 @@ $ python3 else.py /data/something.txt ``` |} in - let test = Shark.Ast.of_sharkdown ~template_markdown in + let test, _ = Shark.Ast.of_sharkdown template_markdown in let hyperblocks = Shark.Ast.to_list test in Alcotest.(check int) "Single command group expected" 1 (List.length hyperblocks); @@ -39,7 +39,7 @@ $ python3 something.py ``` |} in - let test = Shark.Ast.of_sharkdown ~template_markdown in + let test, _ = Shark.Ast.of_sharkdown template_markdown in let hyperblocks = Shark.Ast.to_list test in Alcotest.(check int) "Single command group expected" 1 (List.length hyperblocks); diff --git a/src/test/block.ml b/src/test/block.ml index cf20c0ec..88944275 100644 --- a/src/test/block.ml +++ b/src/test/block.ml @@ -40,6 +40,86 @@ let test_shark_run_multiline_command () = let test = Shark.Block.command_list block in Alcotest.(check (list string)) "Single command" expect test +let test_git_import_block () = + let body = + "https://example.com/quantifyearth/littlejohn.git /data/littlejohn" + in + let block = Shark.Block.import body in + let expected = + [ ("https://example.com/quantifyearth/littlejohn.git", "/data/littlejohn") ] + in + let test = + List.map + (fun (u, p) -> (Uri.to_string u, Fpath.to_string p)) + (Shark.Block.imports block) + in + Alcotest.(check (list (pair string string))) "Single import" expected test; + + let spec, src_dir = Shark.Block.import_spec block in + let specbody = Sexplib.Sexp.to_string_hum (Obuilder_spec.sexp_of_t spec) in + Alcotest.(check bool) + "Found git command" true + (Astring.String.is_infix ~affix:"git clone" specbody); + Alcotest.(check (option string)) "No src_dir change" None src_dir + +let test_http_import_block () = + let body = "https://example.com/data/something.csv /data/src.csv" in + let block = Shark.Block.import body in + let expected = + [ ("https://example.com/data/something.csv", "/data/src.csv") ] + in + let test = + List.map + (fun (u, p) -> (Uri.to_string u, Fpath.to_string p)) + (Shark.Block.imports block) + in + Alcotest.(check (list (pair string string))) "Single import" expected test; + + let spec, src_dir = Shark.Block.import_spec block in + let specbody = Sexplib.Sexp.to_string_hum (Obuilder_spec.sexp_of_t spec) in + Alcotest.(check bool) + "Found git command" true + (Astring.String.is_infix ~affix:"curl -O" specbody); + Alcotest.(check (option string)) "No src_dir change" None src_dir + +let test_file_import_block_no_schema () = + let body = "/home/michael/file.csv /data/file.csv" in + let block = Shark.Block.import body in + let expected = [ ("/home/michael/file.csv", "/data/file.csv") ] in + let test = + List.map + (fun (u, p) -> (Uri.to_string u, Fpath.to_string p)) + (Shark.Block.imports block) + in + Alcotest.(check (list (pair string string))) "Single import" expected test; + + let spec, src_dir = Shark.Block.import_spec block in + let specbody = Sexplib.Sexp.to_string_hum (Obuilder_spec.sexp_of_t spec) in + Alcotest.(check bool) + "Found git command" true + (Astring.String.is_infix ~affix:"copy" specbody); + Alcotest.(check (option string)) + "Src_dir change" (Some "/home/michael") src_dir + +let test_file_import_block_with_schema () = + let body = "file:///home/michael/file.csv /data/file.csv" in + let block = Shark.Block.import body in + let expected = [ ("file:///home/michael/file.csv", "/data/file.csv") ] in + let test = + List.map + (fun (u, p) -> (Uri.to_string u, Fpath.to_string p)) + (Shark.Block.imports block) + in + Alcotest.(check (list (pair string string))) "Single import" expected test; + + let spec, src_dir = Shark.Block.import_spec block in + let specbody = Sexplib.Sexp.to_string_hum (Obuilder_spec.sexp_of_t spec) in + Alcotest.(check bool) + "Found git command" true + (Astring.String.is_infix ~affix:"copy" specbody); + Alcotest.(check (option string)) + "Src_dir change" (Some "/home/michael") src_dir + let tests = [ ("shark build block, no hash", `Quick, test_shark_build_block_no_hash); @@ -48,4 +128,12 @@ let tests = ("shark run block", `Quick, test_shark_run_block_no_hash); ("parsing multiple commands", `Quick, test_shark_run_multiple_commands); ("parsing multiline command", `Quick, test_shark_run_multiline_command); + ("parsing basic git import", `Quick, test_git_import_block); + ("parsing basic http import", `Quick, test_http_import_block); + ( "parsing basic file import no schema", + `Quick, + test_file_import_block_no_schema ); + ( "parsing basic file import with schema", + `Quick, + test_file_import_block_with_schema ); ] diff --git a/src/test/expect/test_dot.expected b/src/test/expect/test_dot.expected index 3a221e41..b763f234 100644 --- a/src/test/expect/test_dot.expected +++ b/src/test/expect/test_dot.expected @@ -1,93 +1,95 @@ digraph{ n0[shape="cylinder",label="/data/tmf/project_boundaries/123.geojson"]; - n1[shape="cylinder",label="/data/tmf/project_boundaries"]; - n2[shape="cylinder",label="/data/tmf/jrc/zips"]; - n3[shape="cylinder",label="/data/tmf/jrc/tif"]; - n5[shape="cylinder",label="/data/tmf/fcc-cpcs"]; - n7[shape="cylinder",label="/data/tmf/ecoregions/ecoregions.geojson"]; - n9[shape="cylinder",label="/data/tmf/ecoregions"]; - n11[shape="cylinder",label="/data/tmf/access/raw.tif"]; - n13[shape="cylinder",label="/data/tmf/access"]; - n15[shape="cylinder",label="/data/tmf/osm_borders.geojson"]; - n17[shape="cylinder",label="/data/tmf/123/buffer.geojson"]; - n19[shape="cylinder",label="/data/tmf/123/leakage.geojson"]; - n21[shape="cylinder",label="/data/tmf/123/luc.tif"]; - n23[shape="cylinder",label="/data/tmf/gedi"]; - n26[shape="cylinder",label="/data/tmf/123/carbon-density.csv"]; - n28[shape="cylinder",label="/data/tmf/123/country-list.json"]; - n30[shape="cylinder",label="/data/tmf/123/matching-area.geojson"]; - n32[shape="cylinder",label="/data/tmf/srtm/zip"]; - n33[shape="cylinder",label="/data/tmf/srtm/tif"]; - n35[shape="cylinder",label="/data/tmf/slopes"]; - n37[shape="cylinder",label="/data/tmf/rescaled-elevation"]; - n39[shape="cylinder",label="/data/tmf/rescaled-slopes"]; - n41[shape="cylinder",label="/data/tmf/123/countries.tif"]; - n43[shape="cylinder",label="/data/tmf/123/k.parquet"]; - n45[shape="cylinder",label="/data/tmf/123/matches"]; - n47[shape="cylinder",label="/data/tmf/123/matches.tif"]; - n49[shape="cylinder",label="/data/tmf/123/matches.parquet"]; - n51[shape="cylinder",label="/data/tmf/123/pairs"]; - n53[shape="cylinder",label="/data/tmf/123/additionality.csv"]; + n2[shape="cylinder",label="/data/tmf/project_boundaries"]; + n4[shape="cylinder",label="/data/tmf/jrc/zips"]; + n5[shape="cylinder",label="/data/tmf/jrc/tif"]; + n7[shape="cylinder",label="/data/tmf/fcc-cpcs"]; + n9[shape="cylinder",label="/data/tmf/ecoregions/ecoregions.geojson"]; + n11[shape="cylinder",label="/data/tmf/ecoregions"]; + n13[shape="cylinder",label="/data/tmf/access/raw.tif"]; + n15[shape="cylinder",label="/data/tmf/access"]; + n17[shape="cylinder",label="/data/tmf/osm_borders.geojson"]; + n19[shape="cylinder",label="/data/tmf/123/buffer.geojson"]; + n21[shape="cylinder",label="/data/tmf/123/leakage.geojson"]; + n23[shape="cylinder",label="/data/tmf/123/luc.tif"]; + n25[shape="cylinder",label="/data/tmf/gedi"]; + n28[shape="cylinder",label="/data/tmf/123/carbon-density.csv"]; + n30[shape="cylinder",label="/data/tmf/123/country-list.json"]; + n32[shape="cylinder",label="/data/tmf/123/matching-area.geojson"]; + n34[shape="cylinder",label="/data/tmf/srtm/zip"]; + n35[shape="cylinder",label="/data/tmf/srtm/tif"]; + n37[shape="cylinder",label="/data/tmf/slopes"]; + n39[shape="cylinder",label="/data/tmf/rescaled-elevation"]; + n41[shape="cylinder",label="/data/tmf/rescaled-slopes"]; + n43[shape="cylinder",label="/data/tmf/123/countries.tif"]; + n45[shape="cylinder",label="/data/tmf/123/k.parquet"]; + n47[shape="cylinder",label="/data/tmf/123/matches"]; + n49[shape="cylinder",label="/data/tmf/123/matches.tif"]; + n51[shape="cylinder",label="/data/tmf/123/matches.parquet"]; + n53[shape="cylinder",label="/data/tmf/123/pairs"]; + n55[shape="cylinder",label="/data/tmf/123/additionality.csv"]; subgraph "cluster_0" { style = solid - label = "JRC" - n4[shape="box",label="methods.inputs.download_jrc_data"]; - n4->n2[penwidth="2.0"]; - n4->n3[penwidth="2.0"]; + label = "imports" + n1[shape="cylinder",label="boundary"]; + n1->n0[penwidth="2.0"]; + + n3[shape="cylinder",label="projects"]; + n3->n2[penwidth="2.0"]; } subgraph "cluster_1" { style = solid label = "JRC" - n3->n6[penwidth="2.0"]; - n6[shape="box",label="methods.inputs.generate_fine_circular_coverage"]; + n6[shape="box",label="methods.inputs.download_jrc_data"]; + n6->n4[penwidth="2.0"]; n6->n5[penwidth="2.0"]; } subgraph "cluster_2" { style = solid - label = "Ecoregions" - n8[shape="box",label="methods.inputs.download_shapefiles"]; + label = "JRC" + n5->n8[penwidth="2.0"]; + n8[shape="box",label="methods.inputs.generate_fine_circular_coverage"]; n8->n7[penwidth="2.0"]; } subgraph "cluster_3" { style = solid label = "Ecoregions" - n7->n10[penwidth="2.0"]; - n3->n10[penwidth="2.0"]; - n10[shape="box",label="methods.inputs.generate_ecoregion_rasters"]; + n10[shape="box",label="methods.inputs.download_shapefiles"]; n10->n9[penwidth="2.0"]; } subgraph "cluster_4" { style = solid - label = "ACCESS" - n12[shape="box",label="methods.inputs.download_accessibility"]; + label = "Ecoregions" + n9->n12[penwidth="2.0"]; + n5->n12[penwidth="2.0"]; + n12[shape="box",label="methods.inputs.generate_ecoregion_rasters"]; n12->n11[penwidth="2.0"]; } subgraph "cluster_5" { style = solid label = "ACCESS" - n11->n14[penwidth="2.0"]; - n3->n14[penwidth="2.0"]; - n14[shape="box",label="methods.inputs.generate_access_tiles"]; + n14[shape="box",label="methods.inputs.download_accessibility"]; n14->n13[penwidth="2.0"]; } subgraph "cluster_6" { style = solid - label = "Country boarders" - n16[shape="box",label="methods.inputs.osm_countries"]; + label = "ACCESS" + n13->n16[penwidth="2.0"]; + n5->n16[penwidth="2.0"]; + n16[shape="box",label="methods.inputs.generate_access_tiles"]; n16->n15[penwidth="2.0"]; } subgraph "cluster_7" { style = solid - label = "Make variations on project shapes" - n0->n18[penwidth="2.0"]; - n18[shape="box",label="methods.inputs.generate_boundary"]; + label = "Country boarders" + n18[shape="box",label="methods.inputs.osm_countries"]; n18->n17[penwidth="2.0"]; } @@ -95,49 +97,48 @@ subgraph "cluster_8" { style = solid label = "Make variations on project shapes" n0->n20[penwidth="2.0"]; - n20[shape="box",label="methods.inputs.generate_leakage"]; + n20[shape="box",label="methods.inputs.generate_boundary"]; n20->n19[penwidth="2.0"]; } subgraph "cluster_9" { style = solid - label = "Make LUC tiles" - n17->n22[penwidth="2.0"]; - n3->n22[penwidth="2.0"]; - n22[shape="box",label="methods.inputs.generate_luc_layer"]; + label = "Make variations on project shapes" + n0->n22[penwidth="2.0"]; + n22[shape="box",label="methods.inputs.generate_leakage"]; n22->n21[penwidth="2.0"]; } subgraph "cluster_10" { style = solid - label = "GEDI data" - n17->n24[penwidth="2.0"]; - n24[shape="box",label="methods.inputs.download_gedi_data"]; + label = "Make LUC tiles" + n19->n24[penwidth="2.0"]; + n5->n24[penwidth="2.0"]; + n24[shape="box",label="methods.inputs.generate_luc_layer"]; n24->n23[penwidth="2.0"]; } subgraph "cluster_11" { style = solid label = "GEDI data" - n23->n25[penwidth="2.0"]; - n25[shape="box",label="methods.inputs.import_gedi_data"]; + n19->n26[penwidth="2.0"]; + n26[shape="box",label="methods.inputs.download_gedi_data"]; + n26->n25[penwidth="2.0"]; } subgraph "cluster_12" { style = solid label = "GEDI data" - n17->n27[penwidth="2.0"]; - n21->n27[penwidth="2.0"]; - n27[shape="box",label="methods.inputs.generate_carbon_density"]; - n27->n26[penwidth="2.0"]; + n25->n27[penwidth="2.0"]; + n27[shape="box",label="methods.inputs.import_gedi_data"]; } subgraph "cluster_13" { style = solid label = "GEDI data" - n0->n29[penwidth="2.0"]; - n15->n29[penwidth="2.0"]; - n29[shape="box",label="methods.inputs.generate_country_list"]; + n19->n29[penwidth="2.0"]; + n23->n29[penwidth="2.0"]; + n29[shape="box",label="methods.inputs.generate_carbon_density"]; n29->n28[penwidth="2.0"]; } @@ -145,84 +146,77 @@ subgraph "cluster_14" { style = solid label = "GEDI data" n0->n31[penwidth="2.0"]; - n28->n31[penwidth="2.0"]; - n15->n31[penwidth="2.0"]; - n7->n31[penwidth="2.0"]; - n1->n31[penwidth="2.0"]; - n31[shape="box",label="methods.inputs.generate_matching_area"]; + n17->n31[penwidth="2.0"]; + n31[shape="box",label="methods.inputs.generate_country_list"]; n31->n30[penwidth="2.0"]; } subgraph "cluster_15" { style = solid - label = "Elevation and slope data" - n0->n34[penwidth="2.0"]; - n30->n34[penwidth="2.0"]; - n34[shape="box",label="methods.inputs.download_srtm_data"]; - n34->n32[penwidth="2.0"]; - n34->n33[penwidth="2.0"]; + label = "GEDI data" + n0->n33[penwidth="2.0"]; + n30->n33[penwidth="2.0"]; + n17->n33[penwidth="2.0"]; + n9->n33[penwidth="2.0"]; + n2->n33[penwidth="2.0"]; + n33[shape="box",label="methods.inputs.generate_matching_area"]; + n33->n32[penwidth="2.0"]; } subgraph "cluster_16" { style = solid label = "Elevation and slope data" - n33->n36[penwidth="2.0"]; - n36[shape="box",label="methods.inputs.generate_slope"]; + n0->n36[penwidth="2.0"]; + n32->n36[penwidth="2.0"]; + n36[shape="box",label="methods.inputs.download_srtm_data"]; + n36->n34[penwidth="2.0"]; n36->n35[penwidth="2.0"]; } subgraph "cluster_17" { style = solid label = "Elevation and slope data" - n3->n38[penwidth="2.0"]; - n33->n38[penwidth="2.0"]; - n38[shape="box",label="methods.inputs.rescale_tiles_to_jrc"]; + n35->n38[penwidth="2.0"]; + n38[shape="box",label="methods.inputs.generate_slope"]; n38->n37[penwidth="2.0"]; - n3->n40[penwidth="2.0"]; +} +subgraph "cluster_18" { + style = solid + label = "Elevation and slope data" + n5->n40[penwidth="2.0"]; n35->n40[penwidth="2.0"]; n40[shape="box",label="methods.inputs.rescale_tiles_to_jrc"]; n40->n39[penwidth="2.0"]; -} -subgraph "cluster_18" { - style = solid - label = "Country raster" - n3->n42[penwidth="2.0"]; - n30->n42[penwidth="2.0"]; - n15->n42[penwidth="2.0"]; - n42[shape="box",label="methods.inputs.generate_country_raster"]; + n5->n42[penwidth="2.0"]; + n37->n42[penwidth="2.0"]; + n42[shape="box",label="methods.inputs.rescale_tiles_to_jrc"]; n42->n41[penwidth="2.0"]; } subgraph "cluster_19" { style = solid - label = "Calculate set K" - n0->n44[penwidth="2.0"]; - n3->n44[penwidth="2.0"]; + label = "Country raster" n5->n44[penwidth="2.0"]; - n9->n44[penwidth="2.0"]; - n37->n44[penwidth="2.0"]; - n39->n44[penwidth="2.0"]; - n13->n44[penwidth="2.0"]; - n41->n44[penwidth="2.0"]; - n44[shape="box",label="methods.matching.calculate_k"]; + n32->n44[penwidth="2.0"]; + n17->n44[penwidth="2.0"]; + n44[shape="box",label="methods.inputs.generate_country_raster"]; n44->n43[penwidth="2.0"]; } subgraph "cluster_20" { style = solid - label = "Calculate set M" - n43->n46[penwidth="2.0"]; - n30->n46[penwidth="2.0"]; - n3->n46[penwidth="2.0"]; + label = "Calculate set K" + n0->n46[penwidth="2.0"]; n5->n46[penwidth="2.0"]; - n9->n46[penwidth="2.0"]; - n37->n46[penwidth="2.0"]; + n7->n46[penwidth="2.0"]; + n11->n46[penwidth="2.0"]; n39->n46[penwidth="2.0"]; - n13->n46[penwidth="2.0"]; n41->n46[penwidth="2.0"]; - n46[shape="box",label="methods.matching.find_potential_matches"]; + n15->n46[penwidth="2.0"]; + n43->n46[penwidth="2.0"]; + n46[shape="box",label="methods.matching.calculate_k"]; n46->n45[penwidth="2.0"]; } @@ -230,7 +224,15 @@ subgraph "cluster_21" { style = solid label = "Calculate set M" n45->n48[penwidth="2.0"]; - n48[shape="box",label="methods.matching.build_m_raster"]; + n32->n48[penwidth="2.0"]; + n5->n48[penwidth="2.0"]; + n7->n48[penwidth="2.0"]; + n11->n48[penwidth="2.0"]; + n39->n48[penwidth="2.0"]; + n41->n48[penwidth="2.0"]; + n15->n48[penwidth="2.0"]; + n43->n48[penwidth="2.0"]; + n48[shape="box",label="methods.matching.find_potential_matches"]; n48->n47[penwidth="2.0"]; } @@ -238,35 +240,43 @@ subgraph "cluster_22" { style = solid label = "Calculate set M" n47->n50[penwidth="2.0"]; - n30->n50[penwidth="2.0"]; - n3->n50[penwidth="2.0"]; - n5->n50[penwidth="2.0"]; - n9->n50[penwidth="2.0"]; - n37->n50[penwidth="2.0"]; - n39->n50[penwidth="2.0"]; - n13->n50[penwidth="2.0"]; - n41->n50[penwidth="2.0"]; - n50[shape="box",label="methods.matching.build_m_table"]; + n50[shape="box",label="methods.matching.build_m_raster"]; n50->n49[penwidth="2.0"]; } subgraph "cluster_23" { style = solid - label = "Find pairs" - n43->n52[penwidth="2.0"]; + label = "Calculate set M" n49->n52[penwidth="2.0"]; - n52[shape="box",label="methods.matching.find_pairs"]; + n32->n52[penwidth="2.0"]; + n5->n52[penwidth="2.0"]; + n7->n52[penwidth="2.0"]; + n11->n52[penwidth="2.0"]; + n39->n52[penwidth="2.0"]; + n41->n52[penwidth="2.0"]; + n15->n52[penwidth="2.0"]; + n43->n52[penwidth="2.0"]; + n52[shape="box",label="methods.matching.build_m_table"]; n52->n51[penwidth="2.0"]; } subgraph "cluster_24" { style = solid - label = "Calculate additionality" - n0->n54[penwidth="2.0"]; - n26->n54[penwidth="2.0"]; + label = "Find pairs" + n45->n54[penwidth="2.0"]; n51->n54[penwidth="2.0"]; - n54[shape="box",label="methods.outputs.calculate_additionality"]; + n54[shape="box",label="methods.matching.find_pairs"]; n54->n53[penwidth="2.0"]; +} +subgraph "cluster_25" { + style = solid + label = "Calculate additionality" + n0->n56[penwidth="2.0"]; + n28->n56[penwidth="2.0"]; + n53->n56[penwidth="2.0"]; + n56[shape="box",label="methods.outputs.calculate_additionality"]; + n56->n55[penwidth="2.0"]; + } }