From 8014d8b0c0938d1ad76911aa8c7256d81fcf7496 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sun, 27 Aug 2023 17:03:03 +0530 Subject: [PATCH 01/24] create lib_ccxr and libccxr_exports --- .gitignore | 3 +- src/rust/Cargo.lock | 144 +++++++++++++++++----------- src/rust/Cargo.toml | 1 + src/rust/lib_ccxr/Cargo.lock | 7 ++ src/rust/lib_ccxr/Cargo.toml | 17 ++++ src/rust/lib_ccxr/src/lib.rs | 1 + src/rust/lib_ccxr/src/util/mod.rs | 1 + src/rust/src/lib.rs | 1 + src/rust/src/libccxr_exports/mod.rs | 1 + 9 files changed, 117 insertions(+), 59 deletions(-) create mode 100644 src/rust/lib_ccxr/Cargo.lock create mode 100644 src/rust/lib_ccxr/Cargo.toml create mode 100644 src/rust/lib_ccxr/src/lib.rs create mode 100644 src/rust/lib_ccxr/src/util/mod.rs create mode 100644 src/rust/src/libccxr_exports/mod.rs diff --git a/.gitignore b/.gitignore index a1ef235b5..3f8a2ed82 100644 --- a/.gitignore +++ b/.gitignore @@ -149,7 +149,8 @@ src/rust/CMakeCache.txt src/rust/Makefile src/rust/cmake_install.cmake src/rust/target/ +src/rust/lib_ccxr/target/ windows/ccx_rust.lib windows/*/debug/* windows/*/CACHEDIR.TAG -windows/.rustc_info.json \ No newline at end of file +windows/.rustc_info.json diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 89013e51f..d42a787f1 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "6748e8def348ed4d14996fa801f4122cd763fff530258cdc03f64b25f89d3a5a" dependencies = [ "memchr", ] @@ -99,9 +99,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "camino" -version = "1.1.4" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c530edf18f37068ac2d977409ed5cd50d53d73bc653c7647b48eb78976ac9ae2" +checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" [[package]] name = "ccx_rust" @@ -111,6 +111,7 @@ dependencies = [ "env_logger", "iconv", "leptonica-sys", + "lib_ccxr", "log", "palette", "rsmpeg", @@ -123,7 +124,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27" dependencies = [ - "nom 5.1.2", + "nom 5.1.3", ] [[package]] @@ -143,9 +144,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clang-sys" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ed9a53e5d4d9c573ae844bfac6872b159cb1d1585a83b29e7a64b7eef7332a" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" dependencies = [ "glob", "libc", @@ -175,9 +176,9 @@ checksum = "74c57ab96715773d9cb9789b38eb7cbf04b3c6f5624a9d98f51761603376767c" [[package]] name = "either" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "env_logger" @@ -246,20 +247,24 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "leptonica-sys" -version = "0.4.4" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "811a92997ff15e0d7323c1e8fa7190331dd02ea50d9d7cfaa4fdc2b21a613a2e" +checksum = "eff3f1dc2f0112411228f8db99ca8a6a1157537a7887b28b1c91fdc4051fb326" dependencies = [ "bindgen 0.64.0", "pkg-config", "vcpkg", ] +[[package]] +name = "lib_ccxr" +version = "0.1.0" + [[package]] name = "libc" -version = "0.2.140" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libloading" @@ -273,12 +278,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "memchr" @@ -294,9 +296,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "nom" -version = "5.1.2" +version = "5.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b" dependencies = [ "memchr", "version_check", @@ -314,18 +316,18 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", ] [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "palette" @@ -353,9 +355,9 @@ dependencies = [ [[package]] name = "paste" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" [[package]] name = "peeking_take_while" @@ -365,9 +367,9 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "phf" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ "phf_macros", "phf_shared", @@ -375,9 +377,9 @@ dependencies = [ [[package]] name = "phf_generator" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" dependencies = [ "phf_shared", "rand", @@ -385,46 +387,46 @@ dependencies = [ [[package]] name = "phf_macros" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92aacdc5f16768709a569e913f7451034034178b05bdc8acda226659a3dccc66" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" dependencies = [ "phf_generator", "phf_shared", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.29", ] [[package]] name = "phf_shared" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ "siphasher", ] [[package]] name = "pkg-config" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" [[package]] name = "proc-macro2" -version = "1.0.52" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.26" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] @@ -446,9 +448,21 @@ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" [[package]] name = "regex" -version = "1.7.1" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "12de2eff854e5fa4b1295edd650e227e9d8fb0c9e90b12e7f36d6a6811791a29" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49530408a136e16e5b486e883fbb6ba058e8e4e8ae6621a77b048b314336e629" dependencies = [ "aho-corasick", "memchr", @@ -457,9 +471,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.28" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "rsmpeg" @@ -495,9 +509,23 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.158" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "771d4d9c4163ee138805e12c710dd365e4f44be8be0503cb1bb9eb989425d9c9" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.29", +] [[package]] name = "shlex" @@ -507,9 +535,9 @@ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] name = "siphasher" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] name = "strsim" @@ -530,9 +558,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.4" +version = "2.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c622ae390c9302e214c31013517c2061ecb2699935882c60a9b37f82f8625ae" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" dependencies = [ "proc-macro2", "quote", @@ -571,22 +599,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.4", + "syn 2.0.29", ] [[package]] @@ -600,9 +628,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-width" diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index e8edc2096..f736deb3a 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -18,6 +18,7 @@ palette = "0.6.0" rsmpeg = { version = "0.14.1", optional = true, features = ["link_system_ffmpeg"] } tesseract-sys = { version = "0.5.14", optional = true, default-features = false} leptonica-sys = { version = "0.4.3", optional = true, default-features = false} +lib_ccxr = { path = "lib_ccxr" } [build-dependencies] bindgen = "0.58.1" diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock new file mode 100644 index 000000000..7532d4515 --- /dev/null +++ b/src/rust/lib_ccxr/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "lib_ccxr" +version = "0.1.0" diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml new file mode 100644 index 000000000..ca3612505 --- /dev/null +++ b/src/rust/lib_ccxr/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "lib_ccxr" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[features] +default = ["enable_sharing", "wtv_debug", "enable_ffmpeg", "debug", "with_libcurl"] +enable_sharing = [] +wtv_debug = [] +enable_ffmpeg = [] +debug_out = [] +debug = [] +with_libcurl = [] diff --git a/src/rust/lib_ccxr/src/lib.rs b/src/rust/lib_ccxr/src/lib.rs new file mode 100644 index 000000000..812d1edf2 --- /dev/null +++ b/src/rust/lib_ccxr/src/lib.rs @@ -0,0 +1 @@ +pub mod util; diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs new file mode 100644 index 000000000..daf5935ac --- /dev/null +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -0,0 +1 @@ +//! Provides basic utilities used throughout the program. diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index a76faa419..1fec8fa5e 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -15,6 +15,7 @@ pub mod bindings { pub mod decoder; #[cfg(feature = "hardsubx_ocr")] pub mod hardsubx; +pub mod libccxr_exports; pub mod utils; #[cfg(windows)] diff --git a/src/rust/src/libccxr_exports/mod.rs b/src/rust/src/libccxr_exports/mod.rs new file mode 100644 index 000000000..e365e0fb2 --- /dev/null +++ b/src/rust/src/libccxr_exports/mod.rs @@ -0,0 +1 @@ +//! Provides C-FFI functions that are direct equivalent of functions available in C. From 4f0dc69673ed52f683b22e19f8b3e2c57f66dea2 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sun, 27 Aug 2023 17:45:59 +0530 Subject: [PATCH 02/24] add bits and levenshtein module --- src/lib_ccx/utility.c | 18 +++ src/rust/Cargo.lock | 12 ++ src/rust/lib_ccxr/Cargo.lock | 18 +++ src/rust/lib_ccxr/Cargo.toml | 1 + src/rust/lib_ccxr/src/util/bits.rs | 153 ++++++++++++++++++++++ src/rust/lib_ccxr/src/util/c_functions.rs | 19 +++ src/rust/lib_ccxr/src/util/levenshtein.rs | 27 ++++ src/rust/lib_ccxr/src/util/mod.rs | 18 +++ src/rust/src/libccxr_exports/mod.rs | 61 +++++++++ 9 files changed, 327 insertions(+) create mode 100644 src/rust/lib_ccxr/src/util/bits.rs create mode 100644 src/rust/lib_ccxr/src/util/c_functions.rs create mode 100644 src/rust/lib_ccxr/src/util/levenshtein.rs diff --git a/src/lib_ccx/utility.c b/src/lib_ccx/utility.c index cb3cb6152..c110a1ec3 100644 --- a/src/lib_ccx/utility.c +++ b/src/lib_ccx/utility.c @@ -9,6 +9,12 @@ int temp_debug = 0; // This is a convenience variable used to enable/disable debug on variable conditions. Find references to understand. volatile sig_atomic_t change_filename_requested = 0; +#ifndef DISABLE_RUST +extern int ccxr_verify_crc32(uint8_t *buf, int len); +extern int ccxr_levenshtein_dist(const uint64_t *s1, const uint64_t *s2, unsigned s1len, unsigned s2len); +extern int ccxr_levenshtein_dist_char(const char *s1, const char *s2, unsigned s1len, unsigned s2len); +#endif + static uint32_t crc32_table[] = { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, @@ -77,6 +83,10 @@ static uint32_t crc32_table[] = { int verify_crc32(uint8_t *buf, int len) { +#ifndef DISABLE_RUST + return ccxr_verify_crc32(buf, len); +#endif + int i = 0; int32_t crc = -1; for (i = 0; i < len; i++) @@ -151,6 +161,10 @@ void timestamp_to_vtttime(uint64_t timestamp, char *buffer) int levenshtein_dist(const uint64_t *s1, const uint64_t *s2, unsigned s1len, unsigned s2len) { +#ifndef DISABLE_RUST + return ccxr_levenshtein_dist(s1, s2, s1len, s2len); +#endif + unsigned int x, y, v, lastdiag, olddiag; unsigned int *column = (unsigned *)malloc((s1len + 1) * sizeof(unsigned int)); for (y = 1; y <= s1len; y++) @@ -172,6 +186,10 @@ int levenshtein_dist(const uint64_t *s1, const uint64_t *s2, unsigned s1len, uns int levenshtein_dist_char(const char *s1, const char *s2, unsigned s1len, unsigned s2len) { +#ifndef DISABLE_RUST + return ccxr_levenshtein_dist_char(s1, s2, s1len, s2len); +#endif + unsigned int x, y, v, lastdiag, olddiag; unsigned int *column = (unsigned *)malloc((s1len + 1) * sizeof(unsigned int)); for (y = 1; y <= s1len; y++) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index d42a787f1..fec82b594 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -168,6 +168,15 @@ dependencies = [ "vec_map", ] +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + [[package]] name = "dyn_buf" version = "0.1.0" @@ -259,6 +268,9 @@ dependencies = [ [[package]] name = "lib_ccxr" version = "0.1.0" +dependencies = [ + "crc32fast", +] [[package]] name = "libc" diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock index 7532d4515..facf9ae64 100644 --- a/src/rust/lib_ccxr/Cargo.lock +++ b/src/rust/lib_ccxr/Cargo.lock @@ -2,6 +2,24 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + [[package]] name = "lib_ccxr" version = "0.1.0" +dependencies = [ + "crc32fast", +] diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml index ca3612505..6b6fe8164 100644 --- a/src/rust/lib_ccxr/Cargo.toml +++ b/src/rust/lib_ccxr/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +crc32fast = "1.3.2" [features] default = ["enable_sharing", "wtv_debug", "enable_ffmpeg", "debug", "with_libcurl"] diff --git a/src/rust/lib_ccxr/src/util/bits.rs b/src/rust/lib_ccxr/src/util/bits.rs new file mode 100644 index 000000000..9fbe7c7c1 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/bits.rs @@ -0,0 +1,153 @@ +#[rustfmt::skip] +const PARITY_TABLE: [bool; 256] = [ + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, + false, true, true, false, true, false, false, true, + false, true, true, false, true, false, false, true, + true, false, false, true, false, true, true, false, +]; + +const BIT_REVERSE_TABLE: [u8; 256] = [ + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, +]; + +const HAMMING_8_4_DECODER_TABLE: [u8; 256] = [ + 0x01, 0xff, 0x01, 0x01, 0xff, 0x00, 0x01, 0xff, 0xff, 0x02, 0x01, 0xff, 0x0a, 0xff, 0xff, 0x07, + 0xff, 0x00, 0x01, 0xff, 0x00, 0x00, 0xff, 0x00, 0x06, 0xff, 0xff, 0x0b, 0xff, 0x00, 0x03, 0xff, + 0xff, 0x0c, 0x01, 0xff, 0x04, 0xff, 0xff, 0x07, 0x06, 0xff, 0xff, 0x07, 0xff, 0x07, 0x07, 0x07, + 0x06, 0xff, 0xff, 0x05, 0xff, 0x00, 0x0d, 0xff, 0x06, 0x06, 0x06, 0xff, 0x06, 0xff, 0xff, 0x07, + 0xff, 0x02, 0x01, 0xff, 0x04, 0xff, 0xff, 0x09, 0x02, 0x02, 0xff, 0x02, 0xff, 0x02, 0x03, 0xff, + 0x08, 0xff, 0xff, 0x05, 0xff, 0x00, 0x03, 0xff, 0xff, 0x02, 0x03, 0xff, 0x03, 0xff, 0x03, 0x03, + 0x04, 0xff, 0xff, 0x05, 0x04, 0x04, 0x04, 0xff, 0xff, 0x02, 0x0f, 0xff, 0x04, 0xff, 0xff, 0x07, + 0xff, 0x05, 0x05, 0x05, 0x04, 0xff, 0xff, 0x05, 0x06, 0xff, 0xff, 0x05, 0xff, 0x0e, 0x03, 0xff, + 0xff, 0x0c, 0x01, 0xff, 0x0a, 0xff, 0xff, 0x09, 0x0a, 0xff, 0xff, 0x0b, 0x0a, 0x0a, 0x0a, 0xff, + 0x08, 0xff, 0xff, 0x0b, 0xff, 0x00, 0x0d, 0xff, 0xff, 0x0b, 0x0b, 0x0b, 0x0a, 0xff, 0xff, 0x0b, + 0x0c, 0x0c, 0xff, 0x0c, 0xff, 0x0c, 0x0d, 0xff, 0xff, 0x0c, 0x0f, 0xff, 0x0a, 0xff, 0xff, 0x07, + 0xff, 0x0c, 0x0d, 0xff, 0x0d, 0xff, 0x0d, 0x0d, 0x06, 0xff, 0xff, 0x0b, 0xff, 0x0e, 0x0d, 0xff, + 0x08, 0xff, 0xff, 0x09, 0xff, 0x09, 0x09, 0x09, 0xff, 0x02, 0x0f, 0xff, 0x0a, 0xff, 0xff, 0x09, + 0x08, 0x08, 0x08, 0xff, 0x08, 0xff, 0xff, 0x09, 0x08, 0xff, 0xff, 0x0b, 0xff, 0x0e, 0x03, 0xff, + 0xff, 0x0c, 0x0f, 0xff, 0x04, 0xff, 0xff, 0x09, 0x0f, 0xff, 0x0f, 0x0f, 0xff, 0x0e, 0x0f, 0xff, + 0x08, 0xff, 0xff, 0x05, 0xff, 0x0e, 0x0d, 0xff, 0xff, 0x0e, 0x0f, 0xff, 0x0e, 0x0e, 0xff, 0x0e, +]; + +/// Returns the parity of the given byte. +/// +/// # Exmaples +/// ```rust +/// # use lib_ccxr::util::*; +/// assert_eq!(parity(0x00), false); +/// assert_eq!(parity(0x01), true); +/// ``` +pub fn parity(value: u8) -> bool { + PARITY_TABLE[value as usize] +} + +/// Returns a byte with its bits flipped from input. +/// +/// # Exmaples +/// ```rust +/// # use lib_ccxr::util::*; +/// assert_eq!(reverse(0x00), 0x00); +/// assert_eq!(reverse(0x01), 0x80); +/// ``` +pub fn reverse(value: u8) -> u8 { + BIT_REVERSE_TABLE[value as usize] +} + +/// Returns the decoded byte given a \[8,4\] hamming code byte if no error is present. +/// +/// # Exmaples +/// ```rust +/// # use lib_ccxr::util::*; +/// assert_eq!(decode_hamming_8_4(0x00), Some(0x01)); +/// assert_eq!(decode_hamming_8_4(0x01), None); +/// ``` +pub fn decode_hamming_8_4(value: u8) -> Option { + // ETS 300 706, chapter 8.2 + let decoded = HAMMING_8_4_DECODER_TABLE[value as usize]; + if decoded == 0xff { + None + } else { + Some(decoded) + } +} + +/// Returns the decoded byte given a \[24,18\] hamming code byte if no error is present. +/// +/// # Exmaples +/// ```rust +/// # use lib_ccxr::util::*; +/// assert_eq!(decode_hamming_24_18(0x00000000), Some(0x00000000)); +/// assert_eq!(decode_hamming_24_18(0x00000001), None); +/// ``` +pub fn decode_hamming_24_18(mut value: u32) -> Option { + // ETS 300 706, chapter 8.3 + let mut test: u8 = 0; + + // Tests A-F correspond to bits 0-6 respectively in 'test'. + for i in 0..23 { + test ^= (((value >> i) & 0x01) as u8) * (i + 33); + } + + // Only parity bit is tested for bit 24 + test ^= (((value >> 23) & 0x01) as u8) * 32u8; + + if (test & 0x1f) != 0x1f { + // Not all tests A-E correct + if (test & 0x20) == 0x20 { + // F correct: Double error + return None; + } + // Test F incorrect: Single error + value ^= 1 << (30 - test); + } + + Some( + (value & 0x000004) >> 2 + | (value & 0x000070) >> 3 + | (value & 0x007f00) >> 4 + | (value & 0x7f0000) >> 5, + ) +} diff --git a/src/rust/lib_ccxr/src/util/c_functions.rs b/src/rust/lib_ccxr/src/util/c_functions.rs new file mode 100644 index 000000000..e3fcc7923 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/c_functions.rs @@ -0,0 +1,19 @@ +//! Provides Rust equivalent for functions in C. Uses Rust-native types as input and output. + +use super::*; +use crc32fast::hash; + +/// Rust equivalent for `verify_crc32` function in C. Uses Rust-native types as input and output. +pub fn verify_crc32(buf: &[u8]) -> bool { + hash(buf) == 0 +} + +/// Rust equivalent for `levenshtein_dist` function in C. Uses Rust-native types as input and output. +pub fn levenshtein_dist(s1: &[u64], s2: &[u64]) -> usize { + levenshtein(s1, s2) +} + +/// Rust equivalent for `levenshtein_dist_char` function in C. Uses Rust-native types as input and output. +pub fn levenshtein_dist_char(s1: &[T], s2: &[T]) -> usize { + levenshtein(s1, s2) +} diff --git a/src/rust/lib_ccxr/src/util/levenshtein.rs b/src/rust/lib_ccxr/src/util/levenshtein.rs new file mode 100644 index 000000000..0e58049d7 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/levenshtein.rs @@ -0,0 +1,27 @@ +use std::cmp::min; + +/// Calculates the levenshtein distance between two slices. +/// +/// # Examples +/// ```rust +/// # use lib_ccxr::util::levenshtein; +/// assert_eq!(levenshtein(&[1,2,3,4,5], &[1,3,2,4,5,6]), 3); +/// ``` +pub fn levenshtein(a: &[T], b: &[T]) -> usize { + let mut column: Vec = (0..).take(a.len() + 1).collect(); + + for x in 1..=b.len() { + column[0] = x; + let mut lastdiag = x - 1; + for y in 1..=a.len() { + let olddiag = column[y]; + column[y] = min( + min(column[y] + 1, column[y - 1] + 1), + lastdiag + (if a[y - 1] == b[x - 1] { 0 } else { 1 }), + ); + lastdiag = olddiag; + } + } + + column[a.len()] +} diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index daf5935ac..7e787762e 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -1 +1,19 @@ //! Provides basic utilities used throughout the program. +//! +//! # Conversion Guide +//! +//! | From | To | +//! |--------------------------------------------|--------------------------------| +//! | `PARITY_8` | [`parity`] | +//! | `REVERSE_8` | [`reverse`] | +//! | `UNHAM_8_4` | [`decode_hamming_8_4`] | +//! | `unham_24_18` | [`decode_hamming_24_18`] | +//! | `levenshtein_dist`, levenshtein_dist_char` | [`levenshtein`](levenshtein()) | + +mod bits; +mod levenshtein; + +pub mod c_functions; + +pub use bits::*; +pub use levenshtein::*; diff --git a/src/rust/src/libccxr_exports/mod.rs b/src/rust/src/libccxr_exports/mod.rs index e365e0fb2..7ef56432f 100644 --- a/src/rust/src/libccxr_exports/mod.rs +++ b/src/rust/src/libccxr_exports/mod.rs @@ -1 +1,62 @@ //! Provides C-FFI functions that are direct equivalent of functions available in C. + +use lib_ccxr::util::c_functions::*; +use std::convert::TryInto; +use std::os::raw::{c_char, c_int, c_uint}; + +/// Rust equivalent for `verify_crc32` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `buf` should not be a NULL pointer and the length of buffer pointed by `buf` should be equal to +/// or less than `len`. +#[no_mangle] +pub unsafe extern "C" fn ccxr_verify_crc32(buf: *const u8, len: c_int) -> c_int { + let buf = std::slice::from_raw_parts(buf, len as usize); + if verify_crc32(buf) { + 1 + } else { + 0 + } +} + +/// Rust equivalent for `levenshtein_dist` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `s1` and `s2` must valid slices of data with lengths of `s1len` and `s2len` respectively. +#[no_mangle] +pub unsafe extern "C" fn ccxr_levenshtein_dist( + s1: *const u64, + s2: *const u64, + s1len: c_uint, + s2len: c_uint, +) -> c_int { + let s1 = std::slice::from_raw_parts(s1, s1len.try_into().unwrap()); + let s2 = std::slice::from_raw_parts(s2, s2len.try_into().unwrap()); + + let ans = levenshtein_dist(s1, s2); + + ans.try_into().unwrap() +} + +/// Rust equivalent for `levenshtein_dist_char` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `s1` and `s2` must valid slices of data and therefore not be null. They must have lengths +/// of `s1len` and `s2len` respectively. +#[no_mangle] +pub unsafe extern "C" fn ccxr_levenshtein_dist_char( + s1: *const c_char, + s2: *const c_char, + s1len: c_uint, + s2len: c_uint, +) -> c_int { + let s1 = std::slice::from_raw_parts(s1, s1len.try_into().unwrap()); + let s2 = std::slice::from_raw_parts(s2, s2len.try_into().unwrap()); + + let ans = levenshtein_dist_char(s1, s2); + + ans.try_into().unwrap() +} From 8f348fe2d9dc0ca382542e5d5ec6537b8a995116 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sun, 27 Aug 2023 18:20:30 +0530 Subject: [PATCH 03/24] add log module --- src/lib_ccx/lib_ccx.c | 8 + src/rust/Cargo.lock | 15 +- src/rust/build.rs | 1 + src/rust/lib_ccxr/Cargo.lock | 9 + src/rust/lib_ccxr/Cargo.toml | 1 + src/rust/lib_ccxr/src/util/log.rs | 546 ++++++++++++++++++++++++++++ src/rust/lib_ccxr/src/util/mod.rs | 2 + src/rust/src/lib.rs | 1 + src/rust/src/libccxr_exports/mod.rs | 31 ++ 9 files changed, 611 insertions(+), 3 deletions(-) create mode 100644 src/rust/lib_ccxr/src/util/log.rs diff --git a/src/lib_ccx/lib_ccx.c b/src/lib_ccx/lib_ccx.c index 74a0bc88a..48a0ed7a6 100644 --- a/src/lib_ccx/lib_ccx.c +++ b/src/lib_ccx/lib_ccx.c @@ -6,6 +6,10 @@ #include "ccx_decoders_708.h" #include "ccx_decoders_isdb.h" +#ifndef DISABLE_RUST +extern void ccxr_init_basic_logger(); +#endif + struct ccx_common_logging_t ccx_common_logging; static struct ccx_decoders_common_settings_t *init_decoder_setting( struct ccx_s_options *opt) @@ -100,6 +104,10 @@ struct lib_ccx_ctx *init_libraries(struct ccx_s_options *opt) ccx_common_logging.log_ftn = &mprint; ccx_common_logging.gui_ftn = &activity_library_process; +#ifndef DISABLE_RUST + ccxr_init_basic_logger(); +#endif + struct lib_ccx_ctx *ctx = malloc(sizeof(struct lib_ccx_ctx)); if (!ctx) ccx_common_logging.fatal_ftn(EXIT_NOT_ENOUGH_MEMORY, "init_libraries: Not enough memory allocating lib_ccx_ctx context."); diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index d42a787f1..cf9f55bdf 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -52,7 +52,7 @@ version = "0.58.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f8523b410d7187a43085e7e064416ea32ded16bd0a4e6fc025e21616d01258f" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cexpr 0.4.0", "clang-sys", "clap", @@ -75,7 +75,7 @@ version = "0.64.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cexpr 0.6.0", "clang-sys", "lazy_static", @@ -97,6 +97,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + [[package]] name = "camino" version = "1.1.6" @@ -161,7 +167,7 @@ checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", - "bitflags", + "bitflags 1.3.2", "strsim", "textwrap", "unicode-width", @@ -259,6 +265,9 @@ dependencies = [ [[package]] name = "lib_ccxr" version = "0.1.0" +dependencies = [ + "bitflags 2.4.0", +] [[package]] name = "libc" diff --git a/src/rust/build.rs b/src/rust/build.rs index f8ecc04c8..4df835845 100644 --- a/src/rust/build.rs +++ b/src/rust/build.rs @@ -26,6 +26,7 @@ fn main() { "lib_cc_decode", "cc_subtitle", "ccx_output_format", + "ccx_s_options", ]); #[cfg(feature = "hardsubx_ocr")] diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock index 7532d4515..d7ac327fc 100644 --- a/src/rust/lib_ccxr/Cargo.lock +++ b/src/rust/lib_ccxr/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + [[package]] name = "lib_ccxr" version = "0.1.0" +dependencies = [ + "bitflags", +] diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml index ca3612505..fb032a7ce 100644 --- a/src/rust/lib_ccxr/Cargo.toml +++ b/src/rust/lib_ccxr/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bitflags = "2.3.1" [features] default = ["enable_sharing", "wtv_debug", "enable_ffmpeg", "debug", "with_libcurl"] diff --git a/src/rust/lib_ccxr/src/util/log.rs b/src/rust/lib_ccxr/src/util/log.rs new file mode 100644 index 000000000..13c7870c4 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/log.rs @@ -0,0 +1,546 @@ +//! Provides primitives for logging functionality +//! +//! The interface of this module is highly inspired by the famous log crate of rust. +//! +//! The first step before using any of the logging functionality is to setup a logger. This can be +//! done by creating a [`CCExtractorLogger`] and calling [`set_logger`] with it. To gain access to +//! the instance of [`CCExtractorLogger`], [`logger`] or [`logger_mut`] can be used. +//! +//! There are 4 types of logging messages based on its importance and severity denoted by their +//! respective macros. +//! - [`fatal!`] +//! - [`error!`] +//! - [`info!`] +//! - [`debug!`] +//! +//! Hex dumps can be logged for debugging by [`hex_dump`] and [`hex_dump_with_start_idx`]. Communication +//! with the GUI is possible through [`send_gui`]. +//! +//! # Conversion Guide +//! +//! | From | To | +//! |------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------| +//! | `EXIT_*`, `CCX_COMMON_EXIT_*` | [`ExitCause`] | +//! | `CCX_MESSAGES_*` | [`OutputTarget`] | +//! | `CCX_DMT_*`, `ccx_debug_message_types` | [`DebugMessageFlag`] | +//! | `temp_debug`, `ccx_options.debug_mask`, `ccx_options.debug_mask_on_debug` | [`DebugMessageMask`] | +//! | `ccx_options.messages_target`, `temp_debug`, `ccx_options.debug_mask`, `ccx_options.debug_mask_on_debug`, `ccx_options.gui_mode_reports` | [`CCExtractorLogger`] | +//! | `fatal`, `ccx_common_logging.fatal_ftn` | [`fatal!`] | +//! | `mprint`, `ccx_common_logging.log_ftn` | [`info!`] | +//! | `dbg_print`, `ccx_common_logging.debug_ftn` | [`debug!`] | +//! | `activity_library_process`, `ccx_common_logging.gui_ftn` | [`send_gui`] | +//! | `dump` | [`hex_dump`] | +//! | `dump` | [`hex_dump_with_start_idx`] | + +use bitflags::bitflags; +use std::fmt::Arguments; +use std::sync::{OnceLock, RwLock, RwLockReadGuard, RwLockWriteGuard}; + +static LOGGER: OnceLock> = OnceLock::new(); + +/// The possible targets for logging messages. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OutputTarget { + Stdout, + Stderr, + Quiet, +} + +bitflags! { + /// A bitflag for the types of a Debug Message. + /// + /// Each debug message can belong to one or more of these types. The + /// constants of this struct can be used as bitflags for one message to + /// belong to more than one type. + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] + pub struct DebugMessageFlag: u16 { + /// Show information related to parsing the container + const PARSE = 0b0000000000000001; + /// Show video stream related information + const VIDEO_STREAM = 0b0000000000000010; + /// Show GOP and PTS timing information + const TIME = 0b0000000000000100; + /// Show lots of debugging output + const VERBOSE = 0b0000000000001000; + /// Show CC-608 decoder debug + const DECODER_608 = 0b0000000000010000; + /// Show CC-708 decoder debug + const DECODER_708 = 0b0000000000100000; + /// Show XDS decoder debug + const DECODER_XDS = 0b0000000001000000; + /// Show Caption blocks with FTS timing + const CB_RAW = 0b0000000010000000; + /// Generic, always displayed even if no debug is selected + const GENERIC_NOTICE = 0b0000000100000000; + /// Show teletext debug + const TELETEXT = 0b0000001000000000; + /// Show Program Allocation Table dump + const PAT = 0b0000010000000000; + /// Show Program Map Table dump + const PMT = 0b0000100000000000; + /// Show Levenshtein distance calculations + const LEVENSHTEIN = 0b0001000000000000; + /// Show DVB debug + const DVB = 0b0010000000000000; + /// Dump defective TS packets + const DUMP_DEF = 0b0100000000000000; + /// Extracted captions sharing service + #[cfg(feature = "enable_sharing")] + const SHARE = 0b1000000000000000; + } +} + +/// All possible causes for crashing the program instantly. Used in `cause` key of [`fatal!`] macro. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ExitCause { + Ok, + Failure, + NoInputFiles, + TooManyInputFiles, + IncompatibleParameters, + UnableToDetermineFileSize, + MalformedParameter, + ReadError, + NoCaptions, + WithHelp, + NotClassified, + ErrorInCapitalizationFile, + BufferFull, + MissingAsfHeader, + MissingRcwtHeader, + + FileCreationFailed, + Unsupported, + NotEnoughMemory, + Bug, +} + +/// A message to be sent to GUI for XDS. Used in [`send_gui`]. +pub enum GuiXdsMessage<'a> { + ProgramName(&'a str), + ProgramIdNr { + minute: u8, + hour: u8, + date: u8, + month: u8, + }, + ProgramDescription { + line_num: i32, + desc: &'a str, + }, + CallLetters(&'a str), +} + +/// A mask to filter the debug messages based on its type specified by [`DebugMessageFlag`]. +/// +/// This operates on one of the two modes: Normal Mode and Debug Mode. The mask used when in Debug Mode is a superset +/// of the mask used when in Normal Mode. One can switch between the two modes by [`DebugMessageMask::set_debug_mode`]. +#[derive(Debug)] +pub struct DebugMessageMask { + debug_mode: bool, + mask_on_normal: DebugMessageFlag, + mask_on_debug: DebugMessageFlag, +} + +/// A global logger used throughout CCExtractor and stores the settings related to logging. +/// +/// A global logger can be setup up initially using [`set_logger`]. Use the following convenience +/// macros for logging: [`fatal!`], [`error!`], [`info!`] and [`debug!`]. +#[derive(Debug)] +pub struct CCExtractorLogger { + target: OutputTarget, + debug_mask: DebugMessageMask, + gui_mode: bool, +} + +impl DebugMessageMask { + /// Creates a new [`DebugMessageFlag`] given a mask to be used for Normal Mode and an additional mask to be + /// used in Debug Mode. + /// + /// Note that while in Debug Mode, the mask for Normal Mode will still be valid. + /// `extra_mask_on_debug` only specifies additional flags to be set on Debug Mode. + pub const fn new( + mask_on_normal: DebugMessageFlag, + extra_mask_on_debug: DebugMessageFlag, + ) -> DebugMessageMask { + DebugMessageMask { + debug_mode: false, + mask_on_normal, + mask_on_debug: extra_mask_on_debug.union(mask_on_normal), + } + } + + /// Set the mode to Normal or Debug Mode based on `false` or `true` respectively. + pub fn set_debug_mode(&mut self, mode: bool) { + self.debug_mode = mode; + } + + /// Check if the current mode is set to Debug Mode. + pub fn debug_mode(&self) -> bool { + self.debug_mode + } + + /// Return the mask according to its mode. + pub fn mask(&self) -> DebugMessageFlag { + if self.debug_mode { + self.mask_on_debug + } else { + self.mask_on_normal + } + } +} + +impl ExitCause { + /// Returns the exit code associated with the cause of the error. + /// + /// The GUI depends on these exit codes. + /// Exit code of 0 means OK as usual. + /// Exit code below 100 means display whatever was output to stderr as a warning. + /// Exit code above or equal to 100 means display whatever was output to stdout as an error. + pub fn exit_code(&self) -> i32 { + match self { + ExitCause::Ok => 0, + ExitCause::Failure => 1, + ExitCause::NoInputFiles => 2, + ExitCause::TooManyInputFiles => 3, + ExitCause::IncompatibleParameters => 4, + ExitCause::UnableToDetermineFileSize => 6, + ExitCause::MalformedParameter => 7, + ExitCause::ReadError => 8, + ExitCause::NoCaptions => 10, + ExitCause::WithHelp => 11, + ExitCause::NotClassified => 300, + ExitCause::ErrorInCapitalizationFile => 501, + ExitCause::BufferFull => 502, + ExitCause::MissingAsfHeader => 1001, + ExitCause::MissingRcwtHeader => 1002, + + ExitCause::FileCreationFailed => 5, + ExitCause::Unsupported => 9, + ExitCause::NotEnoughMemory => 500, + ExitCause::Bug => 1000, + } + } +} + +impl<'a> CCExtractorLogger { + /// Returns a new instance of CCExtractorLogger with the provided settings. + /// + /// `gui_mode` is used to determine if the log massages are intercepted by a GUI. + /// `target` specifies the location for printing the log messages. + /// `debug_mask` is used to filter debug messages based on its type. + pub const fn new( + target: OutputTarget, + debug_mask: DebugMessageMask, + gui_mode: bool, + ) -> CCExtractorLogger { + CCExtractorLogger { + target, + debug_mask, + gui_mode, + } + } + + /// Set the mode to Normal or Debug Mode based on `false` or `true` respectively for the + /// underlying [`DebugMessageMask`]. + /// + /// This method switches the mask used for filtering debug messages. + /// Similar to [`DebugMessageMask::set_debug_mode`]. + pub fn set_debug_mode(&mut self, mode: bool) { + self.debug_mask.set_debug_mode(mode) + } + + /// Check if the current mode is set to Debug Mode. + /// + /// Similar to [`DebugMessageMask::debug_mode`]. + pub fn debug_mode(&self) -> bool { + self.debug_mask.debug_mode() + } + + /// Returns the currently set target for logging messages. + pub fn target(&self) -> OutputTarget { + self.target + } + + /// Check if the messages are intercepted by GUI. + pub fn is_gui_mode(&self) -> bool { + self.gui_mode + } + + fn print(&self, args: &Arguments<'a>) { + match &self.target { + OutputTarget::Stdout => print!("{}", args), + OutputTarget::Stderr => eprint!("{}", args), + OutputTarget::Quiet => {} + } + } + + /// Log a fatal error message. Use [`fatal!`] instead. + /// + /// Used for logging errors dangerous enough to crash the program instantly. + pub fn log_fatal(&self, exit_cause: ExitCause, args: &Arguments<'a>) -> ! { + self.log_error(args); + println!(); // TODO: print end message + std::process::exit(exit_cause.exit_code()) + } + + /// Log an error message. Use [`error!`] instead. + /// + /// Used for logging general errors occuring in the program. + pub fn log_error(&self, args: &Arguments<'a>) { + if self.gui_mode { + eprint!("###MESSAGE#") + } else { + eprint!("\rError: ") + } + + eprintln!("{}", args); + } + + /// Log an informational message. Use [`info!`] instead. + /// + /// Used for logging extra information about the execution of the program. + pub fn log_info(&self, args: &Arguments<'a>) { + // TODO: call activity_header + self.print(&format_args!("{}", args)); + } + + /// Log a debug message. Use [`debug!`] instead. + /// + /// Used for logging debug messages throughout the program. + pub fn log_debug(&self, message_type: DebugMessageFlag, args: &Arguments<'a>) { + if self.debug_mask.mask().intersects(message_type) { + self.print(&format_args!("{}", args)); + } + } + + /// Send a message to GUI. Use [`send_gui`] instead. + /// + /// Used for sending information related to XDS to the GUI. + pub fn send_gui(&self, _message_type: GuiXdsMessage) { + todo!() + } + + /// Log a hex dump which is helpful for debugging purposes. + /// Use [`hex_dump`] or [`hex_dump_with_start_idx`] instead. + /// + /// Setting `clear_high_bit` to true will ignore the highest bit whien displaying the + /// characters. This makes visual CC inspection easier since the highest bit is usually used + /// as a parity bit. + /// + /// The output will contain byte numbers which can be made to start from any number using + /// `start_idx`. This is usually `0`. + pub fn log_hex_dump( + &self, + message_type: DebugMessageFlag, + data: &[u8], + clear_high_bit: bool, + start_idx: usize, + ) { + if self.debug_mask.mask().intersects(message_type) { + let chunked_data = data.chunks(16); + + for (id, chunk) in chunked_data.enumerate() { + self.print(&format_args!("{:05} | ", id * 16 + start_idx)); + for x in chunk { + self.print(&format_args!("{:02X} ", x)); + } + + for _ in 0..(16 - chunk.len()) { + self.print(&format_args!(" ")); + } + + self.print(&format_args!(" | ")); + + for x in chunk { + let c = if x >= &b' ' { + // 0x7F < remove high bit, convenient for visual CC inspection + x & if clear_high_bit { 0x7F } else { 0xFF } + } else { + b' ' + }; + + self.print(&format_args!("{}", c as char)); + } + + self.print(&format_args!("\n")); + } + } + } +} + +/// Setup the global logger. +/// +/// This function can only be called once throught the execution of program. The logger can then be +/// accessed by [`logger`] and [`logger_mut`]. +pub fn set_logger(logger: CCExtractorLogger) -> Result<(), CCExtractorLogger> { + LOGGER + .set(logger.into()) + .map_err(|x| x.into_inner().unwrap()) +} + +/// Get an immutable instance of the global logger. +/// +/// This function will return [`None`] if the logger is not setup initially by [`set_logger`] or if +/// the underlying RwLock fails to generate a read lock. +/// +/// Use [`logger_mut`] to get a mutable instance. +pub fn logger() -> Option> { + LOGGER.get()?.read().ok() +} + +/// Get a mutable instance of the global logger. +/// +/// This function will return [`None`] if the logger is not setup initially by [`set_logger`] or if +/// the underlying RwLock fails to generate a write lock. +/// +/// Use [`logger`] to get an immutable instance. +pub fn logger_mut() -> Option> { + LOGGER.get()?.write().ok() +} + +/// Log a fatal error message. +/// +/// Used for logging errors dangerous enough to crash the program instantly. This macro does not +/// return (i.e. it returns `!`). A logger needs to be setup initially by [`set_logger`]. +/// +/// # Usage +/// This macro requires an [`ExitCause`] which provides the appropriate exit codes for shutting +/// down program. This is provided using a key called `cause` which comes before the `;`. After +/// `;`, the arguments works the same as a [`println!`] macro. +/// +/// # Examples +/// ```no_run +/// # use lib_ccxr::util::log::*; +/// # let actual = 2; +/// # let required = 1; +/// fatal!( +/// cause = ExitCause::TooManyInputFiles; +/// "{} input files were provided but only {} were needed", actual, required +/// ); +/// ``` +#[macro_export] +macro_rules! fatal { + (cause = $exit_cause:expr; $($args:expr),*) => { + $crate::util::log::logger().expect("Logger is not yet initialized") + .log_fatal($exit_cause, &format_args!($($args),*)) + }; +} + +/// Log an error message. +/// +/// Used for logging general errors occuring in the program. A logger needs to be setup +/// initially by [`set_logger`]. +/// +/// # Usage +/// The arguments works the same as a [`println!`] macro. +/// +/// # Examples +/// ```no_run +/// # use lib_ccxr::util::log::*; +/// # let missing_blocks = 2; +/// error!("missing {} additional blocks", missing_blocks); +/// ``` +#[macro_export] +macro_rules! error { + ($($args:expr),*) => { + $crate::util::log::logger().expect("Logger is not yet initialized") + .log_error(&format_args!($($args),*)) + } +} + +/// Log an informational message. +/// +/// Used for logging extra information about the execution of the program. A logger needs to be +/// setup initially by [`set_logger`]. +/// +/// # Usage +/// The arguments works the same as a [`println!`] macro. +/// +/// # Examples +/// ```no_run +/// # use lib_ccxr::util::log::*; +/// info!("Processing the header section"); +/// ``` +#[macro_export] +macro_rules! info { + ($($args:expr),*) => { + $crate::util::log::logger().expect("Logger is not yet initialized") + .log_info(&format_args!($($args),*)) + }; +} + +/// Log a debug message. +/// +/// Used for logging debug messages throughout the program. A logger needs to be setup initially +/// by [`set_logger`]. +/// +/// # Usage +/// This macro requires an [`DebugMessageFlag`] which represents the type of debug message. It is +/// used for filtering the messages. This is provided using a key called `msg_type` which comes +/// before the `;`. After `;`, the arguments works the same as a [`println!`] macro. +/// +/// # Examples +/// ```no_run +/// # use lib_ccxr::util::log::*; +/// # let byte1 = 23u8; +/// # let byte2 = 45u8; +/// debug!( +/// msg_type = DebugMessageFlag::DECODER_708; +/// "Packet Start with contents {} {}", byte1, byte2 +/// ); +/// ``` +#[macro_export] +macro_rules! debug { + (msg_type = $msg_flag:expr; $($args:expr),*) => { + $crate::util::log::logger().expect("Logger is not yet initialized") + .log_debug($msg_flag, &format_args!($($args),*)) + }; +} + +pub use debug; +pub use error; +pub use fatal; +pub use info; + +/// Log a hex dump which is helpful for debugging purposes. +/// +/// Setting `clear_high_bit` to true will ignore the highest bit whien displaying the +/// characters. This makes visual CC inspection easier since the highest bit is usually used +/// as a parity bit. +/// +/// The byte numbers start from `0` by default. Use [`hex_dump_with_start_idx`] if a +/// different starting index is required. +pub fn hex_dump(message_type: DebugMessageFlag, data: &[u8], clear_high_bit: bool) { + logger() + .expect("Logger is not yet initialized") + .log_hex_dump(message_type, data, clear_high_bit, 0) +} + +/// Log a hex dump which is helpful for debugging purposes. +/// +/// Setting `clear_high_bit` to true will ignore the highest bit whien displaying the +/// characters. This makes visual CC inspection easier since the highest bit is usually used +/// as a parity bit. +/// +/// The output will contain byte numbers which can be made to start from any number using +/// `start_idx`. This is usually `0`. +pub fn hex_dump_with_start_idx( + message_type: DebugMessageFlag, + data: &[u8], + clear_high_bit: bool, + start_idx: usize, +) { + logger() + .expect("Logger is not yet initialized") + .log_hex_dump(message_type, data, clear_high_bit, start_idx) +} + +/// Send a message to GUI. +/// +/// Used for sending information related to XDS to the GUI. +pub fn send_gui(message: GuiXdsMessage) { + logger() + .expect("Logger is not yet initialized") + .send_gui(message) +} diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index daf5935ac..504f17e26 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -1 +1,3 @@ //! Provides basic utilities used throughout the program. + +pub mod log; diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 1fec8fa5e..f2d7ba171 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -33,6 +33,7 @@ extern "C" { static mut cb_708: c_int; static mut cb_field1: c_int; static mut cb_field2: c_int; + static mut ccx_options: ccx_s_options; } /// Initialize env logger with custom format, using stdout as target diff --git a/src/rust/src/libccxr_exports/mod.rs b/src/rust/src/libccxr_exports/mod.rs index e365e0fb2..b48d76984 100644 --- a/src/rust/src/libccxr_exports/mod.rs +++ b/src/rust/src/libccxr_exports/mod.rs @@ -1 +1,32 @@ //! Provides C-FFI functions that are direct equivalent of functions available in C. + +use crate::ccx_options; +use lib_ccxr::util::log::*; +use std::convert::TryInto; + +/// Initializes the logger at the rust side. +/// +/// # Safety +/// +/// `ccx_options` in C must initialized properly before calling this function. +#[no_mangle] +pub unsafe extern "C" fn ccxr_init_basic_logger() { + let debug_mask = + DebugMessageFlag::from_bits(ccx_options.debug_mask.try_into().unwrap()).unwrap(); + let debug_mask_on_debug = + DebugMessageFlag::from_bits(ccx_options.debug_mask_on_debug.try_into().unwrap()).unwrap(); + let mask = DebugMessageMask::new(debug_mask, debug_mask_on_debug); + let gui_mode_reports = ccx_options.gui_mode_reports != 0; + let messages_target = match ccx_options.messages_target { + 0 => OutputTarget::Stdout, + 1 => OutputTarget::Stderr, + 2 => OutputTarget::Quiet, + _ => panic!("incorrect value for messages_target"), + }; + set_logger(CCExtractorLogger::new( + messages_target, + mask, + gui_mode_reports, + )) + .unwrap(); +} From 676ef8443e88e041ef2b470cd29bb2a415cefcb2 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sun, 27 Aug 2023 18:25:54 +0530 Subject: [PATCH 04/24] add encoding module --- src/rust/lib_ccxr/src/util/encoding.rs | 760 +++++++++++++++++++++++++ src/rust/lib_ccxr/src/util/mod.rs | 2 + 2 files changed, 762 insertions(+) create mode 100644 src/rust/lib_ccxr/src/util/encoding.rs diff --git a/src/rust/lib_ccxr/src/util/encoding.rs b/src/rust/lib_ccxr/src/util/encoding.rs new file mode 100644 index 000000000..659744b86 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/encoding.rs @@ -0,0 +1,760 @@ +//! A module for working with different kinds of text encoding formats. +//! +//! Any Text within the entire application can be in one of the following 4 formats which is +//! represented by [`Encoding`]. +//! - [`Line 21`](Encoding::Line21) - Used in 608 captions. +//! - [`Latin-1`](Encoding::Latin1) - ISO/IEC 8859-1. +//! - [`Ucs2`](Encoding::Ucs2) - UCS-2 code points. +//! - [`UTF-8`](Encoding::Utf8) +//! +//! To represent a string in any one of the above encoding, use the following respectively. +//! - [`Line21String`] +//! - [`Latin1String`] +//! - [`Ucs2String`] +//! - [`String`] (same as from rust std) +//! +//! Each of these 4 types can be converted to any other type using [`From::from`] and [`Into::into`]. +//! +//! The above types can be used when the encoding is known at compile-time. If the exact encoding +//! is only known at runtime then [`EncodedString`] can be used. Each of the above 4 types can be +//! converted to [`EncodedString`] using [`From::from`] and [`Into::into`]. An [`EncodedString`] can +//! be converted to any of the 4 types by `to_*` methods. Conversions where the target encoding is +//! only known at runtime can be done using [`EncodedString::encode_to`]. +//! +//! # Conversion Guide +//! +//! | From | To | +//! |-----------------------------------------|----------------------------------| +//! | `CCX_ENC_*`, `ccx_encoding_type` | [`Encoding`] | +//! | any `char` buffer with Line 21 encoding | [`Line21String`] | +//! | any `char` buffer with Latin-1 encoding | [`Latin1String`] | +//! | any `char` buffer with UCS-2 encoding | [`Ucs2String`] | +//! | any `char` buffer with UTF-8 encoding | [`String`] | +//! | any `char` buffer with unknown encoding | [`EncodedString`] | +//! | `get_char_in_latin_1` | `line21_to_latin1` | +//! | `get_char_in_unicode` | `line21_to_ucs2` | +//! | `get_char_in_utf_8` | `line21_to_ucs2`, `ucs2_to_char` | +//! | `utf8_to_latin1_map` | `char_to_ucs2`, `ucs2_to_latin1` | + +/// Represents the different kinds of encoding that [`EncodedString`] can take. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Encoding { + Line21, + Latin1, + Ucs2, + Utf8, +} + +/// Represents a character in Line 21 encoding. +pub type Line21Char = u8; + +/// Represents a character in Latin-1 encoding. +pub type Latin1Char = u8; + +/// Represents a character in UCS-2 encoding. +pub type Ucs2Char = u16; + +/// A String-like type containing a sequence of Line 21 encoded characters. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Line21String(Vec); + +/// A String-like type containing a sequence of Latin-1 encoded characters. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Latin1String(Vec); + +/// A String-like type containing a sequence of UCS-2 code points. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Ucs2String(Vec); + +/// A String-like type that stores its characters in one of the [`Encoding`] formats. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum EncodedString { + Line21(Line21String), + Latin1(Latin1String), + Ucs2(Ucs2String), + Utf8(String), +} + +/// A placeholder for missing characters. +/// +/// It is used for interconverting between [`Encoding`] formats if the target +/// format does not support a character in the source format. +pub const UNAVAILABLE_CHAR: u8 = b'?'; + +impl Line21String { + /// Creates a new empty [`Line21String`]. + pub fn new() -> Line21String { + Line21String(Vec::new()) + } + + /// Creates a new [`Line21String`] from the contents of given [`Vec`]. + pub fn from_vec(v: Vec) -> Line21String { + Line21String(v) + } + + /// Returns a reference to the internal [`Vec`]. + pub fn as_vec(&self) -> &Vec { + &self.0 + } + + /// Returns a mutable reference to the internal [`Vec`]. + pub fn as_mut_vec(&mut self) -> &mut Vec { + &mut self.0 + } + + /// Returns the internal [`Vec`], consuming this [`Line21String`]. + pub fn into_vec(self) -> Vec { + self.0 + } + + /// Converts this [`Line21String`] to a format provided by `encoding`, returning a new [`EncodedString`]. + pub fn encode_to(&self, encoding: Encoding) -> EncodedString { + match encoding { + Encoding::Line21 => self.clone().into(), + Encoding::Latin1 => EncodedString::Latin1(self.into()), + Encoding::Ucs2 => EncodedString::Ucs2(self.into()), + Encoding::Utf8 => EncodedString::Utf8(self.into()), + } + } + + /// Converts the [`Line21String`] to lowercase, returning a new [`Line21String`]. + pub fn to_lowercase(&self) -> Line21String { + Line21String::from_vec( + self.as_vec() + .iter() + .map(|&c| line21_to_lowercase(c)) + .collect(), + ) + } + + /// Converts the [`Line21String`] to uppercase, returning a new [`Line21String`]. + pub fn to_uppercase(&self) -> Line21String { + Line21String::from_vec( + self.as_vec() + .iter() + .map(|&c| line21_to_uppercase(c)) + .collect(), + ) + } +} + +impl Latin1String { + /// Creates a new empty [`Latin1String`]. + pub fn new() -> Latin1String { + Latin1String(Vec::new()) + } + + /// Creates a new [`Latin1String`] from the contents of given [`Vec`]. + pub fn from_vec(v: Vec) -> Latin1String { + Latin1String(v) + } + + /// Returns a reference to the internal [`Vec`]. + pub fn as_vec(&self) -> &Vec { + &self.0 + } + + /// Returns a mutable reference to the internal [`Vec`]. + pub fn as_mut_vec(&mut self) -> &mut Vec { + &mut self.0 + } + + /// Returns the internal [`Vec`], consuming this [`Latin1String`]. + pub fn into_vec(self) -> Vec { + self.0 + } + + /// Converts this [`Latin1String`] to a format provided by `encoding`, returning a new [`EncodedString`]. + pub fn encode_to(&self, encoding: Encoding) -> EncodedString { + match encoding { + Encoding::Line21 => todo!(), + Encoding::Latin1 => self.clone().into(), + Encoding::Ucs2 => EncodedString::Ucs2(self.into()), + Encoding::Utf8 => EncodedString::Utf8(self.into()), + } + } +} + +impl Ucs2String { + /// Creates a new empty [`Ucs2String`]. + pub fn new() -> Ucs2String { + Ucs2String(Vec::new()) + } + + /// Creates a new [`Ucs2String`] from the contents of given [`Vec`]. + pub fn from_vec(v: Vec) -> Ucs2String { + Ucs2String(v) + } + + /// Returns a reference to the internal [`Vec`]. + pub fn as_vec(&self) -> &Vec { + &self.0 + } + + /// Returns a mutable reference to the internal [`Vec`]. + pub fn as_mut_vec(&mut self) -> &mut Vec { + &mut self.0 + } + + /// Returns the internal [`Vec`], consuming this [`Ucs2String`]. + pub fn into_vec(self) -> Vec { + self.0 + } + + /// Converts this [`Ucs2String`] to a format provided by `encoding`, returning a new [`EncodedString`]. + pub fn encode_to(&self, encoding: Encoding) -> EncodedString { + match encoding { + Encoding::Line21 => EncodedString::Line21(self.into()), + Encoding::Latin1 => EncodedString::Latin1(self.into()), + Encoding::Ucs2 => self.clone().into(), + Encoding::Utf8 => EncodedString::Utf8(self.into()), + } + } +} + +impl From<&Ucs2String> for Line21String { + fn from(value: &Ucs2String) -> Line21String { + Line21String::from_vec(value.as_vec().iter().map(|&c| ucs2_to_line21(c)).collect()) + } +} + +impl From<&str> for Line21String { + fn from(value: &str) -> Line21String { + Line21String::from_vec( + value + .chars() + .map(char_to_ucs2) + .map(ucs2_to_line21) + .collect(), + ) + } +} + +impl From<&Line21String> for Latin1String { + fn from(value: &Line21String) -> Latin1String { + Latin1String::from_vec( + value + .as_vec() + .iter() + .map(|&x| line21_to_latin1(x)) + .collect(), + ) + } +} + +impl From<&Ucs2String> for Latin1String { + fn from(value: &Ucs2String) -> Latin1String { + Latin1String::from_vec(value.as_vec().iter().map(|&c| ucs2_to_latin1(c)).collect()) + } +} + +impl From<&str> for Latin1String { + fn from(value: &str) -> Latin1String { + Latin1String::from_vec( + value + .chars() + .map(char_to_ucs2) + .map(ucs2_to_latin1) + .collect(), + ) + } +} + +impl From<&Line21String> for Ucs2String { + fn from(value: &Line21String) -> Ucs2String { + Ucs2String::from_vec(value.as_vec().iter().map(|&x| line21_to_ucs2(x)).collect()) + } +} + +impl From<&Latin1String> for Ucs2String { + fn from(value: &Latin1String) -> Ucs2String { + Ucs2String::from_vec(value.as_vec().iter().map(|&x| x.into()).collect()) + } +} + +impl From<&str> for Ucs2String { + fn from(value: &str) -> Ucs2String { + Ucs2String::from_vec(value.chars().map(char_to_ucs2).collect()) + } +} + +impl From<&Line21String> for String { + fn from(value: &Line21String) -> String { + value + .as_vec() + .iter() + .map(|&x| line21_to_ucs2(x)) + .map(ucs2_to_char) + .collect() + } +} + +impl From<&Latin1String> for String { + fn from(value: &Latin1String) -> String { + value + .as_vec() + .iter() + .map(|&x| Into::::into(x)) + .collect() + } +} + +impl From<&Ucs2String> for String { + fn from(value: &Ucs2String) -> String { + value.as_vec().iter().map(|&x| ucs2_to_char(x)).collect() + } +} + +impl Default for Line21String { + fn default() -> Self { + Self::new() + } +} + +impl Default for Latin1String { + fn default() -> Self { + Self::new() + } +} + +impl Default for Ucs2String { + fn default() -> Self { + Self::new() + } +} + +impl EncodedString { + /// Creates an [`EncodedString`] with the given `encoding` from string slice. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let s = EncodedString::from_str("è", Encoding::Latin1); + /// assert_eq!(s, Latin1String::from_vec(vec![0xe8]).into()) + /// ``` + pub fn from_str(string: &str, encoding: Encoding) -> EncodedString { + match encoding { + Encoding::Line21 => EncodedString::Line21(string.into()), + Encoding::Latin1 => EncodedString::Latin1(string.into()), + Encoding::Ucs2 => EncodedString::Ucs2(string.into()), + Encoding::Utf8 => EncodedString::Utf8(string.to_string()), + } + } + + /// Returns the [`Encoding`] format of this [`EncodedString`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let s: EncodedString = Line21String::from_vec(vec![b'a', b'b']).into(); + /// assert_eq!(s.encoding(), Encoding::Line21); + /// ``` + pub fn encoding(&self) -> Encoding { + match self { + EncodedString::Line21(_) => Encoding::Line21, + EncodedString::Latin1(_) => Encoding::Latin1, + EncodedString::Ucs2(_) => Encoding::Ucs2, + EncodedString::Utf8(_) => Encoding::Utf8, + } + } + + /// Converts the [`EncodedString`] to Line 21 format, returning a new [`Line21String`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let s = EncodedString::from_str("Hi 😀", Encoding::Utf8); + /// assert_eq!( + /// s.to_line21(), + /// Line21String::from_vec( + /// vec![0x48, 0x69, 0x20, 0x3f] // "Hi ?" + /// ) + /// ) + /// ``` + pub fn to_line21(&self) -> Line21String { + match self { + EncodedString::Line21(l) => l.clone(), + EncodedString::Latin1(_) => todo!(), + EncodedString::Ucs2(u) => u.into(), + EncodedString::Utf8(s) => s.as_str().into(), + } + } + + /// Converts the [`EncodedString`] to Latin-1 format, returning a new [`Latin1String`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let s = EncodedString::from_str("résumé", Encoding::Utf8); + /// assert_eq!( + /// s.to_latin1(), + /// Latin1String::from_vec( + /// vec![0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9] + /// ) + /// ) + /// ``` + pub fn to_latin1(&self) -> Latin1String { + match self { + EncodedString::Line21(l) => l.into(), + EncodedString::Latin1(l) => l.clone(), + EncodedString::Ucs2(u) => u.into(), + EncodedString::Utf8(s) => s.as_str().into(), + } + } + + /// Converts the [`EncodedString`] to UCS-2 format, returing a new [`Ucs2String`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let v = vec![0x72, 0x5c, 0x73, 0x75, 0x6d, 0x5c]; // résumé in Line 21 encoding + /// let s: EncodedString = Line21String::from_vec(v).into(); + /// assert_eq!( + /// s.to_ucs2(), + /// Ucs2String::from_vec( + /// vec![0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9] + /// ) + /// ) + /// ``` + pub fn to_ucs2(&self) -> Ucs2String { + match self { + EncodedString::Line21(l) => l.into(), + EncodedString::Latin1(l) => l.into(), + EncodedString::Ucs2(u) => u.clone(), + EncodedString::Utf8(s) => s.as_str().into(), + } + } + + /// Converts the [`EncodedString`] to UTF-8 format, returning a new [`String`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let v = vec![0x72, 0x5c, 0x73, 0x75, 0x6d, 0x5c]; // résumé in Line 21 encoding + /// let s: EncodedString = Line21String::from_vec(v).into(); + /// assert_eq!(s.to_utf8(), "résumé".to_string()) + /// ``` + pub fn to_utf8(&self) -> String { + match self { + EncodedString::Line21(l) => l.into(), + EncodedString::Latin1(l) => l.into(), + EncodedString::Ucs2(u) => u.into(), + EncodedString::Utf8(s) => s.clone(), + } + } + + /// Converts this [`EncodedString`] to a format provided by `encoding`, returning a new [`EncodedString`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let v = vec![0x72, 0x5c, 0x73, 0x75, 0x6d, 0x5c]; // résumé in Line 21 encoding + /// let s: EncodedString = Line21String::from_vec(v).into(); + /// assert_eq!(s.encode_to(Encoding::Utf8), "résumé".to_string().into()) + /// ``` + pub fn encode_to(&self, encoding: Encoding) -> EncodedString { + match encoding { + Encoding::Line21 => EncodedString::Line21(self.to_line21()), + Encoding::Latin1 => EncodedString::Latin1(self.to_latin1()), + Encoding::Ucs2 => EncodedString::Ucs2(self.to_ucs2()), + Encoding::Utf8 => EncodedString::Utf8(self.to_utf8()), + } + } + + /// Converts the [`EncodedString`] to lowercase, returning a new [`EncodedString`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let a = vec![0x72, 0x5c, 0x73, 0x75, 0x6d, 0x5c]; // résumé in Line 21 encoding + /// let b = vec![0x72, 0x91, 0x73, 0x75, 0x6d, 0x91]; // RÉSUMÉ in Line 21 encoding + /// let sa: EncodedString = Line21String::from_vec(a).into(); + /// let sb: EncodedString = Line21String::from_vec(b).into(); + /// assert_eq!(sb.to_lowercase(), sa) + /// ``` + pub fn to_lowercase(&self) -> EncodedString { + match self { + EncodedString::Line21(l) => l.to_lowercase().into(), + EncodedString::Latin1(_) => todo!(), + EncodedString::Ucs2(_) => todo!(), + EncodedString::Utf8(s) => s.to_lowercase().into(), + } + } + + /// Converts the [`EncodedString`] to uppercase, returning a new [`EncodedString`]. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::encoding::*; + /// let a = vec![0x72, 0x5c, 0x73, 0x75, 0x6d, 0x5c]; // résumé in Line 21 encoding + /// let b = vec![0x52, 0x91, 0x53, 0x55, 0x4d, 0x91]; // RÉSUMÉ in Line 21 encoding + /// let sa: EncodedString = Line21String::from_vec(a).into(); + /// let sb: EncodedString = Line21String::from_vec(b).into(); + /// assert_eq!(sa.to_uppercase(), sb) + /// ``` + pub fn to_uppercase(&self) -> EncodedString { + match self { + EncodedString::Line21(l) => l.to_uppercase().into(), + EncodedString::Latin1(_) => todo!(), + EncodedString::Ucs2(_) => todo!(), + EncodedString::Utf8(s) => s.to_uppercase().into(), + } + } +} + +impl From for EncodedString { + fn from(value: Line21String) -> Self { + EncodedString::Line21(value) + } +} + +impl From for EncodedString { + fn from(value: Latin1String) -> Self { + EncodedString::Latin1(value) + } +} + +impl From for EncodedString { + fn from(value: Ucs2String) -> Self { + EncodedString::Ucs2(value) + } +} + +impl From for EncodedString { + fn from(value: String) -> Self { + EncodedString::Utf8(value) + } +} + +fn line21_to_latin1(c: Line21Char) -> Latin1Char { + if c < 0x80 { + // Regular line-21 character set, mostly ASCII except these exceptions + match c { + 0x2a => 0xe1, // lowercase a, acute accent + 0x5c => 0xe9, // lowercase e, acute accent + 0x5e => 0xed, // lowercase i, acute accent + 0x5f => 0xf3, // lowercase o, acute accent + 0x60 => 0xfa, // lowercase u, acute accent + 0x7b => 0xe7, // lowercase c with cedilla + 0x7c => 0xf7, // division symbol + 0x7d => 0xd1, // uppercase N tilde + 0x7e => 0xf1, // lowercase n tilde + 0x7f => UNAVAILABLE_CHAR, // Solid block - Does not exist in Latin 1 + _ => c, + } + } else { + match c { + // THIS BLOCK INCLUDES THE 16 EXTENDED (TWO-BYTE) LINE 21 CHARACTERS + // THAT COME FROM HI BYTE=0x11 AND LOW BETWEEN 0x30 AND 0x3F + 0x80 => 0xae, // Registered symbol (R) + 0x81 => 0xb0, // degree sign + 0x82 => 0xbd, // 1/2 symbol + 0x83 => 0xbf, // Inverted (open) question mark + 0x84 => UNAVAILABLE_CHAR, // Trademark symbol (TM) - Does not exist in Latin 1 + 0x85 => 0xa2, // Cents symbol + 0x86 => 0xa3, // Pounds sterling + 0x87 => 0xb6, // Music note - Not in latin 1, so we use 'pilcrow' + 0x88 => 0xe0, // lowercase a, grave accent + 0x89 => 0x20, // transparent space, we make it regular + 0x8a => 0xe8, // lowercase e, grave accent + 0x8b => 0xe2, // lowercase a, circumflex accent + 0x8c => 0xea, // lowercase e, circumflex accent + 0x8d => 0xee, // lowercase i, circumflex accent + 0x8e => 0xf4, // lowercase o, circumflex accent + 0x8f => 0xfb, // lowercase u, circumflex accent + // THIS BLOCK INCLUDES THE 32 EXTENDED (TWO-BYTE) LINE 21 CHARACTERS + // THAT COME FROM HI BYTE=0x12 AND LOW BETWEEN 0x20 AND 0x3F + 0x90 => 0xc1, // capital letter A with acute + 0x91 => 0xc9, // capital letter E with acute + 0x92 => 0xd3, // capital letter O with acute + 0x93 => 0xda, // capital letter U with acute + 0x94 => 0xdc, // capital letter U with diaeresis + 0x95 => 0xfc, // lowercase letter U with diaeresis + 0x96 => 0x27, // apostrophe + 0x97 => 0xa1, // inverted exclamation mark + 0x98 => 0x2a, // asterisk + 0x99 => 0x27, // apostrophe (yes, duped). See CCADI source code. + 0x9a => 0x2d, // em dash + 0x9b => 0xa9, // copyright sign + 0x9c => UNAVAILABLE_CHAR, // Service Mark - not available in latin 1 + 0x9d => 0x2e, // Full stop (.) + 0x9e => 0x22, // Quotation mark + 0x9f => 0x22, // Quotation mark + 0xa0 => 0xc0, // uppercase A, grave accent + 0xa1 => 0xc2, // uppercase A, circumflex + 0xa2 => 0xc7, // uppercase C with cedilla + 0xa3 => 0xc8, // uppercase E, grave accent + 0xa4 => 0xca, // uppercase E, circumflex + 0xa5 => 0xcb, // capital letter E with diaeresis + 0xa6 => 0xeb, // lowercase letter e with diaeresis + 0xa7 => 0xce, // uppercase I, circumflex + 0xa8 => 0xcf, // uppercase I, with diaeresis + 0xa9 => 0xef, // lowercase i, with diaeresis + 0xaa => 0xd4, // uppercase O, circumflex + 0xab => 0xd9, // uppercase U, grave accent + 0xac => 0xf9, // lowercase u, grave accent + 0xad => 0xdb, // uppercase U, circumflex + 0xae => 0xab, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0xaf => 0xbb, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + // THIS BLOCK INCLUDES THE 32 EXTENDED (TWO-BYTE) LINE 21 CHARACTERS + // THAT COME FROM HI BYTE=0x13 AND LOW BETWEEN 0x20 AND 0x3F + 0xb0 => 0xc3, // Uppercase A, tilde + 0xb1 => 0xe3, // Lowercase a, tilde + 0xb2 => 0xcd, // Uppercase I, acute accent + 0xb3 => 0xcc, // Uppercase I, grave accent + 0xb4 => 0xec, // Lowercase i, grave accent + 0xb5 => 0xd2, // Uppercase O, grave accent + 0xb6 => 0xf2, // Lowercase o, grave accent + 0xb7 => 0xd5, // Uppercase O, tilde + 0xb8 => 0xf5, // Lowercase o, tilde + 0xb9 => 0x7b, // Open curly brace + 0xba => 0x7d, // Closing curly brace + 0xbb => 0x5c, // Backslash + 0xbc => 0x5e, // Caret + 0xbd => 0x5f, // Underscore + 0xbe => 0xa6, // Pipe (broken bar) + 0xbf => 0x7e, // Tilde + 0xc0 => 0xc4, // Uppercase A, umlaut + 0xc1 => 0xe3, // Lowercase A, umlaut + 0xc2 => 0xd6, // Uppercase O, umlaut + 0xc3 => 0xf6, // Lowercase o, umlaut + 0xc4 => 0xdf, // Eszett (sharp S) + 0xc5 => 0xa5, // Yen symbol + 0xc6 => 0xa4, // Currency symbol + 0xc7 => 0x7c, // Vertical bar + 0xc8 => 0xc5, // Uppercase A, ring + 0xc9 => 0xe5, // Lowercase A, ring + 0xca => 0xd8, // Uppercase O, slash + 0xcb => 0xf8, // Lowercase o, slash + 0xcc => UNAVAILABLE_CHAR, // Upper left corner + 0xcd => UNAVAILABLE_CHAR, // Upper right corner + 0xce => UNAVAILABLE_CHAR, // Lower left corner + 0xcf => UNAVAILABLE_CHAR, // Lower right corner + _ => UNAVAILABLE_CHAR, // For those that don't have representation + // I'll do it eventually, I promise + // This are weird chars anyway + } + } +} + +fn line21_to_ucs2(c: Line21Char) -> Ucs2Char { + match c { + 0x7f => 0x25A0, // Solid block + 0x84 => 0x2122, // Trademark symbol (TM) + 0x87 => 0x266a, // Music note + 0x9c => 0x2120, // Service Mark + 0xcc => 0x231c, // Upper left corner + 0xcd => 0x231d, // Upper right corner + 0xce => 0x231e, // Lower left corner + 0xcf => 0x231f, // Lower right corner + _ => line21_to_latin1(c).into(), // Everything else, same as latin-1 followed by 00 + } +} + +fn ucs2_to_line21(c: Ucs2Char) -> Line21Char { + if c < 0x80 { + c as u8 + } else { + UNAVAILABLE_CHAR + } +} + +fn ucs2_to_latin1(c: Ucs2Char) -> Latin1Char { + // Code points 0 to U+00FF are the same in both. + if c < 0xff { + c as u8 + } else { + match c { + 0x0152 => 188, // U+0152 = 0xBC: OE ligature + 0x0153 => 189, // U+0153 = 0xBD: oe ligature + 0x0160 => 166, // U+0160 = 0xA6: S with caron + 0x0161 => 168, // U+0161 = 0xA8: s with caron + 0x0178 => 190, // U+0178 = 0xBE: Y with diaresis + 0x017D => 180, // U+017D = 0xB4: Z with caron + 0x017E => 184, // U+017E = 0xB8: z with caron + 0x20AC => 164, // U+20AC = 0xA4: Euro + _ => UNAVAILABLE_CHAR, + } + } +} + +fn line21_to_lowercase(c: Line21Char) -> Line21Char { + if c.is_ascii_uppercase() { + c - b'A' + b'a' + } else { + match c { + 0x7d => 0x7e, // uppercase N tilde + 0x90 => 0x2a, // capital letter A with acute + 0x91 => 0x5c, // capital letter E with acute + 0x92 => 0x5f, // capital letter O with acute + 0x93 => 0x60, // capital letter U with acute + 0xa2 => 0x7b, // uppercase C with cedilla + 0xa0 => 0x88, // uppercase A, grave accent + 0xa3 => 0x8a, // uppercase E, grave accent + 0xa1 => 0x8b, // uppercase A, circumflex + 0xa4 => 0x8c, // uppercase E, circumflex + 0xa7 => 0x8d, // uppercase I, circumflex + 0xaa => 0x8e, // uppercase O, circumflex + 0xad => 0x8f, // uppercase U, circumflex + 0x94 => 0x95, // capital letter U with diaeresis + 0xa5 => 0xa6, // capital letter E with diaeresis + 0xa8 => 0xa9, // uppercase I, with diaeresis + 0xab => 0xac, // uppercase U, grave accent + 0xb0 => 0xb1, // Uppercase A, tilde + 0xb2 => 0x5e, // Uppercase I, acute accent + 0xb3 => 0xb4, // Uppercase I, grave accent + 0xb5 => 0xb6, // Uppercase O, grave accent + 0xb7 => 0xb8, // Uppercase O, tilde + 0xc0 => 0xc1, // Uppercase A, umlaut + 0xc2 => 0xc3, // Uppercase O, umlaut + 0xc8 => 0xc9, // Uppercase A, ring + 0xca => 0xcb, // Uppercase O, slash + x => x, + } + } +} + +fn line21_to_uppercase(c: Line21Char) -> Line21Char { + if c.is_ascii_lowercase() { + c - b'a' + b'A' + } else { + match c { + 0x7e => 0x7d, // lowercase n tilde + 0x2a => 0x90, // lowercase a, acute accent + 0x5c => 0x91, // lowercase e, acute accent + 0x5e => 0xb2, // lowercase i, acute accent + 0x5f => 0x92, // lowercase o, acute accent + 0x60 => 0x93, // lowercase u, acute accent + 0x7b => 0xa2, // lowercase c with cedilla + 0x88 => 0xa0, // lowercase a, grave accent + 0x8a => 0xa3, // lowercase e, grave accent + 0x8b => 0xa1, // lowercase a, circumflex accent + 0x8c => 0xa4, // lowercase e, circumflex accent + 0x8d => 0xa7, // lowercase i, circumflex accent + 0x8e => 0xaa, // lowercase o, circumflex accent + 0x8f => 0xad, // lowercase u, circumflex accent + 0x95 => 0x94, // lowercase letter U with diaeresis + 0xa6 => 0xa5, // lowercase letter e with diaeresis + 0xa9 => 0xa8, // lowercase i, with diaeresis + 0xac => 0xab, // lowercase u, grave accent + 0xb1 => 0xb0, // Lowercase a, tilde + 0xb4 => 0xb3, // Lowercase i, grave accent + 0xb6 => 0xb5, // Lowercase o, grave accent + 0xb8 => 0xb7, // Lowercase o, tilde + 0xc1 => 0xc0, // Lowercase A, umlaut + 0xc3 => 0xc2, // Lowercase o, umlaut + 0xc9 => 0xc8, // Lowercase A, ring + 0xcb => 0xca, // Lowercase o, slash + x => x, + } + } +} + +fn ucs2_to_char(c: Ucs2Char) -> char { + let x: u32 = c.into(); + char::from_u32(x).unwrap_or(UNAVAILABLE_CHAR.into()) +} + +fn char_to_ucs2(c: char) -> Ucs2Char { + (c as u32).try_into().unwrap_or(UNAVAILABLE_CHAR.into()) +} diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index daf5935ac..06dce786a 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -1 +1,3 @@ //! Provides basic utilities used throughout the program. + +pub mod encoding; From f4d4d7c9927218d16982d577948fa33e037a9faf Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sat, 26 Aug 2023 12:58:49 +0530 Subject: [PATCH 05/24] add common constants module --- src/rust/lib_ccxr/src/common/constants.rs | 415 ++++++++++++++++++++++ src/rust/lib_ccxr/src/common/mod.rs | 21 ++ src/rust/lib_ccxr/src/lib.rs | 1 + 3 files changed, 437 insertions(+) create mode 100644 src/rust/lib_ccxr/src/common/constants.rs create mode 100644 src/rust/lib_ccxr/src/common/mod.rs diff --git a/src/rust/lib_ccxr/src/common/constants.rs b/src/rust/lib_ccxr/src/common/constants.rs new file mode 100644 index 000000000..b310c95b9 --- /dev/null +++ b/src/rust/lib_ccxr/src/common/constants.rs @@ -0,0 +1,415 @@ +use std::ffi::OsStr; + +pub const DTVCC_MAX_SERVICES: usize = 63; + +/// An enum of all the available formats for the subtitle output. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum OutputFormat { + Raw, + Srt, + Sami, + Transcript, + Rcwt, + Null, + SmpteTt, + SpuPng, + DvdRaw, // See -d at http://www.theneitherworld.com/mcpoodle/SCC_TOOLS/DOCS/SCC_TOOLS.HTML#CCExtract + WebVtt, + SimpleXml, + G608, + Curl, + Ssa, + Mcc, + Scc, + Ccd, +} + +// AVC NAL types +pub enum AvcNalType { + Unspecified0 = 0, + CodedSliceNonIdrPicture1 = 1, + CodedSlicePartitionA = 2, + CodedSlicePartitionB = 3, + CodedSlicePartitionC = 4, + CodedSliceIdrPicture = 5, + Sei = 6, + SequenceParameterSet7 = 7, + PictureParameterSet = 8, + AccessUnitDelimiter9 = 9, + EndOfSequence = 10, + EndOfStream = 11, + FillerData = 12, + SequenceParameterSetExtension = 13, + PrefixNalUnit = 14, + SubsetSequenceParameterSet = 15, + Reserved16 = 16, + Reserved17 = 17, + Reserved18 = 18, + CodedSliceAuxiliaryPicture = 19, + CodedSliceExtension = 20, + Reserved21 = 21, + Reserved22 = 22, + Reserved23 = 23, + Unspecified24 = 24, + Unspecified25 = 25, + Unspecified26 = 26, + Unspecified27 = 27, + Unspecified28 = 28, + Unspecified29 = 29, + Unspecified30 = 30, + Unspecified31 = 31, +} + +// MPEG-2 TS stream types +pub enum StreamType { + Unknownstream = 0, + /* + The later constants are defined by MPEG-TS standard + Explore at: https://exiftool.org/TagNames/M2TS.html + */ + VideoMpeg1 = 0x01, + VideoMpeg2 = 0x02, + AudioMpeg1 = 0x03, + AudioMpeg2 = 0x04, + PrivateTableMpeg2 = 0x05, + PrivateMpeg2 = 0x06, + MhegPackets = 0x07, + Mpeg2AnnexADsmCc = 0x08, + ItuTH222_1 = 0x09, + IsoIec13818_6TypeA = 0x0a, + IsoIec13818_6TypeB = 0x0b, + IsoIec13818_6TypeC = 0x0c, + IsoIec13818_6TypeD = 0x0d, + AudioAac = 0x0f, + VideoMpeg4 = 0x10, + VideoH264 = 0x1b, + PrivateUserMpeg2 = 0x80, + AudioAc3 = 0x81, + AudioHdmvDts = 0x82, + AudioDts = 0x8a, +} + +pub enum MpegDescriptor { + /* + The later constants are defined by ETSI EN 300 468 standard + Explore at: https://www.etsi.org/deliver/etsi_en/300400_300499/300468/01.11.01_60/en_300468v011101p.pdf + */ + Registration = 0x05, + DataStreamAlignment = 0x06, + Iso639Language = 0x0a, + VbiDataDescriptor = 0x45, + VbiTeletextDescriptor = 0x46, + TeletextDescriptor = 0x56, + DvbSubtitle = 0x59, + /* User defined */ + CaptionService = 0x86, + DataComp = 0xfd, +} + +pub enum DataSource { + File, + Stdin, + Network, + Tcp, +} + +pub enum StreamMode { + ElementaryOrNotFound = 0, + Transport = 1, + Program = 2, + Asf = 3, + McpoodlesRaw = 4, + Rcwt = 5, // raw captions with time, not used yet. + Myth = 6, // use the myth loop + Mp4 = 7, // mp4, iso- + #[cfg(feature = "wtv_debug")] + HexDump = 8, // hexadecimal dump generated by wtvccdump + Wtv = 9, + #[cfg(feature = "enable_ffmpeg")] + Ffmpeg = 10, + Gxf = 11, + Mkv = 12, + Mxf = 13, + Autodetect = 16, +} + +pub enum BufferdataType { + Unknown, + Pes, + Raw, + H264, + Hauppage, + Teletext, + PrivateMpeg2Cc, + DvbSubtitle, + IsdbSubtitle, + /* BUffer where cc data contain 3 byte cc_valid ccdata 1 ccdata 2 */ + RawType, + DvdSubtitle, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum FrameType { + ResetOrUnknown, + IFrame, + PFrame, + BFrame, + DFrame, +} + +pub enum Codec { + Teletext, + Dvb, + IsdbCc, + AtscCc, +} + +pub enum SelectCodec { + All, + Some(Codec), + None, +} + +/// Caption Distribution Packet +pub enum CdpSectionType { + /* + The later constants are defined by SMPTE ST 334 + Purchase for 80$ at: https://ieeexplore.ieee.org/document/8255806 + */ + Data = 0x72, + SvcInfo = 0x73, + Footer = 0x74, +} + +pub enum Language { + Und, // Undefined + Eng, + Afr, + Amh, + Ara, + Asm, + Aze, + Bel, + Ben, + Bod, + Bos, + Bul, + Cat, + Ceb, + Ces, + Chs, + Chi, + Chr, + Cym, + Dan, + Deu, + Dzo, + Ell, + Enm, + Epo, + Equ, + Est, + Eus, + Fas, + Fin, + Fra, + Frk, + Frm, + Gle, + Glg, + Grc, + Guj, + Hat, + Heb, + Hin, + Hrv, + Hun, + Iku, + Ind, + Isl, + Ita, + Jav, + Jpn, + Kan, + Kat, + Kaz, + Khm, + Kir, + Kor, + Kur, + Lao, + Lat, + Lav, + Lit, + Mal, + Mar, + Mkd, + Mlt, + Msa, + Mya, + Nep, + Nld, + Nor, + Ori, + Osd, + Pan, + Pol, + Por, + Pus, + Ron, + Rus, + San, + Sin, + Slk, + Slv, + Spa, + Sqi, + Srp, + Swa, + Swe, + Syr, + Tam, + Tel, + Tgk, + Tgl, + Tha, + Tir, + Tur, + Uig, + Ukr, + Urd, + Uzb, + Vie, + Yid, +} + +impl OutputFormat { + /// Returns the file extension for the output format if it is a file based format. + pub fn file_extension(&self) -> Option<&OsStr> { + match self { + OutputFormat::Raw => Some(OsStr::new(".raw")), + OutputFormat::Srt => Some(OsStr::new(".srt")), + OutputFormat::Sami => Some(OsStr::new(".smi")), + OutputFormat::Transcript => Some(OsStr::new(".txt")), + OutputFormat::Rcwt => Some(OsStr::new(".bin")), + OutputFormat::Null => None, + OutputFormat::SmpteTt => Some(OsStr::new(".ttml")), + OutputFormat::SpuPng => Some(OsStr::new(".xml")), + OutputFormat::DvdRaw => Some(OsStr::new(".dvdraw")), + OutputFormat::WebVtt => Some(OsStr::new(".vtt")), + OutputFormat::SimpleXml => Some(OsStr::new(".xml")), + OutputFormat::G608 => Some(OsStr::new(".g608")), + OutputFormat::Curl => None, + OutputFormat::Ssa => Some(OsStr::new(".ass")), + OutputFormat::Mcc => Some(OsStr::new(".mcc")), + OutputFormat::Scc => Some(OsStr::new(".scc")), + OutputFormat::Ccd => Some(OsStr::new(".ccd")), + } + } +} + +impl Language { + pub fn to_str(&self) -> &'static str { + match self { + Language::Und => "und", // Undefined + Language::Eng => "eng", + Language::Afr => "afr", + Language::Amh => "amh", + Language::Ara => "ara", + Language::Asm => "asm", + Language::Aze => "aze", + Language::Bel => "bel", + Language::Ben => "ben", + Language::Bod => "bod", + Language::Bos => "bos", + Language::Bul => "bul", + Language::Cat => "cat", + Language::Ceb => "ceb", + Language::Ces => "ces", + Language::Chs => "chs", + Language::Chi => "chi", + Language::Chr => "chr", + Language::Cym => "cym", + Language::Dan => "dan", + Language::Deu => "deu", + Language::Dzo => "dzo", + Language::Ell => "ell", + Language::Enm => "enm", + Language::Epo => "epo", + Language::Equ => "equ", + Language::Est => "est", + Language::Eus => "eus", + Language::Fas => "fas", + Language::Fin => "fin", + Language::Fra => "fra", + Language::Frk => "frk", + Language::Frm => "frm", + Language::Gle => "gle", + Language::Glg => "glg", + Language::Grc => "grc", + Language::Guj => "guj", + Language::Hat => "hat", + Language::Heb => "heb", + Language::Hin => "hin", + Language::Hrv => "hrv", + Language::Hun => "hun", + Language::Iku => "iku", + Language::Ind => "ind", + Language::Isl => "isl", + Language::Ita => "ita", + Language::Jav => "jav", + Language::Jpn => "jpn", + Language::Kan => "kan", + Language::Kat => "kat", + Language::Kaz => "kaz", + Language::Khm => "khm", + Language::Kir => "kir", + Language::Kor => "kor", + Language::Kur => "kur", + Language::Lao => "lao", + Language::Lat => "lat", + Language::Lav => "lav", + Language::Lit => "lit", + Language::Mal => "mal", + Language::Mar => "mar", + Language::Mkd => "mkd", + Language::Mlt => "mlt", + Language::Msa => "msa", + Language::Mya => "mya", + Language::Nep => "nep", + Language::Nld => "nld", + Language::Nor => "nor", + Language::Ori => "ori", + Language::Osd => "osd", + Language::Pan => "pan", + Language::Pol => "pol", + Language::Por => "por", + Language::Pus => "pus", + Language::Ron => "ron", + Language::Rus => "rus", + Language::San => "san", + Language::Sin => "sin", + Language::Slk => "slk", + Language::Slv => "slv", + Language::Spa => "spa", + Language::Sqi => "sqi", + Language::Srp => "srp", + Language::Swa => "swa", + Language::Swe => "swe", + Language::Syr => "syr", + Language::Tam => "tam", + Language::Tel => "tel", + Language::Tgk => "tgk", + Language::Tgl => "tgl", + Language::Tha => "tha", + Language::Tir => "tir", + Language::Tur => "tur", + Language::Uig => "uig", + Language::Ukr => "ukr", + Language::Urd => "urd", + Language::Uzb => "uzb", + Language::Vie => "vie", + Language::Yid => "yid", + } + } +} diff --git a/src/rust/lib_ccxr/src/common/mod.rs b/src/rust/lib_ccxr/src/common/mod.rs new file mode 100644 index 000000000..502820224 --- /dev/null +++ b/src/rust/lib_ccxr/src/common/mod.rs @@ -0,0 +1,21 @@ +//! Provides common types throughout the codebase. +//! +//! # Conversion Guide +//! +//! | From | To | +//! |-------------------------|----------------------------| +//! | `ccx_output_format` | [`OutputFormat`] | +//! | `ccx_avc_nal_types` | [`AvcNalType`] | +//! | `ccx_stream_type` | [`StreamType`] | +//! | `ccx_mpeg_descriptor` | [`MpegDescriptor`] | +//! | `ccx_datasource` | [`DataSource`] | +//! | `ccx_stream_mode_enum` | [`StreamMode`] | +//! | `ccx_bufferdata_type` | [`BufferdataType`] | +//! | `ccx_frame_type` | [`FrameType`] | +//! | `ccx_code_type` | [`Codec`], [`SelectCodec`] | +//! | `cdp_section_type` | [`CdpSectionType`] | +//! | `language[NB_LANGUAGE]` | [`Language`] | + +mod constants; + +pub use constants::*; diff --git a/src/rust/lib_ccxr/src/lib.rs b/src/rust/lib_ccxr/src/lib.rs index 812d1edf2..45ee8e79c 100644 --- a/src/rust/lib_ccxr/src/lib.rs +++ b/src/rust/lib_ccxr/src/lib.rs @@ -1 +1,2 @@ +pub mod common; pub mod util; From 49cdd1fd917f86ac59027139a8ad53eb8559bcc8 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sun, 27 Aug 2023 18:38:05 +0530 Subject: [PATCH 06/24] add time units module --- src/lib_ccx/utility.c | 33 + src/rust/Cargo.lock | 79 +++ src/rust/build.rs | 8 +- src/rust/lib_ccxr/Cargo.lock | 165 +++++ src/rust/lib_ccxr/Cargo.toml | 3 + src/rust/lib_ccxr/src/util/mod.rs | 2 + .../lib_ccxr/src/util/time/c_functions.rs | 35 + src/rust/lib_ccxr/src/util/time/mod.rs | 27 + src/rust/lib_ccxr/src/util/time/units.rs | 634 ++++++++++++++++++ src/rust/src/libccxr_exports/mod.rs | 4 + src/rust/src/libccxr_exports/time.rs | 107 +++ 11 files changed, 1096 insertions(+), 1 deletion(-) create mode 100644 src/rust/lib_ccxr/src/util/time/c_functions.rs create mode 100644 src/rust/lib_ccxr/src/util/time/mod.rs create mode 100644 src/rust/lib_ccxr/src/util/time/units.rs create mode 100644 src/rust/src/libccxr_exports/time.rs diff --git a/src/lib_ccx/utility.c b/src/lib_ccx/utility.c index cb3cb6152..a950008c1 100644 --- a/src/lib_ccx/utility.c +++ b/src/lib_ccx/utility.c @@ -9,6 +9,13 @@ int temp_debug = 0; // This is a convenience variable used to enable/disable debug on variable conditions. Find references to understand. volatile sig_atomic_t change_filename_requested = 0; +#ifndef DISABLE_RUST +extern void ccxr_timestamp_to_srttime(uint64_t timestamp, char *buffer); +extern void ccxr_timestamp_to_vtttime(uint64_t timestamp, char *buffer); +extern void ccxr_millis_to_date(uint64_t timestamp, char *buffer, enum ccx_output_date_format date_format, char millis_separator); +extern int ccxr_stringztoms(const char *s, struct ccx_boundary_time *bt); +#endif + static uint32_t crc32_table[] = { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, @@ -86,6 +93,10 @@ int verify_crc32(uint8_t *buf, int len) int stringztoms(const char *s, struct ccx_boundary_time *bt) { +#ifndef DISABLE_RUST + return ccxr_stringztoms(s, bt); +#endif + unsigned ss = 0, mm = 0, hh = 0; int value = -1; int colons = 0; @@ -130,6 +141,10 @@ int stringztoms(const char *s, struct ccx_boundary_time *bt) } void timestamp_to_srttime(uint64_t timestamp, char *buffer) { +#ifndef DISABLE_RUST + return ccxr_timestamp_to_srttime(timestamp, buffer); +#endif + uint64_t p = timestamp; uint8_t h = (uint8_t)(p / 3600000); uint8_t m = (uint8_t)(p / 60000 - 60 * h); @@ -139,6 +154,10 @@ void timestamp_to_srttime(uint64_t timestamp, char *buffer) } void timestamp_to_vtttime(uint64_t timestamp, char *buffer) { +#ifndef DISABLE_RUST + return ccxr_timestamp_to_vtttime(timestamp, buffer); +#endif + uint64_t p = timestamp; uint8_t h = (uint8_t)(p / 3600000); uint8_t m = (uint8_t)(p / 60000 - 60 * h); @@ -193,6 +212,20 @@ int levenshtein_dist_char(const char *s1, const char *s2, unsigned s1len, unsign void millis_to_date(uint64_t timestamp, char *buffer, enum ccx_output_date_format date_format, char millis_separator) { +#ifndef DISABLE_RUST + switch (date_format) + { + case ODF_NONE: + case ODF_HHMMSS: + case ODF_HHMMSSMS: + case ODF_SECONDS: + case ODF_DATE: + return ccxr_millis_to_date(timestamp, buffer, date_format, millis_separator); + default: + fatal(CCX_COMMON_EXIT_BUG_BUG, "Invalid value for date_format in millis_to_date()\n"); + } +#endif + time_t secs; unsigned int millis; char c_temp[80]; diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index d42a787f1..4e870dbb7 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -168,6 +168,31 @@ dependencies = [ "vec_map", ] +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "deranged" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" + +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", +] + [[package]] name = "dyn_buf" version = "0.1.0" @@ -233,6 +258,12 @@ dependencies = [ "libc", ] +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + [[package]] name = "lazy_static" version = "1.4.0" @@ -259,6 +290,11 @@ dependencies = [ [[package]] name = "lib_ccxr" version = "0.1.0" +dependencies = [ + "derive_more", + "thiserror", + "time", +] [[package]] name = "libc" @@ -493,6 +529,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + [[package]] name = "rusty_ffmpeg" version = "0.13.1+ffmpeg.6.0" @@ -507,6 +552,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "semver" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" + [[package]] name = "serde" version = "1.0.188" @@ -617,6 +668,34 @@ dependencies = [ "syn 2.0.29", ] +[[package]] +name = "time" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48" +dependencies = [ + "deranged", + "itoa", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" + +[[package]] +name = "time-macros" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572" +dependencies = [ + "time-core", +] + [[package]] name = "toml" version = "0.5.11" diff --git a/src/rust/build.rs b/src/rust/build.rs index f8ecc04c8..3885248c2 100644 --- a/src/rust/build.rs +++ b/src/rust/build.rs @@ -26,6 +26,8 @@ fn main() { "lib_cc_decode", "cc_subtitle", "ccx_output_format", + "ccx_boundary_time", + "gop_time_code", ]); #[cfg(feature = "hardsubx_ocr")] @@ -71,4 +73,8 @@ fn main() { .expect("Couldn't write bindings!"); } -const RUSTIFIED_ENUMS: &[&str] = &["dtvcc_(window|pen)_.*", "ccx_output_format"]; +const RUSTIFIED_ENUMS: &[&str] = &[ + "dtvcc_(window|pen)_.*", + "ccx_output_format", + "ccx_output_date_format", +]; diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock index 7532d4515..5cea85df6 100644 --- a/src/rust/lib_ccxr/Cargo.lock +++ b/src/rust/lib_ccxr/Cargo.lock @@ -2,6 +2,171 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "deranged" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" + +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + [[package]] name = "lib_ccxr" version = "0.1.0" +dependencies = [ + "derive_more", + "thiserror", + "time", +] + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "semver" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" + +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.29", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.29", +] + +[[package]] +name = "time" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48" +dependencies = [ + "deranged", + "itoa", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" + +[[package]] +name = "time-macros" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572" +dependencies = [ + "time-core", +] + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml index ca3612505..35d1ce860 100644 --- a/src/rust/lib_ccxr/Cargo.toml +++ b/src/rust/lib_ccxr/Cargo.toml @@ -6,6 +6,9 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +thiserror = "1.0.39" +time = { version = "0.3.27", features = ["macros", "formatting"] } +derive_more = "0.99.17" [features] default = ["enable_sharing", "wtv_debug", "enable_ffmpeg", "debug", "with_libcurl"] diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index daf5935ac..1f40764f9 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -1 +1,3 @@ //! Provides basic utilities used throughout the program. + +pub mod time; diff --git a/src/rust/lib_ccxr/src/util/time/c_functions.rs b/src/rust/lib_ccxr/src/util/time/c_functions.rs new file mode 100644 index 000000000..65d837e35 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/time/c_functions.rs @@ -0,0 +1,35 @@ +//! Provides Rust equivalent for functions in C. Uses Rust-native types as input and output. + +use super::*; + +/// Rust equivalent for `timestamp_to_srttime` function in C. Uses Rust-native types as input and +/// output. +pub fn timestamp_to_srttime( + timestamp: Timestamp, + buffer: &mut String, +) -> Result<(), TimestampError> { + timestamp.write_srt_time(buffer) +} + +/// Rust equivalent for `timestamp_to_vtttime` function in C. Uses Rust-native types as input and +/// output. +pub fn timestamp_to_vtttime( + timestamp: Timestamp, + buffer: &mut String, +) -> Result<(), TimestampError> { + timestamp.write_vtt_time(buffer) +} + +/// Rust equivalent for `millis_to_date` function in C. Uses Rust-native types as input and output. +pub fn millis_to_date( + timestamp: Timestamp, + buffer: &mut String, + date_format: TimestampFormat, +) -> Result<(), TimestampError> { + timestamp.write_formatted_time(buffer, date_format) +} + +/// Rust equivalent for `stringztoms` function in C. Uses Rust-native types as input and output. +pub fn stringztoms(s: &str) -> Option { + Timestamp::parse_optional_hhmmss_from_str(s).ok() +} diff --git a/src/rust/lib_ccxr/src/util/time/mod.rs b/src/rust/lib_ccxr/src/util/time/mod.rs new file mode 100644 index 000000000..64c67f4d5 --- /dev/null +++ b/src/rust/lib_ccxr/src/util/time/mod.rs @@ -0,0 +1,27 @@ +//! Provide types for storing time in different formats +//! +//! Time can be represented in one of following formats: +//! - [`Timestamp`] as number of milliseconds +//! - [`MpegClockTick`] as number of clock ticks (as defined in the MPEG standard) +//! - [`FrameCount`] as number of frames +//! - [`GopTimeCode`] as a GOP time code (as defined in the MPEG standard) +//! +//! # Conversion Guide +//! +//! | From | To | +//! |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------| +//! | `ccx_boundary_time` | [`Option`](Timestamp) | +//! | any fts | [`Timestamp`] | +//! | `ccx_output_date_format` | [`TimestampFormat`] | +//! | any pts | [`MpegClockTick`] | +//! | any frame count | [`FrameCount`] | +//! | `gop_time_code` | [`GopTimeCode`] | +//! | `print_mstime_static` | [`Timestamp::to_hms_millis_time`] | +//! | `gop_accepted` | [`GopTimeCode::did_rollover`] + some additional logic | +//! | `calculate_ms_gop_time` | [`GopTimeCode::new`], [`GopTimeCode::timestamp`] | + +mod units; + +pub mod c_functions; + +pub use units::*; diff --git a/src/rust/lib_ccxr/src/util/time/units.rs b/src/rust/lib_ccxr/src/util/time/units.rs new file mode 100644 index 000000000..dba22d3df --- /dev/null +++ b/src/rust/lib_ccxr/src/util/time/units.rs @@ -0,0 +1,634 @@ +use derive_more::{Add, Neg, Sub}; +use std::convert::TryInto; +use std::fmt::Write; +use std::num::TryFromIntError; +use std::time::{SystemTime, UNIX_EPOCH}; +use thiserror::Error; +use time::macros::{datetime, format_description}; +use time::{error::Format, Duration}; + +/// Represents a timestamp in milliseconds. +/// +/// The number can be negetive. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Add, Sub, Neg)] +pub struct Timestamp { + millis: i64, +} + +/// Represents an error during operations on [`Timestamp`]. +#[derive(Error, Debug)] +pub enum TimestampError { + #[error("input parameter given is out of range")] + InputOutOfRangeError, + #[error("timestamp is out of range")] + OutOfRangeError(#[from] TryFromIntError), + #[error("error ocurred during formatting")] + FormattingError(#[from] std::fmt::Error), + #[error("error ocurred during formatting a date")] + DateFormattingError(#[from] Format), + #[error("error ocurred during parsing")] + ParsingError, +} + +/// Represents the different string formats for [`Timestamp`]. +pub enum TimestampFormat { + /// Format: blank string. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; + /// let timestamp = Timestamp::from_millis(6524365); + /// let output = timestamp.to_formatted_time(TimestampFormat::None).unwrap(); + /// assert_eq!(output, ""); + /// ``` + None, + + /// Format: `{hour:02}:{minute:02}:{second:02}`. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; + /// let timestamp = Timestamp::from_millis(6524365); + /// let output = timestamp.to_formatted_time(TimestampFormat::HHMMSS).unwrap(); + /// assert_eq!(output, "01:48:44"); + /// ``` + HHMMSS, + + /// Format: `{second:02}{millis_separator}{millis:03}`. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; + /// let timestamp = Timestamp::from_millis(6524365); + /// let output = timestamp.to_formatted_time( + /// TimestampFormat::Seconds { + /// millis_separator: ',', + /// }, + /// ).unwrap(); + /// assert_eq!(output, "6524,365"); + /// ``` + Seconds { millis_separator: char }, + + /// Format: + /// `{year:04}{month:02}{day:02}{hour:02}{minute:02}{second:02}{millis_separator}{millis:03}`. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; + /// // 11 March 2023 14:53:36.749 in UNIX timestamp. + /// let timestamp = Timestamp::from_millis(1678546416749); + /// let output = timestamp.to_formatted_time( + /// TimestampFormat::Date { + /// millis_separator: ',', + /// }, + /// ).unwrap(); + /// assert_eq!(output, "20230311145336,749"); + /// ``` + Date { millis_separator: char }, + + /// Format: `{hour:02}:{minute:02}:{second:02},{millis:03}`. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; + /// let timestamp = Timestamp::from_millis(6524365); + /// let output = timestamp.to_formatted_time(TimestampFormat::HHMMSSFFF).unwrap(); + /// assert_eq!(output, "01:48:44,365"); + /// ``` + HHMMSSFFF, +} + +impl Timestamp { + /// Create a new [`Timestamp`] based on the number of milliseconds since the Unix Epoch. + pub fn now() -> Timestamp { + let duration = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("System Time cannot be behind the Unix Epoch"); + + Timestamp { + millis: duration.as_millis() as i64, + } + } + + /// Create a new [`Timestamp`] from number of milliseconds. + pub const fn from_millis(millis: i64) -> Timestamp { + Timestamp { millis } + } + + /// Create a new [`Timestamp`] from hours, minutes, seconds and milliseconds. + /// + /// It will fail if any parameter doesn't follow their respective ranges: + /// + /// | Parameter | Range | + /// |-----------|---------| + /// | minutes | 0 - 59 | + /// | seconds | 0 - 59 | + /// | millis | 0 - 999 | + pub fn from_hms_millis( + hours: u8, + minutes: u8, + seconds: u8, + millis: u16, + ) -> Result { + if minutes < 60 && seconds < 60 && millis < 1000 { + Ok(Timestamp::from_millis( + (hours as i64) * 3_600_000 + + (minutes as i64) * 60_000 + + (seconds as i64) * 1000 + + millis as i64, + )) + } else { + Err(TimestampError::InputOutOfRangeError) + } + } + + /// Returns the number of milliseconds. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.millis(), 6524365); + /// ``` + pub fn millis(&self) -> i64 { + self.millis + } + + /// Returns the number of whole seconds. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.seconds(), 6524); + /// ``` + pub fn seconds(&self) -> i64 { + self.millis / 1000 + } + + /// Returns the number of whole seconds and leftover milliseconds as unsigned integers. + /// + /// It will return an [`TimestampError::OutOfRangeError`] if the timestamp is negetive. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.as_sec_millis().unwrap(), (6524, 365)); + /// ``` + pub fn as_sec_millis(&self) -> Result<(u64, u16), TimestampError> { + let millis: u64 = self.millis.try_into()?; + let s = millis / 1000; + let u = millis % 1000; + Ok((s, u as u16)) + } + + /// Returns the time in the form of hours, minutes, seconds and milliseconds as unsigned + /// integers. + /// + /// It will return an [`TimestampError::OutOfRangeError`] if the timestamp is negetive. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.as_hms_millis().unwrap(), (1, 48, 44, 365)); + /// ``` + /// ```rust + /// # use lib_ccxr::util::time::{Timestamp, TimestampError}; + /// let timestamp = Timestamp::from_millis(1678546416749); + /// assert!(matches!( + /// timestamp.as_hms_millis().unwrap_err(), + /// TimestampError::OutOfRangeError(_) + /// )); + /// ``` + pub fn as_hms_millis(&self) -> Result<(u8, u8, u8, u16), TimestampError> { + let millis: u64 = self.millis.try_into()?; + let h = millis / 3600000; + let m = millis / 60000 - 60 * h; + let s = millis / 1000 - 3600 * h - 60 * m; + let u = millis - 3600000 * h - 60000 * m - 1000 * s; + if h > 24 { + println!("{}", h) + } + Ok((h.try_into()?, m as u8, s as u8, u as u16)) + } + + /// Fills `output` with the [`Timestamp`] using SRT's timestamp format. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_srt_time(&mut output); + /// assert_eq!(output, "01:48:44,365"); + /// ``` + pub fn write_srt_time(&self, output: &mut String) -> Result<(), TimestampError> { + let (h, m, s, u) = self.as_hms_millis()?; + write!(output, "{:02}:{:02}:{:02},{:03}", h, m, s, u)?; + Ok(()) + } + + /// Fills `output` with the [`Timestamp`] using VTT's timestamp format. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_vtt_time(&mut output); + /// assert_eq!(output, "01:48:44.365"); + /// ``` + pub fn write_vtt_time(&self, output: &mut String) -> Result<(), TimestampError> { + let (h, m, s, u) = self.as_hms_millis()?; + write!(output, "{:02}:{:02}:{:02}.{:03}", h, m, s, u)?; + Ok(()) + } + + /// Fills `output` with the [`Timestamp`] using + /// "{sign}{hour:02}:{minute:02}:{second:02}{sep}{millis:03}" format, where `sign` can be `-` + /// if time is negetive or blank if it is positive. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_hms_millis_time(&mut output, ':'); + /// assert_eq!(output, "01:48:44:365"); + /// ``` + pub fn write_hms_millis_time( + &self, + output: &mut String, + sep: char, + ) -> Result<(), TimestampError> { + let sign = if self.millis < 0 { "-" } else { "" }; + let timestamp = if self.millis < 0 { -*self } else { *self }; + let (h, m, s, u) = timestamp.as_hms_millis()?; + write!(output, "{}{:02}:{:02}:{:02}{}{:03}", sign, h, m, s, sep, u)?; + Ok(()) + } + + /// Fills `output` with the [`Timestamp`] using ctime's format. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// let mut output = String::new(); + /// timestamp.write_ctime(&mut output); + /// assert_eq!(output, "Thu Jan 01 01:48:44 1970"); + /// ``` + pub fn write_ctime(&self, output: &mut String) -> Result<(), TimestampError> { + let (sec, millis) = self.as_sec_millis()?; + let d = datetime!(1970-01-01 0:00) + + Duration::new(sec.try_into()?, (millis as i32) * 1_000_000); + let format = format_description!( + "[weekday repr:short] [month repr:short] [day] [hour]:[minute]:[second] [year]" + ); + write!(output, "{}", d.format(&format)?)?; + Ok(()) + } + + /// Fills `output` with the [`Timestamp`] using format specified by [`TimestampFormat`]. + /// + /// See [`TimestampFormat`] for examples. + pub fn write_formatted_time( + &self, + output: &mut String, + format: TimestampFormat, + ) -> Result<(), TimestampError> { + match format { + TimestampFormat::None => Ok(()), + TimestampFormat::HHMMSS => { + let (h, m, s, _) = self.as_hms_millis()?; + write!(output, "{:02}:{:02}:{:02}", h, m, s)?; + Ok(()) + } + TimestampFormat::Seconds { millis_separator } => { + let (sec, millis) = self.as_sec_millis()?; + write!(output, "{}{}{:03}", sec, millis_separator, millis)?; + Ok(()) + } + TimestampFormat::Date { millis_separator } => { + let (sec, millis) = self.as_sec_millis()?; + let d = datetime!(1970-01-01 0:00) + + Duration::new(sec.try_into()?, (millis as i32) * 1_000_000); + let format1 = format_description!("[year][month][day][hour][minute][second]"); + let format2 = format_description!("[subsecond digits:3]"); + + write!( + output, + "{}{}{}", + d.format(&format1)?, + millis_separator, + d.format(&format2)? + )?; + Ok(()) + } + TimestampFormat::HHMMSSFFF => self.write_srt_time(output), + } + } + + /// Returns a formatted [`Timestamp`] using SRT's timestamp format. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.to_srt_time().unwrap(), "01:48:44,365"); + /// ``` + pub fn to_srt_time(&self) -> Result { + let mut s = String::new(); + self.write_srt_time(&mut s)?; + Ok(s) + } + + /// Returns a formatted [`Timestamp`] using VTT's timestamp format. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.to_vtt_time().unwrap(), "01:48:44.365"); + /// ``` + pub fn to_vtt_time(&self) -> Result { + let mut s = String::new(); + self.write_vtt_time(&mut s)?; + Ok(s) + } + + /// Returns a formatted [`Timestamp`] using + /// "{sign}{hour:02}:{minute:02}:{second:02}{sep}{millis:03}" format, where `sign` can be `-` + /// if time is negetive or blank if it is positive. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.to_hms_millis_time(':').unwrap(), "01:48:44:365"); + /// ``` + pub fn to_hms_millis_time(&self, sep: char) -> Result { + let mut s = String::new(); + self.write_hms_millis_time(&mut s, sep)?; + Ok(s) + } + + /// Returns a formatted [`Timestamp`] using ctime's format. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::from_millis(6524365); + /// assert_eq!(timestamp.to_ctime().unwrap(), "Thu Jan 01 01:48:44 1970"); + /// ``` + pub fn to_ctime(&self) -> Result { + let mut s = String::new(); + self.write_ctime(&mut s)?; + Ok(s) + } + + /// Returns a formatted [`Timestamp`] using format specified by [`TimestampFormat`]. + /// + /// See [`TimestampFormat`] for examples. + pub fn to_formatted_time(&self, format: TimestampFormat) -> Result { + let mut s = String::new(); + self.write_formatted_time(&mut s, format)?; + Ok(s) + } + + /// Creates a [`Timestamp`] by parsing `input` using format `SS` or `MM:SS` or `HH:MM:SS`. + /// + /// # Examples + /// ```rust + /// # use lib_ccxr::util::time::Timestamp; + /// let timestamp = Timestamp::parse_optional_hhmmss_from_str("01:12:45").unwrap(); + /// assert_eq!(timestamp, Timestamp::from_millis(4_365_000)); + /// ``` + pub fn parse_optional_hhmmss_from_str(input: &str) -> Result { + let mut numbers = input + .split(':') + .map(|x| x.parse::().map_err(|_| TimestampError::ParsingError)) + .rev(); + + let mut millis: u64 = 0; + + let seconds: u64 = numbers.next().ok_or(TimestampError::ParsingError)??.into(); + if seconds > 59 { + return Err(TimestampError::InputOutOfRangeError); + } + millis += seconds * 1000; + + if let Some(x) = numbers.next() { + let minutes: u64 = x?.into(); + if minutes > 59 { + return Err(TimestampError::InputOutOfRangeError); + } + millis += 60_000 * minutes; + } + + if let Some(x) = numbers.next() { + let hours: u64 = x?.into(); + millis += 3_600_000 * hours; + } + + if numbers.next().is_some() { + return Err(TimestampError::ParsingError); + } + + Ok(Timestamp::from_millis(millis.try_into()?)) + } +} + +/// Represent the number of clock ticks as defined in Mpeg standard. +/// +/// This number can never be negetive. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Add, Sub)] +pub struct MpegClockTick(i64); + +impl MpegClockTick { + /// The ratio to convert a clock tick to time duration. + pub const MPEG_CLOCK_FREQ: i64 = 90000; + + /// Create a value representing `ticks` clock ticks. + pub fn new(ticks: i64) -> MpegClockTick { + MpegClockTick(ticks) + } + + /// Returns the number of clock ticks. + pub fn as_i64(&self) -> i64 { + self.0 + } + + /// Converts the clock ticks to its equivalent time duration. + /// + /// The conversion ratio used is [`MpegClockTick::MPEG_CLOCK_FREQ`]. + pub fn as_timestamp(&self) -> Timestamp { + Timestamp::from_millis(self.0 / (MpegClockTick::MPEG_CLOCK_FREQ / 1000)) + } +} + +/// Represents the number of frames. +/// +/// This number can never be negetive. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Add, Sub)] +pub struct FrameCount(u64); + +impl FrameCount { + /// Create a value representing `frames` number of frames. + pub const fn new(frames: u64) -> FrameCount { + FrameCount(frames) + } + + /// Returns the number of frames. + pub fn as_u64(&self) -> u64 { + self.0 + } + + /// Converts the frames to its equivalent time duration. + /// + /// The conversion ratio used is `fps`. + pub fn as_timestamp(&self, fps: f64) -> Timestamp { + Timestamp::from_millis((self.0 as f64 * 1000.0 / fps) as i64) + } + + /// Converts the frames to its equivalent number of clock ticks. + /// + /// The conversion ratio used is [`MpegClockTick::MPEG_CLOCK_FREQ`] and `fps`. + pub fn as_mpeg_clock_tick(&self, fps: f64) -> MpegClockTick { + MpegClockTick::new(((self.0 * MpegClockTick::MPEG_CLOCK_FREQ as u64) as f64 / fps) as i64) + } +} + +/// Represents a GOP Time code as defined in the Mpeg standard. +/// +/// This structure stores its time in the form of hours, minutes, seconds and pictures. This +/// structure also stores its time in the form of a [`Timestamp`] when it is created. This +/// [`Timestamp`] can be modified by [`timestamp_mut`](GopTimeCode::timestamp_mut) and an +/// additional 24 hours may be added on rollover, so it is not necessary that the above two +/// formats refer to the same time. Therefore it is recommended to only rely on the +/// [`Timestamp`] instead of the other format. +#[derive(Copy, Clone, Debug)] +pub struct GopTimeCode { + drop_frame: bool, + time_code_hours: u8, + time_code_minutes: u8, + time_code_seconds: u8, + time_code_pictures: u8, + timestamp: Timestamp, +} + +impl GopTimeCode { + /// Create a new [`GopTimeCode`] from the specified parameters. + /// + /// The number of frames or pictures is converted to time duration using `fps`. + /// + /// If `rollover` is true, then an extra of 24 hours will added. + /// + /// It will return [`None`] if any parameter doesn't follow their respective ranges: + /// + /// | Parameter | Range | + /// |-----------|--------| + /// | hours | 0 - 23 | + /// | minutes | 0 - 59 | + /// | seconds | 0 - 59 | + /// | pictures | 0 - 59 | + pub fn new( + drop_frame: bool, + hours: u8, + minutes: u8, + seconds: u8, + pictures: u8, + fps: f64, + rollover: bool, + ) -> Option { + if hours < 24 && minutes < 60 && seconds < 60 && pictures < 60 { + let millis = (1000.0 * (pictures as f64) / fps) as u16; + let extra_hours = if rollover { 24 } else { 0 }; + let timestamp = + Timestamp::from_hms_millis(hours + extra_hours, minutes, seconds, millis) + .expect("The fps given is probably too low"); + + Some(GopTimeCode { + drop_frame, + time_code_hours: hours, + time_code_minutes: minutes, + time_code_seconds: seconds, + time_code_pictures: pictures, + timestamp, + }) + } else { + None + } + } + + /// Returns the GOP time code in its equivalent time duration. + pub fn timestamp(&self) -> Timestamp { + self.timestamp + } + + /// Returns a mutable reference to internal [`Timestamp`]. + pub fn timestamp_mut(&mut self) -> &mut Timestamp { + &mut self.timestamp + } + + /// Check if a rollover has ocurred by comparing the previous [`GopTimeCode`] that is `prev` + /// with the current [`GopTimeCode`]. + pub fn did_rollover(&self, prev: &GopTimeCode) -> bool { + prev.time_code_hours == 23 + && prev.time_code_minutes == 59 + && self.time_code_hours == 0 + && self.time_code_minutes == 0 + } + + /// Constructs a [`GopTimeCode`] from its individual fields. + /// + /// # Safety + /// + /// The fields other than [`Timestamp`] may not be accurate if it is changed using + /// [`timestamp_mut`](GopTimeCode::timestamp_mut). + pub unsafe fn from_raw_parts( + drop_frame: bool, + hours: u8, + minutes: u8, + seconds: u8, + pictures: u8, + timestamp: Timestamp, + ) -> GopTimeCode { + GopTimeCode { + drop_frame, + time_code_hours: hours, + time_code_minutes: minutes, + time_code_seconds: seconds, + time_code_pictures: pictures, + timestamp, + } + } + + /// Returns the individuals field of a [`GopTimeCode`]. + /// + /// # Safety + /// + /// The fields other than [`Timestamp`] may not be accurate if it is changed using + /// [`timestamp_mut`](GopTimeCode::timestamp_mut). + pub unsafe fn as_raw_parts(&self) -> (bool, u8, u8, u8, u8, Timestamp) { + let GopTimeCode { + drop_frame, + time_code_hours, + time_code_minutes, + time_code_seconds, + time_code_pictures, + timestamp, + } = *self; + + ( + drop_frame, + time_code_hours, + time_code_minutes, + time_code_seconds, + time_code_pictures, + timestamp, + ) + } +} diff --git a/src/rust/src/libccxr_exports/mod.rs b/src/rust/src/libccxr_exports/mod.rs index e365e0fb2..4b8b7b045 100644 --- a/src/rust/src/libccxr_exports/mod.rs +++ b/src/rust/src/libccxr_exports/mod.rs @@ -1 +1,5 @@ //! Provides C-FFI functions that are direct equivalent of functions available in C. + +mod time; + +pub use time::*; diff --git a/src/rust/src/libccxr_exports/time.rs b/src/rust/src/libccxr_exports/time.rs new file mode 100644 index 000000000..0938e936b --- /dev/null +++ b/src/rust/src/libccxr_exports/time.rs @@ -0,0 +1,107 @@ +#![allow(clippy::useless_conversion)] + +use crate::bindings::*; + +use std::ffi::CStr; +use std::os::raw::{c_char, c_int}; + +use lib_ccxr::util::time::{c_functions as c, *}; + +/// Helper function that converts a Rust-String (`string`) to C-String (`buffer`). +/// +/// # Safety +/// +/// `buffer` must have enough allocated space for `string` to fit. +unsafe fn write_string_into_pointer(buffer: *mut c_char, string: &str) { + let buffer = std::slice::from_raw_parts_mut(buffer as *mut u8, string.len() + 1); + buffer[..string.len()].copy_from_slice(string.as_bytes()); + buffer[string.len()] = b'\0'; +} + +/// Rust equivalent for `timestamp_to_srttime` function in C. Uses C-native types as input and +/// output. +/// +/// # Safety +/// +/// `buffer` must have enough allocated space for the formatted `timestamp` to fit. +#[no_mangle] +pub unsafe extern "C" fn ccxr_timestamp_to_srttime(timestamp: u64, buffer: *mut c_char) { + let mut s = String::new(); + let timestamp = Timestamp::from_millis(timestamp as i64); + + let _ = c::timestamp_to_srttime(timestamp, &mut s); + + write_string_into_pointer(buffer, &s); +} + +/// Rust equivalent for `timestamp_to_vtttime` function in C. Uses C-native types as input and +/// output. +/// +/// # Safety +/// +/// `buffer` must have enough allocated space for the formatted `timestamp` to fit. +#[no_mangle] +pub unsafe extern "C" fn ccxr_timestamp_to_vtttime(timestamp: u64, buffer: *mut c_char) { + let mut s = String::new(); + let timestamp = Timestamp::from_millis(timestamp as i64); + + let _ = c::timestamp_to_vtttime(timestamp, &mut s); + + write_string_into_pointer(buffer, &s); +} + +/// Rust equivalent for `millis_to_date` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `buffer` must have enough allocated space for the formatted `timestamp` to fit. +#[no_mangle] +pub unsafe extern "C" fn ccxr_millis_to_date( + timestamp: u64, + buffer: *mut c_char, + date_format: ccx_output_date_format, + millis_separator: c_char, +) { + let mut s = String::new(); + let timestamp = Timestamp::from_millis(timestamp as i64); + let date_format = match date_format { + ccx_output_date_format::ODF_NONE => TimestampFormat::None, + ccx_output_date_format::ODF_HHMMSS => TimestampFormat::HHMMSS, + ccx_output_date_format::ODF_HHMMSSMS => TimestampFormat::HHMMSSFFF, + ccx_output_date_format::ODF_SECONDS => TimestampFormat::Seconds { + millis_separator: millis_separator as u8 as char, + }, + ccx_output_date_format::ODF_DATE => TimestampFormat::Date { + millis_separator: millis_separator as u8 as char, + }, + }; + + let _ = c::millis_to_date(timestamp, &mut s, date_format); + + write_string_into_pointer(buffer, &s); +} + +/// Rust equivalent for `stringztoms` function in C. Uses C-native types as input and output. +/// +/// # Safety +/// +/// `s` must contain valid utf-8 data and have a nul terminator at the end of the string. +#[no_mangle] +pub unsafe extern "C" fn ccxr_stringztoms(s: *const c_char, bt: *mut ccx_boundary_time) -> c_int { + let s = CStr::from_ptr(s).to_str().unwrap(); + + let option_timestamp = c::stringztoms(s); + + if let Some(timestamp) = option_timestamp { + if let Ok((h, m, s, _)) = timestamp.as_hms_millis() { + (*bt).set = 1; + (*bt).hh = h.into(); + (*bt).mm = m.into(); + (*bt).ss = s.into(); + (*bt).time_in_ms = (timestamp.millis() / 1000) * 1000; + return 0; + } + }; + + -1 +} From e60b460171ddc345f83af07065ccb4fd7bb167ed Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sun, 27 Aug 2023 23:23:41 +0530 Subject: [PATCH 07/24] add options module --- src/rust/Cargo.lock | 147 ++++++++++++ src/rust/lib_ccxr/Cargo.lock | 159 ++++++++++++ src/rust/lib_ccxr/Cargo.toml | 2 + src/rust/lib_ccxr/src/common/mod.rs | 2 + src/rust/lib_ccxr/src/common/options.rs | 306 ++++++++++++++++++++++++ src/rust/lib_ccxr/src/hardsubx.rs | 16 ++ src/rust/lib_ccxr/src/lib.rs | 1 + 7 files changed, 633 insertions(+) create mode 100644 src/rust/lib_ccxr/src/common/options.rs create mode 100644 src/rust/lib_ccxr/src/hardsubx.rs diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 22db8f018..30e6139b4 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -224,6 +224,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "find-crate" version = "0.6.3" @@ -233,12 +239,27 @@ dependencies = [ "toml", ] +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + [[package]] name = "glob" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -264,6 +285,26 @@ dependencies = [ "libc", ] +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "itoa" version = "1.0.9" @@ -299,8 +340,10 @@ version = "0.1.0" dependencies = [ "bitflags 2.4.0", "derive_more", + "num_enum", "thiserror", "time", + "url", ] [[package]] @@ -366,6 +409,27 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_enum" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a015b430d3c108a207fd776d2e2196aaf8b1cf8cf93253e3a097ff3085076a1" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.29", +] + [[package]] name = "once_cell" version = "1.18.0" @@ -408,6 +472,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + [[package]] name = "phf" version = "0.11.2" @@ -456,6 +526,16 @@ version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.66" @@ -703,6 +783,21 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "toml" version = "0.5.11" @@ -712,18 +807,61 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_datetime" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" + +[[package]] +name = "toml_edit" +version = "0.19.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + [[package]] name = "unicode-ident" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-width" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +[[package]] +name = "url" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + [[package]] name = "vcpkg" version = "0.2.15" @@ -792,3 +930,12 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "winnow" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c2e3184b9c4e92ad5167ca73039d0c42476302ab603e2fec4487511f38ccefc" +dependencies = [ + "memchr", +] diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock index 5b3b35d5c..6f9180d03 100644 --- a/src/rust/lib_ccxr/Cargo.lock +++ b/src/rust/lib_ccxr/Cargo.lock @@ -33,6 +33,47 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "itoa" version = "1.0.9" @@ -45,8 +86,59 @@ version = "0.1.0" dependencies = [ "bitflags", "derive_more", + "num_enum", "thiserror", "time", + "url", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "num_enum" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a015b430d3c108a207fd776d2e2196aaf8b1cf8cf93253e3a097ff3085076a1" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.29", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit", ] [[package]] @@ -172,8 +264,75 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml_datetime" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" + +[[package]] +name = "toml_edit" +version = "0.19.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + [[package]] name = "unicode-ident" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "url" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "winnow" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c2e3184b9c4e92ad5167ca73039d0c42476302ab603e2fec4487511f38ccefc" +dependencies = [ + "memchr", +] diff --git a/src/rust/lib_ccxr/Cargo.toml b/src/rust/lib_ccxr/Cargo.toml index 58b6c91ac..8f46da750 100644 --- a/src/rust/lib_ccxr/Cargo.toml +++ b/src/rust/lib_ccxr/Cargo.toml @@ -6,10 +6,12 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +num_enum = "0.6.1" thiserror = "1.0.39" time = { version = "0.3.27", features = ["macros", "formatting"] } derive_more = "0.99.17" bitflags = "2.3.1" +url = "2.4.0" [features] default = ["enable_sharing", "wtv_debug", "enable_ffmpeg", "debug", "with_libcurl"] diff --git a/src/rust/lib_ccxr/src/common/mod.rs b/src/rust/lib_ccxr/src/common/mod.rs index 502820224..4aaf4bb62 100644 --- a/src/rust/lib_ccxr/src/common/mod.rs +++ b/src/rust/lib_ccxr/src/common/mod.rs @@ -17,5 +17,7 @@ //! | `language[NB_LANGUAGE]` | [`Language`] | mod constants; +mod options; pub use constants::*; +pub use options::*; diff --git a/src/rust/lib_ccxr/src/common/options.rs b/src/rust/lib_ccxr/src/common/options.rs new file mode 100644 index 000000000..162658d5b --- /dev/null +++ b/src/rust/lib_ccxr/src/common/options.rs @@ -0,0 +1,306 @@ +use url::Url; + +use std::path::PathBuf; + +use crate::common::{ + DataSource, Language, OutputFormat, SelectCodec, StreamMode, StreamType, DTVCC_MAX_SERVICES, +}; +use crate::hardsubx::{ColorHue, OcrMode}; +use crate::util::encoding::Encoding; +use crate::util::log::OutputTarget; +use crate::util::time::{Timestamp, TimestampFormat}; + +pub enum DtvccServiceCharset { + Same(String), + Unique(Box<[String; DTVCC_MAX_SERVICES]>), +} + +#[allow(dead_code)] +pub struct DemuxerConfig { + /// Regular TS or M2TS + m2ts: bool, + auto_stream: StreamMode, + + /* subtitle codec type */ + codec: SelectCodec, + nocodec: SelectCodec, + + /// Try to find a stream with captions automatically (no -pn needed) + ts_autoprogram: bool, + ts_allprogram: bool, + /// PID for stream that holds caption information + ts_cappids: Vec, + /// If 1, never mess with the selected PID + ts_forced_cappid: bool, + /// Specific program to process in TS files, if a forced program is given + ts_forced_program: Option, + /// User WANTED stream type (i.e. use the stream that has this type) + ts_datastreamtype: StreamType, + /// User selected (forced) stream type + ts_forced_streamtype: StreamType, +} + +#[allow(dead_code)] +pub struct EncoderConfig { + /// Extract 1st (1), 2nd (2) or both fields (12) + extract: u8, + dtvcc_extract: bool, + // If true, output in stderr progress updates so the GUI can grab them + gui_mode_reports: bool, + output_filename: String, + write_format: OutputFormat, + keep_output_closed: bool, + /// Force flush on content write + force_flush: bool, + /// Append mode for output files + append_mode: bool, + /// true if -UCLA used, false if not + ucla: bool, + + encoding: Encoding, + date_format: TimestampFormat, + /// Add dashes (-) before each speaker automatically? + autodash: bool, + /// " Remove spaces at sides? " + trim_subs: bool, + /// FIX CASE? = Fix case? + sentence_cap: bool, + /// Split text into complete sentences and prorate time? + splitbysentence: bool, + + /// If out=curl, where do we send the data to? + #[cfg(feature = "with_libcurl")] + curlposturl: Option, + + /// Censors profane words from subtitles + filter_profanity: bool, + + /// Write a .sem file on file open and delete it on close? + with_semaphore: bool, + /* Credit stuff */ + start_credits_text: String, + end_credits_text: String, + startcreditsnotbefore: Timestamp, // Where to insert start credits, if possible + startcreditsnotafter: Timestamp, + startcreditsforatleast: Timestamp, // How long to display them? + startcreditsforatmost: Timestamp, + endcreditsforatleast: Timestamp, + endcreditsforatmost: Timestamp, + + /// Keeps the settings for generating transcript output files. + /* ccx_encoders_transcript_format transcript_settings; */ + send_to_srv: bool, + /// Set to true when no BOM (Byte Order Mark) should be used for files. + /// Note, this might make files unreadable in windows! + no_bom: bool, + first_input_file: String, + multiple_files: bool, + no_font_color: bool, + no_type_setting: bool, + /// If this is set to true, the stdout will be flushed when data was written to the screen during a process_608 call. + cc_to_stdout: bool, + /// false = CRLF, true = LF + line_terminator_lf: bool, + /// ms to delay (or advance) subs + subs_delay: Timestamp, + program_number: u32, + in_format: u8, + // true if we don't want to OCR bitmaps to add the text as comments in the XML file in spupng + nospupngocr: bool, + + // MCC File + /// true if dropframe frame count should be used. defaults to no drop frame. + force_dropframe: bool, + + // text -> png (text render) + /// The font used to render text if needed (e.g. teletext->spupng) + render_font: PathBuf, + render_font_italics: PathBuf, + + //CEA-708 + services_enabled: [bool; DTVCC_MAX_SERVICES], + services_charsets: DtvccServiceCharset, + // true if only 708 subs extraction is enabled + extract_only_708: bool, +} + +/// Options from user parameters +pub struct Options { + /// Extract 1st, 2nd or both fields. Can be 1, 2 or 12 respectively. + pub extract: u8, + /// Disable roll-up emulation (no duplicate output in generated file) + pub no_rollup: bool, + pub noscte20: bool, + pub webvtt_create_css: bool, + /// Channel we want to dump in srt mode + pub cc_channel: u8, + pub buffer_input: bool, + pub nofontcolor: bool, + pub nohtmlescape: bool, + pub notypesetting: bool, + /// The start of the segment we actually process + pub extraction_start: Timestamp, + /// The end of the segment we actually process + pub extraction_end: Timestamp, + pub print_file_reports: bool, + /// Contains the settings for the 608 decoder. + /* ccx_decoder_608_settings settings_608, */ + /// Same for 708 decoder + /* ccx_decoder_dtvcc_settings settings_dtvcc, */ + /// Is 608 enabled by explicitly using flags(-1,-2,-12) + pub is_608_enabled: bool, + /// Is 708 enabled by explicitly using flags(-svc) + pub is_708_enabled: bool, + + /// Disabled by -ve or --videoedited + pub binary_concat: bool, + /// Use GOP instead of PTS timing (None=do as needed, true=always, false=never) + pub use_gop_as_pts: Option, + /// Replace 0000 with 8080 in HDTV (needed for some cards) + pub fix_padding: bool, + /// If true, output in stderr progress updates so the GUI can grab them + pub gui_mode_reports: bool, + /// If true, suppress the output of the progress to stdout + pub no_progress_bar: bool, + /// Extra capitalization word file + pub sentence_cap_file: PathBuf, + /// 0 -> Not a complete file but a live stream, without timeout + /// + /// None -> A regular file + /// + /// \>0 -> Live stream with a timeout of this value in seconds + pub live_stream: Option, + /// Extra profanity word file + pub filter_profanity_file: PathBuf, + pub messages_target: OutputTarget, + /// If true, add WebVTT X-TIMESTAMP-MAP header + pub timestamp_map: bool, + /* Levenshtein's parameters, for string comparison */ + /// false => don't attempt to correct typos with this algorithm + pub dolevdist: bool, + /// Means 2 fails or less is "the same" + pub levdistmincnt: u8, + /// Means 10% or less is also "the same" + pub levdistmaxpct: u8, + /// Look for captions in all packets when everything else fails + pub investigate_packets: bool, + /// Disable pruning of padding cc blocks + pub fullbin: bool, + /// Disable syncing + pub nosync: bool, + /// If true, use PID=1003, process specially and so on + pub hauppauge_mode: bool, + /// Fix broken Windows 7 conversion + pub wtvconvertfix: bool, + pub wtvmpeg2: bool, + /// Use myth-tv mpeg code? false=no, true=yes, None=auto + pub auto_myth: Option, + /* MP4 related stuff */ + /// Process the video track even if a CC dedicated track exists. + pub mp4vidtrack: bool, + /// If true, extracts chapters (if present), from MP4 files. + pub extract_chapters: bool, + /* General settings */ + /// Force the use of pic_order_cnt_lsb in AVC/H.264 data streams + pub usepicorder: bool, + /// 1 = full output. 2 = live output. 3 = both + pub xmltv: u8, + /// interval in seconds between writing xmltv output files in live mode + pub xmltvliveinterval: Timestamp, + /// interval in seconds between writing xmltv full file output + pub xmltvoutputinterval: Timestamp, + pub xmltvonlycurrent: bool, + pub keep_output_closed: bool, + /// Force flush on content write + pub force_flush: bool, + /// Append mode for output files + pub append_mode: bool, + /// true if UCLA used, false if not + pub ucla: bool, + /// true if ticker text style burned in subs, false if not + pub tickertext: bool, + /// true if burned-in subtitles to be extracted + pub hardsubx: bool, + /// true if both burned-in and not burned in need to be extracted + pub hardsubx_and_common: bool, + /// The name of the language stream for DVB + pub dvblang: Option, + /// The name of the .traineddata file to be loaded with tesseract + pub ocrlang: PathBuf, + /// The Tesseract OEM mode, could be 0 (default), 1 or 2 + pub ocr_oem: u8, + /// How to quantize the bitmap before passing to to tesseract + /// (false = no quantization at all, true = CCExtractor's internal) + pub ocr_quantmode: bool, + /// The name of the language stream for MKV + pub mkvlang: Option, + /// If true, the video stream will be processed even if we're using a different one for subtitles. + pub analyze_video_stream: bool, + + /*HardsubX related stuff*/ + pub hardsubx_ocr_mode: OcrMode, + pub hardsubx_min_sub_duration: Timestamp, + pub hardsubx_detect_italics: bool, + pub hardsubx_conf_thresh: f64, + pub hardsubx_hue: ColorHue, + pub hardsubx_lum_thresh: f64, + + /// Keeps the settings for generating transcript output files. + /* ccx_encoders_transcript_format transcript_settings; */ + pub date_format: TimestampFormat, + pub send_to_srv: bool, + pub write_format: OutputFormat, + pub write_format_rewritten: bool, + pub use_ass_instead_of_ssa: bool, + pub use_webvtt_styling: bool, + + /* Networking */ + pub udpsrc: Option, + pub udpaddr: Option, + /// Non-zero => Listen for UDP packets on this port, no files. + pub udpport: u16, + pub tcpport: Option, + pub tcp_password: Option, + pub tcp_desc: Option, + pub srv_addr: Option, + pub srv_port: Option, + /// Do NOT set time automatically? + pub noautotimeref: bool, + /// Files, stdin or network + pub input_source: DataSource, + + pub output_filename: Option, + + /// List of files to process + pub inputfile: Option>, + pub demux_cfg: DemuxerConfig, + pub enc_cfg: EncoderConfig, + /// ms to delay (or advance) subs + pub subs_delay: Timestamp, + /// If true, the stdout will be flushed when data was written to the screen during a process_608 call. + pub cc_to_stdout: bool, + /// If true, the PES Header will be printed to console (debugging purposes) + pub pes_header_to_stdout: bool, + /// If true, the program will ignore PTS jumps. + /// Sometimes this parameter is required for DVB subs with > 30s pause time + pub ignore_pts_jumps: bool, + pub multiprogram: bool, + pub out_interval: i32, + pub segment_on_key_frames_only: bool, + + #[cfg(feature = "with_libcurl")] + pub curlposturl: Option, + + //CC sharing + #[cfg(feature = "enable_sharing")] + pub sharing_enabled: bool, + #[cfg(feature = "enable_sharing")] + pub sharing_url: Option, + #[cfg(feature = "enable_sharing")] + //Translating + pub translate_enabled: bool, + #[cfg(feature = "enable_sharing")] + pub translate_langs: Option, + #[cfg(feature = "enable_sharing")] + pub translate_key: Option, +} diff --git a/src/rust/lib_ccxr/src/hardsubx.rs b/src/rust/lib_ccxr/src/hardsubx.rs new file mode 100644 index 000000000..19352ebed --- /dev/null +++ b/src/rust/lib_ccxr/src/hardsubx.rs @@ -0,0 +1,16 @@ +pub enum OcrMode { + Frame, + Letter, + Word, +} + +pub enum ColorHue { + White, + Yellow, + Green, + Cyan, + Blue, + Magenta, + Red, + Custom(f64), +} diff --git a/src/rust/lib_ccxr/src/lib.rs b/src/rust/lib_ccxr/src/lib.rs index 45ee8e79c..bb3785121 100644 --- a/src/rust/lib_ccxr/src/lib.rs +++ b/src/rust/lib_ccxr/src/lib.rs @@ -1,2 +1,3 @@ pub mod common; +pub mod hardsubx; pub mod util; From f8eb1a75fd3713cc2180a31ea94fa8d8f15f4b77 Mon Sep 17 00:00:00 2001 From: Elbert Ronnie Date: Sun, 27 Aug 2023 23:44:58 +0530 Subject: [PATCH 08/24] add teletext module --- src/rust/lib_ccxr/src/lib.rs | 2 + src/rust/lib_ccxr/src/subtitle.rs | 97 ++ src/rust/lib_ccxr/src/teletext.rs | 1656 +++++++++++++++++++++++++++++ 3 files changed, 1755 insertions(+) create mode 100644 src/rust/lib_ccxr/src/subtitle.rs create mode 100644 src/rust/lib_ccxr/src/teletext.rs diff --git a/src/rust/lib_ccxr/src/lib.rs b/src/rust/lib_ccxr/src/lib.rs index bb3785121..2b6e5db18 100644 --- a/src/rust/lib_ccxr/src/lib.rs +++ b/src/rust/lib_ccxr/src/lib.rs @@ -1,3 +1,5 @@ pub mod common; pub mod hardsubx; +pub mod subtitle; +pub mod teletext; pub mod util; diff --git a/src/rust/lib_ccxr/src/subtitle.rs b/src/rust/lib_ccxr/src/subtitle.rs new file mode 100644 index 000000000..4f442a87e --- /dev/null +++ b/src/rust/lib_ccxr/src/subtitle.rs @@ -0,0 +1,97 @@ +//! Provides types to represent different kinds of subtitle data in a unified format. +//! +//! NOTE: This module is incomplete and a lot of work is still left. + +use crate::common::Language; +use crate::util::encoding::EncodedString; +use crate::util::time::Timestamp; + +/// Represents the different formats in which subtitle data could be stored. +/// +/// NOTE: Heavy Work in Progress. +pub enum SubtitleData { + Dvb { + /* bitmap: Bitmap, */ + lang: Language, + is_eod: bool, + time_out: Timestamp, + }, + Dvd { + /* bitmap: Bitmap, */ + lang: Language, + }, + Xds(/* XdsScreen */), + Eia608(/* Eia608Screen */), + Text(EncodedString), + Raw(Vec), +} + +/// Represents a single subtitle instance on a screen with timing info. +pub struct Subtitle { + /// The subtitle data. + data: SubtitleData, + + /// The start time for this subtitle. + start_time: Timestamp, + + /// The end time of this subtitle. + end_time: Timestamp, + + /// A flag to tell that decoder has given output. + got_output: bool, + info: Option, + mode: String, +} + +impl Subtitle { + /// Create a new Text Subtitle. + pub fn new_text( + string: EncodedString, + start_time: Timestamp, + end_time: Timestamp, + info: Option, + mode: String, + ) -> Subtitle { + Subtitle { + data: SubtitleData::Text(string), + start_time, + end_time, + got_output: true, + info, + mode, + } + } + + /// Return a reference to the subtitle data. + pub fn data(&self) -> &SubtitleData { + &self.data + } + + /// Return the start time of this subtitle. + pub fn start_time(&self) -> Timestamp { + self.start_time + } + + /// Return the end time of this subtitle. + pub fn end_time(&self) -> Timestamp { + self.end_time + } + + /// Check if decoder has given output. + pub fn got_output(&self) -> bool { + self.got_output + } + + /// Update the state if decoder has given output. + pub fn set_got_output(&mut self, val: bool) { + self.got_output = val; + } + + pub fn info(&self) -> Option<&str> { + self.info.as_deref() + } + + pub fn mode(&self) -> &str { + &self.mode + } +} diff --git a/src/rust/lib_ccxr/src/teletext.rs b/src/rust/lib_ccxr/src/teletext.rs new file mode 100644 index 000000000..097600e1d --- /dev/null +++ b/src/rust/lib_ccxr/src/teletext.rs @@ -0,0 +1,1656 @@ +//! Provides types to extract subtitles from Teletext streams. +//! +//! # Conversion Guide +//! +//! | From | To | +//! |-----------------------------|-------------------------------------------------------------| +//! | `MAX_TLT_PAGES` | `MAX_TLT_PAGES` | +//! | `teletext_page_t` | [`TeletextPage`] | +//! | `s_states` | `TeletextState` | +//! | `transmission_mode_t` | `TransmissionMode` | +//! | `data_unit_t` | [`DataUnit`] | +//! | `TeletextCtx` | [`TeletextContext`] | +//! | `TTXT_COLOURS` | `TELETEXT_COLORS` | +//! | `teletext_packet_payload_t` | [`TeletextPacketPayload`] | +//! | `ccx_s_teletext_config` | [`TeletextConfig`] | +//! | `s_primary_charset` | [`G0Charset`] | +//! | `g0_charsets_type` | [`G0CharsetType`] | +//! | `ENTITIES` | `map_entities` | +//! | `LAT_RUS` | `map_latin_to_russian` | +//! | `unham_8_4` | [`decode_hamming_8_4`] | +//! | `unham_24_18` | [`decode_hamming_24_18`] | +//! | `set_g0_charset` | [`G0Charset::set_charset`], [`G0CharsetType::from_triplet`] | +//! | `remap_g0_charset` | `G0Charset::remap_g0_charset` | +//! | `telx_to_ucs2` | [`G0Charset::ucs2_char`] | +//! | `bcd_page_to_int` | [`TeletextPageNumber::bcd_page_to_u16`] | +//! | `telx_case_fix` | `TeletextContext::telx_case_fix` | +//! | `telxcc_dump_prev_page` | `TeletextContext::telxcc_dump_prev_page` | +//! | `process_page` | `TeletextContext::process_page` | +//! | `process_telx_packet` | [`TeletextContext::process_telx_packet`] | +//! | `telxcc_init` | [`TeletextContext::new`] | +//! | `telxcc_close` | [`TeletextContext::close`] | +//! | `fuzzy_memcmp` | [`fuzzy_cmp`] | + +use num_enum::{IntoPrimitive, TryFromPrimitive}; +use std::cell::Cell; +use std::fmt; +use std::fmt::Write; +use std::sync::RwLock; + +use crate::common::OutputFormat; +use crate::subtitle::Subtitle; +use crate::util::encoding::{Ucs2Char, Ucs2String}; +use crate::util::log::{debug, info, logger, DebugMessageFlag}; +use crate::util::time::{Timestamp, TimestampFormat}; +use crate::util::{decode_hamming_24_18, decode_hamming_8_4, levenshtein, parity}; + +/// UTC referential value. +/// +/// It has different meanings based on its value: +/// - `u64::MAX` means don't use UNIX +/// - 0 means use current system time as reference +/// - +1 means use a specific reference +pub static UTC_REFVALUE: RwLock = RwLock::new(u64::MAX); + +const MAX_TLT_PAGES: usize = 1000; + +const TELETEXT_COLORS: [&str; 8] = [ + "#000000", // black + "#ff0000", // red + "#00ff00", // green + "#ffff00", // yellow + "#0000ff", // blue + "#ff00ff", // magenta + "#00ffff", // cyan + "#ffffff", // white +]; + +const LATIN_TO_RUSSIAN: [(Ucs2Char, char); 63] = [ + (65, 'А'), + (66, 'Б'), + (87, 'В'), + (71, 'Г'), + (68, 'Д'), + (69, 'Е'), + (86, 'Ж'), + (90, 'З'), + (73, 'И'), + (74, 'Й'), + (75, 'К'), + (76, 'Л'), + (77, 'М'), + (78, 'Н'), + (79, 'О'), + (80, 'П'), + (82, 'Р'), + (83, 'С'), + (84, 'Т'), + (85, 'У'), + (70, 'Ф'), + (72, 'Х'), + (67, 'Ц'), + (238, 'Ч'), + (235, 'Ш'), + (249, 'Щ'), + (35, 'Ы'), + (88, 'Ь'), + (234, 'Э'), + (224, 'Ю'), + (81, 'Я'), + (97, 'а'), + (98, 'б'), + (119, 'в'), + (103, 'г'), + (100, 'д'), + (101, 'е'), + (118, 'ж'), + (122, 'з'), + (105, 'и'), + (106, 'й'), + (107, 'к'), + (108, 'л'), + (109, 'м'), + (110, 'н'), + (111, 'о'), + (112, 'п'), + (114, 'р'), + (115, 'с'), + (116, 'т'), + (117, 'у'), + (102, 'ф'), + (104, 'х'), + (99, 'ц'), + (231, 'ч'), + (226, 'ш'), + (251, 'щ'), + (121, 'ъ'), + (38, 'ы'), + (120, 'ь'), + (244, 'э'), + (232, 'ю'), + (113, 'я'), +]; + +const ENTITIES: [(u8, &str); 3] = [(b'<', "<"), (b'>', ">"), (b'&', "&")]; + +/// Represents a Teletext Packet. +pub struct TeletextPacketPayload { + _clock_in: u8, // clock run in + _framing_code: u8, // framing code, not needed, ETSI 300 706: const 0xe4 + address: [u8; 2], + data: [u8; 40], +} + +/// Represents the possible kinds of G0 character set. +#[derive(Clone, Copy, Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] +#[repr(u8)] +pub enum G0CharsetType { + Latin = 0, + Cyrillic1 = 1, + Cyrillic2 = 2, + Cyrillic3 = 3, + Greek = 4, + Arabic = 5, + Hebrew = 6, +} + +impl G0CharsetType { + /// Create a [`G0CharsetType`] from the triple from a Teletext triplet. + pub fn from_triplet(value: u32) -> G0CharsetType { + // ETS 300 706, Table 32 + if (value & 0x3c00) == 0x1000 { + match value & 0x0380 { + 0x0000 => G0CharsetType::Cyrillic1, + 0x0200 => G0CharsetType::Cyrillic2, + 0x0280 => G0CharsetType::Cyrillic3, + _ => G0CharsetType::Latin, + } + } else { + G0CharsetType::Latin + } + } +} + +/// Represents the bitcode representation of a [`G0LatinNationalSubset`]. +/// +/// It can be easily contructed from a [`u8`]. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct G0LatinNationalSubsetId(u8); + +impl From for G0LatinNationalSubsetId { + fn from(value: u8) -> G0LatinNationalSubsetId { + G0LatinNationalSubsetId(value) + } +} + +impl fmt::Display for G0LatinNationalSubsetId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "0x{:1x}.{:1x}", self.0 >> 3, self.0 & 0x07) + } +} + +/// Represents the possible kinds of National Option Subset for G0 Latin character set. +#[derive(Clone, Copy, Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] +#[repr(u8)] +pub enum G0LatinNationalSubset { + English = 0x0, + French = 0x1, + SwedishFinnishHungarian = 0x2, + CzechSlovak = 0x3, + German = 0x4, + PortugueseSpanish = 0x5, + Italian = 0x6, + Rumanian = 0x7, + Polish = 0x8, + Turkish = 0x9, + SerbianCroatianSlovenian = 0xa, + Estonian = 0xb, + LettishLithuanian = 0xc, +} + +impl fmt::Display for G0LatinNationalSubset { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}", + match self { + G0LatinNationalSubset::English => "English", + G0LatinNationalSubset::French => "French", + G0LatinNationalSubset::SwedishFinnishHungarian => "Swedish, Finnish, Hungarian", + G0LatinNationalSubset::CzechSlovak => "Czech, Slovak", + G0LatinNationalSubset::German => "German", + G0LatinNationalSubset::PortugueseSpanish => "Portuguese, Spanish", + G0LatinNationalSubset::Italian => "Italian", + G0LatinNationalSubset::Rumanian => "Rumanian", + G0LatinNationalSubset::Polish => "Polish", + G0LatinNationalSubset::Turkish => "Turkish", + G0LatinNationalSubset::SerbianCroatianSlovenian => "Serbian, Croatian, Slovenian", + G0LatinNationalSubset::Estonian => "Estonian", + G0LatinNationalSubset::LettishLithuanian => "Lettish, Lithuanian", + } + ) + } +} + +impl G0LatinNationalSubset { + // array positions where chars from G0_LATIN_NATIONAL_SUBSETS are injected into G0[LATIN] + const G0_LATIN_NATIONAL_SUBSETS_POSITIONS: [usize; 13] = [ + 0x03, 0x04, 0x20, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x5b, 0x5c, 0x5d, 0x5e, + ]; + + // ETS 300 706, chapter 15.2, table 32: Function of Default G0 and G2 Character Set Designation + // and National Option Selection bits in packets X/28/0 Format 1, X/28/4, M/29/0 and M/29/4 + + // Latin National Option Sub-sets + const G0_LATIN_NATIONAL_SUBSETS: [[Ucs2Char; 13]; 13] = [ + // English + [ + 0x00a3, 0x0024, 0x0040, 0x00ab, 0x00bd, 0x00bb, 0x005e, 0x0023, 0x002d, 0x00bc, 0x00a6, + 0x00be, 0x00f7, + ], + // French + [ + 0x00e9, 0x00ef, 0x00e0, 0x00eb, 0x00ea, 0x00f9, 0x00ee, 0x0023, 0x00e8, 0x00e2, 0x00f4, + 0x00fb, 0x00e7, + ], + // Swedish, Finnish, Hungarian + [ + 0x0023, 0x00a4, 0x00c9, 0x00c4, 0x00d6, 0x00c5, 0x00dc, 0x005f, 0x00e9, 0x00e4, 0x00f6, + 0x00e5, 0x00fc, + ], + // Czech, Slovak + [ + 0x0023, 0x016f, 0x010d, 0x0165, 0x017e, 0x00fd, 0x00ed, 0x0159, 0x00e9, 0x00e1, 0x011b, + 0x00fa, 0x0161, + ], + // German + [ + 0x0023, 0x0024, 0x00a7, 0x00c4, 0x00d6, 0x00dc, 0x005e, 0x005f, 0x00b0, 0x00e4, 0x00f6, + 0x00fc, 0x00df, + ], + // Portuguese, Spanish + [ + 0x00e7, 0x0024, 0x00a1, 0x00e1, 0x00e9, 0x00ed, 0x00f3, 0x00fa, 0x00bf, 0x00fc, 0x00f1, + 0x00e8, 0x00e0, + ], + // Italian + [ + 0x00a3, 0x0024, 0x00e9, 0x00b0, 0x00e7, 0x00bb, 0x005e, 0x0023, 0x00f9, 0x00e0, 0x00f2, + 0x00e8, 0x00ec, + ], + // Rumanian + [ + 0x0023, 0x00a4, 0x0162, 0x00c2, 0x015e, 0x0102, 0x00ce, 0x0131, 0x0163, 0x00e2, 0x015f, + 0x0103, 0x00ee, + ], + // Polish + [ + 0x0023, 0x0144, 0x0105, 0x017b, 0x015a, 0x0141, 0x0107, 0x00f3, 0x0119, 0x017c, 0x015b, + 0x0142, 0x017a, + ], + // Turkish + [ + 0x0054, 0x011f, 0x0130, 0x015e, 0x00d6, 0x00c7, 0x00dc, 0x011e, 0x0131, 0x015f, 0x00f6, + 0x00e7, 0x00fc, + ], + // Serbian, Croatian, Slovenian + [ + 0x0023, 0x00cb, 0x010c, 0x0106, 0x017d, 0x0110, 0x0160, 0x00eb, 0x010d, 0x0107, 0x017e, + 0x0111, 0x0161, + ], + // Estonian + [ + 0x0023, 0x00f5, 0x0160, 0x00c4, 0x00d6, 0x017e, 0x00dc, 0x00d5, 0x0161, 0x00e4, 0x00f6, + 0x017e, 0x00fc, + ], + // Lettish, Lithuanian + [ + 0x0023, 0x0024, 0x0160, 0x0117, 0x0119, 0x017d, 0x010d, 0x016b, 0x0161, 0x0105, 0x0173, + 0x017e, 0x012f, + ], + ]; + + // References to the G0_LATIN_NATIONAL_SUBSETS array + const G0_LATIN_NATIONAL_SUBSETS_MAP: [u8; 56] = [ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x01, 0x02, 0x03, 0x04, 0xff, 0x06, + 0xff, 0x00, 0x01, 0x02, 0x09, 0x04, 0x05, 0x06, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, + 0xff, 0x07, 0xff, 0xff, 0x0b, 0x03, 0x04, 0xff, 0x0c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x09, 0xff, 0xff, 0xff, 0xff, + ]; + + /// Create a [`G0LatinNationalSubset`] from its bitcode representation stored as a [`G0LatinNationalSubsetId`]. + pub fn from_subset_id(c: G0LatinNationalSubsetId) -> Option { + let p = *Self::G0_LATIN_NATIONAL_SUBSETS_MAP.get(c.0 as usize)?; + if p == 0xff { + None + } else { + Some(p.try_into().ok()?) + } + } + + /// Return an Iterator containing the position of replacement and the character to replace when + /// changing the National Option Subset for G0 Latin character set. + fn replacement_pos_and_char(&self) -> impl Iterator { + let lang_index: u8 = (*self).into(); + Self::G0_LATIN_NATIONAL_SUBSETS_POSITIONS + .into_iter() + .zip(Self::G0_LATIN_NATIONAL_SUBSETS[lang_index as usize].into_iter()) + } +} + +fn map_latin_to_russian(latin_char: Ucs2Char) -> Option { + LATIN_TO_RUSSIAN + .iter() + .find(|&&(latin, _)| latin == latin_char) + .map(|&(_, russian)| russian) +} + +fn map_entities(c: Ucs2Char) -> Option<&'static str> { + let c: u8 = if c >= 0x80 { + return None; + } else { + c as u8 + }; + match ENTITIES.iter().find(|&&(symbol, _)| symbol == c) { + Some(&(_, entity)) => Some(entity), + None => None, + } +} + +/// A collective type to manage the entire G0 character set. +/// +/// This type is used to change the G0 charecter set and its Latin National Option Subset. This +/// type also manages the subset priority between M/29 and X/28 packets. +pub struct G0Charset { + g0_charset: Box<[[Ucs2Char; 96]; 5]>, + charset_type: G0CharsetType, + primary_charset_current: G0LatinNationalSubsetId, + primary_charset_g0_m29: Option, + primary_charset_g0_x28: Option, + verbose_debug: bool, +} + +impl G0Charset { + fn new(verbose_debug: bool) -> G0Charset { + let charset = Box::new([ + [ + // Latin G0 Primary Set + 0x0020, 0x0021, 0x0022, 0x00a3, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, + 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, + 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, + 0x003e, 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, + 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x00ab, + 0x00bd, 0x00bb, 0x005e, 0x0023, 0x002d, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, + 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, + 0x007a, 0x00bc, 0x00a6, 0x00be, 0x00f7, 0x007f, + ], + [ + // Cyrillic G0 Primary Set - Option 1 - Serbian/Croatian + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x044b, 0x0027, 0x0028, 0x0029, + 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x3200, 0x0033, + 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, + 0x003e, 0x003f, 0x0427, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0408, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 0x040c, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0412, 0x0403, 0x0409, 0x040a, 0x0417, 0x040b, + 0x0416, 0x0402, 0x0428, 0x040f, 0x0447, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, + 0x0444, 0x0433, 0x0445, 0x0438, 0x0428, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, + 0x043f, 0x042c, 0x0440, 0x0441, 0x0442, 0x0443, 0x0432, 0x0423, 0x0429, 0x042a, + 0x0437, 0x042b, 0x0436, 0x0422, 0x0448, 0x042f, + ], + [ + // Cyrillic G0 Primary Set - Option 2 - Russian/Bulgarian + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x044b, 0x0027, 0x0028, 0x0029, + 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, + 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, + 0x003e, 0x003f, 0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 0x042f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042c, 0x042a, 0x0417, 0x0428, + 0x042d, 0x0429, 0x0427, 0x042b, 0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, + 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, + 0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044c, 0x044a, + 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044b, + ], + [ + // Cyrillic G0 Primary Set - Option 3 - Ukrainian + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x00ef, 0x0027, 0x0028, 0x0029, + 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, + 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, + 0x003e, 0x003f, 0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 0x042f, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, 0x042c, 0x0049, 0x0417, 0x0428, + 0x042d, 0x0429, 0x0427, 0x00cf, 0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, + 0x0444, 0x0433, 0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, + 0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, 0x044c, 0x0069, + 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x00ff, + ], + [ + // Greek G0 Primary Set + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, + 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, + 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, + 0x003e, 0x003f, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, 0x03a1, + 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03aa, 0x03ab, + 0x03ac, 0x03ad, 0x03ae, 0x03af, 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, + 0x03b6, 0x03b7, 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, + 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, 0x03c9, + 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x03cf, + ], + ]); + + G0Charset { + g0_charset: charset, + charset_type: G0CharsetType::Latin, + primary_charset_current: G0LatinNationalSubsetId(0), + primary_charset_g0_m29: None, + primary_charset_g0_x28: None, + verbose_debug, + } + } + + /// Return the equivalent UCS-2 character for the given teletext character based on the current + /// character set. + pub fn ucs2_char(&self, telx_char: u8) -> Ucs2Char { + if parity(telx_char) { + debug!(msg_type = DebugMessageFlag::TELETEXT; "- Unrecoverable data error; PARITY({:02x})\n", telx_char); + return 0x20; + } + + let r: Ucs2Char = (telx_char & 0x7f).into(); + if r >= 0x20 { + self.g0_charset[self.charset_type as usize][r as usize - 0x20] + } else { + r + } + } + + /// Change the G0 character set. + pub fn set_charset(&mut self, charset: G0CharsetType) { + self.charset_type = charset; + } + + /// Set the G0 Latin National Option Subset for M/29 packets. + /// + /// It will change the mapping only if a Subset for X/28 is not set since X/28 has a higher + /// priority than M/29. This method will do nothing if the G0 charset is not + /// [`G0CharsetType::Latin`]. + pub fn set_g0_m29_latin_subset(&mut self, subset: G0LatinNationalSubsetId) { + if self.charset_type == G0CharsetType::Latin { + self.primary_charset_g0_m29 = Some(subset); + if self.primary_charset_g0_x28.is_none() { + self.remap_g0_charset(subset); + } + } + } + + /// Set the G0 Latin National Option Subset for X/28 packets. + /// + /// This method will do nothing if the G0 charset is not [`G0CharsetType::Latin`]. + pub fn set_g0_x28_latin_subset(&mut self, subset: G0LatinNationalSubsetId) { + if self.charset_type == G0CharsetType::Latin { + self.primary_charset_g0_x28 = Some(subset); + self.remap_g0_charset(subset); + } + } + + /// Remove the G0 Latin National Option Subset for X/28 packets. + /// + /// It will change the mapping back to the one set for M/29. If the subset for M/29 is not set + /// then `extra_subset` will be used in place of it. This method will do nothing if the G0 + /// charset is not [`G0CharsetType::Latin`]. + pub fn remove_g0_x28_latin_subset(&mut self, extra_subset: G0LatinNationalSubsetId) { + if self.charset_type == G0CharsetType::Latin { + self.primary_charset_g0_x28 = None; + let subset = self.primary_charset_g0_m29.unwrap_or(extra_subset); + self.remap_g0_charset(subset); + } + } + + /// Replace the characters in `g0_charset` based on the given G0 National Option Subset in + /// `subset`. + fn remap_g0_charset(&mut self, subset: G0LatinNationalSubsetId) { + if self.primary_charset_current != subset { + if let Some(s) = G0LatinNationalSubset::from_subset_id(subset) { + for (pos, ch) in s.replacement_pos_and_char() { + self.g0_charset[0x00][pos] = ch; + } + if self.verbose_debug { + eprintln!("- Using G0 Latin National Subset ID {} ({})", subset, s); + } + self.primary_charset_current = subset; + } else { + eprintln!( + "- G0 Latin National Subset ID {} is not implemented", + subset + ); + } + } + } +} + +/// A collective type to manage the entire G0 character set. +pub struct G2Charset; + +impl G2Charset { + const G2_CHARSET: [[Ucs2Char; 96]; 1] = [ + [ + // Latin G2 Supplementary Set + 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x0024, 0x00a5, 0x0023, 0x00a7, 0x00a4, 0x2018, 0x201c, + 0x00ab, 0x2190, 0x2191, 0x2192, 0x2193, 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00d7, 0x00b5, + 0x00b6, 0x00b7, 0x00f7, 0x2019, 0x201d, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x0020, + 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0306, 0x0307, 0x0308, 0x0000, 0x030a, 0x0327, + 0x005f, 0x030b, 0x0328, 0x030c, 0x2015, 0x00b9, 0x00ae, 0x00a9, 0x2122, 0x266a, 0x20ac, + 0x2030, 0x03B1, 0x0000, 0x0000, 0x0000, 0x215b, 0x215c, 0x215d, 0x215e, 0x03a9, 0x00c6, + 0x0110, 0x00aa, 0x0126, 0x0000, 0x0132, 0x013f, 0x0141, 0x00d8, 0x0152, 0x00ba, 0x00de, + 0x0166, 0x014a, 0x0149, 0x0138, 0x00e6, 0x0111, 0x00f0, 0x0127, 0x0131, 0x0133, 0x0140, + 0x0142, 0x00f8, 0x0153, 0x00df, 0x00fe, 0x0167, 0x014b, 0x0020, + ], + // [ // Cyrillic G2 Supplementary Set + // ], + // [ // Greek G2 Supplementary Set + // ], + // [ // Arabic G2 Supplementary Set + // ] + ]; + + const G2_ACCENTS: [[Ucs2Char; 52]; 15] = [ + // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z + [ + // grave + 0x00c0, 0x0000, 0x0000, 0x0000, 0x00c8, 0x0000, 0x0000, 0x0000, 0x00cc, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x00d2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d9, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x00e0, 0x0000, 0x0000, 0x0000, 0x00e8, 0x0000, 0x0000, + 0x0000, 0x00ec, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f2, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x00f9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // acute + 0x00c1, 0x0000, 0x0106, 0x0000, 0x00c9, 0x0000, 0x0000, 0x0000, 0x00cd, 0x0000, 0x0000, + 0x0139, 0x0000, 0x0143, 0x00d3, 0x0000, 0x0000, 0x0154, 0x015a, 0x0000, 0x00da, 0x0000, + 0x0000, 0x0000, 0x00dd, 0x0179, 0x00e1, 0x0000, 0x0107, 0x0000, 0x00e9, 0x0000, 0x0123, + 0x0000, 0x00ed, 0x0000, 0x0000, 0x013a, 0x0000, 0x0144, 0x00f3, 0x0000, 0x0000, 0x0155, + 0x015b, 0x0000, 0x00fa, 0x0000, 0x0000, 0x0000, 0x00fd, 0x017a, + ], + [ + // circumflex + 0x00c2, 0x0000, 0x0108, 0x0000, 0x00ca, 0x0000, 0x011c, 0x0124, 0x00ce, 0x0134, 0x0000, + 0x0000, 0x0000, 0x0000, 0x00d4, 0x0000, 0x0000, 0x0000, 0x015c, 0x0000, 0x00db, 0x0000, + 0x0174, 0x0000, 0x0176, 0x0000, 0x00e2, 0x0000, 0x0109, 0x0000, 0x00ea, 0x0000, 0x011d, + 0x0125, 0x00ee, 0x0135, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f4, 0x0000, 0x0000, 0x0000, + 0x015d, 0x0000, 0x00fb, 0x0000, 0x0175, 0x0000, 0x0177, 0x0000, + ], + [ + // tilde + 0x00c3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0128, 0x0000, 0x0000, + 0x0000, 0x0000, 0x00d1, 0x00d5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0168, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x00e3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0129, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f1, 0x00f5, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0169, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // macron + 0x0100, 0x0000, 0x0000, 0x0000, 0x0112, 0x0000, 0x0000, 0x0000, 0x012a, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x014c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016a, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0101, 0x0000, 0x0000, 0x0000, 0x0113, 0x0000, 0x0000, + 0x0000, 0x012b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x014d, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x016b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // breve + 0x0102, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x011e, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016c, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0103, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x011f, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x016d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // dot + 0x0000, 0x0000, 0x010a, 0x0000, 0x0116, 0x0000, 0x0120, 0x0000, 0x0130, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x017b, 0x0000, 0x0000, 0x010b, 0x0000, 0x0117, 0x0000, 0x0121, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x017c, + ], + [ + // umlaut + 0x00c4, 0x0000, 0x0000, 0x0000, 0x00cb, 0x0000, 0x0000, 0x0000, 0x00cf, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x00d6, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00dc, 0x0000, + 0x0000, 0x0000, 0x0178, 0x0000, 0x00e4, 0x0000, 0x0000, 0x0000, 0x00eb, 0x0000, 0x0000, + 0x0000, 0x00ef, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00f6, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x00fc, 0x0000, 0x0000, 0x0000, 0x00ff, 0x0000, + ], + [ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // ring + 0x00c5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x016e, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x00e5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x016f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // cedilla + 0x0000, 0x0000, 0x00c7, 0x0000, 0x0000, 0x0000, 0x0122, 0x0000, 0x0000, 0x0000, 0x0136, + 0x013b, 0x0000, 0x0145, 0x0000, 0x0000, 0x0000, 0x0156, 0x015e, 0x0162, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00e7, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0137, 0x013c, 0x0000, 0x0146, 0x0000, 0x0000, 0x0000, 0x0157, + 0x015f, 0x0163, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // double acute + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0150, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0170, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0151, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0171, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // ogonek + 0x0104, 0x0000, 0x0000, 0x0000, 0x0118, 0x0000, 0x0000, 0x0000, 0x012e, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0172, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0105, 0x0000, 0x0000, 0x0000, 0x0119, 0x0000, 0x0000, + 0x0000, 0x012f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0173, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + [ + // caron + 0x0000, 0x0000, 0x010c, 0x010e, 0x011a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x013d, 0x0000, 0x0147, 0x0000, 0x0000, 0x0000, 0x0158, 0x0160, 0x0164, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x017d, 0x0000, 0x0000, 0x010d, 0x010f, 0x011b, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x013e, 0x0000, 0x0148, 0x0000, 0x0000, 0x0000, 0x0159, + 0x0161, 0x0165, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x017e, + ], + ]; +} + +/// Represents a Teletext Page Number in its bitcode representation. +/// +/// It can be easily contructed from a [`u16`]. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct TeletextPageNumber(u16); + +impl From for TeletextPageNumber { + fn from(value: u16) -> TeletextPageNumber { + TeletextPageNumber(value) + } +} + +impl fmt::Display for TeletextPageNumber { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:03x}", self.0) + } +} + +impl TeletextPageNumber { + /// Return the magazine and packet bits. + pub fn magazine(&self) -> u8 { + ((self.0 >> 8) & 0x0f) as u8 + } + + /// Return the page bits. + pub fn page(&self) -> u8 { + (self.0 & 0xff) as u8 + } + + /// Return the page number after converting the page bits in bcd format to normal integer. + pub fn bcd_page_to_u16(&self) -> u16 { + ((self.0 & 0xf00) >> 8) * 100 + ((self.0 & 0xf0) >> 4) * 10 + (self.0 & 0xf) + } +} + +/// Represents a teletext page along with timing information. +pub struct TeletextPage { + show_timestamp: Timestamp, // show at timestamp (in ms) + hide_timestamp: Timestamp, // hide at timestamp (in ms) + text: [[Ucs2Char; 40]; 25], // 25 lines x 40 cols (1 screen/page) of wide chars + g2_char_present: [[bool; 40]; 25], // false-Supplementary G2 character set NOT used at this position, true-Supplementary G2 character set used at this position + tainted: bool, // true = text variable contains any data +} + +/// Settings required to contruct a [`TeletextContext`]. +#[allow(dead_code)] +pub struct TeletextConfig { + /// should telxcc logging be verbose? + verbose: bool, + /// teletext page containing cc we want to filter + page: Cell, + /// Page selected by user, which MIGHT be different to `page` depending on autodetection stuff + user_page: u16, + /// false = Don't attempt to correct errors + dolevdist: bool, + /// Means 2 fails or less is "the same" + levdistmincnt: u8, + /// Means 10% or less is also "the same" + levdistmaxpct: u8, + /// Segment we actually process + extraction_start: Option, + /// Segment we actually process + extraction_end: Option, + write_format: OutputFormat, + date_format: TimestampFormat, + /// Do NOT set time automatically? + noautotimeref: bool, + nofontcolor: bool, + nohtmlescape: bool, + latrusmap: bool, +} + +/// Represents the possible states that [`TeletextContext`] can be in. +struct TeletextState { + programme_info_processed: bool, + pts_initialized: bool, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum TransmissionMode { + Parallel, + Serial, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum DataUnit { + EbuTeletextNonsubtitle = 0x02, + EbuTeletextSubtitle = 0x03, + EbuTeletextInverted = 0x0c, + Vps = 0xc3, + ClosedCaptions = 0xc5, +} + +/// A type used for decoding Teletext subtitles. +#[allow(dead_code)] +pub struct TeletextContext<'a> { + config: &'a TeletextConfig, + seen_sub_page: [bool; MAX_TLT_PAGES], + global_timestamp: Timestamp, + + // Current and previous page buffers. This is the output written to file when + // the time comes. + page_buffer: TeletextPage, + page_buffer_prev: Option, + page_buffer_cur: Option, + // Current and previous page compare strings. This is plain text (no colors, + // tags, etc) in UCS2 (fixed length), so we can compare easily. + ucs2_buffer_prev: Option, + ucs2_buffer_cur: Option, + // Buffer timestamp + prev_hide_timestamp: Timestamp, + prev_show_timestamp: Timestamp, + // subtitle type pages bitmap, 2048 bits = 2048 possible pages in teletext (excl. subpages) + cc_map: [u8; 256], + // last timestamp computed + last_timestamp: Timestamp, + states: TeletextState, + // FYI, packet counter + tlt_packet_counter: u32, + // teletext transmission mode + transmission_mode: TransmissionMode, + // flag indicating if incoming data should be processed or ignored + receiving_data: bool, + + using_pts: Option, + delta: Timestamp, + t0: Timestamp, + + sentence_cap: bool, //Set to 1 if -sc is passed + new_sentence: bool, + + g0_charset: G0Charset, + + de_ctr: i32, // a keeps count of packets with flag subtitle ON and data packets +} + +impl<'a> TeletextContext<'a> { + /// Create a new [`TeletextContext`] from parameters in [`TeletextConfig`]. + pub fn new(config: &'a TeletextConfig) -> TeletextContext<'a> { + TeletextContext { + config, + seen_sub_page: [false; MAX_TLT_PAGES], + global_timestamp: Timestamp::from_millis(0), + page_buffer: TeletextPage { + show_timestamp: Timestamp::from_millis(0), + hide_timestamp: Timestamp::from_millis(0), + text: [[0; 40]; 25], + g2_char_present: [[false; 40]; 25], + tainted: false, + }, + page_buffer_prev: None, + page_buffer_cur: None, + ucs2_buffer_prev: None, + ucs2_buffer_cur: None, + prev_hide_timestamp: Timestamp::from_millis(0), + prev_show_timestamp: Timestamp::from_millis(0), + cc_map: [0; 256], + last_timestamp: Timestamp::from_millis(0), + states: TeletextState { + programme_info_processed: false, + pts_initialized: false, + }, + tlt_packet_counter: 0, + transmission_mode: TransmissionMode::Serial, + receiving_data: false, + using_pts: None, + delta: Timestamp::from_millis(0), + t0: Timestamp::from_millis(0), + sentence_cap: false, + new_sentence: false, + g0_charset: G0Charset::new(config.verbose), + de_ctr: 0, + } + } + + /// Fix the case for the sentences stored in `page_buffer_cur`. + /// + /// This method will convert the first character of a sentence to uppercase and the rest of the + /// characters to lowercase. + fn telx_case_fix(&mut self) { + let page_buffer_cur = match self.page_buffer_cur.as_mut() { + None => return, + Some(p) => p, + }; + + let mut fixed_string = String::with_capacity(page_buffer_cur.len()); + + let mut prev_newline = false; + + fixed_string.extend(page_buffer_cur.chars().enumerate().map(|(index, c)| { + let r = match c { + ' ' | '-' => c, // case 0x89: // This is a transparent space + '.' | '?' | '!' | ':' => { + self.new_sentence = true; + c + } + _ => { + let result = if self.new_sentence && index != 0 && !prev_newline { + c.to_ascii_uppercase() + } else if !self.new_sentence && index != 0 && !prev_newline { + c.to_ascii_lowercase() + } else { + c + }; + + self.new_sentence = false; + result + } + }; + + prev_newline = c == '\n'; + r + })); + + *page_buffer_cur = fixed_string; + + todo!() // TODO: telx_correct_case(page_buffer_cur); + } + + /// Reset the page buffers and return its contents in the form of a [`Subtitle`]. + /// + /// It moves `page_buffer_cur` to `page_buffer_prev` and `ucs2_buffer_cur` to + /// `ucs2_buffer_prev`. + fn telxcc_dump_prev_page(&mut self) -> Option { + let page_buffer_prev = self.page_buffer_prev.take()?; + + self.page_buffer_prev = self.page_buffer_cur.take(); + self.ucs2_buffer_prev = self.ucs2_buffer_cur.take(); + + Some(Subtitle::new_text( + page_buffer_prev.into(), + self.prev_show_timestamp, + self.prev_hide_timestamp, + Some(format!("{:03}", self.config.page.get().bcd_page_to_u16())), + "TLT".into(), + )) + } + + fn process_page(&mut self) -> Option { + let mut ans = None; + + if self + .config + .extraction_start + .map(|start| self.page_buffer.hide_timestamp < start) + .unwrap_or(false) + || self + .config + .extraction_end + .map(|end| self.page_buffer.show_timestamp > end) + .unwrap_or(false) + || self.page_buffer.hide_timestamp.millis() == 0 + { + return None; + } + + #[cfg(feature = "debug")] + { + for (index, row) in self.page_buffer.text.iter().enumerate().skip(1) { + print!("DEUBG[{:02}]: ", index); + for c in row { + print!("{:3x} ", c) + } + println!(); + } + println!(); + } + + // optimization: slicing column by column -- higher probability we could find boxed area start mark sooner + let mut page_is_empty = true; + for col in 0..40 { + for row in 1..25 { + if self.page_buffer.text[row][col] == 0x0b { + page_is_empty = false; + break; + } + } + + if !page_is_empty { + break; + } + } + + if page_is_empty { + return None; + } + + if self.page_buffer.show_timestamp > self.page_buffer.hide_timestamp { + self.page_buffer.hide_timestamp = self.page_buffer.show_timestamp; + } + + let mut line_count: u8 = 0; + let mut time_reported = false; + let timecode_show = self + .page_buffer + .show_timestamp + .to_srt_time() + .expect("could not format to SRT time"); + let timecode_hide = self + .page_buffer + .hide_timestamp + .to_srt_time() + .expect("could not format to SRT time"); + + // process data + for row in 1..25 { + let mut col_start: usize = 40; + let col_stop: usize = 40; + + let mut box_open: bool = false; + for col in 0..40 { + // replace all 0/B and 0/A characters with 0/20, as specified in ETS 300 706: + // Unless operating in "Hold Mosaics" mode, each character space occupied by a + // spacing attribute is displayed as a SPACE + if self.page_buffer.text[row][col] == 0x0b { + // open the box + if col_start == 40 { + col_start = col; + line_count += 1; + } else { + self.page_buffer.text[row][col] = 0x20; + } + box_open = true; + } else if self.page_buffer.text[row][col] == 0xa { + // close the box + self.page_buffer.text[row][col] = 0x20; + box_open = false; + } + // characters between 0xA and 0xB shouldn't be displayed + // page->text[row][col] > 0x20 added to preserve color information + else if !box_open && col_start < 40 && self.page_buffer.text[row][col] > 0x20 { + self.page_buffer.text[row][col] = 0x20; + } + } + // line is empty + if col_start > 39 { + continue; + } + + // ETS 300 706, chapter 12.2: Alpha White ("Set-After") - Start-of-row default condition. + // used for colour changes _before_ start box mark + // white is default as stated in ETS 300 706, chapter 12.2 + // black(0), red(1), green(2), yellow(3), blue(4), magenta(5), cyan(6), white(7) + let mut foreground_color: u8 = 0x7; + let mut font_tag_opened = false; + + if line_count > 1 { + match self.config.write_format { + OutputFormat::Transcript => { + self.page_buffer_cur.get_or_insert("".into()).push(' ') + } + OutputFormat::SmpteTt => self + .page_buffer_cur + .get_or_insert("".into()) + .push_str("
"), + _ => self + .page_buffer_cur + .get_or_insert("".into()) + .push_str("\r\n"), + } + } + + if logger().expect("could not access logger").is_gui_mode() { + if !time_reported { + let timecode_show_mmss = &timecode_show[3..8]; + let timecode_hide_mmss = &timecode_hide[3..8]; + // Note, only MM:SS here as we need to save space in the preview window + eprint!( + "###TIME###{}-{}\n###SUBTITLES###", + timecode_show_mmss, timecode_hide_mmss + ); + time_reported = true; + } else { + eprint!("###SUBTITLE###"); + } + } + + for col in 0..=col_stop { + // v is just a shortcut + let mut v = self.page_buffer.text[row][col]; + + if col < col_start && v <= 0x7 { + foreground_color = v as u8; + } + + if col == col_start && (foreground_color != 0x7) && !self.config.nofontcolor { + let buffer = self.page_buffer_cur.get_or_insert("".into()); + let _ = write!( + buffer, + "", + TELETEXT_COLORS[foreground_color as usize] + ); + font_tag_opened = true; + } + + if col >= col_start { + if v <= 0x7 { + // ETS 300 706, chapter 12.2: Unless operating in "Hold Mosaics" mode, + // each character space occupied by a spacing attribute is displayed as a SPACE. + if !self.config.nofontcolor { + if font_tag_opened { + self.page_buffer_cur + .get_or_insert("".into()) + .push_str(""); + font_tag_opened = false; + } + + self.page_buffer_cur.get_or_insert("".into()).push(' '); + // black is considered as white for telxcc purpose + // telxcc writes tags only when needed + if (v > 0x0) && (v < 0x7) { + let buffer = self.page_buffer_cur.get_or_insert("".into()); + let _ = write!( + buffer, + "", + TELETEXT_COLORS[v as usize] + ); + font_tag_opened = true; + } + } else { + v = 0x20; + } + } + + if v >= 0x20 { + self.ucs2_buffer_cur + .get_or_insert(Default::default()) + .as_mut_vec() + .push(v); + + if !font_tag_opened && self.config.latrusmap { + if let Some(ch) = map_latin_to_russian(v) { + v = 0; + self.page_buffer_cur.get_or_insert("".into()).push(ch); + } + } + + // translate some chars into entities, if in colour mode + if !self.config.nofontcolor && !self.config.nohtmlescape { + if let Some(s) = map_entities(v) { + v = 0; + self.page_buffer_cur.get_or_insert("".into()).push_str(s); + } + } + } + + if v >= 0x20 { + let u = char::from_u32(v as u32).unwrap(); + self.page_buffer_cur.get_or_insert("".into()).push(u); + if logger().expect("could not access logger").is_gui_mode() { + // For now we just handle the easy stuff + eprint!("{}", u); + } + } + } + } + + // no tag will left opened! + if !self.config.nofontcolor && font_tag_opened { + self.page_buffer_cur + .get_or_insert("".into()) + .push_str(""); + } + + if logger().expect("could not access logger").is_gui_mode() { + eprintln!(); + } + } + + if self.sentence_cap { + self.telx_case_fix() + } + + match self.config.write_format { + OutputFormat::Transcript | OutputFormat::SmpteTt => { + let page_buffer_prev_len = + self.page_buffer_prev.as_ref().map(|s| s.len()).unwrap_or(0); + if page_buffer_prev_len == 0 { + self.prev_show_timestamp = self.page_buffer.show_timestamp; + } + + let page_buffer_prev = self.page_buffer_prev.as_deref().unwrap_or(""); + let page_buffer_cur = self.page_buffer_cur.as_deref().unwrap_or(""); + let ucs2_buffer_prev = self + .ucs2_buffer_prev + .as_ref() + .map(|x| &x.as_vec()[..]) + .unwrap_or(&[]); + let ucs2_buffer_cur = self + .ucs2_buffer_cur + .as_ref() + .map(|x| &x.as_vec()[..]) + .unwrap_or(&[]); + + if page_buffer_prev_len == 0 + || (self.config.dolevdist + && fuzzy_cmp( + page_buffer_prev, + page_buffer_cur, + ucs2_buffer_prev, + ucs2_buffer_cur, + self.config.levdistmaxpct, + self.config.levdistmincnt, + )) + { + // If empty previous buffer, we just start one with the + // current page and do nothing. Wait until we see more. + self.page_buffer_prev = self.page_buffer_cur.take(); + self.ucs2_buffer_prev = self.ucs2_buffer_cur.take(); + self.prev_hide_timestamp = self.page_buffer.hide_timestamp; + } else { + // OK, the old and new buffer don't match. So write the old + ans = self.telxcc_dump_prev_page(); + self.prev_hide_timestamp = self.page_buffer.hide_timestamp; + self.prev_show_timestamp = self.page_buffer.show_timestamp; + } + } + _ => { + ans = Some(Subtitle::new_text( + self.page_buffer_cur.take().unwrap().into(), + self.page_buffer.show_timestamp, + self.page_buffer.hide_timestamp + Timestamp::from_millis(1), + None, + "TLT".into(), + )); + } + } + + // Also update GUI... + + self.page_buffer_cur = None; + ans + } + + /// Process the teletext `packet` and append the extracted subtitles in `subtitles`. + pub fn process_telx_packet( + &mut self, + data_unit: DataUnit, + packet: &TeletextPacketPayload, + timestamp: Timestamp, + subtitles: &mut Vec, + ) { + // variable names conform to ETS 300 706, chapter 7.1.2 + let address = (decode_hamming_8_4(packet.address[1]).unwrap() << 4) + | decode_hamming_8_4(packet.address[0]).unwrap(); + let mut m = address & 0x7; + if m == 0 { + m = 8; + } + let y = (address >> 3) & 0x1f; + let designation_code = if y > 25 { + decode_hamming_8_4(packet.data[0]).unwrap() + } else { + 0x00 + }; + + if y == 0 { + // CC map + let i = (decode_hamming_8_4(packet.data[1]).unwrap() << 4) + | decode_hamming_8_4(packet.data[0]).unwrap(); + let flag_subtitle = (decode_hamming_8_4(packet.data[5]).unwrap() & 0x08) >> 3; + self.cc_map[i as usize] |= flag_subtitle << (m - 1); + + let flag_subtitle = flag_subtitle != 0; + + if flag_subtitle && (i < 0xff) { + let mut thisp = ((m as u32) << 8) + | ((decode_hamming_8_4(packet.data[1]).unwrap() as u32) << 4) + | (decode_hamming_8_4(packet.data[0]).unwrap() as u32); + let t1 = format!("{:x}", thisp); // Example: 1928 -> 788 + thisp = t1.parse().unwrap(); + if !self.seen_sub_page[thisp as usize] { + self.seen_sub_page[thisp as usize] = true; + info!( + "\rNotice: Teletext page with possible subtitles detected: {:03}\n", + thisp + ); + } + } + if (self.config.page.get() == 0.into()) && flag_subtitle && (i < 0xff) { + self.config.page.replace( + (((m as u16) << 8) + | ((decode_hamming_8_4(packet.data[1]).unwrap() as u16) << 4) + | (decode_hamming_8_4(packet.data[0]).unwrap() as u16)) + .into(), + ); + info!("- No teletext page specified, first received suitable page is {}, not guaranteed\n", self.config.page.get()); + } + + // Page number and control bits + let page_number: TeletextPageNumber = (((m as u16) << 8) + | ((decode_hamming_8_4(packet.data[1]).unwrap() as u16) << 4) + | (decode_hamming_8_4(packet.data[0]).unwrap() as u16)) + .into(); + let charset = ((decode_hamming_8_4(packet.data[7]).unwrap() & 0x08) + | (decode_hamming_8_4(packet.data[7]).unwrap() & 0x04) + | (decode_hamming_8_4(packet.data[7]).unwrap() & 0x02)) + >> 1; + // let flag_suppress_header = decode_hamming_8_4(packet.data[6]).unwrap() & 0x01; + // let flag_inhibit_display = (decode_hamming_8_4(packet.data[6]).unwrap() & 0x08) >> 3; + + // ETS 300 706, chapter 9.3.1.3: + // When set to '1' the service is designated to be in Serial mode and the transmission of a page is terminated + // by the next page header with a different page number. + // When set to '0' the service is designated to be in Parallel mode and the transmission of a page is terminated + // by the next page header with a different page number but the same magazine number. + // The same setting shall be used for all page headers in the service. + // ETS 300 706, chapter 7.2.1: Page is terminated by and excludes the next page header packet + // having the same magazine address in parallel transmission mode, or any magazine address in serial transmission mode. + self.transmission_mode = if decode_hamming_8_4(packet.data[7]).unwrap() & 0x01 == 0 { + TransmissionMode::Parallel + } else { + TransmissionMode::Serial + }; + + // FIXME: Well, this is not ETS 300 706 kosher, however we are interested in EBU_TELETEXT_SUBTITLE only + if (self.transmission_mode == TransmissionMode::Parallel) + && (data_unit != DataUnit::EbuTeletextSubtitle) + && !(self.de_ctr != 0 && flag_subtitle && self.receiving_data) + { + return; + } + + if self.receiving_data + && (((self.transmission_mode == TransmissionMode::Serial) + && (page_number.page() != self.config.page.get().page())) + || ((self.transmission_mode == TransmissionMode::Parallel) + && (page_number.page() != self.config.page.get().page()) + && (m == self.config.page.get().magazine()))) + { + self.receiving_data = false; + if !(self.de_ctr != 0 && flag_subtitle) { + return; + } + } + + // Page transmission is terminated, however now we are waiting for our new page + if page_number != self.config.page.get() + && !(self.de_ctr != 0 && flag_subtitle && self.receiving_data) + { + return; + } + + // Now we have the begining of page transmission; if there is page_buffer pending, process it + if self.page_buffer.tainted { + // Convert telx to UCS-2 before processing + for yt in 1..=23 { + for it in 0..40 { + if self.page_buffer.text[yt][it] != 0x00 + && !self.page_buffer.g2_char_present[yt][it] + { + self.page_buffer.text[yt][it] = self + .g0_charset + .ucs2_char(self.page_buffer.text[yt][it].try_into().unwrap()); + } + } + } + // it would be nice, if subtitle hides on previous video frame, so we contract 40 ms (1 frame @25 fps) + self.page_buffer.hide_timestamp = timestamp - Timestamp::from_millis(40); + if self.page_buffer.hide_timestamp > timestamp { + self.page_buffer.hide_timestamp = Timestamp::from_millis(0); + } + if let Some(sub) = self.process_page() { + subtitles.push(sub); + } + self.de_ctr = 0; + } + + self.page_buffer.show_timestamp = timestamp; + self.page_buffer.hide_timestamp = Timestamp::from_millis(0); + self.page_buffer.text = [[0; 40]; 25]; + self.page_buffer.g2_char_present = [[false; 40]; 25]; + self.page_buffer.tainted = false; + self.receiving_data = false; + if self.g0_charset.charset_type == G0CharsetType::Latin { + // G0 Character National Option Sub-sets selection required only for Latin Character Sets + self.g0_charset.remove_g0_x28_latin_subset(charset.into()) + } + /* + // I know -- not needed; in subtitles we will never need disturbing teletext page status bar + // displaying tv station name, current time etc. + if (flag_suppress_header == NO) { + for (uint8_t i = 14; i < 40; i++) page_buffer.text[y][i] = telx_to_ucs2(packet->data[i]); + //page_buffer.tainted = YES; + } + */ + } else if (m == self.config.page.get().magazine()) + && (1..=23).contains(&y) + && self.receiving_data + { + // ETS 300 706, chapter 9.4.1: Packets X/26 at presentation Levels 1.5, 2.5, 3.5 are used for addressing + // a character location and overwriting the existing character defined on the Level 1 page + // ETS 300 706, annex B.2.2: Packets with Y = 26 shall be transmitted before any packets with Y = 1 to Y = 25; + // so page_buffer.text[y][i] may already contain any character received + // in frame number 26, skip original G0 character + for i in 0..40 { + if self.page_buffer.text[y as usize][i] == 0x00 { + self.page_buffer.text[y as usize][i] = packet.data[i] as Ucs2Char; + } + } + self.page_buffer.tainted = true; + self.de_ctr -= 1; + } else if (m == self.config.page.get().magazine()) && (y == 26) && self.receiving_data { + // ETS 300 706, chapter 12.3.2: X/26 definition + let mut x26_row: u8 = 0; + + let mut triplets: [u32; 13] = [0; 13]; + for (j, triplet) in triplets.iter_mut().enumerate() { + *triplet = decode_hamming_24_18( + ((packet.data[j * 3 + 3] as u32) << 16) + | ((packet.data[j * 3 + 2] as u32) << 8) + | (packet.data[j * 3 + 1] as u32), + ) + .unwrap_or(0xffffffff); + } + + for triplet in triplets { + // invalid data (HAM24/18 uncorrectable error detected), skip group + if triplet == 0xffffffff { + debug!(msg_type = DebugMessageFlag::TELETEXT; "- Unrecoverable data error; UNHAM24/18()={:04x}\n", triplet); + continue; + } + + let data = ((triplet & 0x3f800) >> 11) as u8; + let mode = ((triplet & 0x7c0) >> 6) as u8; + let address = (triplet & 0x3f) as u8; + let row_address_group = (40..=63).contains(&address); + + // ETS 300 706, chapter 12.3.1, table 27: set active position + if (mode == 0x04) && row_address_group { + x26_row = address - 40; + if x26_row == 0 { + x26_row = 24; + } + } + + // ETS 300 706, chapter 12.3.1, table 27: termination marker + if (0x11..=0x1f).contains(&mode) && row_address_group { + break; + } + + // ETS 300 706, chapter 12.3.1, table 27: character from G2 set + if (mode == 0x0f) && !row_address_group && data > 31 { + self.page_buffer.text[x26_row as usize][address as usize] = + G2Charset::G2_CHARSET[0][data as usize - 0x20]; + self.page_buffer.g2_char_present[x26_row as usize][address as usize] = true; + } + + // ETS 300 706 v1.2.1, chapter 12.3.4, Table 29: G0 character without diacritical mark (display '@' instead of '*') + if (mode == 0x10) && !row_address_group && data == 64 { + // check for @ symbol + self.g0_charset.remap_g0_charset(0.into()); + self.page_buffer.text[x26_row as usize][address as usize] = 0x40; + } + + // ETS 300 706, chapter 12.3.1, table 27: G0 character with diacritical mark + if (0x11..=0x1f).contains(&mode) && !row_address_group { + // A - Z + if (65..=90).contains(&data) { + self.page_buffer.text[x26_row as usize][address as usize] = + G2Charset::G2_ACCENTS[mode as usize - 0x11][data as usize - 65]; + } + // a - z + else if (97..=122).contains(&data) { + self.page_buffer.text[x26_row as usize][address as usize] = + G2Charset::G2_ACCENTS[mode as usize - 0x11][data as usize - 71]; + // other + } else { + self.page_buffer.text[x26_row as usize][address as usize] = + self.g0_charset.ucs2_char(data); + } + self.page_buffer.g2_char_present[x26_row as usize][address as usize] = true; + } + } + } else if (m == self.config.page.get().magazine()) && (y == 28) && self.receiving_data { + // TODO: + // ETS 300 706, chapter 9.4.7: Packet X/28/4 + // Where packets 28/0 and 28/4 are both transmitted as part of a page, packet 28/0 takes precedence over 28/4 for all but the colour map entry coding. + if (designation_code == 0) || (designation_code == 4) { + // ETS 300 706, chapter 9.4.2: Packet X/28/0 Format 1 + // ETS 300 706, chapter 9.4.7: Packet X/28/4 + if let Some(triplet0) = decode_hamming_24_18( + ((packet.data[3] as u32) << 16) + | ((packet.data[2] as u32) << 8) + | packet.data[1] as u32, + ) { + // ETS 300 706, chapter 9.4.2: Packet X/28/0 Format 1 only + if (triplet0 & 0x0f) == 0x00 { + // ETS 300 706, Table 32 + self.g0_charset + .set_charset(G0CharsetType::from_triplet(triplet0)); // Deciding G0 Character Set + self.g0_charset + .set_g0_x28_latin_subset((((triplet0 & 0x3f80) >> 7) as u8).into()) + } + } else { + // invalid data (HAM24/18 uncorrectable error detected), skip group + debug!(msg_type = DebugMessageFlag::TELETEXT; "! Unrecoverable data error; UNHAM24/18()={:04x}\n", 0xffffffffu32); + } + } + } else if (m == self.config.page.get().magazine()) && (y == 29) { + // TODO: + // ETS 300 706, chapter 9.5.1 Packet M/29/0 + // Where M/29/0 and M/29/4 are transmitted for the same magazine, M/29/0 takes precedence over M/29/4. + if (designation_code == 0) || (designation_code == 4) { + // ETS 300 706, chapter 9.5.1: Packet M/29/0 + // ETS 300 706, chapter 9.5.3: Packet M/29/4 + if let Some(triplet0) = decode_hamming_24_18( + ((packet.data[3] as u32) << 16) + | ((packet.data[2] as u32) << 8) + | packet.data[1] as u32, + ) { + // ETS 300 706, table 11: Coding of Packet M/29/0 + // ETS 300 706, table 13: Coding of Packet M/29/4 + if (triplet0 & 0xff) == 0x00 { + self.g0_charset + .set_charset(G0CharsetType::from_triplet(triplet0)); + self.g0_charset + .set_g0_m29_latin_subset((((triplet0 & 0x3f80) >> 7) as u8).into()) + } + } else { + // invalid data (HAM24/18 uncorrectable error detected), skip group + debug!(msg_type = DebugMessageFlag::TELETEXT; "! Unrecoverable data error; UNHAM24/18()={:04x}\n", 0xffffffffu32); + } + } + } else if (m == 8) && (y == 30) { + // ETS 300 706, chapter 9.8: Broadcast Service Data Packets + if !self.states.programme_info_processed { + // ETS 300 706, chapter 9.8.1: Packet 8/30 Format 1 + if decode_hamming_8_4(packet.data[0]) + .map(|x| x < 2) + .unwrap_or(false) + { + let mut t: u32 = 0; + info!("- Programme Identification Data = "); + for i in 20..40 { + let c = self.g0_charset.ucs2_char(packet.data[i]); + // strip any control codes from PID, eg. TVP station + if c < 0x20 { + continue; + } + + info!("{}", char::from_u32(c as u32).unwrap()); + } + info!("\n"); + + // OMG! ETS 300 706 stores timestamp in 7 bytes in Modified Julian Day in BCD format + HH:MM:SS in BCD format + // + timezone as 5-bit count of half-hours from GMT with 1-bit sign + // In addition all decimals are incremented by 1 before transmission. + // 1st step: BCD to Modified Julian Day + t += ((packet.data[10] & 0x0f) as u32) * 10000; + t += (((packet.data[11] & 0xf0) >> 4) as u32) * 1000; + t += ((packet.data[11] & 0x0f) as u32) * 100; + t += (((packet.data[12] & 0xf0) >> 4) as u32) * 10; + t += (packet.data[12] & 0x0f) as u32; + t -= 11111; + // 2nd step: conversion Modified Julian Day to unix timestamp + t = (t - 40587) * 86400; + // 3rd step: add time + t += 3600 + * (((packet.data[13] & 0xf0) >> 4) as u32 * 10 + + (packet.data[13] & 0x0f) as u32); + t += 60 + * (((packet.data[14] & 0xf0) >> 4) as u32 * 10 + + (packet.data[14] & 0x0f) as u32); + t += ((packet.data[15] & 0xf0) >> 4) as u32 * 10 + + (packet.data[15] & 0x0f) as u32; + t -= 40271; + // 4th step: conversion to time_t + let t0 = Timestamp::from_millis((t as i64) * 1000); + + info!( + "- Universal Time Co-ordinated = {}\n", + t0.to_ctime().unwrap() + ); + + debug!(msg_type = DebugMessageFlag::TELETEXT; "- Transmission mode = {:?}\n", self.transmission_mode); + + if self.config.write_format == OutputFormat::Transcript + && matches!(self.config.date_format, TimestampFormat::Date { .. }) + && !self.config.noautotimeref + { + info!("- Broadcast Service Data Packet received, resetting UTC referential value to {}\n", t0.to_ctime().unwrap()); + *UTC_REFVALUE.write().unwrap() = t as u64; + self.states.pts_initialized = false; + } + + self.states.programme_info_processed = true; + } + } + } + } + + /// Consumes the [`TeletextContext`] and appends the pending extracted subtitles in `subtitles`. + pub fn close(mut self, subtitles: Option<&mut Vec>) { + info!( + "\nTeletext decoder: {} packets processed \n", + self.tlt_packet_counter + ); + if self.config.write_format != OutputFormat::Rcwt { + if let Some(subtitles) = subtitles { + // output any pending close caption + if self.page_buffer.tainted { + // Convert telx to UCS-2 before processing + for yt in 1..=23 { + for it in 0..40 { + if self.page_buffer.text[yt][it] != 0x00 + && !self.page_buffer.g2_char_present[yt][it] + { + self.page_buffer.text[yt][it] = self + .g0_charset + .ucs2_char(self.page_buffer.text[yt][it].try_into().unwrap()); + } + } + } + // this time we do not subtract any frames, there will be no more frames + self.page_buffer.hide_timestamp = self.last_timestamp; + if let Some(sub) = self.process_page() { + subtitles.push(sub); + } + } + + self.telxcc_dump_prev_page(); + } + } + } +} + +/// Check the given two lines can be considered similar using levenshtein +/// distance. +/// +/// If the levenshtein distance between `ucs2_buf1` and `ucs2_buf2` is less than either +/// `levdistmincnt` or `levdistmaxpct`% of the length of the shorter line, then the lines are +/// considered to be similar. `c1` and `c2` are used for displaying a debug message only. +/// +/// # Examples +/// ``` +/// # use lib_ccxr::util::fuzzy_cmp; +/// # use lib_ccxr::util::log::*; +/// # let mask = DebugMessageMask::new(DebugMessageFlag::LEVENSHTEIN, DebugMessageFlag::LEVENSHTEIN); +/// # set_logger(CCExtractorLogger::new(OutputTarget::Quiet, mask, false)); +/// let hello_world = [72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100]; +/// let hello_Aorld = [72, 101, 108, 108, 111, 32, 65, 111, 114, 108, 100]; +/// let helld_Aorld = [72, 101, 108, 108, 100, 32, 65, 111, 114, 108, 100]; +/// +/// // Returns true if both lines are same +/// assert!(fuzzy_cmp("", "", &hello_world, &hello_world, 10, 2)); +/// +/// // Returns true since the distance is 1 which is less than 2. +/// assert!(fuzzy_cmp("", "", &hello_world, &hello_Aorld, 10, 2)); +/// +/// // Returns false since the distance is 2 which is not less than both 2 and 10% of length. +/// assert!(!fuzzy_cmp("", "", &hello_world, &helld_Aorld, 10, 2)); +/// +/// // Returns true since the distance is 1 which is less than 20% of length. +/// assert!(fuzzy_cmp("", "", &hello_world, &hello_Aorld, 20, 2)); +/// ``` +pub fn fuzzy_cmp( + c1: &str, + c2: &str, + ucs2_buf1: &[Ucs2Char], + ucs2_buf2: &[Ucs2Char], + levdistmaxpct: u8, + levdistmincnt: u8, +) -> bool { + let short_len = std::cmp::min(ucs2_buf1.len(), ucs2_buf2.len()); + let max = std::cmp::max( + (short_len * levdistmaxpct as usize) / 100, + levdistmincnt.into(), + ); + + // For the second string, only take the first chars (up to the first string length, that's short_len). + let l = levenshtein(ucs2_buf1, &ucs2_buf2[..short_len]); + let is_same = l < max; + debug!(msg_type = DebugMessageFlag::LEVENSHTEIN; "\rLEV | {} | {} | Max: {} | Calc: {} | Match: {}\n", c1, c2, max, l, is_same); + is_same +} From b2bada4b8a2ac8203319a65b354a96557a8144f3 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Mon, 19 Aug 2024 22:35:43 +0530 Subject: [PATCH 09/24] chore: remove outdated --- src/rust/lib_ccxr/src/util/c_functions.rs | 19 - src/rust/lib_ccxr/src/util/log.rs | 1 + .../lib_ccxr/src/util/time/c_functions.rs | 35 - src/rust/lib_ccxr/src/util/time/mod.rs | 27 - src/rust/lib_ccxr/src/util/time/units.rs | 634 ------------------ src/rust/src/libccxr_exports/mod.rs | 7 +- 6 files changed, 4 insertions(+), 719 deletions(-) delete mode 100644 src/rust/lib_ccxr/src/util/c_functions.rs delete mode 100644 src/rust/lib_ccxr/src/util/time/c_functions.rs delete mode 100644 src/rust/lib_ccxr/src/util/time/mod.rs delete mode 100644 src/rust/lib_ccxr/src/util/time/units.rs diff --git a/src/rust/lib_ccxr/src/util/c_functions.rs b/src/rust/lib_ccxr/src/util/c_functions.rs deleted file mode 100644 index e3fcc7923..000000000 --- a/src/rust/lib_ccxr/src/util/c_functions.rs +++ /dev/null @@ -1,19 +0,0 @@ -//! Provides Rust equivalent for functions in C. Uses Rust-native types as input and output. - -use super::*; -use crc32fast::hash; - -/// Rust equivalent for `verify_crc32` function in C. Uses Rust-native types as input and output. -pub fn verify_crc32(buf: &[u8]) -> bool { - hash(buf) == 0 -} - -/// Rust equivalent for `levenshtein_dist` function in C. Uses Rust-native types as input and output. -pub fn levenshtein_dist(s1: &[u64], s2: &[u64]) -> usize { - levenshtein(s1, s2) -} - -/// Rust equivalent for `levenshtein_dist_char` function in C. Uses Rust-native types as input and output. -pub fn levenshtein_dist_char(s1: &[T], s2: &[T]) -> usize { - levenshtein(s1, s2) -} diff --git a/src/rust/lib_ccxr/src/util/log.rs b/src/rust/lib_ccxr/src/util/log.rs index c6cfc0feb..582aeefcc 100644 --- a/src/rust/lib_ccxr/src/util/log.rs +++ b/src/rust/lib_ccxr/src/util/log.rs @@ -30,6 +30,7 @@ //! | `dbg_print`, `ccx_common_logging.debug_ftn` | [`debug!`] | //! | `activity_library_process`, `ccx_common_logging.gui_ftn` | [`send_gui`] | //! | `ccx_common_logging_gui` | [`GuiXdsMessage`] | +//! | `dump` | [`hex_dump`] | //! | `dump` | [`hex_dump_with_start_idx`] | use bitflags::bitflags; diff --git a/src/rust/lib_ccxr/src/util/time/c_functions.rs b/src/rust/lib_ccxr/src/util/time/c_functions.rs deleted file mode 100644 index 65d837e35..000000000 --- a/src/rust/lib_ccxr/src/util/time/c_functions.rs +++ /dev/null @@ -1,35 +0,0 @@ -//! Provides Rust equivalent for functions in C. Uses Rust-native types as input and output. - -use super::*; - -/// Rust equivalent for `timestamp_to_srttime` function in C. Uses Rust-native types as input and -/// output. -pub fn timestamp_to_srttime( - timestamp: Timestamp, - buffer: &mut String, -) -> Result<(), TimestampError> { - timestamp.write_srt_time(buffer) -} - -/// Rust equivalent for `timestamp_to_vtttime` function in C. Uses Rust-native types as input and -/// output. -pub fn timestamp_to_vtttime( - timestamp: Timestamp, - buffer: &mut String, -) -> Result<(), TimestampError> { - timestamp.write_vtt_time(buffer) -} - -/// Rust equivalent for `millis_to_date` function in C. Uses Rust-native types as input and output. -pub fn millis_to_date( - timestamp: Timestamp, - buffer: &mut String, - date_format: TimestampFormat, -) -> Result<(), TimestampError> { - timestamp.write_formatted_time(buffer, date_format) -} - -/// Rust equivalent for `stringztoms` function in C. Uses Rust-native types as input and output. -pub fn stringztoms(s: &str) -> Option { - Timestamp::parse_optional_hhmmss_from_str(s).ok() -} diff --git a/src/rust/lib_ccxr/src/util/time/mod.rs b/src/rust/lib_ccxr/src/util/time/mod.rs deleted file mode 100644 index 64c67f4d5..000000000 --- a/src/rust/lib_ccxr/src/util/time/mod.rs +++ /dev/null @@ -1,27 +0,0 @@ -//! Provide types for storing time in different formats -//! -//! Time can be represented in one of following formats: -//! - [`Timestamp`] as number of milliseconds -//! - [`MpegClockTick`] as number of clock ticks (as defined in the MPEG standard) -//! - [`FrameCount`] as number of frames -//! - [`GopTimeCode`] as a GOP time code (as defined in the MPEG standard) -//! -//! # Conversion Guide -//! -//! | From | To | -//! |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------| -//! | `ccx_boundary_time` | [`Option`](Timestamp) | -//! | any fts | [`Timestamp`] | -//! | `ccx_output_date_format` | [`TimestampFormat`] | -//! | any pts | [`MpegClockTick`] | -//! | any frame count | [`FrameCount`] | -//! | `gop_time_code` | [`GopTimeCode`] | -//! | `print_mstime_static` | [`Timestamp::to_hms_millis_time`] | -//! | `gop_accepted` | [`GopTimeCode::did_rollover`] + some additional logic | -//! | `calculate_ms_gop_time` | [`GopTimeCode::new`], [`GopTimeCode::timestamp`] | - -mod units; - -pub mod c_functions; - -pub use units::*; diff --git a/src/rust/lib_ccxr/src/util/time/units.rs b/src/rust/lib_ccxr/src/util/time/units.rs deleted file mode 100644 index dba22d3df..000000000 --- a/src/rust/lib_ccxr/src/util/time/units.rs +++ /dev/null @@ -1,634 +0,0 @@ -use derive_more::{Add, Neg, Sub}; -use std::convert::TryInto; -use std::fmt::Write; -use std::num::TryFromIntError; -use std::time::{SystemTime, UNIX_EPOCH}; -use thiserror::Error; -use time::macros::{datetime, format_description}; -use time::{error::Format, Duration}; - -/// Represents a timestamp in milliseconds. -/// -/// The number can be negetive. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Add, Sub, Neg)] -pub struct Timestamp { - millis: i64, -} - -/// Represents an error during operations on [`Timestamp`]. -#[derive(Error, Debug)] -pub enum TimestampError { - #[error("input parameter given is out of range")] - InputOutOfRangeError, - #[error("timestamp is out of range")] - OutOfRangeError(#[from] TryFromIntError), - #[error("error ocurred during formatting")] - FormattingError(#[from] std::fmt::Error), - #[error("error ocurred during formatting a date")] - DateFormattingError(#[from] Format), - #[error("error ocurred during parsing")] - ParsingError, -} - -/// Represents the different string formats for [`Timestamp`]. -pub enum TimestampFormat { - /// Format: blank string. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; - /// let timestamp = Timestamp::from_millis(6524365); - /// let output = timestamp.to_formatted_time(TimestampFormat::None).unwrap(); - /// assert_eq!(output, ""); - /// ``` - None, - - /// Format: `{hour:02}:{minute:02}:{second:02}`. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; - /// let timestamp = Timestamp::from_millis(6524365); - /// let output = timestamp.to_formatted_time(TimestampFormat::HHMMSS).unwrap(); - /// assert_eq!(output, "01:48:44"); - /// ``` - HHMMSS, - - /// Format: `{second:02}{millis_separator}{millis:03}`. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; - /// let timestamp = Timestamp::from_millis(6524365); - /// let output = timestamp.to_formatted_time( - /// TimestampFormat::Seconds { - /// millis_separator: ',', - /// }, - /// ).unwrap(); - /// assert_eq!(output, "6524,365"); - /// ``` - Seconds { millis_separator: char }, - - /// Format: - /// `{year:04}{month:02}{day:02}{hour:02}{minute:02}{second:02}{millis_separator}{millis:03}`. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; - /// // 11 March 2023 14:53:36.749 in UNIX timestamp. - /// let timestamp = Timestamp::from_millis(1678546416749); - /// let output = timestamp.to_formatted_time( - /// TimestampFormat::Date { - /// millis_separator: ',', - /// }, - /// ).unwrap(); - /// assert_eq!(output, "20230311145336,749"); - /// ``` - Date { millis_separator: char }, - - /// Format: `{hour:02}:{minute:02}:{second:02},{millis:03}`. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::{Timestamp, TimestampFormat}; - /// let timestamp = Timestamp::from_millis(6524365); - /// let output = timestamp.to_formatted_time(TimestampFormat::HHMMSSFFF).unwrap(); - /// assert_eq!(output, "01:48:44,365"); - /// ``` - HHMMSSFFF, -} - -impl Timestamp { - /// Create a new [`Timestamp`] based on the number of milliseconds since the Unix Epoch. - pub fn now() -> Timestamp { - let duration = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("System Time cannot be behind the Unix Epoch"); - - Timestamp { - millis: duration.as_millis() as i64, - } - } - - /// Create a new [`Timestamp`] from number of milliseconds. - pub const fn from_millis(millis: i64) -> Timestamp { - Timestamp { millis } - } - - /// Create a new [`Timestamp`] from hours, minutes, seconds and milliseconds. - /// - /// It will fail if any parameter doesn't follow their respective ranges: - /// - /// | Parameter | Range | - /// |-----------|---------| - /// | minutes | 0 - 59 | - /// | seconds | 0 - 59 | - /// | millis | 0 - 999 | - pub fn from_hms_millis( - hours: u8, - minutes: u8, - seconds: u8, - millis: u16, - ) -> Result { - if minutes < 60 && seconds < 60 && millis < 1000 { - Ok(Timestamp::from_millis( - (hours as i64) * 3_600_000 - + (minutes as i64) * 60_000 - + (seconds as i64) * 1000 - + millis as i64, - )) - } else { - Err(TimestampError::InputOutOfRangeError) - } - } - - /// Returns the number of milliseconds. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// assert_eq!(timestamp.millis(), 6524365); - /// ``` - pub fn millis(&self) -> i64 { - self.millis - } - - /// Returns the number of whole seconds. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// assert_eq!(timestamp.seconds(), 6524); - /// ``` - pub fn seconds(&self) -> i64 { - self.millis / 1000 - } - - /// Returns the number of whole seconds and leftover milliseconds as unsigned integers. - /// - /// It will return an [`TimestampError::OutOfRangeError`] if the timestamp is negetive. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// assert_eq!(timestamp.as_sec_millis().unwrap(), (6524, 365)); - /// ``` - pub fn as_sec_millis(&self) -> Result<(u64, u16), TimestampError> { - let millis: u64 = self.millis.try_into()?; - let s = millis / 1000; - let u = millis % 1000; - Ok((s, u as u16)) - } - - /// Returns the time in the form of hours, minutes, seconds and milliseconds as unsigned - /// integers. - /// - /// It will return an [`TimestampError::OutOfRangeError`] if the timestamp is negetive. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// assert_eq!(timestamp.as_hms_millis().unwrap(), (1, 48, 44, 365)); - /// ``` - /// ```rust - /// # use lib_ccxr::util::time::{Timestamp, TimestampError}; - /// let timestamp = Timestamp::from_millis(1678546416749); - /// assert!(matches!( - /// timestamp.as_hms_millis().unwrap_err(), - /// TimestampError::OutOfRangeError(_) - /// )); - /// ``` - pub fn as_hms_millis(&self) -> Result<(u8, u8, u8, u16), TimestampError> { - let millis: u64 = self.millis.try_into()?; - let h = millis / 3600000; - let m = millis / 60000 - 60 * h; - let s = millis / 1000 - 3600 * h - 60 * m; - let u = millis - 3600000 * h - 60000 * m - 1000 * s; - if h > 24 { - println!("{}", h) - } - Ok((h.try_into()?, m as u8, s as u8, u as u16)) - } - - /// Fills `output` with the [`Timestamp`] using SRT's timestamp format. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// let mut output = String::new(); - /// timestamp.write_srt_time(&mut output); - /// assert_eq!(output, "01:48:44,365"); - /// ``` - pub fn write_srt_time(&self, output: &mut String) -> Result<(), TimestampError> { - let (h, m, s, u) = self.as_hms_millis()?; - write!(output, "{:02}:{:02}:{:02},{:03}", h, m, s, u)?; - Ok(()) - } - - /// Fills `output` with the [`Timestamp`] using VTT's timestamp format. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// let mut output = String::new(); - /// timestamp.write_vtt_time(&mut output); - /// assert_eq!(output, "01:48:44.365"); - /// ``` - pub fn write_vtt_time(&self, output: &mut String) -> Result<(), TimestampError> { - let (h, m, s, u) = self.as_hms_millis()?; - write!(output, "{:02}:{:02}:{:02}.{:03}", h, m, s, u)?; - Ok(()) - } - - /// Fills `output` with the [`Timestamp`] using - /// "{sign}{hour:02}:{minute:02}:{second:02}{sep}{millis:03}" format, where `sign` can be `-` - /// if time is negetive or blank if it is positive. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// let mut output = String::new(); - /// timestamp.write_hms_millis_time(&mut output, ':'); - /// assert_eq!(output, "01:48:44:365"); - /// ``` - pub fn write_hms_millis_time( - &self, - output: &mut String, - sep: char, - ) -> Result<(), TimestampError> { - let sign = if self.millis < 0 { "-" } else { "" }; - let timestamp = if self.millis < 0 { -*self } else { *self }; - let (h, m, s, u) = timestamp.as_hms_millis()?; - write!(output, "{}{:02}:{:02}:{:02}{}{:03}", sign, h, m, s, sep, u)?; - Ok(()) - } - - /// Fills `output` with the [`Timestamp`] using ctime's format. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// let mut output = String::new(); - /// timestamp.write_ctime(&mut output); - /// assert_eq!(output, "Thu Jan 01 01:48:44 1970"); - /// ``` - pub fn write_ctime(&self, output: &mut String) -> Result<(), TimestampError> { - let (sec, millis) = self.as_sec_millis()?; - let d = datetime!(1970-01-01 0:00) - + Duration::new(sec.try_into()?, (millis as i32) * 1_000_000); - let format = format_description!( - "[weekday repr:short] [month repr:short] [day] [hour]:[minute]:[second] [year]" - ); - write!(output, "{}", d.format(&format)?)?; - Ok(()) - } - - /// Fills `output` with the [`Timestamp`] using format specified by [`TimestampFormat`]. - /// - /// See [`TimestampFormat`] for examples. - pub fn write_formatted_time( - &self, - output: &mut String, - format: TimestampFormat, - ) -> Result<(), TimestampError> { - match format { - TimestampFormat::None => Ok(()), - TimestampFormat::HHMMSS => { - let (h, m, s, _) = self.as_hms_millis()?; - write!(output, "{:02}:{:02}:{:02}", h, m, s)?; - Ok(()) - } - TimestampFormat::Seconds { millis_separator } => { - let (sec, millis) = self.as_sec_millis()?; - write!(output, "{}{}{:03}", sec, millis_separator, millis)?; - Ok(()) - } - TimestampFormat::Date { millis_separator } => { - let (sec, millis) = self.as_sec_millis()?; - let d = datetime!(1970-01-01 0:00) - + Duration::new(sec.try_into()?, (millis as i32) * 1_000_000); - let format1 = format_description!("[year][month][day][hour][minute][second]"); - let format2 = format_description!("[subsecond digits:3]"); - - write!( - output, - "{}{}{}", - d.format(&format1)?, - millis_separator, - d.format(&format2)? - )?; - Ok(()) - } - TimestampFormat::HHMMSSFFF => self.write_srt_time(output), - } - } - - /// Returns a formatted [`Timestamp`] using SRT's timestamp format. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// assert_eq!(timestamp.to_srt_time().unwrap(), "01:48:44,365"); - /// ``` - pub fn to_srt_time(&self) -> Result { - let mut s = String::new(); - self.write_srt_time(&mut s)?; - Ok(s) - } - - /// Returns a formatted [`Timestamp`] using VTT's timestamp format. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// assert_eq!(timestamp.to_vtt_time().unwrap(), "01:48:44.365"); - /// ``` - pub fn to_vtt_time(&self) -> Result { - let mut s = String::new(); - self.write_vtt_time(&mut s)?; - Ok(s) - } - - /// Returns a formatted [`Timestamp`] using - /// "{sign}{hour:02}:{minute:02}:{second:02}{sep}{millis:03}" format, where `sign` can be `-` - /// if time is negetive or blank if it is positive. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// assert_eq!(timestamp.to_hms_millis_time(':').unwrap(), "01:48:44:365"); - /// ``` - pub fn to_hms_millis_time(&self, sep: char) -> Result { - let mut s = String::new(); - self.write_hms_millis_time(&mut s, sep)?; - Ok(s) - } - - /// Returns a formatted [`Timestamp`] using ctime's format. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::from_millis(6524365); - /// assert_eq!(timestamp.to_ctime().unwrap(), "Thu Jan 01 01:48:44 1970"); - /// ``` - pub fn to_ctime(&self) -> Result { - let mut s = String::new(); - self.write_ctime(&mut s)?; - Ok(s) - } - - /// Returns a formatted [`Timestamp`] using format specified by [`TimestampFormat`]. - /// - /// See [`TimestampFormat`] for examples. - pub fn to_formatted_time(&self, format: TimestampFormat) -> Result { - let mut s = String::new(); - self.write_formatted_time(&mut s, format)?; - Ok(s) - } - - /// Creates a [`Timestamp`] by parsing `input` using format `SS` or `MM:SS` or `HH:MM:SS`. - /// - /// # Examples - /// ```rust - /// # use lib_ccxr::util::time::Timestamp; - /// let timestamp = Timestamp::parse_optional_hhmmss_from_str("01:12:45").unwrap(); - /// assert_eq!(timestamp, Timestamp::from_millis(4_365_000)); - /// ``` - pub fn parse_optional_hhmmss_from_str(input: &str) -> Result { - let mut numbers = input - .split(':') - .map(|x| x.parse::().map_err(|_| TimestampError::ParsingError)) - .rev(); - - let mut millis: u64 = 0; - - let seconds: u64 = numbers.next().ok_or(TimestampError::ParsingError)??.into(); - if seconds > 59 { - return Err(TimestampError::InputOutOfRangeError); - } - millis += seconds * 1000; - - if let Some(x) = numbers.next() { - let minutes: u64 = x?.into(); - if minutes > 59 { - return Err(TimestampError::InputOutOfRangeError); - } - millis += 60_000 * minutes; - } - - if let Some(x) = numbers.next() { - let hours: u64 = x?.into(); - millis += 3_600_000 * hours; - } - - if numbers.next().is_some() { - return Err(TimestampError::ParsingError); - } - - Ok(Timestamp::from_millis(millis.try_into()?)) - } -} - -/// Represent the number of clock ticks as defined in Mpeg standard. -/// -/// This number can never be negetive. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Add, Sub)] -pub struct MpegClockTick(i64); - -impl MpegClockTick { - /// The ratio to convert a clock tick to time duration. - pub const MPEG_CLOCK_FREQ: i64 = 90000; - - /// Create a value representing `ticks` clock ticks. - pub fn new(ticks: i64) -> MpegClockTick { - MpegClockTick(ticks) - } - - /// Returns the number of clock ticks. - pub fn as_i64(&self) -> i64 { - self.0 - } - - /// Converts the clock ticks to its equivalent time duration. - /// - /// The conversion ratio used is [`MpegClockTick::MPEG_CLOCK_FREQ`]. - pub fn as_timestamp(&self) -> Timestamp { - Timestamp::from_millis(self.0 / (MpegClockTick::MPEG_CLOCK_FREQ / 1000)) - } -} - -/// Represents the number of frames. -/// -/// This number can never be negetive. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Add, Sub)] -pub struct FrameCount(u64); - -impl FrameCount { - /// Create a value representing `frames` number of frames. - pub const fn new(frames: u64) -> FrameCount { - FrameCount(frames) - } - - /// Returns the number of frames. - pub fn as_u64(&self) -> u64 { - self.0 - } - - /// Converts the frames to its equivalent time duration. - /// - /// The conversion ratio used is `fps`. - pub fn as_timestamp(&self, fps: f64) -> Timestamp { - Timestamp::from_millis((self.0 as f64 * 1000.0 / fps) as i64) - } - - /// Converts the frames to its equivalent number of clock ticks. - /// - /// The conversion ratio used is [`MpegClockTick::MPEG_CLOCK_FREQ`] and `fps`. - pub fn as_mpeg_clock_tick(&self, fps: f64) -> MpegClockTick { - MpegClockTick::new(((self.0 * MpegClockTick::MPEG_CLOCK_FREQ as u64) as f64 / fps) as i64) - } -} - -/// Represents a GOP Time code as defined in the Mpeg standard. -/// -/// This structure stores its time in the form of hours, minutes, seconds and pictures. This -/// structure also stores its time in the form of a [`Timestamp`] when it is created. This -/// [`Timestamp`] can be modified by [`timestamp_mut`](GopTimeCode::timestamp_mut) and an -/// additional 24 hours may be added on rollover, so it is not necessary that the above two -/// formats refer to the same time. Therefore it is recommended to only rely on the -/// [`Timestamp`] instead of the other format. -#[derive(Copy, Clone, Debug)] -pub struct GopTimeCode { - drop_frame: bool, - time_code_hours: u8, - time_code_minutes: u8, - time_code_seconds: u8, - time_code_pictures: u8, - timestamp: Timestamp, -} - -impl GopTimeCode { - /// Create a new [`GopTimeCode`] from the specified parameters. - /// - /// The number of frames or pictures is converted to time duration using `fps`. - /// - /// If `rollover` is true, then an extra of 24 hours will added. - /// - /// It will return [`None`] if any parameter doesn't follow their respective ranges: - /// - /// | Parameter | Range | - /// |-----------|--------| - /// | hours | 0 - 23 | - /// | minutes | 0 - 59 | - /// | seconds | 0 - 59 | - /// | pictures | 0 - 59 | - pub fn new( - drop_frame: bool, - hours: u8, - minutes: u8, - seconds: u8, - pictures: u8, - fps: f64, - rollover: bool, - ) -> Option { - if hours < 24 && minutes < 60 && seconds < 60 && pictures < 60 { - let millis = (1000.0 * (pictures as f64) / fps) as u16; - let extra_hours = if rollover { 24 } else { 0 }; - let timestamp = - Timestamp::from_hms_millis(hours + extra_hours, minutes, seconds, millis) - .expect("The fps given is probably too low"); - - Some(GopTimeCode { - drop_frame, - time_code_hours: hours, - time_code_minutes: minutes, - time_code_seconds: seconds, - time_code_pictures: pictures, - timestamp, - }) - } else { - None - } - } - - /// Returns the GOP time code in its equivalent time duration. - pub fn timestamp(&self) -> Timestamp { - self.timestamp - } - - /// Returns a mutable reference to internal [`Timestamp`]. - pub fn timestamp_mut(&mut self) -> &mut Timestamp { - &mut self.timestamp - } - - /// Check if a rollover has ocurred by comparing the previous [`GopTimeCode`] that is `prev` - /// with the current [`GopTimeCode`]. - pub fn did_rollover(&self, prev: &GopTimeCode) -> bool { - prev.time_code_hours == 23 - && prev.time_code_minutes == 59 - && self.time_code_hours == 0 - && self.time_code_minutes == 0 - } - - /// Constructs a [`GopTimeCode`] from its individual fields. - /// - /// # Safety - /// - /// The fields other than [`Timestamp`] may not be accurate if it is changed using - /// [`timestamp_mut`](GopTimeCode::timestamp_mut). - pub unsafe fn from_raw_parts( - drop_frame: bool, - hours: u8, - minutes: u8, - seconds: u8, - pictures: u8, - timestamp: Timestamp, - ) -> GopTimeCode { - GopTimeCode { - drop_frame, - time_code_hours: hours, - time_code_minutes: minutes, - time_code_seconds: seconds, - time_code_pictures: pictures, - timestamp, - } - } - - /// Returns the individuals field of a [`GopTimeCode`]. - /// - /// # Safety - /// - /// The fields other than [`Timestamp`] may not be accurate if it is changed using - /// [`timestamp_mut`](GopTimeCode::timestamp_mut). - pub unsafe fn as_raw_parts(&self) -> (bool, u8, u8, u8, u8, Timestamp) { - let GopTimeCode { - drop_frame, - time_code_hours, - time_code_minutes, - time_code_seconds, - time_code_pictures, - timestamp, - } = *self; - - ( - drop_frame, - time_code_hours, - time_code_minutes, - time_code_seconds, - time_code_pictures, - timestamp, - ) - } -} diff --git a/src/rust/src/libccxr_exports/mod.rs b/src/rust/src/libccxr_exports/mod.rs index 855d593e0..58ede2e5c 100644 --- a/src/rust/src/libccxr_exports/mod.rs +++ b/src/rust/src/libccxr_exports/mod.rs @@ -1,15 +1,14 @@ //! Provides C-FFI functions that are direct equivalent of functions available in C. +pub mod time; use crate::ccx_options; use lib_ccxr::util::c_functions::*; use lib_ccxr::util::log::*; +use lib_ccxr::util::{bits::*, levenshtein::*}; + use std::convert::TryInto; use std::os::raw::{c_char, c_int, c_uint}; -mod time; - -pub use time::*; - /// Initializes the logger at the rust side. /// /// # Safety From 201233e7979fb2df05899707bb2521c69650dc45 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Mon, 19 Aug 2024 22:39:17 +0530 Subject: [PATCH 10/24] chore: update lock files --- src/rust/Cargo.lock | 322 ++++++++++++++++++++--------------- src/rust/lib_ccxr/Cargo.lock | 120 ++++++++++++- 2 files changed, 302 insertions(+), 140 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 17ccaa076..053d5d436 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.14" +version = "0.6.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" dependencies = [ "anstyle", "anstyle-parse", @@ -28,36 +28,36 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" [[package]] name = "anstyle-parse" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.3" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a64c907d4e79225ac72e2a354c9ce84d50ebb4586dee56c82b3ee73004f537f5" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.3" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -127,7 +127,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.66", + "syn 2.0.75", "which", ] @@ -145,9 +145,9 @@ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "camino" -version = "1.1.7" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239" +checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" [[package]] name = "ccx_rust" @@ -189,9 +189,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clang-sys" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", "libc", @@ -200,9 +200,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.4" +version = "4.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" +checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" dependencies = [ "clap_builder", "clap_derive", @@ -210,9 +210,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.2" +version = "4.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6" dependencies = [ "anstream", "anstyle", @@ -222,55 +222,27 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.4" +version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" +checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.75", ] [[package]] name = "clap_lex" -version = "0.7.0" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "colorchoice" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" - -[[package]] -name = "convert_case" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" - -[[package]] -name = "deranged" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" -dependencies = [ - "powerfmt", -] - -[[package]] -name = "derive_more" -version = "0.99.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" -dependencies = [ - "convert_case", - "proc-macro2", - "quote", - "rustc_version", - "syn 2.0.66", -] +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" [[package]] name = "convert_case" @@ -280,30 +252,33 @@ checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if", ] [[package]] name = "deranged" -version = "0.3.8" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] [[package]] name = "derive_more" -version = "0.99.17" +version = "0.99.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" dependencies = [ "convert_case", "proc-macro2", "quote", "rustc_version", - "syn 1.0.109", + "syn 2.0.75", ] [[package]] @@ -314,9 +289,9 @@ checksum = "74c57ab96715773d9cb9789b38eb7cbf04b3c6f5624a9d98f51761603376767c" [[package]] name = "either" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "env_logger" @@ -331,6 +306,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.9" @@ -338,7 +319,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -365,6 +346,12 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + [[package]] name = "heck" version = "0.4.1" @@ -392,7 +379,7 @@ version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -421,11 +408,21 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "indexmap" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" -version = "1.70.0" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itertools" @@ -470,7 +467,9 @@ name = "lib_ccxr" version = "0.1.0" dependencies = [ "bitflags 2.6.0", + "crc32fast", "derive_more", + "num_enum", "strum 0.26.3", "strum_macros 0.26.4", "thiserror", @@ -480,15 +479,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.155" +version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] name = "libloading" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", "windows-targets", @@ -502,15 +501,15 @@ checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "minimal-lexical" @@ -552,6 +551,27 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_enum" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a015b430d3c108a207fd776d2e2196aaf8b1cf8cf93253e3a097ff3085076a1" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.75", +] + [[package]] name = "once_cell" version = "1.19.0" @@ -630,7 +650,7 @@ dependencies = [ "phf_shared", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.75", ] [[package]] @@ -661,14 +681,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.66", + "syn 2.0.75", +] + +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit", ] [[package]] name = "proc-macro2" -version = "1.0.83" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] @@ -699,9 +729,9 @@ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" [[package]] name = "regex" -version = "1.10.4" +version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" dependencies = [ "aho-corasick", "memchr", @@ -711,9 +741,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", @@ -722,9 +752,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "rsmpeg" @@ -763,7 +793,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -794,22 +824,22 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.202" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" +checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.202" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" +checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.75", ] [[package]] @@ -852,7 +882,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.66", + "syn 2.0.75", ] [[package]] @@ -865,7 +895,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.66", + "syn 2.0.75", ] [[package]] @@ -881,9 +911,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.66" +version = "2.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9" dependencies = [ "proc-macro2", "quote", @@ -913,22 +943,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.75", ] [[package]] @@ -986,6 +1016,23 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" + +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + [[package]] name = "unicode-bidi" version = "0.3.15" @@ -1020,20 +1067,9 @@ dependencies = [ [[package]] name = "utf8parse" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" - -[[package]] -name = "url" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "vcpkg" @@ -1075,7 +1111,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -1093,11 +1129,20 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", @@ -1111,48 +1156,57 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] diff --git a/src/rust/lib_ccxr/Cargo.lock b/src/rust/lib_ccxr/Cargo.lock index 4fd82ee25..886ba28db 100644 --- a/src/rust/lib_ccxr/Cargo.lock +++ b/src/rust/lib_ccxr/Cargo.lock @@ -8,12 +8,27 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "convert_case" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "deranged" version = "0.3.11" @@ -36,6 +51,12 @@ dependencies = [ "syn", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -45,6 +66,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + [[package]] name = "heck" version = "0.5.0" @@ -61,6 +88,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "indexmap" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "itoa" version = "1.0.11" @@ -72,7 +109,9 @@ name = "lib_ccxr" version = "0.1.0" dependencies = [ "bitflags", + "crc32fast", "derive_more", + "num_enum", "strum", "strum_macros", "thiserror", @@ -80,12 +119,45 @@ dependencies = [ "url", ] +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + [[package]] name = "num-conv" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num_enum" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a015b430d3c108a207fd776d2e2196aaf8b1cf8cf93253e3a097ff3085076a1" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + [[package]] name = "percent-encoding" version = "2.3.1" @@ -98,6 +170,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.86" @@ -139,18 +221,18 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.205" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" +checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.205" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" +checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", @@ -178,9 +260,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.72" +version = "2.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9" dependencies = [ "proc-macro2", "quote", @@ -253,6 +335,23 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" + +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + [[package]] name = "unicode-bidi" version = "0.3.15" @@ -284,3 +383,12 @@ dependencies = [ "idna", "percent-encoding", ] + +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] From 23d4b4ed5ea57b6dc3f579b12fa334cbf8567932 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Mon, 19 Aug 2024 22:47:17 +0530 Subject: [PATCH 11/24] chore: fix naming --- src/rust/lib_ccxr/src/subtitle.rs | 2 +- src/rust/lib_ccxr/src/teletext.rs | 7 ++++--- src/rust/src/lib.rs | 2 +- src/rust/src/libccxr_exports/mod.rs | 1 - 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rust/lib_ccxr/src/subtitle.rs b/src/rust/lib_ccxr/src/subtitle.rs index 4f442a87e..9ee3cfd90 100644 --- a/src/rust/lib_ccxr/src/subtitle.rs +++ b/src/rust/lib_ccxr/src/subtitle.rs @@ -3,8 +3,8 @@ //! NOTE: This module is incomplete and a lot of work is still left. use crate::common::Language; +use crate::time::units::Timestamp; use crate::util::encoding::EncodedString; -use crate::util::time::Timestamp; /// Represents the different formats in which subtitle data could be stored. /// diff --git a/src/rust/lib_ccxr/src/teletext.rs b/src/rust/lib_ccxr/src/teletext.rs index 097600e1d..2b3a75699 100644 --- a/src/rust/lib_ccxr/src/teletext.rs +++ b/src/rust/lib_ccxr/src/teletext.rs @@ -39,10 +39,11 @@ use std::sync::RwLock; use crate::common::OutputFormat; use crate::subtitle::Subtitle; +use crate::time::units::{Timestamp, TimestampFormat}; +use crate::util::bits::{decode_hamming_24_18, decode_hamming_8_4, get_parity}; use crate::util::encoding::{Ucs2Char, Ucs2String}; +use crate::util::levenshtein::levenshtein; use crate::util::log::{debug, info, logger, DebugMessageFlag}; -use crate::util::time::{Timestamp, TimestampFormat}; -use crate::util::{decode_hamming_24_18, decode_hamming_8_4, levenshtein, parity}; /// UTC referential value. /// @@ -453,7 +454,7 @@ impl G0Charset { /// Return the equivalent UCS-2 character for the given teletext character based on the current /// character set. pub fn ucs2_char(&self, telx_char: u8) -> Ucs2Char { - if parity(telx_char) { + if get_parity(telx_char) { debug!(msg_type = DebugMessageFlag::TELETEXT; "- Unrecoverable data error; PARITY({:02x})\n", telx_char); return 0x20; } diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 97fc785ae..87a2f24fa 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -55,13 +55,13 @@ extern "C" { static mut cb_708: c_int; static mut cb_field1: c_int; static mut cb_field2: c_int; - static mut ccx_options: ccx_s_options; } #[allow(dead_code)] extern "C" { static mut MPEG_CLOCK_FREQ: c_int; static mut tlt_config: ccx_s_teletext_config; + static mut ccx_options: ccx_s_options; } /// Initialize env logger with custom format, using stdout as target diff --git a/src/rust/src/libccxr_exports/mod.rs b/src/rust/src/libccxr_exports/mod.rs index 58ede2e5c..62a0d1d42 100644 --- a/src/rust/src/libccxr_exports/mod.rs +++ b/src/rust/src/libccxr_exports/mod.rs @@ -2,7 +2,6 @@ pub mod time; use crate::ccx_options; -use lib_ccxr::util::c_functions::*; use lib_ccxr::util::log::*; use lib_ccxr::util::{bits::*, levenshtein::*}; From e89dd7456c1d4491374aac68414a43627a861d83 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Mon, 19 Aug 2024 22:50:31 +0530 Subject: [PATCH 12/24] fix: reference to TeletextConfig --- src/rust/lib_ccxr/src/common/mod.rs | 2 - src/rust/lib_ccxr/src/common/teletext.rs | 87 ------------------------ src/rust/lib_ccxr/src/teletext.rs | 51 ++++++++++---- src/rust/src/common.rs | 2 +- src/rust/src/lib.rs | 5 +- src/rust/src/parser.rs | 1 + 6 files changed, 39 insertions(+), 109 deletions(-) delete mode 100644 src/rust/lib_ccxr/src/common/teletext.rs diff --git a/src/rust/lib_ccxr/src/common/mod.rs b/src/rust/lib_ccxr/src/common/mod.rs index 61a0a8b59..4aaf4bb62 100644 --- a/src/rust/lib_ccxr/src/common/mod.rs +++ b/src/rust/lib_ccxr/src/common/mod.rs @@ -18,8 +18,6 @@ mod constants; mod options; -mod teletext; pub use constants::*; pub use options::*; -pub use teletext::*; diff --git a/src/rust/lib_ccxr/src/common/teletext.rs b/src/rust/lib_ccxr/src/common/teletext.rs deleted file mode 100644 index 54bac7889..000000000 --- a/src/rust/lib_ccxr/src/common/teletext.rs +++ /dev/null @@ -1,87 +0,0 @@ -use crate::time::units::{Timestamp, TimestampFormat}; -use std::{cell::Cell, fmt}; - -use super::OutputFormat; - -#[derive(Debug)] -pub struct TeletextConfig { - /// should telxcc logging be verbose? - pub verbose: bool, - /// teletext page containing cc we want to filter - pub page: Cell, - /// Page selected by user, which MIGHT be different to `page` depending on autodetection stuff - pub user_page: u16, - /// false = Don't attempt to correct errors - pub dolevdist: bool, - /// Means 2 fails or less is "the same" - pub levdistmincnt: u8, - /// Means 10% or less is also "the same" - pub levdistmaxpct: u8, - /// Segment we actually process - pub extraction_start: Option, - /// Segment we actually process - pub extraction_end: Option, - pub write_format: OutputFormat, - pub date_format: TimestampFormat, - /// Do NOT set time automatically? - pub noautotimeref: bool, - pub nofontcolor: bool, - pub nohtmlescape: bool, - pub latrusmap: bool, -} - -impl Default for TeletextConfig { - fn default() -> Self { - Self { - verbose: true, - page: TeletextPageNumber(0).into(), - user_page: 0, - dolevdist: false, - levdistmincnt: 0, - levdistmaxpct: 0, - extraction_start: None, - extraction_end: None, - write_format: OutputFormat::default(), - date_format: TimestampFormat::default(), - noautotimeref: false, - nofontcolor: false, - nohtmlescape: false, - latrusmap: false, - } - } -} - -/// Represents a Teletext Page Number in its bitcode representation. -/// -/// It can be easily contructed from a [`u16`]. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct TeletextPageNumber(u16); - -impl From for TeletextPageNumber { - fn from(value: u16) -> TeletextPageNumber { - TeletextPageNumber(value) - } -} - -impl fmt::Display for TeletextPageNumber { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:03x}", self.0) - } -} - -impl TeletextPageNumber { - /// Return the magazine and packet bits. - pub fn magazine(&self) -> u8 { - ((self.0 >> 8) & 0x0f) as u8 - } - - /// Return the page bits. - pub fn page(&self) -> u8 { - (self.0 & 0xff) as u8 - } - - /// Return the page number after converting the page bits in bcd format to normal integer. - pub fn bcd_page_to_u16(&self) -> u16 { - ((self.0 & 0xf00) >> 8) * 100 + ((self.0 & 0xf0) >> 4) * 10 + (self.0 & 0xf) - } -} diff --git a/src/rust/lib_ccxr/src/teletext.rs b/src/rust/lib_ccxr/src/teletext.rs index 2b3a75699..c7289e586 100644 --- a/src/rust/lib_ccxr/src/teletext.rs +++ b/src/rust/lib_ccxr/src/teletext.rs @@ -724,31 +724,52 @@ pub struct TeletextPage { } /// Settings required to contruct a [`TeletextContext`]. -#[allow(dead_code)] +#[derive(Debug)] pub struct TeletextConfig { /// should telxcc logging be verbose? - verbose: bool, + pub verbose: bool, /// teletext page containing cc we want to filter - page: Cell, + pub page: Cell, /// Page selected by user, which MIGHT be different to `page` depending on autodetection stuff - user_page: u16, + pub user_page: u16, /// false = Don't attempt to correct errors - dolevdist: bool, + pub dolevdist: bool, /// Means 2 fails or less is "the same" - levdistmincnt: u8, + pub levdistmincnt: u8, /// Means 10% or less is also "the same" - levdistmaxpct: u8, + pub levdistmaxpct: u8, /// Segment we actually process - extraction_start: Option, + pub extraction_start: Option, /// Segment we actually process - extraction_end: Option, - write_format: OutputFormat, - date_format: TimestampFormat, + pub extraction_end: Option, + pub write_format: OutputFormat, + pub date_format: TimestampFormat, /// Do NOT set time automatically? - noautotimeref: bool, - nofontcolor: bool, - nohtmlescape: bool, - latrusmap: bool, + pub noautotimeref: bool, + pub nofontcolor: bool, + pub nohtmlescape: bool, + pub latrusmap: bool, +} + +impl Default for TeletextConfig { + fn default() -> Self { + Self { + verbose: true, + page: TeletextPageNumber(0).into(), + user_page: 0, + dolevdist: false, + levdistmincnt: 0, + levdistmaxpct: 0, + extraction_start: None, + extraction_end: None, + write_format: OutputFormat::default(), + date_format: TimestampFormat::default(), + noautotimeref: false, + nofontcolor: false, + nohtmlescape: false, + latrusmap: false, + } + } } /// Represents the possible states that [`TeletextContext`] can be in. diff --git a/src/rust/src/common.rs b/src/rust/src/common.rs index ee4adea4c..608761696 100644 --- a/src/rust/src/common.rs +++ b/src/rust/src/common.rs @@ -14,9 +14,9 @@ use lib_ccxr::common::OutputFormat; use lib_ccxr::common::SelectCodec; use lib_ccxr::common::StreamMode; use lib_ccxr::common::StreamType; -use lib_ccxr::common::TeletextConfig; use lib_ccxr::hardsubx::ColorHue; use lib_ccxr::hardsubx::OcrMode; +use lib_ccxr::teletext::TeletextConfig; use lib_ccxr::time::units::Timestamp; use lib_ccxr::time::units::TimestampFormat; use lib_ccxr::util::encoding::Encoding; diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 87a2f24fa..bd94fd526 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -32,10 +32,7 @@ use bindings::*; use clap::{error::ErrorKind, Parser}; use common::{CType2, FromRust}; use decoder::Dtvcc; -use lib_ccxr::{ - common::{Options, TeletextConfig}, - util::log::ExitCause, -}; +use lib_ccxr::{common::Options, teletext::TeletextConfig, util::log::ExitCause}; use parser::OptionsExt; use utils::is_true; diff --git a/src/rust/src/parser.rs b/src/rust/src/parser.rs index 47341ec84..1c381bf88 100644 --- a/src/rust/src/parser.rs +++ b/src/rust/src/parser.rs @@ -1,4 +1,5 @@ use args::{Args, OutFormat}; +use lib_ccxr::teletext::{TeletextConfig, TeletextPageNumber}; use lib_ccxr::time::units::{Timestamp, TimestampFormat}; use lib_ccxr::util::encoding::Encoding; use lib_ccxr::util::log::{DebugMessageFlag, DebugMessageMask, ExitCause, OutputTarget}; From ba026e24e2eff821e518b195bc01b8a5095afb8b Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Mon, 19 Aug 2024 23:00:04 +0530 Subject: [PATCH 13/24] fix: issue with ts_forced_program default value --- src/rust/lib_ccxr/src/common/options.rs | 22 ++++++++++++++++++++-- src/rust/src/common.rs | 2 +- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/rust/lib_ccxr/src/common/options.rs b/src/rust/lib_ccxr/src/common/options.rs index 330f692d6..dabe0019c 100644 --- a/src/rust/lib_ccxr/src/common/options.rs +++ b/src/rust/lib_ccxr/src/common/options.rs @@ -157,7 +157,7 @@ pub enum DtvccServiceCharset { } #[allow(dead_code)] -#[derive(Default, Debug, Clone)] +#[derive(Debug, Clone)] pub struct DemuxerConfig { /// Regular TS or M2TS pub m2ts: bool, @@ -175,13 +175,31 @@ pub struct DemuxerConfig { /// If 1, never mess with the selected PID pub ts_forced_cappid: bool, /// Specific program to process in TS files, if a forced program is given - pub ts_forced_program: Option, + pub ts_forced_program: Option, /// User WANTED stream type (i.e. use the stream that has this type) pub ts_datastreamtype: StreamType, /// User selected (forced) stream type pub ts_forced_streamtype: StreamType, } +impl Default for DemuxerConfig { + fn default() -> Self { + Self { + m2ts: false, + auto_stream: StreamMode::default(), + codec: SelectCodec::Some(super::Codec::Any), + nocodec: SelectCodec::None, + ts_autoprogram: false, + ts_allprogram: false, + ts_cappids: Vec::new(), + ts_forced_cappid: false, + ts_forced_program: None, + ts_datastreamtype: StreamType::default(), + ts_forced_streamtype: StreamType::default(), + } + } +} + impl Default for EncoderConfig { fn default() -> Self { Self { diff --git a/src/rust/src/common.rs b/src/rust/src/common.rs index 608761696..adbc6b481 100644 --- a/src/rust/src/common.rs +++ b/src/rust/src/common.rs @@ -486,7 +486,7 @@ impl CType for DemuxerConfig { ts_cappids: self.ts_cappids.to_ctype(), nb_ts_cappid: self.ts_cappids.len() as _, ts_forced_cappid: self.ts_forced_cappid as _, - ts_forced_program: self.ts_forced_program.unwrap_or_default() as _, + ts_forced_program: self.ts_forced_program.unwrap_or(-1) as _, ts_forced_program_selected: self.ts_forced_program.is_some() as _, ts_datastreamtype: self.ts_datastreamtype.to_ctype() as _, ts_forced_streamtype: self.ts_forced_streamtype.to_ctype() as _, From cb6bb540e2d3ef4503eabe8ee6753661f14d3388 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Mon, 19 Aug 2024 23:01:49 +0530 Subject: [PATCH 14/24] fix: use correct definition --- src/lib_ccx/lib_ccx.c | 4 ---- src/lib_ccx/lib_ccx.h | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/lib_ccx/lib_ccx.c b/src/lib_ccx/lib_ccx.c index cf408ee08..d0a69b469 100644 --- a/src/lib_ccx/lib_ccx.c +++ b/src/lib_ccx/lib_ccx.c @@ -6,10 +6,6 @@ #include "ccx_decoders_708.h" #include "ccx_decoders_isdb.h" -#ifndef DISABLE_RUST -extern void ccxr_init_basic_logger(); -#endif - struct ccx_common_logging_t ccx_common_logging; static struct ccx_decoders_common_settings_t *init_decoder_setting( struct ccx_s_options *opt) diff --git a/src/lib_ccx/lib_ccx.h b/src/lib_ccx/lib_ccx.h index f3defaa9a..2e7b44145 100644 --- a/src/lib_ccx/lib_ccx.h +++ b/src/lib_ccx/lib_ccx.h @@ -154,7 +154,7 @@ struct lib_ccx_ctx *init_libraries(struct ccx_s_options *opt); void dinit_libraries( struct lib_ccx_ctx **ctx); #ifndef DISABLE_RUST -extern void ccxr_init_basic_logger(struct ccx_s_options *opts); +extern void ccxr_init_basic_logger(); #endif //ccextractor.c From 4ab91819d732fd8dc6d88010f264b464222e50f8 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Mon, 19 Aug 2024 23:05:39 +0530 Subject: [PATCH 15/24] chore: lint warnings --- src/rust/lib_ccxr/src/teletext.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/lib_ccxr/src/teletext.rs b/src/rust/lib_ccxr/src/teletext.rs index c7289e586..b9ba1021c 100644 --- a/src/rust/lib_ccxr/src/teletext.rs +++ b/src/rust/lib_ccxr/src/teletext.rs @@ -335,7 +335,7 @@ impl G0LatinNationalSubset { let lang_index: u8 = (*self).into(); Self::G0_LATIN_NATIONAL_SUBSETS_POSITIONS .into_iter() - .zip(Self::G0_LATIN_NATIONAL_SUBSETS[lang_index as usize].into_iter()) + .zip(Self::G0_LATIN_NATIONAL_SUBSETS[lang_index as usize]) } } From e6ff9630dce54af17c1d9e6a7c49e353e55cc1ae Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Mon, 19 Aug 2024 23:08:06 +0530 Subject: [PATCH 16/24] fix: example code --- src/rust/lib_ccxr/src/teletext.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/lib_ccxr/src/teletext.rs b/src/rust/lib_ccxr/src/teletext.rs index b9ba1021c..cb4f4a1af 100644 --- a/src/rust/lib_ccxr/src/teletext.rs +++ b/src/rust/lib_ccxr/src/teletext.rs @@ -1636,7 +1636,7 @@ impl<'a> TeletextContext<'a> { /// /// # Examples /// ``` -/// # use lib_ccxr::util::fuzzy_cmp; +/// # use lib_ccxr::teletext::fuzzy_cmp; /// # use lib_ccxr::util::log::*; /// # let mask = DebugMessageMask::new(DebugMessageFlag::LEVENSHTEIN, DebugMessageFlag::LEVENSHTEIN); /// # set_logger(CCExtractorLogger::new(OutputTarget::Quiet, mask, false)); From ac832a60f5fb547bdf3763d3263a6f693bf2ce4d Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Thu, 22 Aug 2024 00:11:15 +0530 Subject: [PATCH 17/24] fix(rust): adjust defaults, more accurate logging, use safe functions, add encoders_helper module --- src/ccextractor.c | 4 +- src/lib_ccx/lib_ccx.h | 2 +- src/lib_ccx/params.c | 2 +- src/rust/{ => lib_ccxr}/src/activity.rs | 2 +- src/rust/lib_ccxr/src/common/constants.rs | 1 + src/rust/lib_ccxr/src/common/options.rs | 121 ++++- src/rust/lib_ccxr/src/lib.rs | 1 + src/rust/lib_ccxr/src/teletext.rs | 16 +- src/rust/lib_ccxr/src/util/encoders_helper.rs | 157 +++++++ src/rust/lib_ccxr/src/util/log.rs | 10 +- src/rust/lib_ccxr/src/util/mod.rs | 1 + src/rust/src/args.rs | 2 +- src/rust/src/ccx_encoders_helpers.rs | 65 --- src/rust/src/common.rs | 223 ++++----- src/rust/src/decoder/mod.rs | 28 +- src/rust/src/lib.rs | 30 +- src/rust/src/parser.rs | 422 +++++++++++------- src/rust/src/utils.rs | 28 +- 18 files changed, 710 insertions(+), 405 deletions(-) rename src/rust/{ => lib_ccxr}/src/activity.rs (93%) create mode 100644 src/rust/lib_ccxr/src/util/encoders_helper.rs delete mode 100644 src/rust/src/ccx_encoders_helpers.rs diff --git a/src/ccextractor.c b/src/ccextractor.c index 3f159f716..c63e6d0ab 100644 --- a/src/ccextractor.c +++ b/src/ccextractor.c @@ -187,6 +187,7 @@ int api_start(struct ccx_s_options api_options) if (!api_options.use_gop_as_pts) // If !0 then the user selected something api_options.use_gop_as_pts = 1; // Force GOP timing for ES ccx_common_timing_settings.is_elementary_stream = 1; + break; case CCX_SM_TRANSPORT: case CCX_SM_PROGRAM: case CCX_SM_ASF: @@ -425,6 +426,7 @@ int api_start(struct ccx_s_options api_options) mprint("code in the MythTV's branch. Please report results to the address above. If\n"); mprint("something is broken it will be fixed. Thanks\n"); } + return ret ? EXIT_OK : EXIT_NO_CAPTIONS; } @@ -447,7 +449,7 @@ int main(int argc, char *argv[]) // See docs/ccextractor.cnf.sample for more info. #ifndef DISABLE_RUST - int compile_ret = ccxr_parse_parameters(api_options, argc, argv); + int compile_ret = ccxr_parse_parameters(argc, argv); #else int compile_ret = parse_parameters(api_options, argc, argv); #endif diff --git a/src/lib_ccx/lib_ccx.h b/src/lib_ccx/lib_ccx.h index 2e7b44145..a765ae8f9 100644 --- a/src/lib_ccx/lib_ccx.h +++ b/src/lib_ccx/lib_ccx.h @@ -162,7 +162,7 @@ void print_end_msg(void); //params.c #ifndef DISABLE_RUST -extern int ccxr_parse_parameters(struct ccx_s_options *opt, int argc, char *argv[]); +extern int ccxr_parse_parameters(int argc, char *argv[]); #endif int parse_parameters (struct ccx_s_options *opt, int argc, char *argv[]); void print_usage (void); diff --git a/src/lib_ccx/params.c b/src/lib_ccx/params.c index 5f96fc732..d52ce2360 100644 --- a/src/lib_ccx/params.c +++ b/src/lib_ccx/params.c @@ -601,7 +601,7 @@ void print_usage(void) mprint(" white). This causes all output in .srt/.smi/.vtt\n"); mprint(" files to have a font tag, which makes the files\n"); mprint(" larger. Add the color you want in RGB, such as\n"); - mprint(" --dc #FF0000 for red.\n"); + mprint(" --defaultcolor #FF0000 for red.\n"); mprint(" --sentencecap: Sentence capitalization. Use if you hate\n"); mprint(" ALL CAPS in subtitles.\n"); mprint(" --capfile file: Add the contents of 'file' to the list of words\n"); diff --git a/src/rust/src/activity.rs b/src/rust/lib_ccxr/src/activity.rs similarity index 93% rename from src/rust/src/activity.rs rename to src/rust/lib_ccxr/src/activity.rs index 73f9e4d8d..58cc106ee 100644 --- a/src/rust/src/activity.rs +++ b/src/rust/lib_ccxr/src/activity.rs @@ -1,7 +1,7 @@ use std::io; use std::io::Write; -use lib_ccxr::common::Options; +use crate::common::Options; pub trait ActivityExt { fn activity_report_version(&mut self); diff --git a/src/rust/lib_ccxr/src/common/constants.rs b/src/rust/lib_ccxr/src/common/constants.rs index 979be426b..f6d865cbe 100644 --- a/src/rust/lib_ccxr/src/common/constants.rs +++ b/src/rust/lib_ccxr/src/common/constants.rs @@ -143,6 +143,7 @@ pub const SLICE_TYPES: [&str; 10] = [ "0 - P", "1 - B", "2 - I", "3 - SP", "4 - SI", "5 - P", "6 - B", "7 - I", "8 - SP", "9 - SI", ]; +pub const CCX_DECODER_608_SCREEN_WIDTH: usize = 32; pub const ONEPASS: usize = 120; // Bytes we can always look ahead without going out of limits pub const BUFSIZE: usize = 2048 * 1024 + ONEPASS; // 2 Mb plus the safety pass pub const MAX_CLOSED_CAPTION_DATA_PER_PICTURE: usize = 32; diff --git a/src/rust/lib_ccxr/src/common/options.rs b/src/rust/lib_ccxr/src/common/options.rs index dabe0019c..ef1fe26f9 100644 --- a/src/rust/lib_ccxr/src/common/options.rs +++ b/src/rust/lib_ccxr/src/common/options.rs @@ -8,7 +8,7 @@ use crate::common::{ use crate::hardsubx::{ColorHue, OcrMode}; use crate::time::units::{Timestamp, TimestampFormat}; use crate::util::encoding::Encoding; -use crate::util::log::{DebugMessageMask, OutputTarget}; +use crate::util::log::{DebugMessageFlag, DebugMessageMask, OutputTarget}; use crate::util::time::stringztoms; #[derive(Debug, Clone)] @@ -186,7 +186,7 @@ impl Default for DemuxerConfig { fn default() -> Self { Self { m2ts: false, - auto_stream: StreamMode::default(), + auto_stream: StreamMode::Autodetect, codec: SelectCodec::Some(super::Codec::Any), nocodec: SelectCodec::None, ts_autoprogram: false, @@ -194,8 +194,8 @@ impl Default for DemuxerConfig { ts_cappids: Vec::new(), ts_forced_cappid: false, ts_forced_program: None, - ts_datastreamtype: StreamType::default(), - ts_forced_streamtype: StreamType::default(), + ts_datastreamtype: StreamType::Unknownstream, + ts_forced_streamtype: StreamType::Unknownstream, } } } @@ -338,7 +338,7 @@ pub struct EncoderConfig { } /// Options from user parameters -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] pub struct Options { /// Extract 1st, 2nd or both fields. Can be 1, 2 or 12 respectively. pub extract: u8, @@ -353,9 +353,9 @@ pub struct Options { pub nohtmlescape: bool, pub notypesetting: bool, /// The start of the segment we actually process - pub extraction_start: Timestamp, + pub extraction_start: Option, /// The end of the segment we actually process - pub extraction_end: Timestamp, + pub extraction_end: Option, pub print_file_reports: bool, /// Contains the settings for the 608 decoder. pub settings_608: Decoder608Settings, @@ -520,3 +520,110 @@ pub struct Options { #[cfg(feature = "enable_sharing")] pub translate_key: Option, } + +impl Default for Options { + fn default() -> Self { + Self { + extract: 1, + no_rollup: Default::default(), + noscte20: Default::default(), + webvtt_create_css: Default::default(), + cc_channel: Default::default(), + buffer_input: Default::default(), + nofontcolor: Default::default(), + nohtmlescape: Default::default(), + notypesetting: Default::default(), + extraction_start: Default::default(), + extraction_end: Default::default(), + print_file_reports: Default::default(), + settings_608: Default::default(), + settings_dtvcc: Default::default(), + is_608_enabled: Default::default(), + is_708_enabled: Default::default(), + binary_concat: true, + use_gop_as_pts: Default::default(), + fix_padding: Default::default(), + gui_mode_reports: Default::default(), + no_progress_bar: Default::default(), + sentence_cap_file: Default::default(), + live_stream: Some(Timestamp::default()), + filter_profanity_file: Default::default(), + messages_target: Default::default(), + timestamp_map: Default::default(), + dolevdist: Default::default(), + levdistmincnt: Default::default(), + levdistmaxpct: Default::default(), + investigate_packets: Default::default(), + fullbin: Default::default(), + nosync: Default::default(), + hauppauge_mode: Default::default(), + wtvconvertfix: Default::default(), + wtvmpeg2: Default::default(), + auto_myth: Default::default(), + mp4vidtrack: Default::default(), + extract_chapters: Default::default(), + usepicorder: Default::default(), + xmltv: Default::default(), + xmltvliveinterval: Default::default(), + xmltvoutputinterval: Default::default(), + xmltvonlycurrent: Default::default(), + keep_output_closed: Default::default(), + force_flush: Default::default(), + append_mode: Default::default(), + ucla: Default::default(), + tickertext: Default::default(), + hardsubx: Default::default(), + hardsubx_and_common: Default::default(), + dvblang: Default::default(), + ocrlang: Default::default(), + ocr_oem: Default::default(), + ocr_quantmode: 1, + mkvlang: Default::default(), + analyze_video_stream: Default::default(), + hardsubx_ocr_mode: Default::default(), + hardsubx_min_sub_duration: Default::default(), + hardsubx_detect_italics: Default::default(), + hardsubx_conf_thresh: Default::default(), + hardsubx_hue: Default::default(), + hardsubx_lum_thresh: Default::default(), + transcript_settings: Default::default(), + date_format: Default::default(), + send_to_srv: Default::default(), + write_format: OutputFormat::Srt, + write_format_rewritten: Default::default(), + use_ass_instead_of_ssa: Default::default(), + use_webvtt_styling: Default::default(), + udpsrc: Default::default(), + udpaddr: Default::default(), + udpport: Default::default(), + tcpport: Default::default(), + tcp_password: Default::default(), + tcp_desc: Default::default(), + srv_addr: Default::default(), + srv_port: Default::default(), + noautotimeref: Default::default(), + input_source: Default::default(), + output_filename: Default::default(), + inputfile: Default::default(), + demux_cfg: Default::default(), + enc_cfg: Default::default(), + subs_delay: Default::default(), + cc_to_stdout: Default::default(), + pes_header_to_stdout: Default::default(), + ignore_pts_jumps: Default::default(), + multiprogram: Default::default(), + out_interval: Default::default(), + segment_on_key_frames_only: Default::default(), + debug_mask: DebugMessageMask::new( + DebugMessageFlag::GENERIC_NOTICE, + DebugMessageFlag::VERBOSE, + ), + curlposturl: Default::default(), + sharing_enabled: Default::default(), + sharing_url: Default::default(), + translate_enabled: Default::default(), + translate_langs: Default::default(), + translate_key: Default::default(), + } + } +} diff --git a/src/rust/lib_ccxr/src/lib.rs b/src/rust/lib_ccxr/src/lib.rs index d202eb521..9f32678db 100644 --- a/src/rust/lib_ccxr/src/lib.rs +++ b/src/rust/lib_ccxr/src/lib.rs @@ -1,3 +1,4 @@ +pub mod activity; pub mod common; pub mod hardsubx; pub mod subtitle; diff --git a/src/rust/lib_ccxr/src/teletext.rs b/src/rust/lib_ccxr/src/teletext.rs index cb4f4a1af..966010200 100644 --- a/src/rust/lib_ccxr/src/teletext.rs +++ b/src/rust/lib_ccxr/src/teletext.rs @@ -41,6 +41,7 @@ use crate::common::OutputFormat; use crate::subtitle::Subtitle; use crate::time::units::{Timestamp, TimestampFormat}; use crate::util::bits::{decode_hamming_24_18, decode_hamming_8_4, get_parity}; +use crate::util::encoders_helper::telx_correct_case; use crate::util::encoding::{Ucs2Char, Ucs2String}; use crate::util::levenshtein::levenshtein; use crate::util::log::{debug, info, logger, DebugMessageFlag}; @@ -879,7 +880,7 @@ impl<'a> TeletextContext<'a> { /// /// This method will convert the first character of a sentence to uppercase and the rest of the /// characters to lowercase. - fn telx_case_fix(&mut self) { + fn telx_case_fix(&mut self, capitalization_list: &[String]) { let page_buffer_cur = match self.page_buffer_cur.as_mut() { None => return, Some(p) => p, @@ -916,7 +917,7 @@ impl<'a> TeletextContext<'a> { *page_buffer_cur = fixed_string; - todo!() // TODO: telx_correct_case(page_buffer_cur); + telx_correct_case(page_buffer_cur, capitalization_list); } /// Reset the page buffers and return its contents in the form of a [`Subtitle`]. @@ -938,7 +939,7 @@ impl<'a> TeletextContext<'a> { )) } - fn process_page(&mut self) -> Option { + fn process_page(&mut self, capitalization_list: &[String]) -> Option { let mut ans = None; if self @@ -1170,7 +1171,7 @@ impl<'a> TeletextContext<'a> { } if self.sentence_cap { - self.telx_case_fix() + self.telx_case_fix(capitalization_list) } match self.config.write_format { @@ -1241,6 +1242,7 @@ impl<'a> TeletextContext<'a> { packet: &TeletextPacketPayload, timestamp: Timestamp, subtitles: &mut Vec, + capitalization_list: &[String], ) { // variable names conform to ETS 300 706, chapter 7.1.2 let address = (decode_hamming_8_4(packet.address[1]).unwrap() << 4) @@ -1362,7 +1364,7 @@ impl<'a> TeletextContext<'a> { if self.page_buffer.hide_timestamp > timestamp { self.page_buffer.hide_timestamp = Timestamp::from_millis(0); } - if let Some(sub) = self.process_page() { + if let Some(sub) = self.process_page(capitalization_list) { subtitles.push(sub); } self.de_ctr = 0; @@ -1593,7 +1595,7 @@ impl<'a> TeletextContext<'a> { } /// Consumes the [`TeletextContext`] and appends the pending extracted subtitles in `subtitles`. - pub fn close(mut self, subtitles: Option<&mut Vec>) { + pub fn close(mut self, subtitles: Option<&mut Vec>, capitalization_list: &[String]) { info!( "\nTeletext decoder: {} packets processed \n", self.tlt_packet_counter @@ -1616,7 +1618,7 @@ impl<'a> TeletextContext<'a> { } // this time we do not subtract any frames, there will be no more frames self.page_buffer.hide_timestamp = self.last_timestamp; - if let Some(sub) = self.process_page() { + if let Some(sub) = self.process_page(capitalization_list) { subtitles.push(sub); } } diff --git a/src/rust/lib_ccxr/src/util/encoders_helper.rs b/src/rust/lib_ccxr/src/util/encoders_helper.rs new file mode 100644 index 000000000..410b7836d --- /dev/null +++ b/src/rust/lib_ccxr/src/util/encoders_helper.rs @@ -0,0 +1,157 @@ +// Some basic English words, so user-defined doesn't have to +// include the common stuff + +pub const CAPITALIZED_BUILTIN: [&str; 29] = [ + "I", + "I'd", + "I've", + "I'd", + "I'll", + "January", + "February", + "March", + "April", // May skipped intentionally + "June", + "July", + "August", + "September", + "October", + "November", + "December", + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + "Halloween", + "United States", + "Spain", + "France", + "Italy", + "England", +]; + +pub const PROFANE_BUILTIN: [&str; 25] = [ + "arse", + "ass", + "asshole", + "bastard", + "bitch", + "bollocks", + "child-fucker", + "crap", + "cunt", + "damn", + "frigger", + "fuck", + "goddamn", + "godsdamn", + "hell", + "holy", + "horseshit", + "motherfucker", + "nigga", + "nigger", + "prick", + "shit", + "shitass", + "slut", + "twat", +]; + +pub fn capitalize_word(index: usize, word: &mut String, capitalization_list: &[String]) { + word.replace_range( + index..capitalization_list[index].len(), + capitalization_list[index].as_str(), + ); +} + +pub fn censor_word(index: usize, word: &mut String, profane: &[String]) { + word.replace_range( + index..profane[index].len(), + format!("{}", (0x98 as char)).as_str(), + ); // 0x98 is the asterisk in EIA-608 +} + +pub fn telx_correct_case(sub_line: &mut String, capitalization_list: &[String]) { + let delim: [char; 35] = [ + ' ', + '\n', + '\r', + 0x89 as char, + 0x99 as char, + '!', + '"', + '#', + '%', + '&', + '\'', + '(', + ')', + ';', + '<', + '=', + '>', + '?', + '[', + '\\', + ']', + '*', + '+', + ',', + '-', + '.', + '/', + ':', + '^', + '_', + '{', + '|', + '}', + '~', + '\0', + ]; + + let line = sub_line.clone(); + + let mut start = 0; + let splitted: Vec<(usize, &str)> = line + .split(|c| delim.contains(&c)) + .map(|part| { + let end = start + part.len(); + let result = (start, part); + start = end + 1; + result + }) + .collect(); + + for (index, c) in splitted { + // check if c is in capitalization_list + if capitalization_list.contains(&(c as &str).to_string()) { + // get the correct_c from capitalization_list + let correct_c: &String = capitalization_list + .iter() + .find(|&x| x == &c.to_string()) + .unwrap(); + + // get the length of correct_c + let len = correct_c.len(); + // replace c with correct_c in sub_line + sub_line.replace_range(index..index + len, correct_c); + } + } +} + +pub fn add_builtin_capitalization(list: &mut Vec) { + for word in CAPITALIZED_BUILTIN.iter() { + list.push(word.to_string()); + } +} + +pub fn add_builtin_profane(list: &mut Vec) { + for word in PROFANE_BUILTIN.iter() { + list.push(word.to_string()); + } +} diff --git a/src/rust/lib_ccxr/src/util/log.rs b/src/rust/lib_ccxr/src/util/log.rs index 582aeefcc..36b192a03 100644 --- a/src/rust/lib_ccxr/src/util/log.rs +++ b/src/rust/lib_ccxr/src/util/log.rs @@ -42,8 +42,8 @@ static LOGGER: OnceLock> = OnceLock::new(); /// The possible targets for logging messages. #[derive(Default, Clone, Copy, Debug, PartialEq, Eq)] pub enum OutputTarget { - #[default] Quiet = 0, + #[default] Stdout = 1, Stderr = 2, } @@ -190,13 +190,13 @@ impl DebugMessageMask { self.mask_on_normal } } - /// Return the mask according to its mode. + /// Return the mask on normal pub fn normal_mask(&self) -> DebugMessageFlag { - self.mask_on_debug + self.mask_on_normal } - /// Return the mask according to its mode. + /// Return the mask on debug pub fn debug_mask(&self) -> DebugMessageFlag { - self.mask_on_normal + self.mask_on_debug } } diff --git a/src/rust/lib_ccxr/src/util/mod.rs b/src/rust/lib_ccxr/src/util/mod.rs index ee33ba5b4..2d05f1221 100644 --- a/src/rust/lib_ccxr/src/util/mod.rs +++ b/src/rust/lib_ccxr/src/util/mod.rs @@ -11,6 +11,7 @@ //! | `levenshtein_dist`, levenshtein_dist_char` | [`levenshtein`](levenshtein()) | pub mod bits; +pub mod encoders_helper; pub mod encoding; pub mod levenshtein; pub mod log; diff --git a/src/rust/src/args.rs b/src/rust/src/args.rs index 95336dfef..7e3a79fef 100644 --- a/src/rust/src/args.rs +++ b/src/rust/src/args.rs @@ -472,7 +472,7 @@ pub struct Args { /// white). This causes all output in .srt/.smi/.vtt /// files to have a font tag, which makes the files /// larger. Add the color you want in RGB, such as - /// --dc #FF0000 for red. + /// --defaultcolor #FF0000 for red. #[arg(long, verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)] pub defaultcolor: Option, /// Sentence capitalization. Use if you hate diff --git a/src/rust/src/ccx_encoders_helpers.rs b/src/rust/src/ccx_encoders_helpers.rs deleted file mode 100644 index 9aca063c2..000000000 --- a/src/rust/src/ccx_encoders_helpers.rs +++ /dev/null @@ -1,65 +0,0 @@ -// Some basic English words, so user-defined doesn't have to -// include the common stuff - -pub static mut PROFANE: Vec = Vec::new(); -pub static mut CAPITALIZATION_LIST: Vec = Vec::new(); - -pub const CAPITALIZED_BUILTIN: [&str; 29] = [ - "I", - "I'd", - "I've", - "I'd", - "I'll", - "January", - "February", - "March", - "April", // May skipped intentionally - "June", - "July", - "August", - "September", - "October", - "November", - "December", - "Monday", - "Tuesday", - "Wednesday", - "Thursday", - "Friday", - "Saturday", - "Sunday", - "Halloween", - "United States", - "Spain", - "France", - "Italy", - "England", -]; - -pub const PROFANE_BUILTIN: [&str; 25] = [ - "arse", - "ass", - "asshole", - "bastard", - "bitch", - "bollocks", - "child-fucker", - "crap", - "cunt", - "damn", - "frigger", - "fuck", - "goddamn", - "godsdamn", - "hell", - "holy", - "horseshit", - "motherfucker", - "nigga", - "nigger", - "prick", - "shit", - "shitass", - "slut", - "twat", -]; diff --git a/src/rust/src/common.rs b/src/rust/src/common.rs index adbc6b481..e3fd99015 100644 --- a/src/rust/src/common.rs +++ b/src/rust/src/common.rs @@ -43,45 +43,49 @@ pub trait CType2 { pub trait FromRust { /// # Safety /// This function is unsafe because it dereferences the pointer passed to it. - unsafe fn copy_from_rust(&self, options: T); + unsafe fn copy_from_rust(&mut self, options: T); } -impl FromRust for *mut ccx_s_options { +impl FromRust for ccx_s_options { /// # Safety /// /// This function is unsafe because it dereferences the pointer passed to it. - unsafe fn copy_from_rust(self: &*mut ccx_s_options, options: Options) { - (**self).extract = options.extract as _; - (**self).no_rollup = options.no_rollup as _; - (**self).noscte20 = options.noscte20 as _; - (**self).webvtt_create_css = options.webvtt_create_css as _; - (**self).cc_channel = options.cc_channel as _; - (**self).buffer_input = options.buffer_input as _; - (**self).nofontcolor = options.nofontcolor as _; - (**self).write_format = options.write_format.to_ctype(); - (**self).send_to_srv = options.send_to_srv as _; - (**self).nohtmlescape = options.nohtmlescape as _; - (**self).notypesetting = options.notypesetting as _; - (**self).extraction_start = options.extraction_start.to_ctype(); - (**self).extraction_end = options.extraction_end.to_ctype(); - (**self).print_file_reports = options.print_file_reports as _; - (**self).settings_608 = options.settings_608.to_ctype(); - (**self).settings_dtvcc = options.settings_dtvcc.to_ctype(); - (**self).is_608_enabled = options.is_608_enabled as _; - (**self).is_708_enabled = options.is_708_enabled as _; - (**self).millis_separator = options.date_format.millis_separator() as _; - (**self).binary_concat = options.binary_concat as _; - (**self).use_gop_as_pts = if let Some(usegops) = options.use_gop_as_pts { - usegops as _ + unsafe fn copy_from_rust(self: &mut ccx_s_options, options: Options) { + (*self).extract = options.extract as _; + (*self).no_rollup = options.no_rollup as _; + (*self).noscte20 = options.noscte20 as _; + (*self).webvtt_create_css = options.webvtt_create_css as _; + (*self).cc_channel = options.cc_channel as _; + (*self).buffer_input = options.buffer_input as _; + (*self).nofontcolor = options.nofontcolor as _; + (*self).write_format = options.write_format.to_ctype(); + (*self).send_to_srv = options.send_to_srv as _; + (*self).nohtmlescape = options.nohtmlescape as _; + (*self).notypesetting = options.notypesetting as _; + (*self).extraction_start = options.extraction_start.to_ctype(); + (*self).extraction_end = options.extraction_end.to_ctype(); + (*self).print_file_reports = options.print_file_reports as _; + (*self).settings_608 = options.settings_608.to_ctype(); + (*self).settings_dtvcc = options.settings_dtvcc.to_ctype(); + (*self).is_608_enabled = options.is_608_enabled as _; + (*self).is_708_enabled = options.is_708_enabled as _; + (*self).millis_separator = options.date_format.millis_separator() as _; + (*self).binary_concat = options.binary_concat as _; + (*self).use_gop_as_pts = if let Some(usegops) = options.use_gop_as_pts { + if usegops { + 1 + } else { + -1 + } } else { - -1 + 0 }; - (**self).fix_padding = options.fix_padding as _; - (**self).gui_mode_reports = options.gui_mode_reports as _; - (**self).no_progress_bar = options.no_progress_bar as _; + (*self).fix_padding = options.fix_padding as _; + (*self).gui_mode_reports = options.gui_mode_reports as _; + (*self).no_progress_bar = options.no_progress_bar as _; if options.sentence_cap_file.try_exists().unwrap_or_default() { - (**self).sentence_cap_file = string_to_c_char( + (*self).sentence_cap_file = string_to_c_char( options .sentence_cap_file .clone() @@ -90,7 +94,7 @@ impl FromRust for *mut ccx_s_options { ); } - (**self).live_stream = if let Some(live_stream) = options.live_stream { + (*self).live_stream = if let Some(live_stream) = options.live_stream { live_stream.seconds() as _ } else { -1 @@ -100,7 +104,7 @@ impl FromRust for *mut ccx_s_options { .try_exists() .unwrap_or_default() { - (**self).filter_profanity_file = string_to_c_char( + (*self).filter_profanity_file = string_to_c_char( options .filter_profanity_file .clone() @@ -108,122 +112,123 @@ impl FromRust for *mut ccx_s_options { .unwrap_or_default(), ); } - (**self).messages_target = options.messages_target as _; - (**self).timestamp_map = options.timestamp_map as _; - (**self).dolevdist = options.dolevdist.into(); - (**self).levdistmincnt = options.levdistmincnt as _; - (**self).levdistmaxpct = options.levdistmaxpct as _; - (**self).investigate_packets = options.investigate_packets as _; - (**self).fullbin = options.fullbin as _; - (**self).nosync = options.nosync as _; - (**self).hauppauge_mode = options.hauppauge_mode as _; - (**self).wtvconvertfix = options.wtvconvertfix as _; - (**self).wtvmpeg2 = options.wtvmpeg2 as _; - (**self).auto_myth = if let Some(auto_myth) = options.auto_myth { + (*self).messages_target = options.messages_target as _; + (*self).timestamp_map = options.timestamp_map as _; + (*self).dolevdist = options.dolevdist.into(); + (*self).levdistmincnt = options.levdistmincnt as _; + (*self).levdistmaxpct = options.levdistmaxpct as _; + (*self).investigate_packets = options.investigate_packets as _; + (*self).fullbin = options.fullbin as _; + (*self).nosync = options.nosync as _; + (*self).hauppauge_mode = options.hauppauge_mode as _; + (*self).wtvconvertfix = options.wtvconvertfix as _; + (*self).wtvmpeg2 = options.wtvmpeg2 as _; + (*self).auto_myth = if let Some(auto_myth) = options.auto_myth { auto_myth as _ } else { - -1 + 2 }; - (**self).mp4vidtrack = options.mp4vidtrack as _; - (**self).extract_chapters = options.extract_chapters as _; - (**self).usepicorder = options.usepicorder as _; - (**self).xmltv = options.xmltv as _; - (**self).xmltvliveinterval = options.xmltvliveinterval.seconds() as _; - (**self).xmltvoutputinterval = options.xmltvoutputinterval.seconds() as _; - (**self).xmltvonlycurrent = options.xmltvonlycurrent.into(); - (**self).keep_output_closed = options.keep_output_closed as _; - (**self).force_flush = options.force_flush as _; - (**self).append_mode = options.append_mode as _; - (**self).ucla = options.ucla as _; - (**self).tickertext = options.tickertext as _; - (**self).hardsubx = options.hardsubx as _; - (**self).hardsubx_and_common = options.hardsubx_and_common as _; + (*self).mp4vidtrack = options.mp4vidtrack as _; + (*self).extract_chapters = options.extract_chapters as _; + (*self).usepicorder = options.usepicorder as _; + (*self).xmltv = options.xmltv as _; + (*self).xmltvliveinterval = options.xmltvliveinterval.seconds() as _; + (*self).xmltvoutputinterval = options.xmltvoutputinterval.seconds() as _; + (*self).xmltvonlycurrent = options.xmltvonlycurrent.into(); + (*self).keep_output_closed = options.keep_output_closed as _; + (*self).force_flush = options.force_flush as _; + (*self).append_mode = options.append_mode as _; + (*self).ucla = options.ucla as _; + (*self).tickertext = options.tickertext as _; + (*self).hardsubx = options.hardsubx as _; + (*self).hardsubx_and_common = options.hardsubx_and_common as _; if let Some(dvblang) = options.dvblang { - (**self).dvblang = string_to_c_char(dvblang.to_ctype().as_str()); + (*self).dvblang = string_to_c_char(dvblang.to_ctype().as_str()); } if options.ocrlang.try_exists().unwrap_or_default() { - (**self).ocrlang = string_to_c_char(options.ocrlang.to_str().unwrap()); + (*self).ocrlang = string_to_c_char(options.ocrlang.to_str().unwrap()); } - (**self).ocr_oem = options.ocr_oem as _; - (**self).ocr_quantmode = options.ocr_quantmode as _; + (*self).ocr_oem = options.ocr_oem as _; + (*self).ocr_quantmode = options.ocr_quantmode as _; if let Some(mkvlang) = options.mkvlang { - (**self).mkvlang = string_to_c_char(mkvlang.to_ctype().as_str()); - } - (**self).analyze_video_stream = options.analyze_video_stream as _; - (**self).hardsubx_ocr_mode = options.hardsubx_ocr_mode.to_ctype(); - (**self).hardsubx_subcolor = options.hardsubx_hue.to_ctype(); - (**self).hardsubx_min_sub_duration = options.hardsubx_min_sub_duration.seconds() as _; - (**self).hardsubx_detect_italics = options.hardsubx_detect_italics as _; - (**self).hardsubx_conf_thresh = options.hardsubx_conf_thresh as _; - (**self).hardsubx_hue = options.hardsubx_hue.get_hue() as _; - (**self).hardsubx_lum_thresh = options.hardsubx_lum_thresh as _; - (**self).transcript_settings = options.transcript_settings.to_ctype(); - (**self).date_format = options.date_format.to_ctype(); - (**self).write_format_rewritten = options.write_format_rewritten as _; - (**self).use_ass_instead_of_ssa = options.use_ass_instead_of_ssa as _; - (**self).use_webvtt_styling = options.use_webvtt_styling as _; - (**self).debug_mask = options.debug_mask.normal_mask().bits() as _; - (**self).debug_mask_on_debug = options.debug_mask.debug_mask().bits() as _; + (*self).mkvlang = string_to_c_char(mkvlang.to_ctype().as_str()); + } + (*self).analyze_video_stream = options.analyze_video_stream as _; + (*self).hardsubx_ocr_mode = options.hardsubx_ocr_mode.to_ctype(); + (*self).hardsubx_subcolor = options.hardsubx_hue.to_ctype(); + (*self).hardsubx_min_sub_duration = options.hardsubx_min_sub_duration.seconds() as _; + (*self).hardsubx_detect_italics = options.hardsubx_detect_italics as _; + (*self).hardsubx_conf_thresh = options.hardsubx_conf_thresh as _; + (*self).hardsubx_hue = options.hardsubx_hue.get_hue() as _; + (*self).hardsubx_lum_thresh = options.hardsubx_lum_thresh as _; + (*self).transcript_settings = options.transcript_settings.to_ctype(); + (*self).date_format = options.date_format.to_ctype(); + (*self).write_format_rewritten = options.write_format_rewritten as _; + (*self).use_ass_instead_of_ssa = options.use_ass_instead_of_ssa as _; + (*self).use_webvtt_styling = options.use_webvtt_styling as _; + (*self).debug_mask = options.debug_mask.normal_mask().bits() as _; + (*self).debug_mask_on_debug = options.debug_mask.debug_mask().bits() as _; if options.udpsrc.is_some() { - (**self).udpsrc = string_to_c_char(&options.udpsrc.clone().unwrap()); + (*self).udpsrc = string_to_c_char(&options.udpsrc.clone().unwrap()); } if options.udpaddr.is_some() { - (**self).udpaddr = string_to_c_char(&options.udpaddr.clone().unwrap()); + (*self).udpaddr = string_to_c_char(&options.udpaddr.clone().unwrap()); } - (**self).udpport = options.udpport as _; + (*self).udpport = options.udpport as _; if options.tcpport.is_some() { - (**self).tcpport = string_to_c_char(&options.tcpport.unwrap().to_string()); + (*self).tcpport = string_to_c_char(&options.tcpport.unwrap().to_string()); } if options.tcp_password.is_some() { - (**self).tcp_password = string_to_c_char(&options.tcp_password.clone().unwrap()); + (*self).tcp_password = string_to_c_char(&options.tcp_password.clone().unwrap()); } if options.tcp_desc.is_some() { - (**self).tcp_desc = string_to_c_char(&options.tcp_desc.clone().unwrap()); + (*self).tcp_desc = string_to_c_char(&options.tcp_desc.clone().unwrap()); } if options.srv_addr.is_some() { - (**self).srv_addr = string_to_c_char(&options.srv_addr.clone().unwrap()); + (*self).srv_addr = string_to_c_char(&options.srv_addr.clone().unwrap()); } if options.srv_port.is_some() { - (**self).srv_port = string_to_c_char(&options.srv_port.unwrap().to_string()); + (*self).srv_port = string_to_c_char(&options.srv_port.unwrap().to_string()); } - (**self).noautotimeref = options.noautotimeref as _; - (**self).input_source = options.input_source as _; + (*self).noautotimeref = options.noautotimeref as _; + (*self).input_source = options.input_source as _; if options.output_filename.is_some() { - (**self).output_filename = string_to_c_char(&options.output_filename.clone().unwrap()); + (*self).output_filename = string_to_c_char(&options.output_filename.clone().unwrap()); } if options.inputfile.is_some() { - (**self).inputfile = string_to_c_chars(options.inputfile.clone().unwrap()); - (**self).num_input_files = options.inputfile.as_ref().unwrap().len() as _; - } - (**self).demux_cfg = options.demux_cfg.to_ctype(); - (**self).enc_cfg = options.enc_cfg.to_ctype(); - (**self).subs_delay = options.subs_delay.millis(); - (**self).cc_to_stdout = options.cc_to_stdout as _; - (**self).pes_header_to_stdout = options.pes_header_to_stdout as _; - (**self).ignore_pts_jumps = options.ignore_pts_jumps as _; - (**self).multiprogram = options.multiprogram as _; - (**self).out_interval = options.out_interval; - (**self).segment_on_key_frames_only = options.segment_on_key_frames_only as _; + (*self).inputfile = string_to_c_chars(options.inputfile.clone().unwrap()); + (*self).num_input_files = + options.inputfile.iter().filter(|s| !s.is_empty()).count() as _; + } + (*self).demux_cfg = options.demux_cfg.to_ctype(); + (*self).enc_cfg = options.enc_cfg.to_ctype(); + (*self).subs_delay = options.subs_delay.millis(); + (*self).cc_to_stdout = options.cc_to_stdout as _; + (*self).pes_header_to_stdout = options.pes_header_to_stdout as _; + (*self).ignore_pts_jumps = options.ignore_pts_jumps as _; + (*self).multiprogram = options.multiprogram as _; + (*self).out_interval = options.out_interval; + (*self).segment_on_key_frames_only = options.segment_on_key_frames_only as _; #[cfg(feature = "with_libcurl")] { if options.curlposturl.is_some() { - (**self).curlposturl = + (*self).curlposturl = string_to_c_char(&options.curlposturl.as_ref().unwrap_or_default().as_str()); } } #[cfg(feature = "enable_sharing")] { - (**self).sharing_enabled = options.sharing_enabled as _; + (*self).sharing_enabled = options.sharing_enabled as _; if options.sharing_url.is_some() { - (**self).sharing_url = + (*self).sharing_url = string_to_c_char(&options.sharing_url.as_ref().unwrap().as_str()); } - (**self).translate_enabled = options.translate_enabled as _; + (*self).translate_enabled = options.translate_enabled as _; if options.translate_langs.is_some() { - (**self).translate_langs = string_to_c_char(&options.translate_langs.unwrap()); + (*self).translate_langs = string_to_c_char(&options.translate_langs.unwrap()); } if options.translate_key.is_some() { - (**self).translate_key = string_to_c_char(&options.translate_key.unwrap()); + (*self).translate_key = string_to_c_char(&options.translate_key.unwrap()); } } } diff --git a/src/rust/src/decoder/mod.rs b/src/rust/src/decoder/mod.rs index 875012fdc..78ff03632 100644 --- a/src/rust/src/decoder/mod.rs +++ b/src/rust/src/decoder/mod.rs @@ -10,9 +10,12 @@ mod timing; mod tv_screen; mod window; -use crate::{bindings::*, utils::is_true}; +use lib_ccxr::{ + debug, fatal, + util::log::{DebugMessageFlag, ExitCause}, +}; -use log::{debug, warn}; +use crate::{bindings::*, utils::is_true}; const CCX_DTVCC_MAX_PACKET_LENGTH: u8 = 128; const CCX_DTVCC_NO_LAST_SEQUENCE: i32 = -1; @@ -71,10 +74,10 @@ impl<'a> Dtvcc<'a> { // type 0 and 1 are for CEA 608 data and are handled before calling this function // valid types for CEA 708 data are only 2 and 3 2 => { - debug!("dtvcc_process_data: DTVCC Channel Packet Data"); + debug!( msg_type = DebugMessageFlag::DECODER_708; "dtvcc_process_data: DTVCC Channel Packet Data"); if cc_valid == 1 && self.is_header_parsed { if self.packet_length > 253 { - warn!("dtvcc_process_data: Warning: Legal packet size exceeded (1), data not added."); + debug!(msg_type = DebugMessageFlag::DECODER_708;"dtvcc_process_data: Warning: Legal packet size exceeded (1), data not added."); } else { self.add_data_to_packet(data1, data2); @@ -95,13 +98,13 @@ impl<'a> Dtvcc<'a> { } } 3 => { - debug!("dtvcc_process_data: DTVCC Channel Packet Start"); + debug!(msg_type = DebugMessageFlag::DECODER_708;"dtvcc_process_data: DTVCC Channel Packet Start"); if cc_valid == 1 { if self.packet_length > (CCX_DTVCC_MAX_PACKET_LENGTH - 1) { - warn!("dtvcc_process_data: Warning: Legal packet size exceeded (2), data not added."); + debug!(msg_type = DebugMessageFlag::DECODER_708;"dtvcc_process_data: Warning: Legal packet size exceeded (2), data not added."); } else { if self.is_header_parsed { - warn!("dtvcc_process_data: Warning: Incorrect packet length specified. Packet will be skipped."); + debug!(msg_type = DebugMessageFlag::DECODER_708;"dtvcc_process_data: Warning: Incorrect packet length specified. Packet will be skipped."); self.clear_packet(); } self.add_data_to_packet(data1, data2); @@ -109,7 +112,7 @@ impl<'a> Dtvcc<'a> { } } } - _ => warn!( + _ => fatal!(cause = ExitCause::Bug; "dtvcc_process_data: shouldn't be here - cc_type: {}", cc_type ), @@ -126,6 +129,7 @@ impl<'a> Dtvcc<'a> { pub fn process_current_packet(&mut self, len: u8) { let seq = (self.packet[0] & 0xC0) >> 6; debug!( + msg_type = DebugMessageFlag::DECODER_708; "dtvcc_process_current_packet: Sequence: {}, packet length: {}", seq, len ); @@ -138,7 +142,7 @@ impl<'a> Dtvcc<'a> { if self.last_sequence != CCX_DTVCC_NO_LAST_SEQUENCE && (self.last_sequence + 1) % 4 != seq as i32 { - warn!("dtvcc_process_current_packet: Unexpected sequence number, it is {} but should be {}", seq, (self.last_sequence +1) % 4); + debug!(msg_type = DebugMessageFlag::DECODER_708;"dtvcc_process_current_packet: Unexpected sequence number, it is {} but should be {}", seq, (self.last_sequence +1) % 4); } self.last_sequence = seq as i32; @@ -146,7 +150,7 @@ impl<'a> Dtvcc<'a> { while pos < len { let mut service_number = (self.packet[pos as usize] & 0xE0) >> 5; // 3 more significant bits let block_length = self.packet[pos as usize] & 0x1F; // 5 less significant bits - debug!("dtvcc_process_current_packet: Standard header Service number: {}, Block length: {}", service_number, block_length); + debug!(msg_type = DebugMessageFlag::DECODER_708;"dtvcc_process_current_packet: Standard header Service number: {}, Block length: {}", service_number, block_length); if service_number == 7 { // There is an extended header @@ -154,7 +158,7 @@ impl<'a> Dtvcc<'a> { pos += 1; service_number = self.packet[pos as usize] & 0x3F; // 6 more significant bits if service_number > 7 { - warn!("dtvcc_process_current_packet: Illegal service number in extended header: {}", service_number); + debug!(msg_type = DebugMessageFlag::DECODER_708;"dtvcc_process_current_packet: Illegal service number in extended header: {}", service_number); } } @@ -187,7 +191,7 @@ impl<'a> Dtvcc<'a> { if len < 128 && self.packet[pos as usize] != 0 { // Null header is mandatory if there is room - warn!("dtvcc_process_current_packet: Warning: Null header expected but not found."); + debug!(msg_type = DebugMessageFlag::DECODER_708;"dtvcc_process_current_packet: Warning: Null header expected but not found."); } } /// Clear current packet diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index bd94fd526..0db93c614 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -12,9 +12,8 @@ pub mod bindings { include!(concat!(env!("OUT_DIR"), "/bindings.rs")); } -pub mod activity; + pub mod args; -pub mod ccx_encoders_helpers; pub mod common; pub mod decoder; #[cfg(feature = "hardsubx_ocr")] @@ -56,6 +55,8 @@ extern "C" { #[allow(dead_code)] extern "C" { + static mut usercolor_rgb: [c_int; 8]; + static mut FILEBUFFERSIZE: c_int; static mut MPEG_CLOCK_FREQ: c_int; static mut tlt_config: ccx_s_teletext_config; static mut ccx_options: ccx_s_options; @@ -208,11 +209,8 @@ extern "C" { /// /// Parse parameters from argv and argc #[no_mangle] -pub unsafe extern "C" fn ccxr_parse_parameters( - mut _options: *mut ccx_s_options, - argc: c_int, - argv: *mut *mut c_char, -) -> c_int { +pub unsafe extern "C" fn ccxr_parse_parameters(argc: c_int, argv: *mut *mut c_char) -> c_int { + ccxr_init_logger(); // Convert argv to Vec and pass it to parse_parameters let args = std::slice::from_raw_parts(argv, argc as usize) .iter() @@ -246,25 +244,33 @@ pub unsafe extern "C" fn ccxr_parse_parameters( ErrorKind::UnknownArgument => { println!("Unknown Argument"); println!("{}", e); - return 1; + return ExitCause::MalformedParameter.exit_code(); } _ => { println!("{}", e); - return 1; + return ExitCause::Failure.exit_code(); } } } }; + let mut capitalization_list: Vec = Vec::new(); + let mut profane: Vec = Vec::new(); + let mut opt = Options::default(); let mut _tlt_config = TeletextConfig::default(); - opt.parse_parameters(&args, &mut _tlt_config); + opt.parse_parameters( + &args, + &mut _tlt_config, + &mut capitalization_list, + &mut profane, + ); tlt_config = _tlt_config.to_ctype(&opt); // Convert the rust struct (CcxOptions) to C struct (ccx_s_options), so that it can be used by the C code - _options.copy_from_rust(opt); + ccx_options.copy_from_rust(opt); - 0 + ExitCause::Ok.exit_code() } #[cfg(test)] diff --git a/src/rust/src/parser.rs b/src/rust/src/parser.rs index 1c381bf88..5f1e0b997 100644 --- a/src/rust/src/parser.rs +++ b/src/rust/src/parser.rs @@ -1,6 +1,8 @@ use args::{Args, OutFormat}; -use lib_ccxr::teletext::{TeletextConfig, TeletextPageNumber}; +use lib_ccxr::activity::ActivityExt; +use lib_ccxr::teletext::{TeletextConfig, TeletextPageNumber, UTC_REFVALUE}; use lib_ccxr::time::units::{Timestamp, TimestampFormat}; +use lib_ccxr::util::encoders_helper::{add_builtin_capitalization, add_builtin_profane}; use lib_ccxr::util::encoding::Encoding; use lib_ccxr::util::log::{DebugMessageFlag, DebugMessageMask, ExitCause, OutputTarget}; use lib_ccxr::util::time::stringztoms; @@ -10,21 +12,18 @@ use std::convert::TryInto; use std::fs::File; use std::io::{prelude::*, BufReader}; use std::path::PathBuf; -use std::ptr::addr_of_mut; use std::str::FromStr; use std::string::String; -use lib_ccxr::common::*; +use lib_ccxr::{common::*, fatal}; use cfg_if::cfg_if; use time::OffsetDateTime; +use crate::args::CCXCodec; use crate::args::{self, InFormat}; -use crate::ccx_encoders_helpers::{ - CAPITALIZATION_LIST, CAPITALIZED_BUILTIN, PROFANE, PROFANE_BUILTIN, -}; -use crate::{activity::ActivityExt, args::CCXCodec}; +use crate::{usercolor_rgb, FILEBUFFERSIZE}; cfg_if! { if #[cfg(test)] { @@ -47,22 +46,19 @@ cfg_if! { } } -pub static mut FILEBUFFERSIZE: i64 = 1024 * 1024 * 16; -static mut USERCOLOR_RGB: String = String::new(); -pub static mut UTC_REFVALUE: u64 = 0; -const CCX_DECODER_608_SCREEN_WIDTH: u16 = 32; -static mut inputfile_capacity: i32 = 0; - -fn get_vector_words(string_array: &[&str]) -> Vec { - let mut vector = Vec::new(); - for string in string_array { - vector.push(String::from(*string)); +fn set_usercolor_rgb(color: &str) { + let mut rgb: [i32; 8] = [0; 8]; + for (i, item) in color.chars().enumerate() { + rgb[i] = item as i32; + } + rgb[7] = 0; + unsafe { + usercolor_rgb = rgb; } - vector } -fn atol(bufsize: &str) -> i64 { - let mut val = bufsize[0..bufsize.len() - 1].parse::().unwrap(); +fn atol(bufsize: &str) -> i32 { + let mut val = bufsize[0..bufsize.len() - 1].parse::().unwrap(); let size = bufsize .to_string() .to_uppercase() @@ -97,13 +93,15 @@ where match atoi_hex(s) { Ok(val) => val, Err(_) => { - println!("Malformed parameter: {}", s); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Malformed parameter: {}",s + ); } } } -unsafe fn process_word_file(filename: &str, list: *mut Vec) -> Result<(), std::io::Error> { +fn process_word_file(filename: &str, list: &mut Vec) -> Result<(), std::io::Error> { let file = File::open(filename)?; let reader = BufReader::new(file); let mut num = 0; @@ -116,7 +114,7 @@ unsafe fn process_word_file(filename: &str, list: *mut Vec) -> Result<() } let new_len = line.trim().len(); - if new_len > CCX_DECODER_608_SCREEN_WIDTH as usize { + if new_len > CCX_DECODER_608_SCREEN_WIDTH { println!( "Word in line {} too long, max = {} characters.", num, CCX_DECODER_608_SCREEN_WIDTH @@ -125,7 +123,7 @@ unsafe fn process_word_file(filename: &str, list: *mut Vec) -> Result<() } if new_len > 0 { - (*list).push(line.trim().to_string()); + list.push(line.trim().to_string()); } } Ok(()) @@ -139,15 +137,19 @@ fn mkvlang_params_check(lang: &str) { _present = char_index; if _present - initial < 3 || _present - initial > 6 { - println!("language codes should be xxx,xxx,xxx,....\n"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "language codes should be xxx,xxx,xxx,....\n" + ); } if _present - initial == 6 { let sub_slice = &lang[initial.._present]; if !sub_slice.contains('-') { - println!("language code is not of the form xxx-xx\n"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "language codes should be xxx,xxx,xxx,....\n" + ); } } @@ -166,28 +168,48 @@ fn mkvlang_params_check(lang: &str) { } if _present - initial < 2 || _present - initial > 5 { - println!("last language code should be xxx.\n"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "last language code should be xxx.\n" + ); } if _present - initial == 5 { let sub_slice = &lang[initial.._present]; if !sub_slice.contains('-') { - println!("last language code is not of the form xxx-xx\n"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "last language code is not of the form xxx-xx\n" + ); } } } +fn get_file_buffer_size() -> i32 { + unsafe { FILEBUFFERSIZE } +} + +fn set_file_buffer_size(size: i32) { + unsafe { + FILEBUFFERSIZE = size; + } +} + pub trait OptionsExt { fn set_output_format_type(&mut self, out_format: OutFormat); fn set_output_format(&mut self, args: &Args); fn set_input_format_type(&mut self, input_format: InFormat); fn set_input_format(&mut self, args: &Args); fn parse_708_services(&mut self, s: &str); - fn append_file_to_queue(&mut self, filename: &str) -> i32; - fn add_file_sequence(&mut self, filename: &mut String) -> i32; - fn parse_parameters(&mut self, args: &Args, tlt_config: &mut TeletextConfig); + fn append_file_to_queue(&mut self, filename: &str, inputfile_capacity: &mut i32) -> i32; + fn add_file_sequence(&mut self, filename: &mut String, inputfile_capacity: &mut i32) -> i32; + fn parse_parameters( + &mut self, + args: &Args, + tlt_config: &mut TeletextConfig, + capitalization_list: &mut Vec, + profane: &mut Vec, + ); fn is_inputfile_empty(&self) -> bool; } @@ -314,8 +336,10 @@ impl OptionsExt for Options { } else if args.wtv { self.set_input_format_type(InFormat::Wtv); } else { - println!("Unknown input file format: {}\n", args.input.unwrap()); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Unknown input file format: {}\n", args.input.unwrap() + ); } } @@ -324,8 +348,17 @@ impl OptionsExt for Options { let charset = if s.len() > 3 { &s[4..s.len() - 1] } else { "" }; self.settings_dtvcc.enabled = true; self.enc_cfg.dtvcc_extract = true; - self.enc_cfg.services_charsets = DtvccServiceCharset::Same(charset.to_string()); + if charset.is_empty() { + self.enc_cfg.services_charsets = DtvccServiceCharset::Unique( + vec![String::new(); DTVCC_MAX_SERVICES] + .into_boxed_slice() + .try_into() + .unwrap(), + ); + } else { + self.enc_cfg.services_charsets = DtvccServiceCharset::Same(charset.to_string()); + } for i in 0..DTVCC_MAX_SERVICES { self.settings_dtvcc.services_enabled[i] = true; self.enc_cfg.services_enabled[i] = true; @@ -370,8 +403,10 @@ impl OptionsExt for Options { for (i, service) in services.iter().enumerate() { let svc = service.parse::().unwrap(); if !(1..=DTVCC_MAX_SERVICES).contains(&svc) { - println!("[CEA-708] Malformed parameter: Invalid service number ({}), valid range is 1-{}.\n", svc, DTVCC_MAX_SERVICES); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "[CEA-708] Malformed parameter: Invalid service number ({}), valid range is 1-{}.\n", svc, DTVCC_MAX_SERVICES + ); } self.settings_dtvcc.services_enabled[svc - 1] = true; self.enc_cfg.services_enabled[svc - 1] = true; @@ -387,40 +422,38 @@ impl OptionsExt for Options { } if self.settings_dtvcc.active_services_count == 0 { - println!("[CEA-708] Malformed parameter: no services\n"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "[CEA-708] Malformed parameter: no services\n" + ); } } - fn append_file_to_queue(&mut self, filename: &str) -> i32 { + fn append_file_to_queue(&mut self, filename: &str, inputfile_capacity: &mut i32) -> i32 { if filename.is_empty() { return 0; } - let new_size: usize; - - unsafe { - let num_input_files = if let Some(ref inputfile) = self.inputfile { - inputfile.len() - } else { - 0 - }; - if num_input_files >= inputfile_capacity as _ { - inputfile_capacity += 10; - } + let num_input_files = if let Some(ref inputfile) = self.inputfile { + inputfile.len() + } else { + 0 + }; + if num_input_files >= *inputfile_capacity as _ { + *inputfile_capacity += 10; + } - new_size = inputfile_capacity.try_into().unwrap_or(0); + let new_size = (*inputfile_capacity).try_into().unwrap_or(0); - if self.inputfile.is_none() { - self.inputfile = Some(Vec::with_capacity(new_size)); - } + if self.inputfile.is_none() { + self.inputfile = Some(Vec::with_capacity(new_size)); + } - if let Some(ref mut inputfile) = self.inputfile { - inputfile.resize(new_size, String::new()); + if let Some(ref mut inputfile) = self.inputfile { + inputfile.resize(new_size, String::new()); - let index = num_input_files; - inputfile[index] = filename.to_string(); - } + let index = num_input_files; + inputfile[index] = filename.to_string(); } 0 @@ -428,7 +461,7 @@ impl OptionsExt for Options { // Used for adding a sequence of files that are numbered // Ex: filename: video1.mp4 will search for video2.mp4, video3.mp4, ... - fn add_file_sequence(&mut self, filename: &mut String) -> i32 { + fn add_file_sequence(&mut self, filename: &mut String, inputfile_capacity: &mut i32) -> i32 { filename.pop(); let mut n: i32 = filename.len() as i32 - 1; let bytes = filename.as_bytes(); @@ -439,7 +472,7 @@ impl OptionsExt for Options { } if n == -1 { // None. No expansion needed - return self.append_file_to_queue(filename); + return self.append_file_to_queue(filename, inputfile_capacity); } let mut m: i32 = n; @@ -457,7 +490,7 @@ impl OptionsExt for Options { loop { if std::path::Path::new(&filename).exists() { - if self.append_file_to_queue(filename.as_str()) != 0 { + if self.append_file_to_queue(filename.as_str(), inputfile_capacity) != 0 { return -1; } temp = format!("{}", i + 1); @@ -483,7 +516,13 @@ impl OptionsExt for Options { 0 } - fn parse_parameters(&mut self, args: &Args, tlt_config: &mut TeletextConfig) { + fn parse_parameters( + &mut self, + args: &Args, + tlt_config: &mut TeletextConfig, + capitalization_list: &mut Vec, + profane: &mut Vec, + ) { if args.stdin { unsafe { set_binary_mode(); @@ -491,26 +530,31 @@ impl OptionsExt for Options { self.input_source = DataSource::Stdin; self.live_stream = None; } + let mut inputfile_capacity = 0; if let Some(ref files) = args.inputfile { for inputfile in files { let plus_sign = '+'; let rc: i32 = if !inputfile.ends_with(plus_sign) { - self.append_file_to_queue(inputfile) + self.append_file_to_queue(inputfile, &mut inputfile_capacity) } else { - self.add_file_sequence(&mut inputfile.clone()) + self.add_file_sequence(&mut inputfile.clone(), &mut inputfile_capacity) }; if rc < 0 { - println!("Fatal: Not enough memory to parse parameters.\n"); - std::process::exit(ExitCause::NotEnoughMemory.exit_code()); + fatal!( + cause = ExitCause::NotEnoughMemory; + "Fatal: Not enough memory to parse parameters.\n" + ); } } } if self.inputfile.is_none() { - println!("No input file specified\n"); - std::process::exit(ExitCause::NoInputFiles.exit_code()); + fatal!( + cause = ExitCause::NoInputFiles; + "No input file specified\n" + ); } #[cfg(feature = "hardsubx_ocr")] @@ -532,8 +576,10 @@ impl OptionsExt for Options { }; if ocr_mode.is_none() { - println!("Invalid OCR mode"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid OCR mode" + ); } self.hardsubx_ocr_mode = ocr_mode.unwrap_or_default(); @@ -565,15 +611,19 @@ impl OptionsExt for Options { _ => { let result = subcolor.parse::(); if result.is_err() { - println!("Invalid Hue value"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid Hue value" + ); } let hue: f64 = result.unwrap(); if hue <= 0.0 || hue > 360.0 { - println!("Invalid Hue value"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid Hue value" + ); } self.hardsubx_hue = ColorHue::Custom(hue); } @@ -582,8 +632,10 @@ impl OptionsExt for Options { if let Some(ref value) = args.min_sub_duration { if *value == 0.0 { - println!("Invalid minimum subtitle duration"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid minimum subtitle duration" + ); } self.hardsubx_min_sub_duration = Timestamp::from_millis((1000.0 * *value) as _); } @@ -594,16 +646,20 @@ impl OptionsExt for Options { if let Some(ref value) = args.conf_thresh { if !(0.0..=100.0).contains(value) { - println!("Invalid confidence threshold, valid values are between 0 & 100"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid confidence threshold, valid values are between 0 & 100" + ); } self.hardsubx_conf_thresh = *value as _; } if let Some(ref value) = args.whiteness_thresh { if !(0.0..=100.0).contains(value) { - println!("Invalid whiteness threshold, valid values are between 0 & 100"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid whiteness threshold, valid values are between 0 & 100" + ); } self.hardsubx_lum_thresh = *value as _; } @@ -635,12 +691,10 @@ impl OptionsExt for Options { } if let Some(ref buffersize) = args.buffersize { - unsafe { - FILEBUFFERSIZE = atol(buffersize); + set_file_buffer_size(atol(buffersize)); - if FILEBUFFERSIZE < 8 { - FILEBUFFERSIZE = 8; // Otherwise crashes are guaranteed at least in MythTV - } + if get_file_buffer_size() < 8 { + set_file_buffer_size(8); // Otherwise crashes are guaranteed at least in MythTV } } @@ -720,8 +774,10 @@ impl OptionsExt for Options { if let Some(ref quant) = args.quant { if !(0..=2).contains(quant) { - println!("Invalid quant value"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid quant value" + ); } self.ocr_quantmode = *quant; } @@ -732,8 +788,10 @@ impl OptionsExt for Options { if let Some(ref oem) = args.oem { if !(0..=2).contains(oem) { - println!("Invalid oem value"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid oem value" + ); } self.ocr_oem = *oem; } @@ -899,14 +957,14 @@ impl OptionsExt for Options { } if let Some(ref defaultcolor) = args.defaultcolor { - unsafe { - if defaultcolor.len() != 7 || !defaultcolor.starts_with('#') { - println!("Invalid default color"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); - } - USERCOLOR_RGB.clone_from(defaultcolor); - self.settings_608.default_color = Decoder608ColorCode::Userdefined; + if defaultcolor.len() != 7 || !defaultcolor.starts_with('#') { + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid default color" + ); } + set_usercolor_rgb(defaultcolor); + self.settings_608.default_color = Decoder608ColorCode::Userdefined; } if let Some(ref delay) = args.delay { @@ -918,10 +976,10 @@ impl OptionsExt for Options { } if let Some(ref startat) = args.startat { - self.extraction_start = stringztoms(startat.clone().as_str()).unwrap(); + self.extraction_start = Some(stringztoms(startat.clone().as_str()).unwrap()); } if let Some(ref endat) = args.endat { - self.extraction_end = stringztoms(endat.clone().as_str()).unwrap(); + self.extraction_end = Some(stringztoms(endat.clone().as_str()).unwrap()); } if args.cc2 { @@ -934,8 +992,10 @@ impl OptionsExt for Options { } else if *extract == "both" { self.extract = 12; } else { - println!("Invalid output field"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid output field" + ); } self.is_608_enabled = true; } @@ -1208,9 +1268,7 @@ impl OptionsExt for Options { if t == 0 { t = OffsetDateTime::now_utc().unix_timestamp() as u64; } - unsafe { - UTC_REFVALUE = t; - } + *UTC_REFVALUE.write().unwrap() = t as u64; self.noautotimeref = true; } @@ -1247,8 +1305,10 @@ impl OptionsExt for Options { self.transcript_settings.use_colors = chars[6] == '1'; } } else { - println!("Invalid customtxt value. It must be 7 digits long"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid customtxt value. It must be 7 digits long" + ); } } @@ -1289,8 +1349,10 @@ impl OptionsExt for Options { let result = _addr.find(']'); if result.is_none() { - println!("Wrong address format, for IPv6 use [address]:port\n"); - std::process::exit(ExitCause::IncompatibleParameters.exit_code()); + fatal!( + cause = ExitCause::IncompatibleParameters; + "Wrong address format, for IPv6 use [address]:port\n" + ); } let mut br = result.unwrap(); _addr = _addr.replace(']', ""); @@ -1363,8 +1425,10 @@ impl OptionsExt for Options { } if self.demux_cfg.auto_stream == StreamMode::Mp4 && self.input_source == DataSource::Stdin { - println!("MP4 requires an actual file, it's not possible to read from a stream, including stdin."); - std::process::exit(ExitCause::IncompatibleParameters.exit_code()); + fatal!( + cause = ExitCause::IncompatibleParameters; + "MP4 requires an actual file, it's not possible to read from a stream, including stdin." + ); } if self.extract_chapters { @@ -1380,32 +1444,28 @@ impl OptionsExt for Options { } if self.enc_cfg.sentence_cap { - unsafe { - CAPITALIZATION_LIST = get_vector_words(&CAPITALIZED_BUILTIN); - if self.sentence_cap_file.exists() { - if let Some(sentence_cap_file) = self.sentence_cap_file.to_str() { - let result = - process_word_file(sentence_cap_file, addr_of_mut!(CAPITALIZATION_LIST)); - - if result.is_err() { - println!("There was an error processing the capitalization file.\n"); - std::process::exit(ExitCause::ErrorInCapitalizationFile.exit_code()); - } + add_builtin_capitalization(capitalization_list); + + if self.sentence_cap_file.exists() { + if let Some(sentence_cap_file) = self.sentence_cap_file.to_str() { + if process_word_file(sentence_cap_file, capitalization_list).is_err() { + fatal!( + cause = ExitCause::ErrorInCapitalizationFile; + "There was an error processing the capitalization file.\n" + ); } } } } if self.enc_cfg.filter_profanity { - unsafe { - PROFANE = get_vector_words(&PROFANE_BUILTIN); - if self.filter_profanity_file.exists() { - if let Some(profanityfile) = self.filter_profanity_file.to_str() { - let result = process_word_file(profanityfile, addr_of_mut!(PROFANE)); - - if result.is_err() { - println!("There was an error processing the profanity file.\n"); - std::process::exit(ExitCause::ErrorInCapitalizationFile.exit_code()); - } + add_builtin_profane(profane); + if self.filter_profanity_file.exists() { + if let Some(filter_profanity_file) = self.filter_profanity_file.to_str() { + if process_word_file(filter_profanity_file, profane).is_err() { + fatal!( + cause = ExitCause::ErrorInCapitalizationFile; + "There was an error processing the profanity file.\n" + ); } } } @@ -1415,8 +1475,8 @@ impl OptionsExt for Options { tlt_config.dolevdist = self.dolevdist; tlt_config.levdistmincnt = self.levdistmincnt; tlt_config.levdistmaxpct = self.levdistmaxpct; - tlt_config.extraction_start = Some(self.extraction_start); - tlt_config.extraction_end = Some(self.extraction_end); + tlt_config.extraction_start = self.extraction_start; + tlt_config.extraction_end = self.extraction_end; tlt_config.write_format = self.write_format; tlt_config.date_format = self.date_format; tlt_config.noautotimeref = self.noautotimeref; @@ -1425,22 +1485,31 @@ impl OptionsExt for Options { // teletext page number out of range if tlt_config.user_page != 0 && (tlt_config.user_page < 100 || tlt_config.user_page > 899) { - println!("Teletext page number out of range (100-899)"); - std::process::exit(ExitCause::NotClassified.exit_code()); + fatal!( + cause = ExitCause::NotClassified; + "Teletext page number out of range (100-899)" + ); } if self.is_inputfile_empty() && self.input_source == DataSource::File { - std::process::exit(ExitCause::NoInputFiles.exit_code()); + fatal!( + cause = ExitCause::NoInputFiles; + "No input file specified\n" + ); } if !self.is_inputfile_empty() && self.live_stream.unwrap_or_default().millis() != 0 { - println!("Live stream mode only supports one input file"); - std::process::exit(ExitCause::TooManyInputFiles.exit_code()); + fatal!( + cause = ExitCause::TooManyInputFiles; + "Live stream mode only supports one input file" + ); } if !self.is_inputfile_empty() && self.input_source == DataSource::Network { - println!("UDP mode is not compatible with input files"); - std::process::exit(ExitCause::TooManyInputFiles.exit_code()); + fatal!( + cause = ExitCause::TooManyInputFiles; + "UDP mode is not compatible with input files" + ); } if self.input_source == DataSource::Network || self.input_source == DataSource::Tcp { @@ -1448,23 +1517,29 @@ impl OptionsExt for Options { } if !self.is_inputfile_empty() && self.input_source == DataSource::Tcp { - println!("TCP mode is not compatible with input files"); - std::process::exit(ExitCause::TooManyInputFiles.exit_code()); + fatal!( + cause = ExitCause::TooManyInputFiles; + "TCP mode is not compatible with input files" + ); } if self.demux_cfg.auto_stream == StreamMode::McpoodlesRaw && self.write_format == OutputFormat::Raw { - println!("-in=raw can only be used if the output is a subtitle file."); - std::process::exit(ExitCause::IncompatibleParameters.exit_code()); + fatal!( + cause = ExitCause::IncompatibleParameters; + "-in=raw can only be used if the output is a subtitle file." + ); } if self.demux_cfg.auto_stream == StreamMode::Rcwt && self.write_format == OutputFormat::Rcwt && self.output_filename.is_none() { - println!("CCExtractor's binary format can only be used simultaneously for input and\noutput if the output file name is specified given with -o.\n"); - std::process::exit(ExitCause::IncompatibleParameters.exit_code()); + fatal!( + cause = ExitCause::IncompatibleParameters; + "CCExtractor's binary format can only be used simultaneously for input and\noutput if the output file name is specified given with -o.\n" + ); } if self.write_format != OutputFormat::DvdRaw @@ -1472,33 +1547,38 @@ impl OptionsExt for Options { && self.extract != 0 && self.extract == 12 { - println!( - "You can't extract both fields to stdout at the same time in broadcast mode.\n", + fatal!( + cause = ExitCause::IncompatibleParameters; + "You can't extract both fields to stdout at the same time in broadcast mode.\n" ); - std::process::exit(ExitCause::IncompatibleParameters.exit_code()); } if self.write_format == OutputFormat::SpuPng && self.cc_to_stdout { - println!("You cannot use --out=spupng with -stdout.\n"); - std::process::exit(ExitCause::IncompatibleParameters.exit_code()); + fatal!( + cause = ExitCause::IncompatibleParameters; + "You cannot use --out=spupng with -stdout.\n" + ); } if self.write_format == OutputFormat::WebVtt && self.enc_cfg.encoding != Encoding::Utf8 { self.enc_cfg.encoding = Encoding::Utf8; println!("Note: Output format is WebVTT, forcing UTF-8"); - std::process::exit(ExitCause::IncompatibleParameters.exit_code()); } // Check WITH_LIBCURL #[cfg(feature = "with_libcurl")] { if self.write_format == OutputFormat::Curl && self.curlposturl.is_none() { - println!("You must pass a URL (--curlposturl) if output format is curl"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "You must pass a URL (--curlposturl) if output format is curl" + ); } if self.write_format != OutputFormat::Curl && self.curlposturl.is_some() { - println!("--curlposturl requires that the format is curl"); - std::process::exit(ExitCause::MalformedParameter.exit_code()); + fatal!( + cause = ExitCause::MalformedParameter; + "--curlposturl requires that the format is curl" + ); } } @@ -1589,7 +1669,15 @@ pub mod tests { ..Default::default() }; - options.parse_parameters(&args, &mut tlt_config); + let mut capitalization_list: Vec = vec![]; + let mut profane: Vec = vec![]; + + options.parse_parameters( + &args, + &mut tlt_config, + &mut capitalization_list, + &mut profane, + ); (options, tlt_config) } @@ -1951,9 +2039,7 @@ pub mod tests { fn options_13() { let (options, _) = parse_args(&["--unixts", "5", "--out", "txt"]); - unsafe { - assert_eq!(UTC_REFVALUE, 5); - } + assert_eq!(*(UTC_REFVALUE.read().unwrap()), 5); assert_eq!(options.write_format, OutputFormat::Transcript); } @@ -2018,9 +2104,7 @@ pub mod tests { fn options_20() { let (_, _) = parse_args(&["--buffersize", "1M"]); - unsafe { - assert_eq!(FILEBUFFERSIZE, 1024 * 1024); - } + assert_eq!(get_file_buffer_size(), 1024 * 1024); } #[test] @@ -2055,7 +2139,7 @@ pub mod tests { fn options_25() { let (options, _) = parse_args(&["--startat", "4", "--endat", "7"]); - assert_eq!(options.extraction_start.seconds(), 4); + assert_eq!(options.extraction_start.unwrap_or_default().seconds(), 4); } #[test] diff --git a/src/rust/src/utils.rs b/src/rust/src/utils.rs index 63364d80e..6305d875d 100644 --- a/src/rust/src/utils.rs +++ b/src/rust/src/utils.rs @@ -44,21 +44,21 @@ use std::ffi::CString; use std::os::raw::c_char; pub fn string_to_c_chars(strs: Vec) -> *mut *mut c_char { - let mut cstr_vec: Vec = vec![]; - for s in strs { - let cstr = CString::new(s.as_str()).unwrap(); - cstr_vec.push(cstr); - } - cstr_vec.shrink_to_fit(); + let cstr_vec: Vec = strs + .iter() + .map(|s| CString::new(s.as_str()).unwrap()) + .collect(); + let c_char_vec: Vec<*mut c_char> = cstr_vec + .iter() + .map(|s| { + if s.as_bytes().is_empty() { + null_pointer() + } else { + s.as_ptr() as *mut c_char + } + }) + .collect(); - let mut c_char_vec: Vec<*const c_char> = vec![]; - for s in &cstr_vec { - if s.as_bytes().is_empty() { - c_char_vec.push(null_pointer()); - continue; - } - c_char_vec.push(s.as_ptr()); - } let ptr = c_char_vec.as_ptr(); std::mem::forget(cstr_vec); From 8e03558261805a7859cc37b463bce875e27aab5b Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Thu, 22 Aug 2024 00:36:51 +0530 Subject: [PATCH 18/24] fix: tests and formatting --- src/rust/src/common.rs | 207 ++++++++++++++++++------------------ src/rust/src/decoder/mod.rs | 14 +++ src/rust/src/lib.rs | 1 - src/rust/src/parser.rs | 17 +-- 4 files changed, 128 insertions(+), 111 deletions(-) diff --git a/src/rust/src/common.rs b/src/rust/src/common.rs index e3fd99015..cb4d7ba0c 100644 --- a/src/rust/src/common.rs +++ b/src/rust/src/common.rs @@ -51,27 +51,27 @@ impl FromRust for ccx_s_options { /// /// This function is unsafe because it dereferences the pointer passed to it. unsafe fn copy_from_rust(self: &mut ccx_s_options, options: Options) { - (*self).extract = options.extract as _; - (*self).no_rollup = options.no_rollup as _; - (*self).noscte20 = options.noscte20 as _; - (*self).webvtt_create_css = options.webvtt_create_css as _; - (*self).cc_channel = options.cc_channel as _; - (*self).buffer_input = options.buffer_input as _; - (*self).nofontcolor = options.nofontcolor as _; - (*self).write_format = options.write_format.to_ctype(); - (*self).send_to_srv = options.send_to_srv as _; - (*self).nohtmlescape = options.nohtmlescape as _; - (*self).notypesetting = options.notypesetting as _; - (*self).extraction_start = options.extraction_start.to_ctype(); - (*self).extraction_end = options.extraction_end.to_ctype(); - (*self).print_file_reports = options.print_file_reports as _; - (*self).settings_608 = options.settings_608.to_ctype(); - (*self).settings_dtvcc = options.settings_dtvcc.to_ctype(); - (*self).is_608_enabled = options.is_608_enabled as _; - (*self).is_708_enabled = options.is_708_enabled as _; - (*self).millis_separator = options.date_format.millis_separator() as _; - (*self).binary_concat = options.binary_concat as _; - (*self).use_gop_as_pts = if let Some(usegops) = options.use_gop_as_pts { + self.extract = options.extract as _; + self.no_rollup = options.no_rollup as _; + self.noscte20 = options.noscte20 as _; + self.webvtt_create_css = options.webvtt_create_css as _; + self.cc_channel = options.cc_channel as _; + self.buffer_input = options.buffer_input as _; + self.nofontcolor = options.nofontcolor as _; + self.write_format = options.write_format.to_ctype(); + self.send_to_srv = options.send_to_srv as _; + self.nohtmlescape = options.nohtmlescape as _; + self.notypesetting = options.notypesetting as _; + self.extraction_start = options.extraction_start.to_ctype(); + self.extraction_end = options.extraction_end.to_ctype(); + self.print_file_reports = options.print_file_reports as _; + self.settings_608 = options.settings_608.to_ctype(); + self.settings_dtvcc = options.settings_dtvcc.to_ctype(); + self.is_608_enabled = options.is_608_enabled as _; + self.is_708_enabled = options.is_708_enabled as _; + self.millis_separator = options.date_format.millis_separator() as _; + self.binary_concat = options.binary_concat as _; + self.use_gop_as_pts = if let Some(usegops) = options.use_gop_as_pts { if usegops { 1 } else { @@ -80,12 +80,12 @@ impl FromRust for ccx_s_options { } else { 0 }; - (*self).fix_padding = options.fix_padding as _; - (*self).gui_mode_reports = options.gui_mode_reports as _; - (*self).no_progress_bar = options.no_progress_bar as _; + self.fix_padding = options.fix_padding as _; + self.gui_mode_reports = options.gui_mode_reports as _; + self.no_progress_bar = options.no_progress_bar as _; if options.sentence_cap_file.try_exists().unwrap_or_default() { - (*self).sentence_cap_file = string_to_c_char( + self.sentence_cap_file = string_to_c_char( options .sentence_cap_file .clone() @@ -94,7 +94,7 @@ impl FromRust for ccx_s_options { ); } - (*self).live_stream = if let Some(live_stream) = options.live_stream { + self.live_stream = if let Some(live_stream) = options.live_stream { live_stream.seconds() as _ } else { -1 @@ -104,7 +104,7 @@ impl FromRust for ccx_s_options { .try_exists() .unwrap_or_default() { - (*self).filter_profanity_file = string_to_c_char( + self.filter_profanity_file = string_to_c_char( options .filter_profanity_file .clone() @@ -112,123 +112,122 @@ impl FromRust for ccx_s_options { .unwrap_or_default(), ); } - (*self).messages_target = options.messages_target as _; - (*self).timestamp_map = options.timestamp_map as _; - (*self).dolevdist = options.dolevdist.into(); - (*self).levdistmincnt = options.levdistmincnt as _; - (*self).levdistmaxpct = options.levdistmaxpct as _; - (*self).investigate_packets = options.investigate_packets as _; - (*self).fullbin = options.fullbin as _; - (*self).nosync = options.nosync as _; - (*self).hauppauge_mode = options.hauppauge_mode as _; - (*self).wtvconvertfix = options.wtvconvertfix as _; - (*self).wtvmpeg2 = options.wtvmpeg2 as _; - (*self).auto_myth = if let Some(auto_myth) = options.auto_myth { + self.messages_target = options.messages_target as _; + self.timestamp_map = options.timestamp_map as _; + self.dolevdist = options.dolevdist.into(); + self.levdistmincnt = options.levdistmincnt as _; + self.levdistmaxpct = options.levdistmaxpct as _; + self.investigate_packets = options.investigate_packets as _; + self.fullbin = options.fullbin as _; + self.nosync = options.nosync as _; + self.hauppauge_mode = options.hauppauge_mode as _; + self.wtvconvertfix = options.wtvconvertfix as _; + self.wtvmpeg2 = options.wtvmpeg2 as _; + self.auto_myth = if let Some(auto_myth) = options.auto_myth { auto_myth as _ } else { 2 }; - (*self).mp4vidtrack = options.mp4vidtrack as _; - (*self).extract_chapters = options.extract_chapters as _; - (*self).usepicorder = options.usepicorder as _; - (*self).xmltv = options.xmltv as _; - (*self).xmltvliveinterval = options.xmltvliveinterval.seconds() as _; - (*self).xmltvoutputinterval = options.xmltvoutputinterval.seconds() as _; - (*self).xmltvonlycurrent = options.xmltvonlycurrent.into(); - (*self).keep_output_closed = options.keep_output_closed as _; - (*self).force_flush = options.force_flush as _; - (*self).append_mode = options.append_mode as _; - (*self).ucla = options.ucla as _; - (*self).tickertext = options.tickertext as _; - (*self).hardsubx = options.hardsubx as _; - (*self).hardsubx_and_common = options.hardsubx_and_common as _; + self.mp4vidtrack = options.mp4vidtrack as _; + self.extract_chapters = options.extract_chapters as _; + self.usepicorder = options.usepicorder as _; + self.xmltv = options.xmltv as _; + self.xmltvliveinterval = options.xmltvliveinterval.seconds() as _; + self.xmltvoutputinterval = options.xmltvoutputinterval.seconds() as _; + self.xmltvonlycurrent = options.xmltvonlycurrent.into(); + self.keep_output_closed = options.keep_output_closed as _; + self.force_flush = options.force_flush as _; + self.append_mode = options.append_mode as _; + self.ucla = options.ucla as _; + self.tickertext = options.tickertext as _; + self.hardsubx = options.hardsubx as _; + self.hardsubx_and_common = options.hardsubx_and_common as _; if let Some(dvblang) = options.dvblang { - (*self).dvblang = string_to_c_char(dvblang.to_ctype().as_str()); + self.dvblang = string_to_c_char(dvblang.to_ctype().as_str()); } if options.ocrlang.try_exists().unwrap_or_default() { - (*self).ocrlang = string_to_c_char(options.ocrlang.to_str().unwrap()); + self.ocrlang = string_to_c_char(options.ocrlang.to_str().unwrap()); } - (*self).ocr_oem = options.ocr_oem as _; - (*self).ocr_quantmode = options.ocr_quantmode as _; + self.ocr_oem = options.ocr_oem as _; + self.ocr_quantmode = options.ocr_quantmode as _; if let Some(mkvlang) = options.mkvlang { - (*self).mkvlang = string_to_c_char(mkvlang.to_ctype().as_str()); - } - (*self).analyze_video_stream = options.analyze_video_stream as _; - (*self).hardsubx_ocr_mode = options.hardsubx_ocr_mode.to_ctype(); - (*self).hardsubx_subcolor = options.hardsubx_hue.to_ctype(); - (*self).hardsubx_min_sub_duration = options.hardsubx_min_sub_duration.seconds() as _; - (*self).hardsubx_detect_italics = options.hardsubx_detect_italics as _; - (*self).hardsubx_conf_thresh = options.hardsubx_conf_thresh as _; - (*self).hardsubx_hue = options.hardsubx_hue.get_hue() as _; - (*self).hardsubx_lum_thresh = options.hardsubx_lum_thresh as _; - (*self).transcript_settings = options.transcript_settings.to_ctype(); - (*self).date_format = options.date_format.to_ctype(); - (*self).write_format_rewritten = options.write_format_rewritten as _; - (*self).use_ass_instead_of_ssa = options.use_ass_instead_of_ssa as _; - (*self).use_webvtt_styling = options.use_webvtt_styling as _; - (*self).debug_mask = options.debug_mask.normal_mask().bits() as _; - (*self).debug_mask_on_debug = options.debug_mask.debug_mask().bits() as _; + self.mkvlang = string_to_c_char(mkvlang.to_ctype().as_str()); + } + self.analyze_video_stream = options.analyze_video_stream as _; + self.hardsubx_ocr_mode = options.hardsubx_ocr_mode.to_ctype(); + self.hardsubx_subcolor = options.hardsubx_hue.to_ctype(); + self.hardsubx_min_sub_duration = options.hardsubx_min_sub_duration.seconds() as _; + self.hardsubx_detect_italics = options.hardsubx_detect_italics as _; + self.hardsubx_conf_thresh = options.hardsubx_conf_thresh as _; + self.hardsubx_hue = options.hardsubx_hue.get_hue() as _; + self.hardsubx_lum_thresh = options.hardsubx_lum_thresh as _; + self.transcript_settings = options.transcript_settings.to_ctype(); + self.date_format = options.date_format.to_ctype(); + self.write_format_rewritten = options.write_format_rewritten as _; + self.use_ass_instead_of_ssa = options.use_ass_instead_of_ssa as _; + self.use_webvtt_styling = options.use_webvtt_styling as _; + self.debug_mask = options.debug_mask.normal_mask().bits() as _; + self.debug_mask_on_debug = options.debug_mask.debug_mask().bits() as _; if options.udpsrc.is_some() { - (*self).udpsrc = string_to_c_char(&options.udpsrc.clone().unwrap()); + self.udpsrc = string_to_c_char(&options.udpsrc.clone().unwrap()); } if options.udpaddr.is_some() { - (*self).udpaddr = string_to_c_char(&options.udpaddr.clone().unwrap()); + self.udpaddr = string_to_c_char(&options.udpaddr.clone().unwrap()); } - (*self).udpport = options.udpport as _; + self.udpport = options.udpport as _; if options.tcpport.is_some() { - (*self).tcpport = string_to_c_char(&options.tcpport.unwrap().to_string()); + self.tcpport = string_to_c_char(&options.tcpport.unwrap().to_string()); } if options.tcp_password.is_some() { - (*self).tcp_password = string_to_c_char(&options.tcp_password.clone().unwrap()); + self.tcp_password = string_to_c_char(&options.tcp_password.clone().unwrap()); } if options.tcp_desc.is_some() { - (*self).tcp_desc = string_to_c_char(&options.tcp_desc.clone().unwrap()); + self.tcp_desc = string_to_c_char(&options.tcp_desc.clone().unwrap()); } if options.srv_addr.is_some() { - (*self).srv_addr = string_to_c_char(&options.srv_addr.clone().unwrap()); + self.srv_addr = string_to_c_char(&options.srv_addr.clone().unwrap()); } if options.srv_port.is_some() { - (*self).srv_port = string_to_c_char(&options.srv_port.unwrap().to_string()); + self.srv_port = string_to_c_char(&options.srv_port.unwrap().to_string()); } - (*self).noautotimeref = options.noautotimeref as _; - (*self).input_source = options.input_source as _; + self.noautotimeref = options.noautotimeref as _; + self.input_source = options.input_source as _; if options.output_filename.is_some() { - (*self).output_filename = string_to_c_char(&options.output_filename.clone().unwrap()); + self.output_filename = string_to_c_char(&options.output_filename.clone().unwrap()); } if options.inputfile.is_some() { - (*self).inputfile = string_to_c_chars(options.inputfile.clone().unwrap()); - (*self).num_input_files = - options.inputfile.iter().filter(|s| !s.is_empty()).count() as _; - } - (*self).demux_cfg = options.demux_cfg.to_ctype(); - (*self).enc_cfg = options.enc_cfg.to_ctype(); - (*self).subs_delay = options.subs_delay.millis(); - (*self).cc_to_stdout = options.cc_to_stdout as _; - (*self).pes_header_to_stdout = options.pes_header_to_stdout as _; - (*self).ignore_pts_jumps = options.ignore_pts_jumps as _; - (*self).multiprogram = options.multiprogram as _; - (*self).out_interval = options.out_interval; - (*self).segment_on_key_frames_only = options.segment_on_key_frames_only as _; + self.inputfile = string_to_c_chars(options.inputfile.clone().unwrap()); + self.num_input_files = options.inputfile.iter().filter(|s| !s.is_empty()).count() as _; + } + self.demux_cfg = options.demux_cfg.to_ctype(); + self.enc_cfg = options.enc_cfg.to_ctype(); + self.subs_delay = options.subs_delay.millis(); + self.cc_to_stdout = options.cc_to_stdout as _; + self.pes_header_to_stdout = options.pes_header_to_stdout as _; + self.ignore_pts_jumps = options.ignore_pts_jumps as _; + self.multiprogram = options.multiprogram as _; + self.out_interval = options.out_interval; + self.segment_on_key_frames_only = options.segment_on_key_frames_only as _; #[cfg(feature = "with_libcurl")] { if options.curlposturl.is_some() { - (*self).curlposturl = + self.curlposturl = string_to_c_char(&options.curlposturl.as_ref().unwrap_or_default().as_str()); } } #[cfg(feature = "enable_sharing")] { - (*self).sharing_enabled = options.sharing_enabled as _; + self.sharing_enabled = options.sharing_enabled as _; if options.sharing_url.is_some() { - (*self).sharing_url = + self.sharing_url = string_to_c_char(&options.sharing_url.as_ref().unwrap().as_str()); } - (*self).translate_enabled = options.translate_enabled as _; + self.translate_enabled = options.translate_enabled as _; if options.translate_langs.is_some() { - (*self).translate_langs = string_to_c_char(&options.translate_langs.unwrap()); + self.translate_langs = string_to_c_char(&options.translate_langs.unwrap()); } if options.translate_key.is_some() { - (*self).translate_key = string_to_c_char(&options.translate_key.unwrap()); + self.translate_key = string_to_c_char(&options.translate_key.unwrap()); } } } diff --git a/src/rust/src/decoder/mod.rs b/src/rust/src/decoder/mod.rs index 78ff03632..3f8a37bf5 100644 --- a/src/rust/src/decoder/mod.rs +++ b/src/rust/src/decoder/mod.rs @@ -237,12 +237,20 @@ impl PartialEq for dtvcc_symbol { #[cfg(test)] mod test { + use lib_ccxr::util::log::{set_logger, CCExtractorLogger, DebugMessageMask, OutputTarget}; + use crate::utils::get_zero_allocated_obj; use super::*; #[test] fn test_process_cc_data() { + set_logger(CCExtractorLogger::new( + OutputTarget::Stdout, + DebugMessageMask::new(DebugMessageFlag::VERBOSE, DebugMessageFlag::VERBOSE), + false, + )) + .ok(); let mut dtvcc_ctx = get_zero_allocated_obj::(); let mut decoder = Dtvcc::new(&mut dtvcc_ctx); @@ -286,6 +294,12 @@ mod test { #[test] fn test_process_current_packet() { + set_logger(CCExtractorLogger::new( + OutputTarget::Stdout, + DebugMessageMask::new(DebugMessageFlag::VERBOSE, DebugMessageFlag::VERBOSE), + false, + )) + .ok(); let mut dtvcc_ctx = get_zero_allocated_obj::(); let mut decoder = Dtvcc::new(&mut dtvcc_ctx); diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 0db93c614..b88493605 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -210,7 +210,6 @@ extern "C" { /// Parse parameters from argv and argc #[no_mangle] pub unsafe extern "C" fn ccxr_parse_parameters(argc: c_int, argv: *mut *mut c_char) -> c_int { - ccxr_init_logger(); // Convert argv to Vec and pass it to parse_parameters let args = std::slice::from_raw_parts(argv, argc as usize) .iter() diff --git a/src/rust/src/parser.rs b/src/rust/src/parser.rs index 5f1e0b997..49449c916 100644 --- a/src/rust/src/parser.rs +++ b/src/rust/src/parser.rs @@ -23,13 +23,12 @@ use time::OffsetDateTime; use crate::args::CCXCodec; use crate::args::{self, InFormat}; -use crate::{usercolor_rgb, FILEBUFFERSIZE}; cfg_if! { if #[cfg(test)] { - use crate::parser::tests::{set_binary_mode, MPEG_CLOCK_FREQ}; + use crate::parser::tests::{set_binary_mode, MPEG_CLOCK_FREQ, usercolor_rgb, FILEBUFFERSIZE}; } else { - use crate::{set_binary_mode, MPEG_CLOCK_FREQ}; + use crate::{set_binary_mode, MPEG_CLOCK_FREQ, usercolor_rgb, FILEBUFFERSIZE}; } } @@ -57,6 +56,12 @@ fn set_usercolor_rgb(color: &str) { } } +fn set_mpeg_clock_freq(freq: i32) { + unsafe { + MPEG_CLOCK_FREQ = freq as _; + } +} + fn atol(bufsize: &str) -> i32 { let mut val = bufsize[0..bufsize.len() - 1].parse::().unwrap(); let size = bufsize @@ -869,9 +874,7 @@ impl OptionsExt for Options { } if args.mpeg90090 { - unsafe { - MPEG_CLOCK_FREQ = 90090; - } + set_mpeg_clock_freq(90090); } if args.no_scte20 { self.noscte20 = true; @@ -1657,6 +1660,8 @@ pub mod tests { #[no_mangle] pub unsafe extern "C" fn set_binary_mode() {} pub static mut MPEG_CLOCK_FREQ: u64 = 0; + pub static mut FILEBUFFERSIZE: i32 = 0; + pub static mut usercolor_rgb: [i32; 8] = [0; 8]; fn parse_args(args: &[&str]) -> (Options, TeletextConfig) { let mut common_args = vec!["./ccextractor", "input_file"]; From 316a7c9d5b523ab1455a1864e836f3db6a9be3e0 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Thu, 22 Aug 2024 00:54:36 +0530 Subject: [PATCH 19/24] fix: allow hex values for streamtype --- src/rust/src/args.rs | 4 ++-- src/rust/src/parser.rs | 28 +++++++++++++++++++++------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/rust/src/args.rs b/src/rust/src/args.rs index 7e3a79fef..9ea07d5c6 100644 --- a/src/rust/src/args.rs +++ b/src/rust/src/args.rs @@ -361,14 +361,14 @@ pub struct Args { /// by its type (pick the stream that has this type in /// the PMT) #[arg(long, verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)] - pub datastreamtype: Option, + pub datastreamtype: Option, /// Assume the data is of this type, don't autodetect. This /// parameter may be needed if --datapid or --datastreamtype /// is used and CCExtractor cannot determine how to process /// the stream. The value will usually be 2 (MPEG video) or /// 6 (MPEG private data). #[arg(long, verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)] - pub streamtype: Option, + pub streamtype: Option, /// If the video was recorder using a Hauppauge card, it /// might need special processing. This parameter will /// force the special treatment. diff --git a/src/rust/src/parser.rs b/src/rust/src/parser.rs index 49449c916..e8d33c757 100644 --- a/src/rust/src/parser.rs +++ b/src/rust/src/parser.rs @@ -1192,14 +1192,29 @@ impl OptionsExt for Options { } if let Some(ref datastreamtype) = args.datastreamtype { - self.demux_cfg.ts_datastreamtype = - StreamType::from_repr((*datastreamtype).into()).unwrap_or_default(); - // TODO: Should I panick? + if let Some(streamType) = + StreamType::from_repr(get_atoi_hex::(&*datastreamtype.to_string()).into()) + { + self.demux_cfg.ts_datastreamtype = streamType; + } else { + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid data stream type" + ); + } } if let Some(ref streamtype) = args.streamtype { - self.demux_cfg.ts_forced_streamtype = - StreamType::from_repr((*streamtype).into()).unwrap_or_default(); + if let Some(streamType) = + StreamType::from_repr(get_atoi_hex::(&*streamtype.to_string()).into()) + { + self.demux_cfg.ts_forced_streamtype = streamType; + } else { + fatal!( + cause = ExitCause::MalformedParameter; + "Invalid stream type" + ); + } } if let Some(ref tpage) = args.tpage { @@ -1391,7 +1406,6 @@ impl OptionsExt for Options { if let Some(ref font) = args.font { self.enc_cfg.render_font = PathBuf::from_str(font).unwrap_or_default(); - // TODO: Check if Panic on wrong path } if let Some(ref italics) = args.italics { @@ -2003,7 +2017,7 @@ pub mod tests { fn options_9() { let (options, _) = parse_args(&[ "--datastreamtype", - "2", + "0x2", "--streamtype", "2", "--no-autotimeref", From f2e85b668d6cf241fb844ed545309200870d16e1 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Thu, 22 Aug 2024 01:02:54 +0530 Subject: [PATCH 20/24] chore: format files --- src/rust/src/parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rust/src/parser.rs b/src/rust/src/parser.rs index e8d33c757..85a0717bf 100644 --- a/src/rust/src/parser.rs +++ b/src/rust/src/parser.rs @@ -1193,7 +1193,7 @@ impl OptionsExt for Options { if let Some(ref datastreamtype) = args.datastreamtype { if let Some(streamType) = - StreamType::from_repr(get_atoi_hex::(&*datastreamtype.to_string()).into()) + StreamType::from_repr(get_atoi_hex::(&datastreamtype.to_string())) { self.demux_cfg.ts_datastreamtype = streamType; } else { @@ -1206,7 +1206,7 @@ impl OptionsExt for Options { if let Some(ref streamtype) = args.streamtype { if let Some(streamType) = - StreamType::from_repr(get_atoi_hex::(&*streamtype.to_string()).into()) + StreamType::from_repr(get_atoi_hex::(&streamtype.to_string())) { self.demux_cfg.ts_forced_streamtype = streamType; } else { From 0d4f3cd1af12ac8e75e3c65d20d8bcc566675103 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Fri, 23 Aug 2024 14:33:58 +0530 Subject: [PATCH 21/24] fix: naming of fields and docs --- src/lib_ccx/ccx_common_option.c | 2 +- src/lib_ccx/params.c | 11 +++++++---- src/rust/lib_ccxr/src/common/options.rs | 4 ++-- src/rust/src/args.rs | 9 ++++++--- src/rust/src/parser.rs | 2 +- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/lib_ccx/ccx_common_option.c b/src/lib_ccx/ccx_common_option.c index 8d68a6946..4ac0a3fef 100644 --- a/src/lib_ccx/ccx_common_option.c +++ b/src/lib_ccx/ccx_common_option.c @@ -27,7 +27,7 @@ void init_options(struct ccx_s_options *options) options->extract = 1; // Extract 1st field only (primary language) options->cc_channel = 1; // Channel we want to dump in srt mode - options->binary_concat = 1; // Disabled by -ve or --videoedited + options->binary_concat = 1; // Disabled by --videoedited options->use_gop_as_pts = 0; // Use GOP instead of PTS timing (0=do as needed, 1=always, -1=never) options->fix_padding = 0; // Replace 0000 with 8080 in HDTV (needed for some cards) options->gui_mode_reports = 0; // If 1, output in stderr progress updates so the GUI can grab them diff --git a/src/lib_ccx/params.c b/src/lib_ccx/params.c index d52ce2360..cd2515806 100644 --- a/src/lib_ccx/params.c +++ b/src/lib_ccx/params.c @@ -395,8 +395,11 @@ void print_usage(void) mprint(" --tcp-description description: Sends to the server short description about\n"); mprint(" captions e.g. channel name or file name\n"); mprint("Options that affect what will be processed:\n"); - mprint(" --output-field 1 / 2 / both: Output Field 1 data, Field 2 data, or both\n"); - mprint(" (DEFAULT is 1)\n"); + mprint(" --output-field 1 / 2 / both:\n"); + mprint(" Values: 1 = Output Field 1\n"); + mprint(" 2 = Output Field 2\n"); + mprint(" both = Both Output Field 1 and 2\n"); + mprint(" Defaults to 1\n"); mprint("Use --append to prevent overwriting of existing files. The output will be\n"); mprint(" appended instead.\n"); mprint(" --cc2: When in srt/sami mode, process captions in channel 2\n"); @@ -992,7 +995,7 @@ void print_usage(void) mprint(" input.d/sub0001.png\n"); mprint(" ...\n"); mprint(" The command:\n"); - mprint(" ccextractor --out=spupng -o /tmp/output --12 input.mpg\n"); + mprint(" ccextractor --out=spupng -o /tmp/output --output-field both input.mpg\n"); mprint(" will create the files:\n"); mprint(" /tmp/output_1.xml\n"); mprint(" /tmp/output_1.d/sub0000.png\n"); @@ -2111,7 +2114,7 @@ int parse_parameters(struct ccx_s_options *opt, int argc, char *argv[]) opt->extract = strcmp(argv[i], "both") == 0 ? 12 : atoi_hex(argv[i]); if (opt->extract != 1 && opt->extract != 2 && opt->extract != 12) { - fatal(EXIT_MALFORMED_PARAMETER, "--output-field only accepts 1 or 2 or both.\n"); + fatal(EXIT_MALFORMED_PARAMETER, "--output-field only accepts 1 , 2 , 12 / both.\n"); } opt->is_608_enabled = 1; continue; diff --git a/src/rust/lib_ccxr/src/common/options.rs b/src/rust/lib_ccxr/src/common/options.rs index ef1fe26f9..b471960a7 100644 --- a/src/rust/lib_ccxr/src/common/options.rs +++ b/src/rust/lib_ccxr/src/common/options.rs @@ -361,9 +361,9 @@ pub struct Options { pub settings_608: Decoder608Settings, /// Same for 708 decoder pub settings_dtvcc: DecoderDtvccSettings, - /// Is 608 enabled by explicitly using flags(-1,-2,-12) + /// Is 608 enabled by explicitly using flags(--output-field 1 / 2 / both) pub is_608_enabled: bool, - /// Is 708 enabled by explicitly using flags(-svc) + /// Is 708 enabled by explicitly using flags(--svc) pub is_708_enabled: bool, /// Disabled by -ve or --videoedited diff --git a/src/rust/src/args.rs b/src/rust/src/args.rs index 9ea07d5c6..d437f232d 100644 --- a/src/rust/src/args.rs +++ b/src/rust/src/args.rs @@ -123,7 +123,7 @@ Notes on spupng output format: input.d/sub0001.png ... The command: - ccextractor --out=spupng -o /tmp/output --12 input.mpg + ccextractor --out=spupng -o /tmp/output --output-field both input.mpg will create the files: /tmp/output_1.xml /tmp/output_1.d/sub0000.png @@ -205,8 +205,11 @@ pub struct Args { /// captions e.g. channel name or file name #[arg(long, value_name="port", verbatim_doc_comment, help_heading=NETWORK_SUPPORT)] pub tcp_description: Option, - /// Output field1 data, field2 data, or both - #[arg(long, value_name="1/2/both", verbatim_doc_comment, help_heading=OPTION_AFFECT_PROCESSED)] + /// Values: 1 = Output Field 1 + /// 2 = Output Field 2 + /// both = Both Output Field 1 and 2 + /// Defaults to 1 + #[arg(long, value_name="field", verbatim_doc_comment, help_heading=OPTION_AFFECT_PROCESSED)] pub output_field: Option, /// Use --append to prevent overwriting of existing files. The output will be /// appended instead. diff --git a/src/rust/src/parser.rs b/src/rust/src/parser.rs index 85a0717bf..145bcf0e6 100644 --- a/src/rust/src/parser.rs +++ b/src/rust/src/parser.rs @@ -990,7 +990,7 @@ impl OptionsExt for Options { } if let Some(ref extract) = args.output_field { - if *extract == "1" || *extract == "2" { + if *extract == "1" || *extract == "2" || *extract == "12" { self.extract = get_atoi_hex(extract); } else if *extract == "both" { self.extract = 12; From ca0b250aeee683f1d3eb60b32b9d388067ce226a Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Fri, 23 Aug 2024 16:22:40 +0530 Subject: [PATCH 22/24] fix: defaults for options --- src/rust/lib_ccxr/src/common/options.rs | 38 ++++++++++++++++--------- src/rust/src/parser.rs | 4 +-- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/src/rust/lib_ccxr/src/common/options.rs b/src/rust/lib_ccxr/src/common/options.rs index b471960a7..b263502b8 100644 --- a/src/rust/lib_ccxr/src/common/options.rs +++ b/src/rust/lib_ccxr/src/common/options.rs @@ -111,7 +111,7 @@ pub struct DecoderDtvccSettings { pub timing: CommonTimingCtx, } -#[derive(Default, Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone)] pub struct Decoder608Settings { pub direct_rollup: i32, pub force_rollup: i32, @@ -120,6 +120,18 @@ pub struct Decoder608Settings { pub screens_to_process: i32, pub report: Option, } +impl Default for Decoder608Settings { + fn default() -> Self { + Self { + direct_rollup: 0, + force_rollup: 0, + no_rollup: false, + default_color: Decoder608ColorCode::Transparent, + screens_to_process: -1, + report: None, + } + } +} #[derive(Debug, Default, Copy, Clone)] pub struct Decoder608Report { @@ -442,7 +454,7 @@ pub struct Options { /// The name of the .traineddata file to be loaded with tesseract pub ocrlang: PathBuf, /// The Tesseract OEM mode, could be 0 (default), 1 or 2 - pub ocr_oem: u8, + pub ocr_oem: i8, /// How to quantize the bitmap before passing to to tesseract /// (0 = no quantization at all, 1 = CCExtractor's internal, /// 2 = reduce distinct color count in image for faster results.) @@ -528,7 +540,7 @@ impl Default for Options { no_rollup: Default::default(), noscte20: Default::default(), webvtt_create_css: Default::default(), - cc_channel: Default::default(), + cc_channel: 1, buffer_input: Default::default(), nofontcolor: Default::default(), nohtmlescape: Default::default(), @@ -550,21 +562,21 @@ impl Default for Options { filter_profanity_file: Default::default(), messages_target: Default::default(), timestamp_map: Default::default(), - dolevdist: Default::default(), - levdistmincnt: Default::default(), - levdistmaxpct: Default::default(), + dolevdist: true, + levdistmincnt: 2, + levdistmaxpct: 10, investigate_packets: Default::default(), fullbin: Default::default(), nosync: Default::default(), hauppauge_mode: Default::default(), wtvconvertfix: Default::default(), wtvmpeg2: Default::default(), - auto_myth: Default::default(), + auto_myth: None, mp4vidtrack: Default::default(), extract_chapters: Default::default(), usepicorder: Default::default(), xmltv: Default::default(), - xmltvliveinterval: Default::default(), + xmltvliveinterval: Timestamp::from_millis(10000), xmltvoutputinterval: Default::default(), xmltvonlycurrent: Default::default(), keep_output_closed: Default::default(), @@ -576,16 +588,16 @@ impl Default for Options { hardsubx_and_common: Default::default(), dvblang: Default::default(), ocrlang: Default::default(), - ocr_oem: Default::default(), + ocr_oem: -1, ocr_quantmode: 1, mkvlang: Default::default(), analyze_video_stream: Default::default(), hardsubx_ocr_mode: Default::default(), - hardsubx_min_sub_duration: Default::default(), + hardsubx_min_sub_duration: Timestamp::from_millis(500), hardsubx_detect_italics: Default::default(), hardsubx_conf_thresh: Default::default(), hardsubx_hue: Default::default(), - hardsubx_lum_thresh: Default::default(), + hardsubx_lum_thresh: 95.0, transcript_settings: Default::default(), date_format: Default::default(), send_to_srv: Default::default(), @@ -602,7 +614,7 @@ impl Default for Options { srv_addr: Default::default(), srv_port: Default::default(), noautotimeref: Default::default(), - input_source: Default::default(), + input_source: DataSource::default(), output_filename: Default::default(), inputfile: Default::default(), demux_cfg: Default::default(), @@ -612,7 +624,7 @@ impl Default for Options { pes_header_to_stdout: Default::default(), ignore_pts_jumps: Default::default(), multiprogram: Default::default(), - out_interval: Default::default(), + out_interval: -1, segment_on_key_frames_only: Default::default(), debug_mask: DebugMessageMask::new( DebugMessageFlag::GENERIC_NOTICE, diff --git a/src/rust/src/parser.rs b/src/rust/src/parser.rs index 145bcf0e6..d84444c1b 100644 --- a/src/rust/src/parser.rs +++ b/src/rust/src/parser.rs @@ -795,10 +795,10 @@ impl OptionsExt for Options { if !(0..=2).contains(oem) { fatal!( cause = ExitCause::MalformedParameter; - "Invalid oem value" + "oem value should be between 0 and 2" ); } - self.ocr_oem = *oem; + self.ocr_oem = *oem as _; } if let Some(ref lang) = args.mkvlang { From 8eceabb12c85aee0dcda1b79d08c9878421ad505 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Sun, 25 Aug 2024 23:49:29 +0530 Subject: [PATCH 23/24] fix: memory leak in vector to string --- src/rust/src/common.rs | 2 +- src/rust/src/utils.rs | 31 ++++++++----------------------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/src/rust/src/common.rs b/src/rust/src/common.rs index cb4d7ba0c..aa6eefae7 100644 --- a/src/rust/src/common.rs +++ b/src/rust/src/common.rs @@ -563,7 +563,7 @@ impl CType for EncoderConfig { extract: self.extract as _, dtvcc_extract: self.dtvcc_extract as _, gui_mode_reports: self.gui_mode_reports as _, - output_filename: unsafe { string_to_c_char(&self.output_filename) }, + output_filename: string_to_c_char(&self.output_filename), write_format: self.write_format.to_ctype(), keep_output_closed: self.keep_output_closed as _, force_flush: self.force_flush as _, diff --git a/src/rust/src/utils.rs b/src/rust/src/utils.rs index 6305d875d..90ada59dd 100644 --- a/src/rust/src/utils.rs +++ b/src/rust/src/utils.rs @@ -25,7 +25,7 @@ pub unsafe fn c_char_to_string(c: *const ::std::os::raw::c_char) -> Option *mut ::std::os::raw::c_char { +pub fn string_to_c_char(a: &str) -> *mut ::std::os::raw::c_char { if a.is_empty() { return null_pointer(); } @@ -40,31 +40,16 @@ pub fn null_pointer() -> *mut T { std::ptr::null_mut() } -use std::ffi::CString; use std::os::raw::c_char; pub fn string_to_c_chars(strs: Vec) -> *mut *mut c_char { - let cstr_vec: Vec = strs - .iter() - .map(|s| CString::new(s.as_str()).unwrap()) - .collect(); - let c_char_vec: Vec<*mut c_char> = cstr_vec - .iter() - .map(|s| { - if s.as_bytes().is_empty() { - null_pointer() - } else { - s.as_ptr() as *mut c_char - } - }) - .collect(); - - let ptr = c_char_vec.as_ptr(); - - std::mem::forget(cstr_vec); - std::mem::forget(c_char_vec); - - ptr as *mut *mut c_char + let mut c_strs: Vec<*mut c_char> = Vec::new(); + for s in strs { + c_strs.push(string_to_c_char(&s)); + } + let ptr = c_strs.as_mut_ptr(); + std::mem::forget(c_strs); + ptr } /// This function creates a new object of type `T` and fills it with zeros. From fba63f5cdf56111bd088d4513295d439ee18a1d1 Mon Sep 17 00:00:00 2001 From: Prateek Sunal Date: Sun, 25 Aug 2024 23:49:44 +0530 Subject: [PATCH 24/24] fix(c): init logger before running parser --- src/ccextractor.c | 4 ++++ src/lib_ccx/lib_ccx.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ccextractor.c b/src/ccextractor.c index c63e6d0ab..a8676405c 100644 --- a/src/ccextractor.c +++ b/src/ccextractor.c @@ -448,6 +448,10 @@ int main(int argc, char *argv[]) // If "ccextractor.cnf" is present, takes options from it. // See docs/ccextractor.cnf.sample for more info. +#ifndef DISABLE_RUST + ccxr_init_basic_logger(); +#endif + #ifndef DISABLE_RUST int compile_ret = ccxr_parse_parameters(argc, argv); #else diff --git a/src/lib_ccx/lib_ccx.c b/src/lib_ccx/lib_ccx.c index d0a69b469..4b137952e 100644 --- a/src/lib_ccx/lib_ccx.c +++ b/src/lib_ccx/lib_ccx.c @@ -102,10 +102,6 @@ struct lib_ccx_ctx *init_libraries(struct ccx_s_options *opt) ccx_common_logging.log_ftn = &mprint; ccx_common_logging.gui_ftn = &activity_library_process; -#ifndef DISABLE_RUST - ccxr_init_basic_logger(); -#endif - struct lib_ccx_ctx *ctx = malloc(sizeof(struct lib_ccx_ctx)); if (!ctx) ccx_common_logging.fatal_ftn(EXIT_NOT_ENOUGH_MEMORY, "init_libraries: Not enough memory allocating lib_ccx_ctx context.");