diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 101e2de..a54e061 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,8 +3,8 @@ name: ci env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} CARGO_TERM_COLOR: always - CARGO_LOCA: "rimc-cli/Cargo.toml" - CARGO_BIN: "rimc" + CARGO_LOCA: "rim-cli/Cargo.toml" + CARGO_BIN: "rim" permissions: contents: write @@ -15,7 +15,7 @@ on: branches: - "main" paths: - - "rimc-cli/Cargo.toml" + - "rim-cli/Cargo.toml" jobs: pre: diff --git a/.gitignore b/.gitignore index d0b2aa9..c2e9754 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ target/ Cargo.lock **/*.rs.bk *.pdb +config.toml \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 31e4623..5aeb908 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["rimc", "rimc-cli"] +members = ["rim", "rim-cli"] resolver = "2" [profile.release] diff --git a/README.md b/README.md index 9fb114c..7cbf36b 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,28 @@ -# Rimc +# Rim -[![CI Status](https://github.com/AUTOM77/Rimc/workflows/ci/badge.svg)](https://github.com/AUTOM77/Rimc/actions?query=workflow:ci) -[![Code Size](https://img.shields.io/github/languages/code-size/AUTOM77/Rimc)](.) +[![CI Status](https://github.com/AUTOM77/Rim/workflows/ci/badge.svg)](https://github.com/AUTOM77/Rim/actions?query=workflow:ci) +[![Code Size](https://img.shields.io/github/languages/code-size/AUTOM77/Rim)](.) [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](./LICENSE) -[![Open Issues](https://img.shields.io/github/issues/AUTOM77/Rimc)](https://github.com/AUTOM77/Rimc/issues) +[![Open Issues](https://img.shields.io/github/issues/AUTOM77/Rim)](https://github.com/AUTOM77/Rim/issues) -> Rimc, a Rust based Multi-Modal Hyper Caption Tool in Parallel +> Rim, a Rust based Multi-Modal Hyper Caption Tool in Parallel ### Usage 1. **Single Image/Video Captioning:** ```bash -rimc -f ${file_path} -c `config.toml` +rim -f ${file_path} -c `config.toml` ``` -Rimc generates a `*.txt` file containing the caption for a single image or video. +Rim generates a `*.txt` file containing the caption for a single image or video. 2. **Batch Image/Video Captioning:** ```bash -rimc -d ${dir_path} -c `config.toml` +rim -d ${dir_path} -c `config.toml` ``` -For a directory of images or videos, `Rimc` generates a corresponding list of `*.txt` caption files. +For a directory of images or videos, `Rim` generates a corresponding list of `*.txt` caption files. 3. Sample `config.toml` can be found in [config.toml](./config.toml) diff --git a/rimc-cli/Cargo.toml b/rim-cli/Cargo.toml similarity index 53% rename from rimc-cli/Cargo.toml rename to rim-cli/Cargo.toml index c8e723a..f29998d 100644 --- a/rimc-cli/Cargo.toml +++ b/rim-cli/Cargo.toml @@ -1,12 +1,13 @@ [package] -name = "rimc-cli" -version = "0.1.0" +name = "rim-cli" +version = "0.1.1" edition = "2021" [dependencies] -librimc = { path = "../rimc" } +librim = { path = "../rim" } clap = { version= "4.5.4", features=["derive"] } +toml = { version = "0.8.12"} [[bin]] -name = "rimc" +name = "rim" path = "src/cli.rs" diff --git a/rim-cli/src/cli.rs b/rim-cli/src/cli.rs new file mode 100644 index 0000000..e88f897 --- /dev/null +++ b/rim-cli/src/cli.rs @@ -0,0 +1,40 @@ +use clap::{Args, Parser}; +use librim::client::RimClient; + +mod conf; + +#[derive(Args)] +#[group(required = true, multiple = false)] +struct Opts { + #[arg(short = 'f', long, name = "FILE")] + file: Option, + + #[arg(short = 'd', long, name = "DIR")] + dir: Option, +} + +#[derive(Parser)] +struct Cli { + #[command(flatten)] + opt: Opts, + + #[arg(short = 'c', long, name = "CONFIG")] + config: String, +} + +fn main() { + let cli = Cli::parse(); + + let (prompt, gemini_keys, _) = conf::load(&cli.config).expect("Failed to decode TOML config"); + + let client = RimClient::build("gemini", prompt, gemini_keys); + + let _ = client.log_prompt(); + + // let opt = &cli.opt; + // if let Some(file_path) = opt.file.as_deref() { + // let _ = single_cap(file_path, cli.config); + // } else if let Some(dir_path) = opt.dir.as_deref() { + // let _ = batch_cap(dir_path, cli.config); + // } +} diff --git a/rim-cli/src/conf.rs b/rim-cli/src/conf.rs new file mode 100644 index 0000000..07fe4b1 --- /dev/null +++ b/rim-cli/src/conf.rs @@ -0,0 +1,40 @@ +use toml::Value; +use std::fs; + +pub fn load(path: &str) -> Result<(String, Vec, Vec), Box> { + let toml_str = fs::read_to_string(path)?; + let toml_value: Value = toml::from_str(&toml_str)?; + + let prompt = toml_value + .get("prompt") + .ok_or("Missing 'prompt' key in TOML")? + .get("value") + .ok_or("Missing 'value' key ")? + .as_str() + .ok_or("Invalid type for 'prompt'")? + .to_string(); + + let gemini_keys = toml_value + .get("gemini") + .ok_or("Missing 'gemini' table in TOML")? + .get("keys") + .ok_or("Missing 'keys' key in 'gemini' table")? + .as_array() + .ok_or("Invalid type for 'gemini.keys'")? + .iter() + .map(|value| value.as_str().unwrap().to_string()) // Assuming keys are strings + .collect(); + + let gpt4v_keys = toml_value + .get("gpt4v") + .ok_or("Missing 'gpt4v' table in TOML")? + .get("keys") + .ok_or("Missing 'keys' key in 'gpt4v' table")? + .as_array() + .ok_or("Invalid type for 'gpt4v.keys'")? + .iter() + .map(|value| value.as_str().unwrap().to_string()) // Assuming keys are strings + .collect(); + + Ok((prompt, gemini_keys, gpt4v_keys)) +} \ No newline at end of file diff --git a/rimc/Cargo.toml b/rim/Cargo.toml similarity index 80% rename from rimc/Cargo.toml rename to rim/Cargo.toml index 8d8817d..488e28b 100644 --- a/rimc/Cargo.toml +++ b/rim/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "librimc" +name = "librim" version = "0.1.0" edition = "2021" @@ -7,5 +7,5 @@ edition = "2021" tokio = { version = "1.37.0", features = ["full"] } [lib] -name = "librimc" +name = "librim" path = "src/lib.rs" diff --git a/rim/src/client.rs b/rim/src/client.rs new file mode 100644 index 0000000..ae7ca44 --- /dev/null +++ b/rim/src/client.rs @@ -0,0 +1,49 @@ +use super::model::google::Gemini; +use super::model::openai::GPT; + +pub trait LLM { + fn generate_caption(&self) -> Result>; + fn log_prompt(&self) -> &String; +} + +impl LLM for Gemini { + fn generate_caption(&self) -> Result> { + Ok("Gemini Caption".to_string()) + } + + fn log_prompt(&self) -> &String{ + &self.get_prompt() + } +} + +impl LLM for GPT { + fn generate_caption(&self) -> Result> { + Ok("GPT4V Caption".to_string()) + } + fn log_prompt(&self) -> &String{ + &self.get_prompt() + } +} + +pub struct RimClient { + client: Box, +} + +impl RimClient { + pub fn build(client_type: &str, prompt: String, keys: Vec) -> Self { + let client: Box = match client_type { + "gemini" => Box::new(Gemini::build(prompt, keys)), + "gpt" => Box::new(GPT::build(prompt, keys)), + _ => panic!("Invalid client type"), + }; + Self { client } + } + + pub fn generate_caption(&self) -> Result> { + self.client.generate_caption() + } + + pub fn log_prompt(&self) { + println!("Prompt: {}", self.client.log_prompt()); + } +} \ No newline at end of file diff --git a/rimc/src/lib.rs b/rim/src/lib.rs similarity index 86% rename from rimc/src/lib.rs rename to rim/src/lib.rs index 871103e..cfae0c9 100644 --- a/rimc/src/lib.rs +++ b/rim/src/lib.rs @@ -1,3 +1,6 @@ +pub mod model; +pub mod client; + pub fn single_cap(f: &str, conf: String) { println!("Processing file: {} with {}", f, conf); } diff --git a/rim/src/model/google.rs b/rim/src/model/google.rs new file mode 100644 index 0000000..c6ce162 --- /dev/null +++ b/rim/src/model/google.rs @@ -0,0 +1,13 @@ +pub struct Gemini { + prompt: String, + keys: Vec, +} + +impl Gemini { + pub fn build(prompt: String, keys: Vec) -> Self { + Self { prompt, keys } + } + pub fn get_prompt(&self) -> &String{ + &self.prompt + } +} \ No newline at end of file diff --git a/rim/src/model/mod.rs b/rim/src/model/mod.rs new file mode 100644 index 0000000..a6f7e38 --- /dev/null +++ b/rim/src/model/mod.rs @@ -0,0 +1,2 @@ +pub mod google; +pub mod openai; \ No newline at end of file diff --git a/rim/src/model/openai.rs b/rim/src/model/openai.rs new file mode 100644 index 0000000..fe44fc9 --- /dev/null +++ b/rim/src/model/openai.rs @@ -0,0 +1,13 @@ +pub struct GPT { + prompt: String, + keys: Vec, +} + +impl GPT { + pub fn build(prompt: String, keys: Vec) -> Self { + Self { prompt, keys } + } + pub fn get_prompt(&self) -> &String{ + &self.prompt + } +} \ No newline at end of file diff --git a/rimc-cli/src/cli.rs b/rimc-cli/src/cli.rs deleted file mode 100644 index 0500e98..0000000 --- a/rimc-cli/src/cli.rs +++ /dev/null @@ -1,33 +0,0 @@ -use clap::{Args, Parser}; - -use librimc::{single_cap, batch_cap}; - -#[derive(Args)] -#[group(required = true, multiple = false)] -struct Opts { - #[arg(short = 'f', long, name = "FILE")] - file: Option, - - #[arg(short = 'd', long, name = "DIR")] - dir: Option, -} - -#[derive(Parser)] -struct Cli { - #[command(flatten)] - opt: Opts, - - #[arg(short = 'c', long, name = "CONFIG")] - config: String, -} - -fn main() { - let cli = Cli::parse(); - - let opt = &cli.opt; - if let Some(file_path) = opt.file.as_deref() { - let _ = single_cap(file_path, cli.config); - } else if let Some(dir_path) = opt.dir.as_deref() { - let _ = batch_cap(dir_path, cli.config); - } -}