diff --git a/README.md b/README.md index f33c37a..261bc2b 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,10 @@ keys = [ "AIza00000000000000000000000000000000001", ] +[vertex] +project = "prjt-001" +key = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1" + [gpt4v] keys = [ "sk-00000000000000000000000000000000", @@ -93,4 +97,83 @@ rustup update nightly && rustup default nightly cargo build --release ./target/release/rim "assets/images" -c config.toml +``` + + +# Upload a file using the GenAI File API via curl. + +```bash +api_key="" +input_file="" +display_name="" + +while getopts a:i:d: flag +do +case "${flag}" in +a) api_key=${OPTARG};; +i) input_file=${OPTARG};; +d) display_name=${OPTARG};; +esac +done + +BASE_URL="https://generativelanguage.googleapis.com" + +CHUNK_SIZE=8388608 # 8 MiB +MIME_TYPE=$(file -b --mime-type "${input_file}") +NUM_BYTES=$(wc -c < "${input_file}") + +echo "Starting upload of '${input_file}' to ${BASE_URL}..." +echo " MIME type: '${MIME_TYPE}'" +echo " Size: ${NUM_BYTES} bytes" + +# Initial resumable request defining metadata. + +tmp_header_file=$(mktemp /tmp/upload-header.XXX) +curl "${BASE_URL}/upload/v1beta/files?key=${api_key}" \ +-D "${tmp_header_file}" \ +-H "X-Goog-Upload-Protocol: resumable" \ +-H "X-Goog-Upload-Command: start" \ +-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \ +-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \ +-H "Content-Type: application/json" \ +-d "{'file': {'display_name': '${display_name}'}}" +upload_url=$(grep "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r") +rm "${tmp_header_file}" + +if [[ -z "${upload_url}" ]]; then +echo "Failed initial resumable upload request." +exit 1 +fi + +# Upload the actual bytes. + +NUM_CHUNKS=$(((NUM_BYTES + CHUNK_SIZE - 1) / CHUNK_SIZE)) +tmp_chunk_file=$(mktemp /tmp/upload-chunk.XXX) +for i in $(seq 1 ${NUM_CHUNKS}) +do +offset=$((i - 1)) +byte_offset=$((offset * CHUNK_SIZE)) + +# Read the actual bytes to the tmp file. + +dd skip="${offset}" bs="${CHUNK_SIZE}" count=1 if="${input_file}" of="${tmp_chunk_file}" 2>/dev/null +num_chunk_bytes=$(wc -c < "${tmp_chunk_file}") +upload_command="upload" +if [[ ${i} -eq ${NUM_CHUNKS} ]] ; then + +# For the final chunk, specify "finalize". + +upload_command="${upload_command}, finalize" +fi +echo " Uploading ${byte_offset} - $((byte_offset + num_chunk_bytes)) of ${NUM_BYTES}..." +curl "${upload_url}" \ +-H "Content-Length: ${num_chunk_bytes}" \ +-H "X-Goog-Upload-Offset: ${byte_offset}" \ +-H "X-Goog-Upload-Command: ${upload_command}" \ +--data-binary "@${tmp_chunk_file}" +done + +rm "${tmp_chunk_file}" + +echo "Upload complete!" ``` \ No newline at end of file diff --git a/assets/disc.py b/assets/disc.py new file mode 100644 index 0000000..30fe119 --- /dev/null +++ b/assets/disc.py @@ -0,0 +1,8 @@ +import requests + +GENAI_API_DISCOVERY_URL= "https://generativelanguage.googleapis.com/$discovery/rest?version=v1beta&key=" +GOOGLE_API_KEY = "AIxxxx" +url=f"{GENAI_API_DISCOVERY_URL}{GOOGLE_API_KEY}" + +response = requests.get(url) +print(response.text) \ No newline at end of file diff --git a/config.toml b/config.toml index 4a10a94..418ac30 100644 --- a/config.toml +++ b/config.toml @@ -7,19 +7,14 @@ keys = [ "AIza00000000000000000000000000000000001", ] +[vertex] +project = "prjt-001" +key = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1" + [gpt4v] keys = [ "sk-00000000000000000000000000000000", "sk-00000000000000000000000000000001", ] -[vertex] -projects = [ - "prjt-001", - "prjt-002", -] -keys = [ - "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1", - "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx2", -] diff --git a/rim-cli/Cargo.toml b/rim-cli/Cargo.toml index 4c95275..81689b7 100644 --- a/rim-cli/Cargo.toml +++ b/rim-cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rim-cli" -version = "0.1.29" +version = "0.2.0" edition = "2021" [dependencies] @@ -10,5 +10,4 @@ librim = { path = "../rim" } [[bin]] name = "rim" -path = "src/cli.rs" - +path = "src/cli.rs" \ No newline at end of file diff --git a/rim-cli/src/cli.rs b/rim-cli/src/cli.rs index c3f9e5d..b835494 100644 --- a/rim-cli/src/cli.rs +++ b/rim-cli/src/cli.rs @@ -23,8 +23,9 @@ fn main() -> Result<(), Box> { let start_time = std::time::Instant::now(); let cli = Cli::parse(); - let (prompt, gemini_keys, _) = rim_cli::parse(&cli.config).expect("Failed to decode TOML config"); - let _ = librim::_rt(&cli._in, gemini_keys, prompt, cli.opt.limit); + let (prompt, vertex_project, vertex_key, gemini_keys, _) = rim_cli::parse(&cli.config).expect("Failed to decode TOML config"); + // let _ = librim::_rt(&cli._in, gemini_keys, prompt, cli.opt.limit); + let _ = librim::_rt2(&cli._in, vertex_project, vertex_key, prompt, cli.opt.limit); println!("Processing time: {:?}", start_time.elapsed()); Ok(()) diff --git a/rim-cli/src/lib.rs b/rim-cli/src/lib.rs index de3f133..e0a9fd0 100644 --- a/rim-cli/src/lib.rs +++ b/rim-cli/src/lib.rs @@ -1,7 +1,7 @@ use toml::Value; use std::fs; -pub fn parse(path: &str) -> Result<(String, Vec, Vec), Box> { +pub fn parse(path: &str) -> Result<(String, String, String, Vec, Vec), Box> { let toml_str = fs::read_to_string(path)?; let toml_value: Value = toml::from_str(&toml_str)?; @@ -14,6 +14,24 @@ pub fn parse(path: &str) -> Result<(String, Vec, Vec), Box Result<(String, Vec, Vec), Box Result> { + let _b64 = img._base64().await?; + let _delay = (idx % 100) * 200; + let mut retries = 0; + let _cap = loop { + tokio::time::sleep(tokio::time::Duration::from_millis(_delay as u64)).await; + match clt.generate_caption(_b64.clone()).await { + Ok(res) => break res, + Err(e) => { + println!("Retry {:#?} with {:?} times", idx, retries); + retries += 1; + tokio::time::sleep(tokio::time::Duration::from_secs(15)).await; + if retries > 10 { + println!("Failed Path: {:#?}", img.local); + return Err(e); + } + } + }; + }; + let _ = img.save(_cap).await?; + clt.log_api(); + img.log_file(); + Ok(idx) +} + +async fn processing( + images: Vec, + clients: Vec, + limit: usize +) -> Result<(), Box> { + let mut tasks = futures::stream::FuturesUnordered::new(); + let mut num = 0; + let total = clients.len(); + + for chunk in images.chunks(limit) { + for img in chunk { + let clt = &clients[num % total]; + tasks.push(caption(img, clt, num)); + num += 1; + } + + while let Some(handle) = tasks.next().await { + let _ = match handle { + Ok(i) => eprintln!("Success: {:?}", i), + Err(e) => eprintln!("Task failed: {:?}", e), + }; + } + tokio::time::sleep(tokio::time::Duration::from_secs(10)).await; + tasks.clear(); + } + Ok(()) +} + +pub fn _rt(pth: &str, keys: Vec, prompt: String, limit: Option) -> Result<(), Box> { + let mut clients = Vec::new(); + + for key in keys { + let _prompt = prompt.clone(); + let _key = key.clone(); + let _client = client::RimClient::build(_prompt, _key); + clients.push(_client); + } + + let images: Vec = std::fs::read_dir(pth) + .unwrap() + .filter_map(Result::ok) + .map(|entry| entry.path().display().to_string()) + .map(|f| modality::Image::from(&f).unwrap()) + .filter(|i| !i.existed()) + .collect(); + + println!("Processing Media {:#?}", images.len()); + std::thread::sleep(std::time::Duration::from_secs(1)); + // println!("{:?}", images); + // let mut images = Vec::new(); + // let i = modality::modality::Image::from(pth)?; + // images.push(i); + let rt = tokio::runtime::Builder::new_multi_thread().enable_all().build()?; + match limit { + Some(n) => rt.block_on(processing(images, clients, n)), + None => rt.block_on(processing(images, clients, 1000)) + }; + Ok(()) +} diff --git a/rim/src/client.rs b/rim/src/client.rs index 97e6032..69e39c7 100644 --- a/rim/src/client.rs +++ b/rim/src/client.rs @@ -1,31 +1,40 @@ -use crate::llm::Gemini; -use crate::llm::GPT; +use crate::llm::Vertex; +// use crate::llm::GPT; use reqwest::header::{HeaderMap, AUTHORIZATION}; #[derive(Debug)] pub struct RimClient { - model: Gemini, + model: Vertex, + headers: HeaderMap, } impl RimClient { - pub fn new(model: Gemini) -> Self { - Self { model } + pub fn new(model: Vertex, headers: HeaderMap) -> Self { + Self { model, headers } } - pub fn build(prompt: String, key: String) -> Self { - let model = Gemini::build(prompt, key); - Self::new(model) + pub fn build(prompt: String, project: String) -> Self { + let model = Vertex::build(prompt, project); + let headers = HeaderMap::new(); + Self::new(model, headers) } - pub async fn generate_caption(&self, data: String) -> Result> { + pub fn with_auth(mut self, key: String) -> Self{ + let auth = format!("Bearer {key}"); + self.headers.insert(AUTHORIZATION, auth.parse().unwrap()); + self + } + + pub async fn generate_caption(&self, fileUrl: String, mime: String) -> Result> { let api = self.model.get_api(); - let payload = self.model.payload(data); + let payload = self.model.payload(fileUrl, mime); let client = reqwest::Client::builder().build()?; let response = client .post(api) + .headers(self.headers.clone()) .json(&payload) .send() .await?; @@ -47,6 +56,27 @@ impl RimClient { Ok(raw.to_string()) } + pub async fn upload_asset(&self, data: Vec, mime: &str) -> Result> { + let api = self.model.get_api(); + // let payload = self.model.payload(data, mime); + + // let raw = json + // .get("candidates") + // .and_then(|candidates| candidates.get(0)) + // .and_then(|candidate| candidate.get("content")) + // .and_then(|content| content.get("parts")) + // .and_then(|parts| parts.get(0)) + // .and_then(|part| part.get("text")) + // .and_then(|text| text.as_str()) + // .ok_or_else(|| "Missing or invalid response data".to_string())?; + let url = "https://github.com/AUTOM77/Rim/raw/main/assets/videos/1.mp4".to_string(); + Ok(url) + } + + pub async fn delete_asset(&self, url: String) -> Result<(), Box> { + Ok(()) + } + pub fn log_api(&self) { println!("API: {}", self.model.get_api()); } diff --git a/rim/src/lib.rs b/rim/src/lib.rs index 880aee7..f6ad8b6 100644 --- a/rim/src/lib.rs +++ b/rim/src/lib.rs @@ -5,47 +5,48 @@ pub mod modality; use futures::StreamExt; async fn caption( - img: &modality::Image, + m: &modality::Media, clt: &client::RimClient, idx: usize ) -> Result> { - let _b64 = img._base64().await?; + let _data = m.data().await?; + let mime = m.get_mime(); + let m_url = clt.upload_asset(_data, &mime).await?; let _delay = (idx % 100) * 200; let mut retries = 0; let _cap = loop { tokio::time::sleep(tokio::time::Duration::from_millis(_delay as u64)).await; - match clt.generate_caption(_b64.clone()).await { + match clt.generate_caption(m_url.clone(), mime.clone()).await { Ok(res) => break res, Err(e) => { println!("Retry {:#?} with {:?} times", idx, retries); retries += 1; tokio::time::sleep(tokio::time::Duration::from_secs(15)).await; if retries > 10 { - println!("Failed Path: {:#?}", img.local); + print!("Failed Path: {:#?}", m.log_file()); return Err(e); } } }; }; - let _ = img.save(_cap).await?; + let _ = m.save(_cap).await?; clt.log_api(); - img.log_file(); + print!("Success Path: {:#?}", m.log_file()); Ok(idx) } async fn processing( - images: Vec, - clients: Vec, + media: Vec, + client: client::RimClient, limit: usize ) -> Result<(), Box> { let mut tasks = futures::stream::FuturesUnordered::new(); let mut num = 0; - let total = clients.len(); - for chunk in images.chunks(limit) { - for img in chunk { - let clt = &clients[num % total]; - tasks.push(caption(img, clt, num)); + for chunk in media.chunks(limit) { + for m in chunk { + let clt = &client; + tasks.push(caption(m, clt, num)); num += 1; } @@ -61,34 +62,23 @@ async fn processing( Ok(()) } -pub fn _rt(pth: &str, keys: Vec, prompt: String, limit: Option) -> Result<(), Box> { - let mut clients = Vec::new(); +pub fn _rt2(pth: &str, prj: String, key:String, prompt: String, limit: Option) -> Result<(), Box> { + let client = client::RimClient::build(prompt, prj).with_auth(key); - for key in keys { - let _prompt = prompt.clone(); - let _key = key.clone(); - let _client = client::RimClient::build(_prompt, _key); - clients.push(_client); - } - - let images: Vec = std::fs::read_dir(pth) + let media: Vec = std::fs::read_dir(pth) .unwrap() .filter_map(Result::ok) .map(|entry| entry.path().display().to_string()) - .map(|f| modality::Image::from(&f).unwrap()) + .map(|f| modality::Media::from(&f).unwrap()) .filter(|i| !i.existed()) .collect(); - - println!("Processing Media {:#?}", images.len()); + + println!("Processing Media {:#?}", media.len()); std::thread::sleep(std::time::Duration::from_secs(1)); - // println!("{:?}", images); - // let mut images = Vec::new(); - // let i = modality::modality::Image::from(pth)?; - // images.push(i); let rt = tokio::runtime::Builder::new_multi_thread().enable_all().build()?; match limit { - Some(n) => rt.block_on(processing(images, clients, n)), - None => rt.block_on(processing(images, clients, 1000)) + Some(n) => rt.block_on(processing(media, client, n)), + None => rt.block_on(processing(media, client, 1000)) }; Ok(()) } diff --git a/rim/src/llm/google.rs b/rim/src/llm/google.rs index f4ade24..080a453 100644 --- a/rim/src/llm/google.rs +++ b/rim/src/llm/google.rs @@ -1,9 +1,16 @@ +const GENAI_API_DISCOVERY_URL: &str = "https://generativelanguage.googleapis.com/$discovery/rest?version=v1beta&key="; const GEMINI_0514: &str = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-preview-0514:generateContent?key="; const GEMINI_VISION: &str = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.0-pro-vision-001:generateContent?key="; const GEMINI_FLASH: &str = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key="; const GEMINI_PRO: &str = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest:generateContent?key="; const GEMINI_EXP: &str = "https://generativelanguage.googleapis.com/v1beta/models/gemini-experimental:generateContent?key="; const GEMINI: &str = "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key="; +const GEMINI_FILE: &str = "https://generativelanguage.googleapis.com/upload/v1beta/files?key={}&alt=json&uploadType=media"; +// &alt=json&uploadType=media + +const VERTEX: &str = "https://{ZONE}-aiplatform.googleapis.com/v1/projects/${PROJECT}/locations/{ZONE}/publishers/google/models/${MODEL}:generateContent"; +const VERTEX_PRO: &str = "https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT}/locations/us-central1/publishers/google/models/gemini-1.5-pro-preview-0514:generateContent"; +const VERTEX_FLASH: &str = "https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT}/locations/us-central1/publishers/google/models/gemini-1.5-flash-preview-0514:generateContent"; use serde_json::json; @@ -24,16 +31,17 @@ impl Gemini { Self::new(prompt, api) } - pub fn get_prompt(&self) -> String{ + pub fn get_prompt(&self) -> String { self.prompt.clone() } - pub fn get_api(&self) -> String{ + pub fn get_api(&self) -> String { self.api.clone() } - pub fn payload(&self, data: String) -> serde_json::Value{ - let payload = json!({ + pub fn payload(&self, data: String) -> serde_json::Value { + let payload = + json!({ "contents": [ { "role": "user", @@ -71,4 +79,70 @@ impl Gemini { }); payload } -} \ No newline at end of file +} + +#[derive(Debug)] +pub struct Vertex { + prompt: String, + api: String, +} + +impl Vertex { + pub fn new(prompt: String, api: String) -> Self { + Self { prompt, api} + } + + pub fn build(prompt: String, project: String) -> Self { + let api = VERTEX_PRO.replace("{PROJECT}", &project); + Self::new(prompt, api) + } + + pub fn get_prompt(&self) -> String { + self.prompt.clone() + } + + pub fn get_api(&self) -> String { + self.api.clone() + } + + pub fn payload(&self, fileUrl: String, mime: String) -> serde_json::Value { + let payload = + json!({ + "contents": [ + { + "role": "user", + "parts": [ + {"fileData": { "mimeType": mime, "fileUri": fileUrl } }, + {"text": self.prompt.clone()}, + ] + } + ], + "generationConfig": { + "temperature": 1, + "topK": 64, + "topP": 0.95, + "maxOutputTokens": 8192, + "stopSequences": [] + }, + "safetySettings": [ + { + "category": "HARM_CATEGORY_HARASSMENT", + "threshold": "BLOCK_NONE" + }, + { + "category": "HARM_CATEGORY_HATE_SPEECH", + "threshold": "BLOCK_NONE" + }, + { + "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", + "threshold": "BLOCK_NONE" + }, + { + "category": "HARM_CATEGORY_DANGEROUS_CONTENT", + "threshold": "BLOCK_NONE" + } + ] + }); + payload + } +} diff --git a/rim/src/llm/mod.rs b/rim/src/llm/mod.rs index fe367b7..e267069 100644 --- a/rim/src/llm/mod.rs +++ b/rim/src/llm/mod.rs @@ -1,5 +1,5 @@ pub mod google; pub mod openai; -pub use google::Gemini; +pub use google::{Gemini, Vertex}; pub use openai::GPT; \ No newline at end of file diff --git a/rim/src/modality/media.rs b/rim/src/modality/media.rs index aae6fe6..95c5dd9 100644 --- a/rim/src/modality/media.rs +++ b/rim/src/modality/media.rs @@ -1,17 +1,18 @@ use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use std::path::PathBuf; +use std::{path::PathBuf, sync::Arc}; #[derive(Debug)] pub struct Media { - pub root: PathBuf, - pub local: PathBuf, + root: PathBuf, + local: PathBuf, + mime: String } impl Media { - pub fn new(root: PathBuf, local:PathBuf) -> Self { - Self { root, local } + pub fn new(root: PathBuf, local:PathBuf, mime: String) -> Self { + Self { root, local, mime } } pub fn with_root(mut self, root: PathBuf) -> Self { @@ -25,14 +26,19 @@ impl Media { .parent() .ok_or("No parent directory found")?; let root = PathBuf::from(format!("{}_cap", _root.display())); - Ok(Self::new(root, local)) + let mime = match local.extension().unwrap().to_str() { + Some("png") => "image/png", + Some("mp4") => "video/mp4", + _ => "media/unkown", + }; + Ok(Self::new(root, local, mime.to_string())) } - pub async fn _base64(&self) -> Result> { + pub async fn data(&self) -> Result, Box> { let mut f = tokio::fs::File::open(&self.local).await?; let mut buffer = Vec::new(); f.read_to_end(&mut buffer).await?; - Ok(BASE64.encode(buffer)) + Ok(buffer) } pub async fn save(&self, cap: String) -> Result<(), Box> { @@ -55,7 +61,11 @@ impl Media { std::fs::metadata(&caption_path).is_ok() } - pub fn log_file(&self) { - println!("File : {:#?}", self.local); + pub fn get_mime(&self) -> String{ + self.mime.clone() + } + + pub fn log_file(&self) -> String { + self.local.display().to_string() } } \ No newline at end of file diff --git a/rim/src/modality/mod.rs b/rim/src/modality/mod.rs index a2eccd2..7039ab5 100644 --- a/rim/src/modality/mod.rs +++ b/rim/src/modality/mod.rs @@ -1,2 +1,4 @@ pub mod image; -pub use image::Image; \ No newline at end of file +pub mod media; +pub use image::Image; +pub use media::Media; \ No newline at end of file