Skip to content

Commit

Permalink
v0.2.0 use vertex API
Browse files Browse the repository at this point in the history
  • Loading branch information
Mon-ius committed May 24, 2024
1 parent b763f23 commit 58d0aa5
Show file tree
Hide file tree
Showing 13 changed files with 379 additions and 75 deletions.
83 changes: 83 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ keys = [
"AIza00000000000000000000000000000000001",
]
[vertex]
project = "prjt-001"
key = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1"
[gpt4v]
keys = [
"sk-00000000000000000000000000000000",
Expand Down Expand Up @@ -93,4 +97,83 @@ rustup update nightly && rustup default nightly

cargo build --release
./target/release/rim "assets/images" -c config.toml
```


# Upload a file using the GenAI File API via curl.

```bash
api_key=""
input_file=""
display_name=""

while getopts a:i:d: flag
do
case "${flag}" in
a) api_key=${OPTARG};;
i) input_file=${OPTARG};;
d) display_name=${OPTARG};;
esac
done

BASE_URL="https://generativelanguage.googleapis.com"

CHUNK_SIZE=8388608 # 8 MiB
MIME_TYPE=$(file -b --mime-type "${input_file}")
NUM_BYTES=$(wc -c < "${input_file}")

echo "Starting upload of '${input_file}' to ${BASE_URL}..."
echo " MIME type: '${MIME_TYPE}'"
echo " Size: ${NUM_BYTES} bytes"

# Initial resumable request defining metadata.

tmp_header_file=$(mktemp /tmp/upload-header.XXX)
curl "${BASE_URL}/upload/v1beta/files?key=${api_key}" \
-D "${tmp_header_file}" \
-H "X-Goog-Upload-Protocol: resumable" \
-H "X-Goog-Upload-Command: start" \
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
-H "Content-Type: application/json" \
-d "{'file': {'display_name': '${display_name}'}}"
upload_url=$(grep "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
rm "${tmp_header_file}"

if [[ -z "${upload_url}" ]]; then
echo "Failed initial resumable upload request."
exit 1
fi

# Upload the actual bytes.

NUM_CHUNKS=$(((NUM_BYTES + CHUNK_SIZE - 1) / CHUNK_SIZE))
tmp_chunk_file=$(mktemp /tmp/upload-chunk.XXX)
for i in $(seq 1 ${NUM_CHUNKS})
do
offset=$((i - 1))
byte_offset=$((offset * CHUNK_SIZE))

# Read the actual bytes to the tmp file.

dd skip="${offset}" bs="${CHUNK_SIZE}" count=1 if="${input_file}" of="${tmp_chunk_file}" 2>/dev/null
num_chunk_bytes=$(wc -c < "${tmp_chunk_file}")
upload_command="upload"
if [[ ${i} -eq ${NUM_CHUNKS} ]] ; then

# For the final chunk, specify "finalize".

upload_command="${upload_command}, finalize"
fi
echo " Uploading ${byte_offset} - $((byte_offset + num_chunk_bytes)) of ${NUM_BYTES}..."
curl "${upload_url}" \
-H "Content-Length: ${num_chunk_bytes}" \
-H "X-Goog-Upload-Offset: ${byte_offset}" \
-H "X-Goog-Upload-Command: ${upload_command}" \
--data-binary "@${tmp_chunk_file}"
done

rm "${tmp_chunk_file}"

echo "Upload complete!"
```
8 changes: 8 additions & 0 deletions assets/disc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import requests

GENAI_API_DISCOVERY_URL= "https://generativelanguage.googleapis.com/$discovery/rest?version=v1beta&key="
GOOGLE_API_KEY = "AIxxxx"
url=f"{GENAI_API_DISCOVERY_URL}{GOOGLE_API_KEY}"

response = requests.get(url)
print(response.text)
13 changes: 4 additions & 9 deletions config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,14 @@ keys = [
"AIza00000000000000000000000000000000001",
]

[vertex]
project = "prjt-001"
key = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1"

[gpt4v]
keys = [
"sk-00000000000000000000000000000000",
"sk-00000000000000000000000000000001",
]

[vertex]
projects = [
"prjt-001",
"prjt-002",
]

keys = [
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1",
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx2",
]
5 changes: 2 additions & 3 deletions rim-cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rim-cli"
version = "0.1.29"
version = "0.2.0"
edition = "2021"

[dependencies]
Expand All @@ -10,5 +10,4 @@ librim = { path = "../rim" }

[[bin]]
name = "rim"
path = "src/cli.rs"

path = "src/cli.rs"
5 changes: 3 additions & 2 deletions rim-cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let start_time = std::time::Instant::now();
let cli = Cli::parse();

let (prompt, gemini_keys, _) = rim_cli::parse(&cli.config).expect("Failed to decode TOML config");
let _ = librim::_rt(&cli._in, gemini_keys, prompt, cli.opt.limit);
let (prompt, vertex_project, vertex_key, gemini_keys, _) = rim_cli::parse(&cli.config).expect("Failed to decode TOML config");
// let _ = librim::_rt(&cli._in, gemini_keys, prompt, cli.opt.limit);
let _ = librim::_rt2(&cli._in, vertex_project, vertex_key, prompt, cli.opt.limit);

println!("Processing time: {:?}", start_time.elapsed());
Ok(())
Expand Down
22 changes: 20 additions & 2 deletions rim-cli/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use toml::Value;
use std::fs;

pub fn parse(path: &str) -> Result<(String, Vec<String>, Vec<String>), Box<dyn std::error::Error>> {
pub fn parse(path: &str) -> Result<(String, String, String, Vec<String>, Vec<String>), Box<dyn std::error::Error>> {
let toml_str = fs::read_to_string(path)?;
let toml_value: Value = toml::from_str(&toml_str)?;

Expand All @@ -14,6 +14,24 @@ pub fn parse(path: &str) -> Result<(String, Vec<String>, Vec<String>), Box<dyn s
.ok_or("Invalid type for 'prompt'")?
.to_string();

let vertex_project = toml_value
.get("vertex")
.ok_or("Missing 'vertex' key in TOML")?
.get("project")
.ok_or("Missing 'project'")?
.as_str()
.ok_or("Invalid type for 'vertex_project'")?
.to_string();

let vertex_key = toml_value
.get("vertex")
.ok_or("Missing 'vertex' key in TOML")?
.get("key")
.ok_or("Missing 'key'")?
.as_str()
.ok_or("Invalid type for 'vertex_key'")?
.to_string();

// let prompts = toml_value
// .get("prompt")
// .ok_or("Missing 'prompt' table in TOML")?
Expand Down Expand Up @@ -47,5 +65,5 @@ pub fn parse(path: &str) -> Result<(String, Vec<String>, Vec<String>), Box<dyn s
.map(|value| value.as_str().unwrap().to_string())
.collect();

Ok((prompt, gemini_keys, gpt4v_keys))
Ok((prompt, vertex_project, vertex_key, gemini_keys, gpt4v_keys))
}
94 changes: 94 additions & 0 deletions rim/src/_lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
pub mod llm;
pub mod client;
pub mod modality;

use futures::StreamExt;

async fn caption(
img: &modality::Image,
clt: &client::RimClient,
idx: usize
) -> Result<usize, Box<dyn std::error::Error + Send + Sync>> {
let _b64 = img._base64().await?;
let _delay = (idx % 100) * 200;
let mut retries = 0;
let _cap = loop {
tokio::time::sleep(tokio::time::Duration::from_millis(_delay as u64)).await;
match clt.generate_caption(_b64.clone()).await {
Ok(res) => break res,
Err(e) => {
println!("Retry {:#?} with {:?} times", idx, retries);
retries += 1;
tokio::time::sleep(tokio::time::Duration::from_secs(15)).await;
if retries > 10 {
println!("Failed Path: {:#?}", img.local);
return Err(e);
}
}
};
};
let _ = img.save(_cap).await?;
clt.log_api();
img.log_file();
Ok(idx)
}

async fn processing(
images: Vec<modality::Image>,
clients: Vec<client::RimClient>,
limit: usize
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let mut tasks = futures::stream::FuturesUnordered::new();
let mut num = 0;
let total = clients.len();

for chunk in images.chunks(limit) {
for img in chunk {
let clt = &clients[num % total];
tasks.push(caption(img, clt, num));
num += 1;
}

while let Some(handle) = tasks.next().await {
let _ = match handle {
Ok(i) => eprintln!("Success: {:?}", i),
Err(e) => eprintln!("Task failed: {:?}", e),
};
}
tokio::time::sleep(tokio::time::Duration::from_secs(10)).await;
tasks.clear();
}
Ok(())
}

pub fn _rt(pth: &str, keys: Vec<String>, prompt: String, limit: Option<usize>) -> Result<(), Box<dyn std::error::Error>> {
let mut clients = Vec::new();

for key in keys {
let _prompt = prompt.clone();
let _key = key.clone();
let _client = client::RimClient::build(_prompt, _key);
clients.push(_client);
}

let images: Vec<modality::Image> = std::fs::read_dir(pth)
.unwrap()
.filter_map(Result::ok)
.map(|entry| entry.path().display().to_string())
.map(|f| modality::Image::from(&f).unwrap())
.filter(|i| !i.existed())
.collect();

println!("Processing Media {:#?}", images.len());
std::thread::sleep(std::time::Duration::from_secs(1));
// println!("{:?}", images);
// let mut images = Vec::new();
// let i = modality::modality::Image::from(pth)?;
// images.push(i);
let rt = tokio::runtime::Builder::new_multi_thread().enable_all().build()?;
match limit {
Some(n) => rt.block_on(processing(images, clients, n)),
None => rt.block_on(processing(images, clients, 1000))
};
Ok(())
}
50 changes: 40 additions & 10 deletions rim/src/client.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,40 @@
use crate::llm::Gemini;
use crate::llm::GPT;
use crate::llm::Vertex;
// use crate::llm::GPT;

use reqwest::header::{HeaderMap, AUTHORIZATION};

#[derive(Debug)]
pub struct RimClient {
model: Gemini,
model: Vertex,
headers: HeaderMap,
}

impl RimClient {
pub fn new(model: Gemini) -> Self {
Self { model }
pub fn new(model: Vertex, headers: HeaderMap) -> Self {
Self { model, headers }
}

pub fn build(prompt: String, key: String) -> Self {
let model = Gemini::build(prompt, key);
Self::new(model)
pub fn build(prompt: String, project: String) -> Self {
let model = Vertex::build(prompt, project);
let headers = HeaderMap::new();
Self::new(model, headers)
}

pub async fn generate_caption(&self, data: String) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
pub fn with_auth(mut self, key: String) -> Self{
let auth = format!("Bearer {key}");
self.headers.insert(AUTHORIZATION, auth.parse().unwrap());
self
}

pub async fn generate_caption(&self, fileUrl: String, mime: String) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
let api = self.model.get_api();
let payload = self.model.payload(data);
let payload = self.model.payload(fileUrl, mime);

let client = reqwest::Client::builder().build()?;

let response = client
.post(api)
.headers(self.headers.clone())
.json(&payload)
.send()
.await?;
Expand All @@ -47,6 +56,27 @@ impl RimClient {
Ok(raw.to_string())
}

pub async fn upload_asset(&self, data: Vec<u8>, mime: &str) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
let api = self.model.get_api();
// let payload = self.model.payload(data, mime);

// let raw = json
// .get("candidates")
// .and_then(|candidates| candidates.get(0))
// .and_then(|candidate| candidate.get("content"))
// .and_then(|content| content.get("parts"))
// .and_then(|parts| parts.get(0))
// .and_then(|part| part.get("text"))
// .and_then(|text| text.as_str())
// .ok_or_else(|| "Missing or invalid response data".to_string())?;
let url = "https://github.com/AUTOM77/Rim/raw/main/assets/videos/1.mp4".to_string();
Ok(url)
}

pub async fn delete_asset(&self, url: String) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
Ok(())
}

pub fn log_api(&self) {
println!("API: {}", self.model.get_api());
}
Expand Down
Loading

0 comments on commit 58d0aa5

Please sign in to comment.