Skip to content

Commit

Permalink
File format correct
Browse files Browse the repository at this point in the history
  • Loading branch information
sbasia committed Oct 28, 2024
1 parent 12e9e4b commit 5bd918a
Showing 1 changed file with 26 additions and 15 deletions.
41 changes: 26 additions & 15 deletions examples/Audio-Spectrogram-Transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,50 @@
import subprocess
import pkg_resources

required = {'librosa', 'soundfile','datasets','intel-npu-acceleration-library'}
required = {"librosa", "soundfile", "datasets", "intel-npu-acceleration-library"}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed
missing = required - installed

if missing:
# implement pip as a subprocess:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', *missing])
subprocess.check_call([sys.executable, "-m", "pip", "install", *missing])
from transformers import AutoFeatureExtractor, ASTForAudioClassification
from datasets import load_dataset
import torch
import intel_npu_acceleration_library

dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation",trust_remote_code=True)

dataset = load_dataset(
"hf-internal-testing/librispeech_asr_demo",
"clean",
split="validation",
trust_remote_code=True,
)
dataset = dataset.sort("id")
sampling_rate = dataset.features["audio"].sampling_rate

feature_extractor = AutoFeatureExtractor.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")

feature_extractor = AutoFeatureExtractor.from_pretrained(
"MIT/ast-finetuned-audioset-10-10-0.4593"
)
model = ASTForAudioClassification.from_pretrained(
"MIT/ast-finetuned-audioset-10-10-0.4593"
)
print("Compile model for the NPU")
model = intel_npu_acceleration_library.compile(model)

# audio file is decoded on the fly
inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")

inputs = feature_extractor(
dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt"
)

with torch.no_grad():
logits = model(**inputs).logits
logits = model(**inputs).logits

predicted_class_ids = torch.argmax(logits, dim=-1).item()
predicted_label = model.config.id2label[predicted_class_ids]
predicted_label

# compute loss - target_label is e.g. "down"
target_label = model.config.id2label[0]
inputs["labels"] = torch.tensor([model.config.label2id[target_label]])
loss = model(**inputs).loss
print(round(loss.item(), 2))
print(round(loss.item(), 2))

0 comments on commit 5bd918a

Please sign in to comment.