AniOoh's picture
low low cpu Granite 4.0 ?
86fe05a verified
|
raw
history blame
1.72 kB
metadata
license: bsl-1.0
datasets:
  - JDhruv14/Bhagavad-Gita_Dataset
metrics:
  - character
base_model:
  - ibm-granite/granite-docling-258M
new_version: ibm-granite/granite-docling-258M
pipeline_tag: summarization
library_name: fastai
tags:
  - art

import torch from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda" model_path = "ibm-granite/granite-4.0-micro" tokenizer = AutoTokenizer.from_pretrained(model_path)

drop device_map if running on CPU

model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device) model.eval()

tools = [ { "type": "function", "function": { "name": "get_current_weather", "description": "Get the current weather for a specified city.", "parameters": { "type": "object", "properties": { "city": { "type": "string", "description": "Name of the city" } }, "required": ["city"] } } } ]

change input text as desired

chat = [ { "role": "user", "content": "What's the weather like in Boston right now?" }, ] chat = tokenizer.apply_chat_template(chat,
tokenize=False,
tools=tools,
add_generation_prompt=True)

tokenize the text

input_tokens = tokenizer(chat, return_tensors="pt").to(device)

generate output tokens

output = model.generate(**input_tokens, max_new_tokens=100)

decode output tokens into text

output = tokenizer.batch_decode(output)

print output

print(output[0])