domain_mapping/zero_shot/bloom.py

52 lines
1.4 KiB
Python
Raw Normal View History

# %%
from transformers import AutoModelForCausalLM, AutoTokenizer
# %%
# Load model and tokenizer
# model_name = "bigscience/bloom-7b1" # Replace with your model
model_name = "bigscience/bloomz-1b1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Automatically map model layers to available GPUs
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto", # Automatically split across multiple GPUs
torch_dtype="auto" # Use FP16 if available
)
# %%
# Prepare input
text = "The quick brown fox jumps over the lazy dog."
inputs = tokenizer(text, return_tensors="pt")
inputs = inputs.to("cuda")
# Generate output
outputs = model.generate(inputs["input_ids"], max_length=50)
# Decode and print result
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
# %%
# %%
# Prepare input
def generate(text):
# Define prompt
prompt = f"Answer Concisely: Give me a mapping between the acronym and descriptor in the format '(acronym: description): '{text}'"
# Generate acronym
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to("cuda")
outputs = model.generate(
inputs["input_ids"],
max_length=100,
no_repeat_ngram_size=3)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Example usage
# text = "Advanced Data Analytics Platform"
text = 'ColdFusion Markup Language (CFML)'
acronym = generate(text)
print(f"Acronym: {acronym}")
# %%