# %% from transformers import AutoModelForCausalLM, AutoTokenizer # %% # Load model and tokenizer model_name = "bigscience/bloom-7b1" # Replace with your model # model_name = "bigscience/bloomz-1b1" tokenizer = AutoTokenizer.from_pretrained(model_name) # Automatically map model layers to available GPUs model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", # Automatically split across multiple GPUs torch_dtype="auto" # Use FP16 if available ) # %% # Prepare input text = "The quick brown fox jumps over the lazy dog." inputs = tokenizer(text, return_tensors="pt") inputs = inputs.to("cuda") # Generate output outputs = model.generate(inputs["input_ids"], max_length=50) # Decode and print result print(tokenizer.decode(outputs[0], skip_special_tokens=True)) # %% # Prepare input def generate(text): # Define prompt prompt = f"Give me past product names relating to: '{text}'" # Generate acronym inputs = tokenizer(prompt, return_tensors="pt") inputs = inputs.to("cuda") outputs = model.generate( inputs["input_ids"], max_length=100, no_repeat_ngram_size=3) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Example usage # text = "Advanced Data Analytics Platform" text = 'windows server' acronym = generate(text) print(f"Generation: {acronym}") # %%