python fine tune a distilgpt llm model using attention matrix pruning, low-rank approximation and low-rank adaptation (lora)
# fine tune a model with attention matrices pruned and low-rank approximation/adaptation # https://pythonprogrammingsnippets.tumblr.com import torch from transformers import AutoTokenizer, AutoModelForCausalLM import os # load the pretrained model if it exists in _MODELS/lora_attention # otherwise load the pretrained model from huggingface if os.path.exists("_MODELS/lora_attention"): print("loading trained model") # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained("_MODELS/lora_attention") # Load the pre-trained DistilGPT2 model model = AutoModelForCausalLM.from_pretrained("_MODELS/lora_attention") else: print("Downloading pretrained model from huggingface") # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained("distilgpt2") # Load the pre-trained DistilGPT2 model model = AutoModelForCausalLM.from_pretrained("distilgpt2") # set padding token tokenizer.pad_token = tokenizer.eos_token # Define the training data from _DATASETS/data.txt with one sentence per line # now train with the train_data from the file _DATASETS/data.txt with one sentence per line. with open("_DATASETS/data.txt") as f: data = f.read() # now split data by \n train_data = data.split( '\n' ) # shuffle the data import random random.shuffle(train_data) # define the function for pruning the attention matrices def prune_attention_matrices(model, threshold): for name, param in model.named_parameters(): if "attention" in name and "weight" in name: data = param.data data[torch.abs(data) < threshold] = 0 param.data = data # define the function for low-rank approximation of the attention matrices def low_rank_approximation(model, rank): for name, param in model.named_parameters(): if "attention" in name and "weight" in name: data = param.data u, s, v = torch.svd(data) data = torch.mm(u[:, :rank], torch.mm(torch.diag(s[:rank]), v[:, :rank].t())) param.data = data # define the function for low-rank adaptation def low_rank_adaptation(model, train_data, tokenizer, rank, num_epochs, lr): # Define the optimizer and loss function optimizer = torch.optim.Adam(model.parameters(), lr=lr) loss_fn = torch.nn.CrossEntropyLoss() # Tokenize the training data input_ids = tokenizer(train_data, padding=True, truncation=True, return_tensors="pt")["input_ids"] # Perform low-rank adaptation fine-tuning for epoch in range(num_epochs): # Zero the gradients optimizer.zero_grad() # Get the model outputs outputs = model(input_ids=input_ids, labels=input_ids) # Get the loss loss = outputs.loss # Backpropagate the loss loss.backward() # Update the parameters optimizer.step() # Print the loss print("Epoch: {}, Loss: {}".format(epoch, loss.item())) # Low-rank approximation low_rank_approximation(model, rank) # prune the attention matrices prune_attention_matrices(model, 0.1) # low-rank approximation low_rank_approximation(model, 32) # low-rank adaptation low_rank_adaptation(model, train_data, tokenizer, 32, 5, 5e-5) # now train # Define the optimizer and loss function optimizer = torch.optim.Adam(model.parameters(), lr=5e-5) loss_fn = torch.nn.CrossEntropyLoss() # Tokenize the training data input_ids = tokenizer(train_data, padding=True, truncation=True, return_tensors="pt")["input_ids"] # Perform fine-tuning for epoch in range(5): # Zero the gradients optimizer.zero_grad() # Get the model outputs outputs = model(input_ids=input_ids, labels=input_ids) # Get the loss loss = outputs.loss # Backpropagate the loss loss.backward() # Update the parameters optimizer.step() # Print the loss print("Epoch: {}, Loss: {}".format(epoch, loss.item())) # save the model model.save_pretrained("_MODELS/lora_attention") # save the tokenizer tokenizer.save_pretrained("_MODELS/lora_attention") ## # load the model model = AutoModelForCausalLM.from_pretrained("_MODELS/lora_attention") # load the tokenizer tokenizer = AutoTokenizer.from_pretrained("_MODELS/lora_attention") # define the function for generating text def generate_text(model, tokenizer, prompt, max_length): # Tokenize the prompt input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"] # Generate the text output_ids = model.generate(input_ids, max_length=max_length, do_sample=True, top_k=50, top_p=0.95, temperature=0.5, num_return_sequences=1) # Decode the text output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) # Print the text print(output_text) # generate text generate_text(model, tokenizer, "quick brown", 125)
Watching VGHS at a young age ruined my standards for romance actually. Like everything Brian does in Season 1 is so awkward and adorable and endearing as hell to both the audience and Jenny.
We get so much of Brian learning how to take his feelings and Jenny seriously that by the time we get to that line, that scene in the final episode of season 1, the one where Brian finally overcomes everything he's been fighting this season, it's also the most romantic thing he's done the entire season.
"Almost nothing."
Followed by killing an entire team, capping your love interests shitty ex, and then walking away from a narratively satisfying explosion??
Call me Jenny Matrix because I too would have been all over Brian trying to style his do
the essential Smith character arc is from "cat who is hissing & yowling & flailing & knocking everything off your countertop & clawing your arms to shreds. because he fell into the bathtub as a consequence of his own actions and is So So Sopping Wet" -> "smug anime catboy"
millennials 5 years after telling themselves 'we aren't going to bitch about how people these days are lazy and how people were smarter back in my day like boomers did'
[ID: cropped and blue tinted tumblr text that says "the problem isn't just that media literacy is slowly becoming a dying art. it's that people straight up do not pay attention when they watch tv/film anymore." end ID.]
just had a weird experience where I was in a crowd of people waiting for an event to start and then the minute it hit the commencement time, everyone collectively hushed to silence at same moment, completely unprompted
NUREMBERG 2.0 - GENOCIDE WAR CRIMINALS - NOTHING CAN STOP WHAT IS COMING - Bill Gates, Anthony Fauci, Tedros Ghebreyesus, Alex Azar, Ralph Baric, Peter Daszak, Drosten, Albert Bourla, Stéphane Bancel, Klaus Schwab, Rockefellers, Rothschilds, the DOD are charged with Bioweapon Injection Genocide War Crimes.
If you do your research you'd realize all the above fμ¢%tards are not real. F. I. T. F. O. (Figure It The Fμ¢% Out) 🤔
This video was released in January 2023, who knows when it was actually done?
"Everything You See isn't Fake, it's controlled." - The Truman Show