ExecComp
Collection
Here you'll find all the resources, datasets and models used to create the execcomp-ai dataset!
β’
7 items
β’
Updated
A Vision-Language Model fine-tuned to classify SEC proxy statement tables as Summary Compensation Tables (SCT) vs non-SCT tables.
This model adds a classification head on top of Qwen/Qwen2.5-VL-3B-Instruct for binary table classification. The base model is frozen and only the classifier head is trained.
Task: Given an image of a table from SEC DEF14A filings, classify whether it's a Summary Compensation Table or not.
import torch
import torch.nn as nn
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
from safetensors.torch import load_file
from PIL import Image
# Define classifier
class VLMClassifier(nn.Module):
def __init__(self, base_model, num_labels=2):
super().__init__()
self.base_model = base_model
hidden_size = base_model.config.hidden_size
self.classifier = nn.Sequential(
nn.Linear(hidden_size, 512),
nn.ReLU(),
nn.Dropout(0.1),
nn.Linear(512, num_labels)
)
def forward(self, input_ids, attention_mask, pixel_values, image_grid_thw):
outputs = self.base_model(
input_ids=input_ids,
attention_mask=attention_mask,
pixel_values=pixel_values,
image_grid_thw=image_grid_thw,
output_hidden_states=True,
return_dict=True
)
hidden_states = outputs.hidden_states[-1]
pooled = hidden_states.mean(dim=1)
return self.classifier(pooled.float())
# Load model
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
base_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
"Qwen/Qwen2.5-VL-3B-Instruct",
torch_dtype=torch.bfloat16,
device_map="cuda:0"
)
model = VLMClassifier(base_model, num_labels=2).to("cuda")
model.classifier.load_state_dict(load_file("classifier_head.safetensors"))
model.eval()
# Inference
img = Image.open("table.png").convert("RGB")
messages = [[{
"role": "user",
"content": [
{"type": "image", "image": img},
{"type": "text", "text": "Classify this table."}
]
}]]
texts = [processor.apply_chat_template(m, tokenize=False, add_generation_prompt=True) for m in messages]
inputs = processor(text=texts, images=[img], padding=True, return_tensors="pt")
with torch.no_grad():
logits = model(
inputs["input_ids"].to("cuda"),
inputs["attention_mask"].to("cuda"),
inputs["pixel_values"].to("cuda", dtype=torch.bfloat16),
inputs["image_grid_thw"].to("cuda")
)
prob_sct = torch.softmax(logits, dim=-1)[0, 1].item()
print(f"P(SCT) = {prob_sct:.3f}")
# Use threshold 0.3 for fewer false negatives
is_sct = prob_sct >= 0.3
classifier_head.safetensors - Classifier head weightsclassifier_config.json - Model configurationconfig.json - Base model confignotebooks/ - Training and testing notebooksPart of SEC executive compensation extraction pipeline.
Apache 2.0 (same as base model)
Base model
Qwen/Qwen3-VL-4B-Instruct