evalstate's picture
evalstate HF Staff
Deploy Diffusers PR API
dbf7313 verified
from __future__ import annotations
from typing import Any
from urllib.parse import urlparse
def _user_fields(user: dict[str, Any] | None) -> dict[str, Any]:
user = user or {}
return {
"author_login": user.get("login"),
"author_id": user.get("id"),
"author_node_id": user.get("node_id"),
"author_type": user.get("type"),
"author_site_admin": user.get("site_admin"),
}
def _labels(labels: list[dict[str, Any]] | None) -> list[str]:
return [
name
for label in labels or []
if isinstance(label, dict) and isinstance((name := label.get("name")), str) and name
]
def _assignees(users: list[dict[str, Any]] | None) -> list[str]:
return [
login
for user in users or []
if isinstance(user, dict) and isinstance((login := user.get("login")), str) and login
]
def issue_url_to_number(issue_url: str | None) -> int | None:
if not issue_url:
return None
path = urlparse(issue_url).path.rstrip("/")
tail = path.rsplit("/", 1)[-1]
try:
return int(tail)
except ValueError:
return None
def normalize_issue(
repo: str, item: dict[str, Any], snapshot_id: str, extracted_at: str
) -> dict[str, Any]:
return {
"repo": repo,
"github_id": item.get("id"),
"github_node_id": item.get("node_id"),
"number": item.get("number"),
"html_url": item.get("html_url"),
"api_url": item.get("url"),
"title": item.get("title"),
"body": item.get("body"),
"state": item.get("state"),
"state_reason": item.get("state_reason"),
"locked": item.get("locked"),
"comments_count": item.get("comments"),
"labels": _labels(item.get("labels")),
"assignees": _assignees(item.get("assignees")),
"created_at": item.get("created_at"),
"updated_at": item.get("updated_at"),
"closed_at": item.get("closed_at"),
"author_association": item.get("author_association"),
"milestone_title": (item.get("milestone") or {}).get("title"),
"snapshot_id": snapshot_id,
"extracted_at": extracted_at,
**_user_fields(item.get("user")),
}
def normalize_pull_request(
repo: str,
issue_stub: dict[str, Any],
pr_detail: dict[str, Any],
snapshot_id: str,
extracted_at: str,
) -> dict[str, Any]:
head = pr_detail.get("head") or {}
base = pr_detail.get("base") or {}
return {
"repo": repo,
"github_id": pr_detail.get("id") or issue_stub.get("id"),
"github_node_id": pr_detail.get("node_id") or issue_stub.get("node_id"),
"number": issue_stub.get("number"),
"html_url": issue_stub.get("html_url"),
"api_url": issue_stub.get("url"),
"title": issue_stub.get("title"),
"body": issue_stub.get("body"),
"state": issue_stub.get("state"),
"state_reason": issue_stub.get("state_reason"),
"locked": issue_stub.get("locked"),
"comments_count": issue_stub.get("comments"),
"labels": _labels(issue_stub.get("labels")),
"assignees": _assignees(issue_stub.get("assignees")),
"created_at": issue_stub.get("created_at"),
"updated_at": issue_stub.get("updated_at"),
"closed_at": issue_stub.get("closed_at"),
"author_association": issue_stub.get("author_association")
or pr_detail.get("author_association"),
"merged_at": pr_detail.get("merged_at"),
"merge_commit_sha": pr_detail.get("merge_commit_sha"),
"merged": pr_detail.get("merged"),
"mergeable": pr_detail.get("mergeable"),
"mergeable_state": pr_detail.get("mergeable_state"),
"draft": pr_detail.get("draft"),
"additions": pr_detail.get("additions"),
"deletions": pr_detail.get("deletions"),
"changed_files": pr_detail.get("changed_files"),
"commits": pr_detail.get("commits"),
"review_comments_count": pr_detail.get("review_comments"),
"maintainer_can_modify": pr_detail.get("maintainer_can_modify"),
"head_ref": head.get("ref"),
"head_sha": head.get("sha"),
"head_repo_full_name": (head.get("repo") or {}).get("full_name"),
"base_ref": base.get("ref"),
"base_sha": base.get("sha"),
"base_repo_full_name": (base.get("repo") or {}).get("full_name"),
"snapshot_id": snapshot_id,
"extracted_at": extracted_at,
**_user_fields(issue_stub.get("user")),
}
def normalize_comment(
repo: str,
item: dict[str, Any],
parent_kind: str,
parent_number: int | None,
snapshot_id: str,
extracted_at: str,
) -> dict[str, Any]:
return {
"repo": repo,
"github_id": item.get("id"),
"github_node_id": item.get("node_id"),
"parent_kind": parent_kind,
"parent_number": parent_number,
"html_url": item.get("html_url"),
"api_url": item.get("url"),
"issue_api_url": item.get("issue_url"),
"body": item.get("body"),
"created_at": item.get("created_at"),
"updated_at": item.get("updated_at"),
"author_association": item.get("author_association"),
"snapshot_id": snapshot_id,
"extracted_at": extracted_at,
**_user_fields(item.get("user")),
}
def normalize_review(
repo: str, pr_number: int, item: dict[str, Any], snapshot_id: str, extracted_at: str
) -> dict[str, Any]:
return {
"repo": repo,
"github_id": item.get("id"),
"github_node_id": item.get("node_id"),
"pull_request_number": pr_number,
"html_url": item.get("html_url"),
"api_url": item.get("url"),
"body": item.get("body"),
"state": item.get("state"),
"submitted_at": item.get("submitted_at"),
"commit_id": item.get("commit_id"),
"author_association": item.get("author_association"),
"snapshot_id": snapshot_id,
"extracted_at": extracted_at,
**_user_fields(item.get("user")),
}
def normalize_review_comment(
repo: str, pr_number: int, item: dict[str, Any], snapshot_id: str, extracted_at: str
) -> dict[str, Any]:
return {
"repo": repo,
"github_id": item.get("id"),
"github_node_id": item.get("node_id"),
"pull_request_number": pr_number,
"review_id": item.get("pull_request_review_id"),
"html_url": item.get("html_url"),
"api_url": item.get("url"),
"pull_request_api_url": item.get("pull_request_url"),
"body": item.get("body"),
"path": item.get("path"),
"commit_id": item.get("commit_id"),
"original_commit_id": item.get("original_commit_id"),
"position": item.get("position"),
"original_position": item.get("original_position"),
"line": item.get("line"),
"start_line": item.get("start_line"),
"side": item.get("side"),
"start_side": item.get("start_side"),
"subject_type": item.get("subject_type"),
"created_at": item.get("created_at"),
"updated_at": item.get("updated_at"),
"author_association": item.get("author_association"),
"snapshot_id": snapshot_id,
"extracted_at": extracted_at,
**_user_fields(item.get("user")),
}
def normalize_pr_file(
repo: str,
pr_number: int,
item: dict[str, Any],
snapshot_id: str,
extracted_at: str,
) -> dict[str, Any]:
return {
"repo": repo,
"pull_request_number": pr_number,
"sha": item.get("sha"),
"filename": item.get("filename"),
"status": item.get("status"),
"additions": item.get("additions"),
"deletions": item.get("deletions"),
"changes": item.get("changes"),
"blob_url": item.get("blob_url"),
"raw_url": item.get("raw_url"),
"contents_url": item.get("contents_url"),
"previous_filename": item.get("previous_filename"),
"patch": item.get("patch"),
"snapshot_id": snapshot_id,
"extracted_at": extracted_at,
}
def normalize_pr_diff(
repo: str,
pr_number: int,
html_url: str | None,
api_url: str | None,
diff: str,
snapshot_id: str,
extracted_at: str,
) -> dict[str, Any]:
return {
"repo": repo,
"pull_request_number": pr_number,
"html_url": html_url,
"api_url": api_url,
"diff": diff,
"snapshot_id": snapshot_id,
"extracted_at": extracted_at,
}
def normalize_timeline_event(
repo: str,
number: int,
parent_kind: str,
item: dict[str, Any],
snapshot_id: str,
extracted_at: str,
) -> dict[str, Any]:
source = item.get("source") or {}
issue = source.get("issue") or {}
return {
"repo": repo,
"parent_kind": parent_kind,
"parent_number": number,
"event": item.get("event"),
"created_at": item.get("created_at"),
"actor_login": (item.get("actor") or {}).get("login"),
"source_issue_number": issue.get("number"),
"source_issue_title": issue.get("title"),
"source_issue_url": issue.get("html_url"),
"commit_id": item.get("commit_id"),
"label_name": (item.get("label") or {}).get("name"),
"snapshot_id": snapshot_id,
"extracted_at": extracted_at,
}