Spaces:
Running
Running
| from __future__ import annotations | |
| from typing import Any | |
| from urllib.parse import urlparse | |
| def _user_fields(user: dict[str, Any] | None) -> dict[str, Any]: | |
| user = user or {} | |
| return { | |
| "author_login": user.get("login"), | |
| "author_id": user.get("id"), | |
| "author_node_id": user.get("node_id"), | |
| "author_type": user.get("type"), | |
| "author_site_admin": user.get("site_admin"), | |
| } | |
| def _labels(labels: list[dict[str, Any]] | None) -> list[str]: | |
| return [ | |
| name | |
| for label in labels or [] | |
| if isinstance(label, dict) and isinstance((name := label.get("name")), str) and name | |
| ] | |
| def _assignees(users: list[dict[str, Any]] | None) -> list[str]: | |
| return [ | |
| login | |
| for user in users or [] | |
| if isinstance(user, dict) and isinstance((login := user.get("login")), str) and login | |
| ] | |
| def issue_url_to_number(issue_url: str | None) -> int | None: | |
| if not issue_url: | |
| return None | |
| path = urlparse(issue_url).path.rstrip("/") | |
| tail = path.rsplit("/", 1)[-1] | |
| try: | |
| return int(tail) | |
| except ValueError: | |
| return None | |
| def normalize_issue( | |
| repo: str, item: dict[str, Any], snapshot_id: str, extracted_at: str | |
| ) -> dict[str, Any]: | |
| return { | |
| "repo": repo, | |
| "github_id": item.get("id"), | |
| "github_node_id": item.get("node_id"), | |
| "number": item.get("number"), | |
| "html_url": item.get("html_url"), | |
| "api_url": item.get("url"), | |
| "title": item.get("title"), | |
| "body": item.get("body"), | |
| "state": item.get("state"), | |
| "state_reason": item.get("state_reason"), | |
| "locked": item.get("locked"), | |
| "comments_count": item.get("comments"), | |
| "labels": _labels(item.get("labels")), | |
| "assignees": _assignees(item.get("assignees")), | |
| "created_at": item.get("created_at"), | |
| "updated_at": item.get("updated_at"), | |
| "closed_at": item.get("closed_at"), | |
| "author_association": item.get("author_association"), | |
| "milestone_title": (item.get("milestone") or {}).get("title"), | |
| "snapshot_id": snapshot_id, | |
| "extracted_at": extracted_at, | |
| **_user_fields(item.get("user")), | |
| } | |
| def normalize_pull_request( | |
| repo: str, | |
| issue_stub: dict[str, Any], | |
| pr_detail: dict[str, Any], | |
| snapshot_id: str, | |
| extracted_at: str, | |
| ) -> dict[str, Any]: | |
| head = pr_detail.get("head") or {} | |
| base = pr_detail.get("base") or {} | |
| return { | |
| "repo": repo, | |
| "github_id": pr_detail.get("id") or issue_stub.get("id"), | |
| "github_node_id": pr_detail.get("node_id") or issue_stub.get("node_id"), | |
| "number": issue_stub.get("number"), | |
| "html_url": issue_stub.get("html_url"), | |
| "api_url": issue_stub.get("url"), | |
| "title": issue_stub.get("title"), | |
| "body": issue_stub.get("body"), | |
| "state": issue_stub.get("state"), | |
| "state_reason": issue_stub.get("state_reason"), | |
| "locked": issue_stub.get("locked"), | |
| "comments_count": issue_stub.get("comments"), | |
| "labels": _labels(issue_stub.get("labels")), | |
| "assignees": _assignees(issue_stub.get("assignees")), | |
| "created_at": issue_stub.get("created_at"), | |
| "updated_at": issue_stub.get("updated_at"), | |
| "closed_at": issue_stub.get("closed_at"), | |
| "author_association": issue_stub.get("author_association") | |
| or pr_detail.get("author_association"), | |
| "merged_at": pr_detail.get("merged_at"), | |
| "merge_commit_sha": pr_detail.get("merge_commit_sha"), | |
| "merged": pr_detail.get("merged"), | |
| "mergeable": pr_detail.get("mergeable"), | |
| "mergeable_state": pr_detail.get("mergeable_state"), | |
| "draft": pr_detail.get("draft"), | |
| "additions": pr_detail.get("additions"), | |
| "deletions": pr_detail.get("deletions"), | |
| "changed_files": pr_detail.get("changed_files"), | |
| "commits": pr_detail.get("commits"), | |
| "review_comments_count": pr_detail.get("review_comments"), | |
| "maintainer_can_modify": pr_detail.get("maintainer_can_modify"), | |
| "head_ref": head.get("ref"), | |
| "head_sha": head.get("sha"), | |
| "head_repo_full_name": (head.get("repo") or {}).get("full_name"), | |
| "base_ref": base.get("ref"), | |
| "base_sha": base.get("sha"), | |
| "base_repo_full_name": (base.get("repo") or {}).get("full_name"), | |
| "snapshot_id": snapshot_id, | |
| "extracted_at": extracted_at, | |
| **_user_fields(issue_stub.get("user")), | |
| } | |
| def normalize_comment( | |
| repo: str, | |
| item: dict[str, Any], | |
| parent_kind: str, | |
| parent_number: int | None, | |
| snapshot_id: str, | |
| extracted_at: str, | |
| ) -> dict[str, Any]: | |
| return { | |
| "repo": repo, | |
| "github_id": item.get("id"), | |
| "github_node_id": item.get("node_id"), | |
| "parent_kind": parent_kind, | |
| "parent_number": parent_number, | |
| "html_url": item.get("html_url"), | |
| "api_url": item.get("url"), | |
| "issue_api_url": item.get("issue_url"), | |
| "body": item.get("body"), | |
| "created_at": item.get("created_at"), | |
| "updated_at": item.get("updated_at"), | |
| "author_association": item.get("author_association"), | |
| "snapshot_id": snapshot_id, | |
| "extracted_at": extracted_at, | |
| **_user_fields(item.get("user")), | |
| } | |
| def normalize_review( | |
| repo: str, pr_number: int, item: dict[str, Any], snapshot_id: str, extracted_at: str | |
| ) -> dict[str, Any]: | |
| return { | |
| "repo": repo, | |
| "github_id": item.get("id"), | |
| "github_node_id": item.get("node_id"), | |
| "pull_request_number": pr_number, | |
| "html_url": item.get("html_url"), | |
| "api_url": item.get("url"), | |
| "body": item.get("body"), | |
| "state": item.get("state"), | |
| "submitted_at": item.get("submitted_at"), | |
| "commit_id": item.get("commit_id"), | |
| "author_association": item.get("author_association"), | |
| "snapshot_id": snapshot_id, | |
| "extracted_at": extracted_at, | |
| **_user_fields(item.get("user")), | |
| } | |
| def normalize_review_comment( | |
| repo: str, pr_number: int, item: dict[str, Any], snapshot_id: str, extracted_at: str | |
| ) -> dict[str, Any]: | |
| return { | |
| "repo": repo, | |
| "github_id": item.get("id"), | |
| "github_node_id": item.get("node_id"), | |
| "pull_request_number": pr_number, | |
| "review_id": item.get("pull_request_review_id"), | |
| "html_url": item.get("html_url"), | |
| "api_url": item.get("url"), | |
| "pull_request_api_url": item.get("pull_request_url"), | |
| "body": item.get("body"), | |
| "path": item.get("path"), | |
| "commit_id": item.get("commit_id"), | |
| "original_commit_id": item.get("original_commit_id"), | |
| "position": item.get("position"), | |
| "original_position": item.get("original_position"), | |
| "line": item.get("line"), | |
| "start_line": item.get("start_line"), | |
| "side": item.get("side"), | |
| "start_side": item.get("start_side"), | |
| "subject_type": item.get("subject_type"), | |
| "created_at": item.get("created_at"), | |
| "updated_at": item.get("updated_at"), | |
| "author_association": item.get("author_association"), | |
| "snapshot_id": snapshot_id, | |
| "extracted_at": extracted_at, | |
| **_user_fields(item.get("user")), | |
| } | |
| def normalize_pr_file( | |
| repo: str, | |
| pr_number: int, | |
| item: dict[str, Any], | |
| snapshot_id: str, | |
| extracted_at: str, | |
| ) -> dict[str, Any]: | |
| return { | |
| "repo": repo, | |
| "pull_request_number": pr_number, | |
| "sha": item.get("sha"), | |
| "filename": item.get("filename"), | |
| "status": item.get("status"), | |
| "additions": item.get("additions"), | |
| "deletions": item.get("deletions"), | |
| "changes": item.get("changes"), | |
| "blob_url": item.get("blob_url"), | |
| "raw_url": item.get("raw_url"), | |
| "contents_url": item.get("contents_url"), | |
| "previous_filename": item.get("previous_filename"), | |
| "patch": item.get("patch"), | |
| "snapshot_id": snapshot_id, | |
| "extracted_at": extracted_at, | |
| } | |
| def normalize_pr_diff( | |
| repo: str, | |
| pr_number: int, | |
| html_url: str | None, | |
| api_url: str | None, | |
| diff: str, | |
| snapshot_id: str, | |
| extracted_at: str, | |
| ) -> dict[str, Any]: | |
| return { | |
| "repo": repo, | |
| "pull_request_number": pr_number, | |
| "html_url": html_url, | |
| "api_url": api_url, | |
| "diff": diff, | |
| "snapshot_id": snapshot_id, | |
| "extracted_at": extracted_at, | |
| } | |
| def normalize_timeline_event( | |
| repo: str, | |
| number: int, | |
| parent_kind: str, | |
| item: dict[str, Any], | |
| snapshot_id: str, | |
| extracted_at: str, | |
| ) -> dict[str, Any]: | |
| source = item.get("source") or {} | |
| issue = source.get("issue") or {} | |
| return { | |
| "repo": repo, | |
| "parent_kind": parent_kind, | |
| "parent_number": number, | |
| "event": item.get("event"), | |
| "created_at": item.get("created_at"), | |
| "actor_login": (item.get("actor") or {}).get("login"), | |
| "source_issue_number": issue.get("number"), | |
| "source_issue_title": issue.get("title"), | |
| "source_issue_url": issue.get("html_url"), | |
| "commit_id": item.get("commit_id"), | |
| "label_name": (item.get("label") or {}).get("name"), | |
| "snapshot_id": snapshot_id, | |
| "extracted_at": extracted_at, | |
| } | |