diff --git a/src/patchman/utils/git_patch_parser/models.py b/src/patchman/utils/git_patch_parser/models.py new file mode 100644 index 0000000..82c98fa --- /dev/null +++ b/src/patchman/utils/git_patch_parser/models.py @@ -0,0 +1,42 @@ +from pydantic import BaseModel, Field +from typing import Optional + + +class GitPatch(BaseModel): + """Represents a parsed Git patch with structured fields. + + Attributes: + commit_hash (str): 40-character SHA-1 commit hash from the 'From' header. + author (str): Author line from the patch header (e.g., 'Max '). + date (str): Commit date in RFC-2822 format (e.g., 'Wed, 23 Jul 2025 12:34:56 +0200'). + subject (str): Commit subject line from the Subject header. + description (Optional[str]): Optional description text from the commit body (between header and diff). + diff (str): Complete patch diff (starting with 'diff --git' or '--- a/...'). + """ + commit_hash: str = Field( + ..., + min_length=40, + max_length=40, + pattern="^[0-9a-f]{40}$", + description="40-character SHA-1 commit hash from the 'From' header" + ) + author: str = Field( + ..., + description="Author line from the patch header (e.g. 'Max ')" + ) + date: str = Field( + ..., + description="Commit date in RFC-2822 format, e.g. 'Wed, 23 Jul 2025 12:34:56 +0200'" + ) + subject: str = Field( + ..., + description="Commit subject line from the Subject header" + ) + description: Optional[str] = Field( + default=None, + description="Optional description text from the commit body (between header and diff)" + ) + diff: str = Field( + ..., + description="Complete patch diff (starting with 'diff --git' or '--- a/...')" + ) diff --git a/src/patchman/utils/git_patch_parser/parser.py b/src/patchman/utils/git_patch_parser/parser.py new file mode 100644 index 0000000..a3d8f6b --- /dev/null +++ b/src/patchman/utils/git_patch_parser/parser.py @@ -0,0 +1,81 @@ +import re +from .models import GitPatch + +# Match the commit hash from the 'From' line at the beginning of the patch +RE_COMMIT_HASH = re.compile( + r'^From\s+(?P[0-9a-f]{40})\s', + flags=re.MULTILINE +) + +# Extract author information from the 'From:' header line +RE_AUTHOR = re.compile(r'^From:\s+(?P.+)$', re.MULTILINE) + +# Extract commit date from the 'Date:' header line +RE_DATE = re.compile(r'^Date:\s+(?P.+)$', re.MULTILINE) + +# Extract commit subject from the 'Subject:' header line +RE_SUBJECT = re.compile(r'^Subject:\s+(?P.+)$', re.MULTILINE) + +# Extract commit description: everything between the first empty line after headers +# and the first occurrence of "diff --git" or "--- a/" +RE_DESCRIPTION = re.compile( + r'\n\n(?P.*?)(?=\n(?:diff --git|---\s[a-b]/))', + flags=re.DOTALL +) + +# Extract the diff content: from the first "diff --git" or "--- a/" to the end of file +RE_DIFF = re.compile( + r'\n(?P(?:diff --git|---\s[a-b]/).*?)\Z', + flags=re.DOTALL +) + + +def parse_git_patch(patch_text: str) -> GitPatch: + """Parse a git-format-patch into a GitPatch dataclass. + + Args: + patch_text (str): The raw text content of a git format-patch file. + + Returns: + GitPatch: A structured representation of the patch. + + Raises: + ValueError: If required fields (commit hash, author, date, subject) are missing. + """ + # Search for required fields with null checks + commit_match = RE_COMMIT_HASH.search(patch_text) + if not commit_match: + raise ValueError("Commit hash not found in patch") + commit_hash = commit_match.group('hash') + + author_match = RE_AUTHOR.search(patch_text) + if not author_match: + raise ValueError("Author not found in patch") + author = author_match.group('author') + + date_match = RE_DATE.search(patch_text) + if not date_match: + raise ValueError("Date not found in patch") + date = date_match.group('date') + + subject_match = RE_SUBJECT.search(patch_text) + if not subject_match: + raise ValueError("Subject not found in patch") + subject = subject_match.group('subject') + + # Extract optional fields + desc_match = RE_DESCRIPTION.search(patch_text) + diff_match = RE_DIFF.search(patch_text) + + # Process optional fields with fallback to empty strings + description = desc_match.group('desc').rstrip() if desc_match else '' + diff = diff_match.group('diff') if diff_match else '' + + return GitPatch( + commit_hash=commit_hash, + author=author, + date=date, + subject=subject, + description=description, + diff=diff, + ) diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..4e32d2c --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,78 @@ +import unittest +from patchman.utils.git_patch_parser.parser import parse_git_patch +from patchman.utils.git_patch_parser.models import GitPatch + +class TestGitPatchParser(unittest.TestCase): + def test_parse_valid_patch(self): + patch_text = """From 0123456789abcdef1234567890abcdef12345678 Mon Sep 17 00:00:00 2001 +From: Max Mustermann +Date: Wed, 23 Jul 2025 12:34:56 +0200 +Subject: [PATCH] Dein Patch-Titel + +Dies ist eine Beschreibung des Patches. + +diff --git a/file.txt b/file.txt +index 83db48f..f735c3b 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-Hello World ++Hello Patchman +""" + expected_patch = GitPatch( + commit_hash="0123456789abcdef1234567890abcdef12345678", + author="Max Mustermann ", + date="Wed, 23 Jul 2025 12:34:56 +0200", + subject="[PATCH] Dein Patch-Titel", + description="Dies ist eine Beschreibung des Patches.", + diff="diff --git a/file.txt b/file.txt\nindex 83db48f..f735c3b 100644\n--- a/file.txt\n+++ b/file.txt\n@@ -1 +1 @@\n-Hello World\n+Hello Patchman\n" + ) + parsed_patch = parse_git_patch(patch_text) + self.assertEqual(parsed_patch, expected_patch) + + def test_parse_invalid_patch(self): + patch_text = "Invalid patch content" + with self.assertRaises(ValueError) as context: + parse_git_patch(patch_text) + self.assertIn("Commit hash not found", str(context.exception)) + + def test_parse_patch_missing_author(self): + patch_text = """From 0123456789abcdef1234567890abcdef12345678 Mon Sep 17 00:00:00 2001 +Date: Wed, 23 Jul 2025 12:34:56 +0200 +Subject: [PATCH] Test""" + with self.assertRaises(ValueError) as context: + parse_git_patch(patch_text) + self.assertIn("Author not found", str(context.exception)) + + def test_parse_patch_missing_date(self): + patch_text = """From 0123456789abcdef1234567890abcdef12345678 Mon Sep 17 00:00:00 2001 +From: Test Author +Subject: [PATCH] Test""" + with self.assertRaises(ValueError) as context: + parse_git_patch(patch_text) + self.assertIn("Date not found", str(context.exception)) + + def test_parse_patch_missing_subject(self): + patch_text = """From 0123456789abcdef1234567890abcdef12345678 Mon Sep 17 00:00:00 2001 +From: Test Author +Date: Wed, 23 Jul 2025 12:34:56 +0200""" + with self.assertRaises(ValueError) as context: + parse_git_patch(patch_text) + self.assertIn("Subject not found", str(context.exception)) + + def test_parse_patch_without_description_and_diff(self): + patch_text = """From 0123456789abcdef1234567890abcdef12345678 Mon Sep 17 00:00:00 2001 +From: Test Author +Date: Wed, 23 Jul 2025 12:34:56 +0200 +Subject: [PATCH] Minimal patch""" + + parsed_patch = parse_git_patch(patch_text) + self.assertEqual(parsed_patch.commit_hash, "0123456789abcdef1234567890abcdef12345678") + self.assertEqual(parsed_patch.author, "Test Author ") + self.assertEqual(parsed_patch.date, "Wed, 23 Jul 2025 12:34:56 +0200") + self.assertEqual(parsed_patch.subject, "[PATCH] Minimal patch") + self.assertEqual(parsed_patch.description, "") + self.assertEqual(parsed_patch.diff, "") + +if __name__ == '__main__': + unittest.main()