Skip to content

Commit 4e434f1

Browse files
authored
fix duplicate authors (#14)
* fix duplicate authors * fix lint * run format
1 parent e2d433b commit 4e434f1

File tree

7 files changed

+183
-84
lines changed

7 files changed

+183
-84
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"changeset": patch
3+
---
4+
5+
fix duplicate authors in changelog

changeset/changelog.py

Lines changed: 97 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ def get_changeset_metadata(changeset_path: Path) -> dict:
114114
commit_hash = result.stdout.strip().split("\n")[0]
115115
metadata["commit_hash"] = commit_hash
116116

117-
118117
# Get the commit message to extract PR number and co-authors
119118
msg_result = subprocess.run(
120119
["git", "log", "-1", "--format=%B", commit_hash],
@@ -136,8 +135,8 @@ def get_changeset_metadata(changeset_path: Path) -> dict:
136135
# Try to get PR author using GitHub CLI if available
137136
try:
138137
# Check if we're in GitHub Actions and have a token
139-
gh_token = (
140-
os.environ.get('GITHUB_TOKEN') or os.environ.get('GH_TOKEN')
138+
gh_token = os.environ.get("GITHUB_TOKEN") or os.environ.get(
139+
"GH_TOKEN"
141140
)
142141

143142
cmd = [
@@ -151,7 +150,7 @@ def get_changeset_metadata(changeset_path: Path) -> dict:
151150

152151
env = os.environ.copy()
153152
if gh_token:
154-
env['GH_TOKEN'] = gh_token
153+
env["GH_TOKEN"] = gh_token
155154

156155
gh_result = subprocess.run(
157156
cmd,
@@ -170,14 +169,12 @@ def get_changeset_metadata(changeset_path: Path) -> dict:
170169

171170
# Also try to get co-authors from PR commits
172171
try:
173-
# Get all commits in the PR
172+
# Get all commits in the PR with full author info
174173
cmd = [
175174
"gh",
176175
"api",
177176
f"repos/{git_info.get('owner', '')}/"
178177
f"{git_info.get('repo', '')}/pulls/{pr_number}/commits",
179-
"--jq",
180-
".[].author.login",
181178
]
182179

183180
commits_result = subprocess.run(
@@ -188,15 +185,31 @@ def get_changeset_metadata(changeset_path: Path) -> dict:
188185
env=env,
189186
)
190187
if commits_result.stdout.strip():
191-
# Get unique commit authors (excluding the PR author)
192-
commit_authors = set(
193-
commits_result.stdout.strip().split('\n')
194-
)
195-
commit_authors.discard(metadata.get("pr_author"))
196-
commit_authors.discard('') # Remove empty strings
197-
if commit_authors:
198-
metadata["co_authors"] = list(commit_authors)
188+
import json
189+
190+
commits_data = json.loads(commits_result.stdout)
191+
192+
# Build a map of GitHub usernames to their info
193+
github_users = {}
194+
for commit in commits_data:
195+
author = commit.get("author")
196+
if author and author.get("login"):
197+
username = author["login"]
198+
pr_author = metadata.get("pr_author")
199+
if username and username != pr_author:
200+
commit_data = commit.get("commit", {})
201+
commit_author = commit_data.get("author", {})
202+
github_users[username] = {
203+
"login": username,
204+
"name": commit_author.get("name", ""),
205+
"email": commit_author.get("email", ""),
206+
}
207+
208+
if github_users:
209+
metadata["co_authors"] = list(github_users.keys())
199210
metadata["co_authors_are_usernames"] = True
211+
# Store the full user info for deduplication later
212+
metadata["github_user_info"] = github_users
200213
except Exception:
201214
pass
202215

@@ -226,25 +239,53 @@ def get_changeset_metadata(changeset_path: Path) -> dict:
226239
metadata["pr_author"] = author_result.stdout.strip()
227240
metadata["pr_author_is_username"] = False
228241

229-
# Extract co-authors from commit message if we don't already have
230-
# them from GitHub API
231-
if "co_authors" not in metadata:
232-
co_authors = []
233-
for line in commit_msg.split('\n'):
234-
co_author_match = re.match(
235-
r'^Co-authored-by:\s*(.+?)\s*<.*>$', line.strip()
236-
)
237-
if co_author_match:
238-
co_author_name = co_author_match.group(1).strip()
239-
if (
240-
co_author_name
241-
and co_author_name != metadata.get("pr_author")
242-
):
243-
co_authors.append(co_author_name)
244-
metadata["co_authors_are_usernames"] = False
245-
246-
if co_authors:
247-
metadata["co_authors"] = co_authors
242+
# Extract co-authors from commit message
243+
co_authors_from_commits = []
244+
for line in commit_msg.split("\n"):
245+
co_author_match = re.match(
246+
r"^Co-authored-by:\s*(.+?)\s*<(.+?)>$", line.strip()
247+
)
248+
if co_author_match:
249+
co_author_name = co_author_match.group(1).strip()
250+
co_author_email = co_author_match.group(2).strip()
251+
if co_author_name and co_author_name != metadata.get("pr_author"):
252+
co_authors_from_commits.append(
253+
{"name": co_author_name, "email": co_author_email}
254+
)
255+
256+
# Deduplicate co-authors using GitHub user info
257+
if "co_authors" in metadata and metadata.get("github_user_info"):
258+
# We have GitHub users - check if commit co-authors match
259+
github_users = metadata.get("github_user_info", {})
260+
final_co_authors = []
261+
262+
# Add all GitHub users
263+
for username in metadata["co_authors"]:
264+
final_co_authors.append((username, True))
265+
266+
# Check commit co-authors against GitHub users
267+
for commit_author in co_authors_from_commits:
268+
is_duplicate = False
269+
for username, user_info in github_users.items():
270+
# Check by email (most reliable)
271+
if commit_author["email"] == user_info.get("email", ""):
272+
is_duplicate = True
273+
break
274+
# Check by name
275+
if commit_author["name"] == user_info.get("name", ""):
276+
is_duplicate = True
277+
break
278+
279+
if not is_duplicate:
280+
# This is a unique co-author not in GitHub commits
281+
final_co_authors.append((commit_author["name"], False))
282+
283+
metadata["co_authors"] = final_co_authors
284+
elif co_authors_from_commits:
285+
# No GitHub API data - just use commit co-authors
286+
metadata["co_authors"] = [
287+
(author["name"], False) for author in co_authors_from_commits
288+
]
248289

249290
except subprocess.CalledProcessError:
250291
# If git commands fail, return empty metadata
@@ -273,7 +314,11 @@ def format_changelog_entry(entry: dict, config: dict, pr_metadata: dict) -> str:
273314
pr_author = pr_metadata.get("pr_author")
274315
pr_author_is_username = pr_metadata.get("pr_author_is_username", False)
275316
co_authors = pr_metadata.get("co_authors", [])
276-
co_authors_are_usernames = pr_metadata.get("co_authors_are_usernames", False)
317+
# Support legacy format where co_authors might be simple strings
318+
if co_authors and isinstance(co_authors[0], str):
319+
# Convert legacy format to new tuple format
320+
co_authors_are_usernames = pr_metadata.get("co_authors_are_usernames", False)
321+
co_authors = [(author, co_authors_are_usernames) for author in co_authors]
277322
commit_hash = pr_metadata.get("commit_hash", "")[:7]
278323
repo_url = pr_metadata.get("repo_url", "")
279324

@@ -301,14 +346,24 @@ def format_changelog_entry(entry: dict, config: dict, pr_metadata: dict) -> str:
301346
authors_to_thank.append(pr_author)
302347

303348
# Add co-authors
304-
for co_author in co_authors:
305-
if co_author.startswith("@"):
306-
authors_to_thank.append(co_author)
307-
elif co_authors_are_usernames:
308-
authors_to_thank.append(f"@{co_author}")
349+
for co_author_entry in co_authors:
350+
# Handle both new tuple format and legacy string format
351+
if isinstance(co_author_entry, tuple):
352+
co_author, is_username = co_author_entry
353+
if co_author.startswith("@"):
354+
authors_to_thank.append(co_author)
355+
elif is_username:
356+
authors_to_thank.append(f"@{co_author}")
357+
else:
358+
# Display name from git - don't add @
359+
authors_to_thank.append(co_author)
309360
else:
310-
# Display names from git - don't add @
311-
authors_to_thank.append(co_author)
361+
# Legacy format - just a string
362+
if co_author_entry.startswith("@"):
363+
authors_to_thank.append(co_author_entry)
364+
else:
365+
# Assume it's a display name without context
366+
authors_to_thank.append(co_author_entry)
312367

313368
if authors_to_thank:
314369
if len(authors_to_thank) == 1:

changeset/changeset.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,12 @@ def version(dry_run: bool, skip_changelog: bool):
680680
package_changes[package] = {"changes": [], "descriptions": []}
681681
package_changes[package]["changes"].append(change_type)
682682
package_changes[package]["descriptions"].append(
683-
{"type": change_type, "description": desc, "changeset": filepath.name, "filepath": filepath}
683+
{
684+
"type": change_type,
685+
"description": desc,
686+
"changeset": filepath.name,
687+
"filepath": filepath,
688+
}
684689
)
685690

686691
# Show changesets

tests/conftest.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,25 @@ def temp_repo(tmp_path: Path) -> Generator[Path]:
2323
subprocess.run(["git", "init"], cwd=tmp_path, check=True, capture_output=True)
2424
subprocess.run(
2525
["git", "config", "user.email", "test@example.com"],
26-
cwd=tmp_path, check=True, capture_output=True
26+
cwd=tmp_path,
27+
check=True,
28+
capture_output=True,
2729
)
2830
subprocess.run(
2931
["git", "config", "user.name", "Test User"],
30-
cwd=tmp_path, check=True, capture_output=True
32+
cwd=tmp_path,
33+
check=True,
34+
capture_output=True,
3135
)
3236

3337
# Create initial commit
3438
(tmp_path / "README.md").write_text("# Test Project")
3539
subprocess.run(["git", "add", "."], cwd=tmp_path, check=True, capture_output=True)
3640
subprocess.run(
3741
["git", "commit", "-m", "Initial commit"],
38-
cwd=tmp_path, check=True, capture_output=True
42+
cwd=tmp_path,
43+
check=True,
44+
capture_output=True,
3945
)
4046

4147
yield tmp_path
@@ -54,6 +60,7 @@ def sample_project(temp_repo: Path) -> Path:
5460
}
5561

5662
import toml
63+
5764
with open(temp_repo / "pyproject.toml", "w") as f:
5865
toml.dump(pyproject_content, f)
5966

@@ -80,6 +87,7 @@ def multi_package_project(temp_repo: Path) -> Path:
8087
}
8188

8289
import toml
90+
8391
with open(pkg1_dir / "pyproject.toml", "w") as f:
8492
toml.dump(pyproject1, f)
8593

@@ -114,7 +122,7 @@ def initialized_changeset_project(sample_project: Path) -> Path:
114122
"major": {"description": "Breaking changes", "emoji": "💥"},
115123
"minor": {"description": "New features", "emoji": "✨"},
116124
"patch": {"description": "Bug fixes and improvements", "emoji": "🐛"},
117-
}
125+
},
118126
}
119127

120128
with open(changeset_dir / "config.json", "w") as f:

0 commit comments

Comments
 (0)