Skip to content

Commit 7989700

Browse files
committed
fix de-duplication of column names
1 parent ee8035d commit 7989700

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

Preparation/Join/Interval Join/script.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,24 @@
4444

4545
df = sqldf(q, globals())
4646

47+
def dedup_names(names):
48+
names = list(names)
49+
counts = {}
4750

51+
for i, col in enumerate(names):
52+
cur_count = counts.get(col, 0)
4853

54+
if cur_count > 0:
55+
names[i] = '%s.%d' % (col, cur_count)
56+
57+
counts[col] = cur_count + 1
58+
59+
return names
60+
61+
62+
while sum(df.columns.duplicated(keep=False)) > 0:
63+
df.columns=dedup_names(df.columns)
4964

50-
cols=pd.Series(df.columns)
51-
for dup in df.columns[df.columns.duplicated(keep=False)]:
52-
cols[df.columns.get_loc(dup)] = ([dup + '.' + str(d_idx)
53-
if d_idx != 0
54-
else dup
55-
for d_idx in range(df.columns.get_loc(dup).sum())]
56-
)
57-
df.columns=cols
5865

5966
output_data = df
6067

0 commit comments

Comments
 (0)