Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions tests/integration/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def engine():
remove=True,
network="dask-sql",
environment={"POSTGRES_HOST_AUTH_METHOD": "trust"},
ports={"5432/tcp": "5432"},
)

try:
Expand All @@ -32,6 +33,7 @@ def engine():
# get the address and create the connection
postgres.reload()
address = postgres.attrs["NetworkSettings"]["Networks"]["dask-sql"]["IPAddress"]
address = "localhost"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @flcong, Apologies for not letting you know about this earlier, Once you have tested with a custom PostgreSQL address, please replace that address with the original docker container host address, if not Github Workflow will fail.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. I see.

port = 5432

engine = sqlalchemy.create_engine(
Expand Down Expand Up @@ -126,6 +128,92 @@ def test_join(assert_query_gives_same_result):
)


def test_join_lricomplex(
assert_query_gives_same_result,
engine,
user_table_ts,
user_table_pn,
user_table_lk,
user_table_lk2,
c,
):
# ---------- Panel data
# Left Join
assert_query_gives_same_result(
"""
select a.*, b.startdate, b.enddate, b.lk_nullint, b.lk_int, b.lk_str,
b.lk_float, b.lk_date
from user_table_pn a left join user_table_lk b
on a.ids=b.id and b.startdate<=a.dates and a.dates<=b.enddate
""",
["ids", "dates", "startdate", "enddate"],
force_dtype="dask",
check_dtype=True,
)
# Right Join
assert_query_gives_same_result(
"""
select b.*, a.startdate, a.enddate, a.lk_nullint, a.lk_int, a.lk_str,
a.lk_float, a.lk_date
from user_table_lk a right join user_table_pn b
on b.ids=a.id and a.startdate<=b.dates and b.dates<=a.enddate
""",
["ids", "dates", "startdate", "enddate"],
force_dtype="dask",
check_dtype=True,
)
# Inner Join
assert_query_gives_same_result(
"""
select a.*, b.startdate, b.enddate, b.lk_nullint, b.lk_int, b.lk_str,
b.lk_float, b.lk_date
from user_table_pn a inner join user_table_lk b
on a.ids=b.id and b.startdate<=a.dates and a.dates<=b.enddate
""",
["ids", "dates", "startdate", "enddate"],
force_dtype="dask",
check_dtype=True,
)

# ---------- Time-series data
# Left Join
assert_query_gives_same_result(
"""
select a.*, b.startdate, b.enddate, b.lk_nullint, b.lk_int, b.lk_str,
b.lk_float, b.lk_date
from user_table_ts a left join user_table_lk2 b
on b.startdate<=a.dates and a.dates<=b.enddate
""",
["dates", "startdate", "enddate"],
force_dtype="dask",
check_dtype=True,
)
# Right Join
assert_query_gives_same_result(
"""
select b.*, a.startdate, a.enddate, a.lk_nullint, a.lk_int, a.lk_str,
a.lk_float, a.lk_date
from user_table_lk2 a right join user_table_ts b
on a.startdate<=b.dates and b.dates<=a.enddate
""",
["dates", "startdate", "enddate"],
force_dtype="dask",
check_dtype=True,
)
# Inner Join
assert_query_gives_same_result(
"""
select a.*, b.startdate, b.enddate, b.lk_nullint, b.lk_int, b.lk_str,
b.lk_float, b.lk_date
from user_table_ts a inner join user_table_lk2 b
on b.startdate<=a.dates and a.dates<=b.enddate
""",
["dates", "startdate", "enddate"],
force_dtype="dask",
check_dtype=True,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious here, Specifying check_dtype = False was not working here? Any other reason for introducing this new argument (force_dtype)?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how assert_frame_equal works to determine if, for example, 2.0 and 2 are identical when check_dtype=False. So here I try to make it more explicit that a type cast to the dask dataframe makes the two dataframes identical even if check_dtype=True. I guess it conveys more information for developers? (Maybe just my OCD.)

)


def test_sort(assert_query_gives_same_result):
assert_query_gives_same_result(
"""
Expand Down