@@ -396,28 +396,28 @@ def test_groupby_fill():
396
396
)
397
397
mdf = md .DataFrame (df1 , chunk_size = 3 )
398
398
399
- r = tile (getattr ( mdf .groupby (["one" , "two" ]), " ffill" ) ())
399
+ r = tile (mdf .groupby (["one" , "two" ]). ffill ())
400
400
assert r .op .output_types [0 ] == OutputType .dataframe
401
401
assert r .shape == (len (df1 ), 1 )
402
402
assert len (r .chunks ) == 3
403
403
assert r .chunks [0 ].shape == (np .nan , 1 )
404
404
assert r .dtypes .index .tolist () == ["three" ]
405
405
406
- r = tile (getattr ( mdf .groupby (["two" ]), " bfill" ) ())
406
+ r = tile (mdf .groupby (["two" ]). bfill ())
407
407
assert r .op .output_types [0 ] == OutputType .dataframe
408
408
assert r .shape == (len (df1 ), 2 )
409
409
assert len (r .chunks ) == 3
410
410
assert r .chunks [0 ].shape == (np .nan , 2 )
411
411
assert r .dtypes .index .tolist () == ["one" , "three" ]
412
412
413
- r = tile (getattr ( mdf .groupby (["two" ]), " backfill" ) ())
413
+ r = tile (mdf .groupby (["two" ]). backfill ())
414
414
assert r .op .output_types [0 ] == OutputType .dataframe
415
415
assert r .shape == (len (df1 ), 2 )
416
416
assert len (r .chunks ) == 3
417
417
assert r .chunks [0 ].shape == (np .nan , 2 )
418
418
assert r .dtypes .index .tolist () == ["one" , "three" ]
419
419
420
- r = tile (getattr ( mdf .groupby (["one" ]), " fillna" ) (5 ))
420
+ r = tile (mdf .groupby (["one" ]). fillna (5 ))
421
421
assert r .op .output_types [0 ] == OutputType .dataframe
422
422
assert r .shape == (len (df1 ), 2 )
423
423
assert len (r .chunks ) == 3
@@ -426,25 +426,25 @@ def test_groupby_fill():
426
426
427
427
s1 = pd .Series ([4 , 3 , 9 , np .nan , np .nan , 7 , 10 , 8 , 1 , 6 ])
428
428
ms1 = md .Series (s1 , chunk_size = 3 )
429
- r = tile (getattr ( ms1 .groupby (lambda x : x % 2 ), " ffill" ) ())
429
+ r = tile (ms1 .groupby (lambda x : x % 2 ). ffill ())
430
430
assert r .op .output_types [0 ] == OutputType .series
431
431
assert len (r .chunks ) == 4
432
432
assert r .shape == (len (s1 ),)
433
433
assert r .chunks [0 ].shape == (np .nan ,)
434
434
435
- r = tile (getattr ( ms1 .groupby (lambda x : x % 2 ), " bfill" ) ())
435
+ r = tile (ms1 .groupby (lambda x : x % 2 ). bfill ())
436
436
assert r .op .output_types [0 ] == OutputType .series
437
437
assert len (r .chunks ) == 4
438
438
assert r .shape == (len (s1 ),)
439
439
assert r .chunks [0 ].shape == (np .nan ,)
440
440
441
- r = tile (getattr ( ms1 .groupby (lambda x : x % 2 ), " backfill" ) ())
441
+ r = tile (ms1 .groupby (lambda x : x % 2 ). backfill ())
442
442
assert r .op .output_types [0 ] == OutputType .series
443
443
assert len (r .chunks ) == 4
444
444
assert r .shape == (len (s1 ),)
445
445
assert r .chunks [0 ].shape == (np .nan ,)
446
446
447
- r = tile (getattr ( ms1 .groupby (lambda x : x % 2 ), " fillna" ) (5 ))
447
+ r = tile (ms1 .groupby (lambda x : x % 2 ). fillna (5 ))
448
448
assert r .op .output_types [0 ] == OutputType .series
449
449
assert len (r .chunks ) == 4
450
450
assert r .shape == (len (s1 ),)
@@ -453,26 +453,47 @@ def test_groupby_fill():
453
453
s1 = pd .Series ([4 , 3 , 9 , np .nan , np .nan , 7 , 10 , 8 , 1 , 6 ])
454
454
ms1 = md .Series (s1 , chunk_size = 3 )
455
455
456
- r = tile (getattr ( ms1 .groupby (lambda x : x % 2 ), " ffill" ) ())
456
+ r = tile (ms1 .groupby (lambda x : x % 2 ). ffill ())
457
457
assert r .op .output_types [0 ] == OutputType .series
458
458
assert len (r .chunks ) == 4
459
459
assert r .shape == (len (s1 ),)
460
460
assert r .chunks [0 ].shape == (np .nan ,)
461
461
462
- r = tile (getattr ( ms1 .groupby (lambda x : x % 2 ), " bfill" ) ())
462
+ r = tile (ms1 .groupby (lambda x : x % 2 ). bfill ())
463
463
assert r .op .output_types [0 ] == OutputType .series
464
464
assert len (r .chunks ) == 4
465
465
assert r .shape == (len (s1 ),)
466
466
assert r .chunks [0 ].shape == (np .nan ,)
467
467
468
- r = tile (getattr ( ms1 .groupby (lambda x : x % 2 ), " backfill" ) ())
468
+ r = tile (ms1 .groupby (lambda x : x % 2 ). backfill ())
469
469
assert r .op .output_types [0 ] == OutputType .series
470
470
assert len (r .chunks ) == 4
471
471
assert r .shape == (len (s1 ),)
472
472
assert r .chunks [0 ].shape == (np .nan ,)
473
473
474
- r = tile (getattr ( ms1 .groupby (lambda x : x % 2 ), " fillna" ) (5 ))
474
+ r = tile (ms1 .groupby (lambda x : x % 2 ). fillna (5 ))
475
475
assert r .op .output_types [0 ] == OutputType .series
476
476
assert len (r .chunks ) == 4
477
477
assert r .shape == (len (s1 ),)
478
478
assert r .chunks [0 ].shape == (np .nan ,)
479
+
480
+
481
+ def test_groupby_nunique ():
482
+ df1 = pd .DataFrame (
483
+ [
484
+ [1 , 1 , 10 ],
485
+ [1 , 1 , np .nan ],
486
+ [1 , 1 , np .nan ],
487
+ [1 , 2 , np .nan ],
488
+ [1 , 2 , 20 ],
489
+ [1 , 2 , np .nan ],
490
+ [1 , 3 , np .nan ],
491
+ [1 , 3 , np .nan ],
492
+ ],
493
+ columns = ["one" , "two" , "three" ],
494
+ )
495
+ mdf = md .DataFrame (df1 , chunk_size = 3 )
496
+
497
+ r = tile (mdf .groupby (["one" , "two" ]).nunique ())
498
+ assert len (r .chunks ) == 1
499
+ assert isinstance (r .chunks [0 ].op , DataFrameGroupByAgg )
0 commit comments