Skip to content

Commit cc63ef9

Browse files
committed
Merge pull request #28 from vchollati/master
few readme.md and setup.py corrections
2 parents 803d2c0 + b19400d commit cc63ef9

File tree

2 files changed

+6
-7
lines changed

2 files changed

+6
-7
lines changed

README.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Sparkit-learn introduces two important distributed data format:
3636
rdd = sc.parallelize(data, 2) # each partition with 10 elements
3737
# ArrayRDD
3838
# each partition will contain blocks with 5 elements
39-
X = ArrayRDD(rdd, block_size=5) # 4 blocks, 2 in each partition
39+
X = ArrayRDD(rdd, bsize=5) # 4 blocks, 2 in each partition
4040
4141
Basic operations:
4242

@@ -71,7 +71,7 @@ Sparkit-learn introduces two important distributed data format:
7171
# array([ 0, 1, 2, ... 17, 18, 19])
7272
7373
# pyspark.rdd operations will still work
74-
X.numPartitions() # 2 - number of partitions
74+
X.getNumPartitions() # 2 - number of partitions
7575
7676
- **DictRDD:**
7777

@@ -84,19 +84,19 @@ Sparkit-learn introduces two important distributed data format:
8484
X = range(20)
8585
y = range(2) * 10
8686
# PySpark RDD with 2 partitions
87-
X_rdd = sc.parallelize(data_X, 2) # each partition with 10 elements
88-
y_rdd = sc.parallelize(data_y, 2) # each partition with 10 elements
87+
X_rdd = sc.parallelize(X, 2) # each partition with 10 elements
88+
y_rdd = sc.parallelize(y, 2) # each partition with 10 elements
8989
zipped_rdd = X_rdd.zip(y_rdd) # zip the two rdd's together
9090
# DictRDD
9191
# each partition will contain blocks with 5 elements
92-
Z = DictRDD(zipped_rdd, columns=('X', 'y'), block_size=5) # 4 blocks, 2/partition
92+
Z = DictRDD(zipped_rdd, columns=('X', 'y'), bsize=5) # 4 blocks, 2/partition
9393
9494
# or:
9595
import numpy as np
9696
9797
data = np.array([range(20), range(2)*10]).T
9898
rdd = sc.parallelize(data, 2)
99-
Z = DictRDD(rdd, columns=('X', 'y'), block_size=5)
99+
Z = DictRDD(rdd, columns=('X', 'y'), bsize=5)
100100
101101
Basic operations:
102102

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ def setup_package():
2424
license='Apache License, Version 2.0',
2525
url='https://github.com/lensacom/sparkit-learn',
2626
packages=['splearn',
27-
'splearn.rdd',
2827
'splearn.cluster',
2928
'splearn.decomposition',
3029
'splearn.feature_extraction',

0 commit comments

Comments
 (0)