Avi Haiat
04/18/2021, 4:44 PMrange(0,count,LIMIT)
pseudo code:
@task
def count_collection_mongo(collectionName):
# calculate number of rows in the collection and return it
return count
@task
def load_data(collectionName, skip, limit):
# data = db.collectionName.find().skip(skip).limit(limit)
return data
@task
def transform(data):
print(data.get('name'))
return data
@task
def calculate_iterations_data(count_result):
res = range(0, count_result, LIMIT)
return res
with Flow("ETL flow for mycollection") as flow:
result_count = count_collection_mongo("mycollection")
iterations = calculate_iterations_data(result_count)
data = load_data.map(iterations)
transform.map(flatten(data))
flow.run()
Kevin Kho
Kevin Kho
Kevin Kho
Avi Haiat
04/18/2021, 5:36 PMKevin Kho