Bob Colner
07/03/2020, 4:51 PMclass ParquetSerializer(Serializer):
def serialize(self, value: pd.DataFrame) -> bytes:
# transform a Python object into bytes
tmp_filename = str(time_ns()) + '.parquet'
value.to_parquet(
path=tmp_filename,
index=False
)
with open(tmp_filename, 'rb') as in_file:
df_bytes = in_file.read()
Path(tmp_filename).unlink()
return df_bytes
def deserialize(self, value:bytes) -> pd.DataFrame:
# recover a Python object from bytes
df_bytes_io = BytesIO(value)
df = pd.read_parquet(df_bytes_io)
return df
Does anyone have thoughts about the above approach? (saving as a local file then reading the bytes from the file?)Jeremiah
07/03/2020, 4:58 PMBob Colner
07/03/2020, 5:01 PMJeremiah
07/03/2020, 5:02 PMBob Colner
07/03/2020, 5:04 PMclass ParquetSerializer(Serializer):
def serialize(self, value: pd.DataFrame) -> bytes:
# transform a Python object into bytes
with NamedTemporaryFile(mode='w+b') as tmp_ref1, open(tmp_ref1.name, 'rb') as tmp_ref2:
value.to_parquet(
path=tmp_ref1,
index=False
)
df_bytes = tmp_ref2.read()
return df_bytes
def deserialize(self, value:bytes) -> pd.DataFrame:
# recover a Python object from bytes
df_bytes_io = BytesIO(value)
df = pd.read_parquet(df_bytes_io)
return df
Brett Naul
07/03/2020, 11:08 PMBob Colner
07/08/2020, 4:57 PMBrett Naul
07/08/2020, 5:03 PMBob Colner
07/08/2020, 5:06 PMBrett Naul
07/08/2020, 5:14 PMBob Colner
07/08/2020, 5:19 PM