Cloud Defense Logo

Products

Solutions

Company

Book A Live Demo

Top 10 Examples of "pyarrow in functional component" in Python

Dive into secure and efficient coding practices with our curated list of the top 10 examples showcasing 'pyarrow' in functional components in Python. Our advanced machine learning engine meticulously scans each line of code, cross-referencing millions of open source libraries to ensure your implementation is not just functional, but also robust and secure. Elevate your React applications to new heights by mastering the art of handling side effects, API calls, and asynchronous operations with confidence and precision.

return datetime.time(hour, minute, second, milisec)

    def expected_data_transform_int32(data):
        milisec = data % (10**4)
        milisec *= 10**2
        data //= 10**4
        second = data % 60
        data //= 60
        minute = data % 60
        hour = data // 60
        return datetime.time(hour, minute, second, milisec)

    iterate_over_test_chunk([pyarrow.int64(), pyarrow.int64()],
                            column_meta_int64, time_generator_int64, expected_data_transform_int64)

    iterate_over_test_chunk([pyarrow.int32(), pyarrow.int32()],
                            column_meta_int32, time_generator_int32, expected_data_transform_int32)
def test_parse_schema(self):
        buf = pa.py_buffer(schema_data)
        result = _load_schema(buf)
        expected = pa.schema([
            pa.field("depdelay", pa.int16()),
            pa.field("arrdelay", pa.int16())
        ])
        assert result.equals(expected)
def test_ingest_with_datetime():
    """
    Test ingesting datetime data with a given schema
    """
    schema = pa.schema([
        pa.field("foo", pa.int64()),
        pa.field("bar", pa.int64()),
        pa.field("baz", pa.timestamp("ns"))
    ])

    data = [{"foo": 1, "bar": 2, "baz": "2018-01-01 01:02:03"}, {"foo": 10, "bar": 20, "baz": "2018-01-02 01:02:03"}]

    converted_data = client.ingest_data(data, schema)
    timestamp_values = [pd.to_datetime("2018-01-01 01:02:03"), pd.to_datetime("2018-01-02 01:02:03")]
    assert converted_data.to_pydict() == {'foo': [1, 10], 'bar': [2, 20], 'baz': timestamp_values}
def generate_data(pyarrow_type, column_meta, source_data_generator, batch_count, batch_row_count):
    stream = BytesIO()

    assert len(pyarrow_type) == len(column_meta)

    column_size = len(pyarrow_type)
    fields = []
    for i in range(column_size):
        fields.append(pyarrow.field("column_{}".format(i), pyarrow_type[i], True, column_meta[i]))
    schema = pyarrow.schema(fields)

    expected_data = []
    writer = RecordBatchStreamWriter(stream, schema)

    for i in range(batch_count):
        column_arrays = []
        py_arrays = []
        for j in range(column_size):
            column_data = []
            not_none_cnt = 0
            while not_none_cnt == 0:
                column_data = []
                for _ in range(batch_row_count):
                    data = None if bool(random.getrandbits(1)) else source_data_generator()
                    if data is not None:
def write_table(table, where, filesystem, **kwargs):  # pylint: disable=unused-argument
    path = str(filesystem.tmp_path / FILENAME)
    filesystem.files[str(where)] = path
    pq.write_table(table, path)
def test_index_metadata(store):
    key = "test.parquet"
    df = pd.DataFrame({"a": [1]})
    table = pa.Table.from_pandas(df)
    meta = b"""{
        "pandas_version": "0.20.3",
        "index_columns": ["__index_level_0__"],
        "columns": [
            {"metadata": null, "name": "a", "numpy_type": "int64", "pandas_type": "int64"}
        ]
    }"""
    table = table.replace_schema_metadata({b"pandas": meta})
    buf = pa.BufferOutputStream()
    pq.write_table(table, buf)
    store.put(key, buf.getvalue().to_pybytes())
    pdt.assert_frame_equal(DataFrameSerializer.restore_dataframe(store, key), df)
def test_leak3():
    import pyarrow.parquet as pq

    df = pd.DataFrame({'a{0}'.format(i): [1, 2, 3, 4]
                       for i in range(50)})
    table = pa.Table.from_pandas(df, preserve_index=False)

    writer = pq.ParquetWriter('leak_test_' + tm.rands(5) + '.parquet',
                              table.schema)

    def func():
        writer.write_table(table, row_group_size=len(table))

    # This does not "leak" per se but we do want to have this use as little
    # memory as possible
    assert_does_not_leak(func, iterations=500,
                         check_interval=50, tolerance=20)
def test_load_empty_table_arrow(self, con):

        con.execute("drop table if exists baz;")
        con.execute("create table baz (a int, b float, c text);")

        data = [(1, 1.1, 'a'),
                (2, 2.2, '2'),
                (3, 3.3, '3')]

        df = pd.DataFrame(data, columns=list('abc')).astype({
            'a': 'int32',
            'b': 'float32'
        })

        table = pa.Table.from_pandas(df, preserve_index=False)
        con.load_table("baz", table, method='arrow')
        result = sorted(con.execute("select * from baz"))
        self.check_empty_insert(result, data)
        con.execute("drop table if exists baz;")
def test_fastparquet_read_with_hdfs():
    fs = hdfs_test_client()

    df = tm.makeDataFrame()
    table = pa.Table.from_pandas(df)

    path = '/tmp/testing.parquet'
    with fs.open(path, 'wb') as f:
        pq.write_table(table, f)

    parquet_file = fastparquet.ParquetFile(path, open_with=fs.open)

    result = parquet_file.to_pandas()
    tm.assert_frame_equal(result, df)
def test_iterate_over_string_chunk():
    random.seed(datetime.datetime.now())
    column_meta = [
            {"logicalType": "TEXT"},
            {"logicalType": "TEXT"}
    ]
    field_foo = pyarrow.field("column_foo", pyarrow.string(), True, column_meta[0])
    field_bar = pyarrow.field("column_bar", pyarrow.string(), True, column_meta[1])
    pyarrow.schema([field_foo, field_bar])

    def str_generator():
        return str(random.randint(-100, 100))

    iterate_over_test_chunk([pyarrow.string(), pyarrow.string()],
                            column_meta, str_generator)

Is your System Free of Underlying Vulnerabilities?
Find Out Now