Skip to content

Memory usage in Flurry Hashmap #115

@ss025

Description

@ss025

Hi I am trying to use flurry hashmap in one of my projects and want to understand memory usage and garbage collection.

I am trying a simple actix web appplication with flurry hashmap by inserting same key/value again and again. Few observation are

  1. On first add memory usage ~15GB while it is 6.7GB with std hashmap behind the lock
  2. Calling add -> clear -> add repeatedly on same key/values eventually leads to OOM error and application is killed.

Output of program

curl --location --request GET '127.0.0.1:8080/add'
count 50331648! from thread ThreadId(18) - stats Length: 50331648, Capacity: 50331648, Memory: 15.7 GiB, Virtual: 17.3 GiB

curl --location --request GET '127.0.0.1:8080/clear'
count 0! from thread ThreadId(19) - stats Length: 0, Capacity: 0, Memory: 15.7 GiB, Virtual: 17.3 GiB

curl --location --request GET '127.0.0.1:8080/add'
count 50331648! from thread ThreadId(20) - stats Length: 50331648, Capacity: 50331648, Memory: 29.1 GiB, Virtual: 30.6 GiB

curl --location --request GET '127.0.0.1:8080/clear'
count 0! from thread ThreadId(21) - stats Length: 0, Capacity: 0, Memory: 29.1 GiB, Virtual: 30.6 GiB

curl --location --request GET '127.0.0.1:8080/add'
curl: (52) Empty reply from server

main.rs

use actix_web::web::Data;
use actix_web::{get, web, App, HttpServer};
use std::sync::Mutex;

use bytesize::ByteSize;
use sysinfo::{get_current_pid, ProcessExt, ProcessRefreshKind, RefreshKind, System, SystemExt};

#[actix_web::main]
async fn main() -> std::io::Result<()> {
    let data = web::Data::new(AppState {
        data: flurry::HashMap::<String, String>::new(),
        sys: Mutex::new(System::new_with_specifics(
            RefreshKind::new().with_processes(ProcessRefreshKind::new()),
        )),
    });

    HttpServer::new(move || {
        App::new()
            .app_data(data.clone())
            .service(add)
            .service(clear)
            .service(stats)
    })
    .bind(("127.0.0.1", 8080))?
    .run()
    .await
}

struct AppState {
    data: flurry::HashMap<String, String>,
    sys: Mutex<System>,
}

#[get("/stats")]
async fn stats(data: web::Data<AppState>) -> String {
    stats_2(data)
}

fn stats_2(data: Data<AppState>) -> String {
    let pid = get_current_pid().unwrap();
    let mut sys = data.sys.lock().unwrap();
    sys.refresh_process(pid);

    let proc = sys.process(pid).unwrap();
    let map = &data.data;
    let string = format!(
        "Length: {}, Capacity: {}, Memory: {}, Virtual: {}\n",
        map.len(),
        map.len(),
        ByteSize::b(proc.memory()).to_string_as(true),
        ByteSize::b(proc.virtual_memory()).to_string_as(true)
    );

    string
}

#[get("/add")]
async fn add(data: web::Data<AppState>) -> String {
    let size;
    {
        let max_entries = 100663296 as u64;
        let m = &data.data;
        for i in 0..max_entries / 2 {
            m.pin().insert(format!("str-{i}"), format!("str-{i}-{i}"));
        }

        size = m.len();
    }
    let stats1 = stats_2(data);
    format!(
        "count {size}! from thread {:?} - stats {stats1}\n",
        std::thread::current().id()
    )
}

#[get("/clear")]
async fn clear(data: web::Data<AppState>) -> String {
    let size;
    {
        let m = &data.data;
        m.pin().clear();
        // unsafe { malloc_trim(0) };
        size = m.len();
    }

    let stats1 = stats_2(data);
    format!(
        "count {size}! from thread {:?} - stats {stats1}\n",
        std::thread::current().id()
    )
}

Cargo.toml

[package]
name = "skiptest"
version = "0.1.0"
edition = "2021"


[dependencies]
actix-web = "4"

flurry = "0.4.0"

sysinfo = "0.28.4"
bytesize = "1.2.0"

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions