aboutsummaryrefslogtreecommitdiff
path: root/src/batch/mod.rs
blob: ea231c7c14ded9f2bcee2ad7b9f18ad8da32a360 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
use itertools::Itertools;
use std::{sync::RwLockWriteGuard, thread};
use tracing::debug;

use crate::repo::{Repo, Repos};

pub fn batch(repos: Repos, f: fn(RwLockWriteGuard<Repo>)) {
    let cpus = num_cpus::get();
    let len = repos.len();

    let jobs = crate::GTREE.get().unwrap().args.jobs;

    let batch_size = if jobs != 0 {
        if len <= jobs {
            1
        } else {
            len / jobs
        }
    } else {
        if len <= cpus {
            1
        } else {
            len / cpus
        }
    };

    debug!(
        "got {} repos and {} threads, batch size is {}",
        len, cpus, batch_size
    );

    let chunks = repos.into_iter().chunks(batch_size);
    let mut handles = Vec::with_capacity(cpus);

    // This has to be a for loop because iterators are lazy
    // and will only start processing the closure in a map when used.
    // https://stackoverflow.com/q/34765967
    for (i, chunk) in chunks.into_iter().enumerate() {
        let repos: Repos = chunk.collect();

        let span = tracing::debug_span!("spawn_batch_thread", number = i);
        let t_span = tracing::debug_span!("batch_thread", number = i);
        let _enter = span.enter();
        let handle = thread::Builder::new()
            .name(format!("batch_{}", i))
            .spawn(move || {
                let _enter = t_span.enter();
                for (_name, repo) in repos {
                    let repo = repo.write().unwrap();

                    f(repo)
                }
            })
            .unwrap();

        handles.push(handle);
    }

    handles
        .into_iter()
        .for_each(|handle| handle.join().unwrap());
}