diff --git a/envconfig/config.go b/envconfig/config.go index fc702198fa3..09dce62d9d2 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -164,6 +164,8 @@ var ( SchedSpread = Bool("OLLAMA_SCHED_SPREAD") // IntelGPU enables experimental Intel GPU detection. IntelGPU = Bool("OLLAMA_INTEL_GPU") + // NoMMap disables memory mapping of the model file. + NoMMap = Bool("OLLAMA_NO_MMAP") // MultiUserCache optimizes prompt caching for multi-user scenarios MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE") // Enable the new Ollama engine @@ -251,6 +253,7 @@ func AsMap() map[string]EnvVar { "OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"}, "OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"}, "OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"}, + "OLLAMA_NO_MMAP": {"OLLAMA_NO_MMAP", NoMMap(), "Disable memory mapping of the model file"}, "OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"}, "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"}, "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, diff --git a/llm/server.go b/llm/server.go index e6046db608e..a7b8bda9f71 100644 --- a/llm/server.go +++ b/llm/server.go @@ -209,7 +209,8 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a // Windows CUDA should not use mmap for best performance // Linux with a model larger than free space, mmap leads to thrashing // For CPU loads we want the memory to be allocated, not FS cache - if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) || + if envconfig.NoMMap() || + (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) || (runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == nil) || (gpus[0].Library == "cpu" && opts.UseMMap == nil) || (opts.UseMMap != nil && !*opts.UseMMap) {