.PHONY: top clean clean-profiles

STAGE0_BUILD:=$(CURDIR)/stage0.build
STAGE1_BUILD:=$(CURDIR)/stage1.build
STAGE2_BUILD:=$(CURDIR)/stage2.build

STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/

PROFILE_DIR:=$(CURDIR)/profiles
PROFILE_FILE:=$(PROFILE_DIR)/merged.prof
PROFRAW_FILES:=$(wildcard $(PROFILE_DIR)/*.profraw)
JULIA_ROOT:=$(CURDIR)/../..

LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt
LLVM_PROFDATA:=$(STAGE0_TOOLS)llvm-profdata
LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy

# When building a single libLLVM.so we need to increase -vp-counters-per-site
# significantly
COUNTERS_PER_SITE:=6

AFTER_STAGE1_MESSAGE:='Run `make clean-profiles` to start with a clean slate. $\
    Then run Julia to collect realistic profile data, for example: `$(STAGE1_BUILD)/julia -O3 -e $\
    '\''using Pkg; Pkg.add("LoopVectorization"); Pkg.test("LoopVectorization")'\''`. This $\
	should produce about 15MB of data in $(PROFILE_DIR). Note that running extensive $\
	scripts may result in counter overflows, which can be detected by running $\
	`make top`. Afterwards run `make stage2`.'

TOOLCHAIN_FLAGS = $\
	"CC=$(STAGE0_TOOLS)clang" $\
	"CXX=$(STAGE0_TOOLS)clang++" $\
	"LD=$(STAGE0_TOOLS)ld.lld" $\
	"AR=$(STAGE0_TOOLS)llvm-ar" $\
	"RANLIB=$(STAGE0_TOOLS)llvm-ranlib" $\
	"CFLAGS+=$(PGO_CFLAGS)" $\
	"CXXFLAGS+=$(PGO_CXXFLAGS)" $\
	"LDFLAGS+=$(PGO_LDFLAGS)"

$(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD):
	$(MAKE) -C $(JULIA_ROOT) O=$@ configure

stage0: export USE_BINARYBUILDER_LLVM=1
stage0: | $(STAGE0_BUILD)
	# Turn [cd]tors into init/fini_array sections in libclang_rt, since lld
	# doesn't do that, and otherwise the profile constructor is not executed
	$(MAKE) -C $(STAGE0_BUILD)/deps install-clang install-llvm install-lld install-llvm-tools && \
	find $< -name 'libclang_rt.profile-*.a' -exec $(LLVM_OBJCOPY) --rename-section .ctors=.init_array --rename-section .dtors=.fini_array {} + && \
	touch $@

$(STAGE1_BUILD): stage0
stage1: PGO_CFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
stage1: PGO_CXXFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
stage1: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-generate=$(PROFILE_DIR)
stage1: export USE_BINARYBUILDER_LLVM=0
stage1: | $(STAGE1_BUILD)
	$(MAKE) -C $(STAGE1_BUILD) $(TOOLCHAIN_FLAGS) && touch $@
	@echo $(AFTER_STAGE1_MESSAGE)

stage2: PGO_CFLAGS:=-fprofile-use=$(PROFILE_FILE)
stage2: PGO_CXXFLAGS:=-fprofile-use=$(PROFILE_FILE)
stage2: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe
stage2: export USE_BINARYBUILDER_LLVM=0
stage2: $(PROFILE_FILE) | $(STAGE2_BUILD)
	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) && touch $@

install: stage2
	$(MAKE) -C $(STAGE2_BUILD) USE_BINARYBUILDER_LLVM=0 install

$(PROFILE_FILE): stage1 $(PROFRAW_FILES)
	$(LLVM_PROFDATA) merge -output=$@ $(PROFRAW_FILES)

# show top 50 functions
top: $(PROFILE_FILE)
	$(LLVM_PROFDATA) show --topn=50 $< | $(LLVM_CXXFILT)

clean-profiles:
	rm -rf $(PROFILE_DIR)

clean:
	rm -f stage0 stage1 stage2 $(PROFILE_FILE)
