chromium/third_party/sentencepiece/src/src/CMakeLists.txt

# Copyright 2018 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.!

if (SPM_USE_EXTERNAL_ABSL)
  set(ABSL_FLAGS_SRCS "")
  set(ABSL_STRINGS_SRCS "")
  list(APPEND SPM_LIBS absl::strings)
  list(APPEND SPM_LIBS absl::flags)
  list(APPEND SPM_LIBS absl::flags_parse)
  if (MSVC)
   add_definitions("/D_USE_EXTERNAL_ABSL")
  else()
   add_definitions("-D_USE_EXTERNAL_ABSL")
  endif()
else()
  set(ABSL_FLAGS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/absl/flags/flag.cc)
endif()

if (SPM_USE_BUILTIN_PROTOBUF)
  set(SPM_PROTO_HDRS builtin_pb/sentencepiece.pb.h)
  set(SPM_PROTO_SRCS builtin_pb/sentencepiece.pb.cc)
  set(SPM_MODEL_PROTO_HDRS builtin_pb/sentencepiece_model.pb.h)
  set(SPM_MODEL_PROTO_SRCS builtin_pb/sentencepiece_model.pb.cc)
  set(PROTOBUF_LITE_LIBRARY "")
  set(PROTOBUF_LITE_SRCS
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/arena.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/arenastring.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/bytestream.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/coded_stream.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/common.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/extension_set.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/generated_enum_util.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/generated_message_table_driven_lite.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/generated_message_util.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/implicit_weak_message.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/int128.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/io_win32.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/message_lite.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/parse_context.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/repeated_field.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/status.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/statusor.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/stringpiece.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/stringprintf.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/structurally_valid.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/strutil.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/time.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/wire_format_lite.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/zero_copy_stream.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/zero_copy_stream_impl.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/zero_copy_stream_impl_lite.cc)
  if (MSVC)
    add_definitions("/DHAVE_PTHREAD /wd4018 /wd4514")
  else()
    add_definitions("-pthread -DHAVE_PTHREAD=1 -Wno-sign-compare -Wno-deprecated-declarations")
  endif()
  include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite)
  include_directories(builtin_pb)
else()
  find_package(Protobuf REQUIRED)
  include_directories(${Protobuf_INCLUDE_DIRS})
  protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto)
  protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto)
  set(PROTOBUF_LITE_SRCS "")
  include_directories(${PROTOBUF_INCLUDE_DIR})
  if (MSVC)
    add_definitions("/D_USE_EXTERNAL_PROTOBUF")
  else()
    add_definitions("-D_USE_EXTERNAL_PROTOBUF")
  endif()
endif()

include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../third_party)

if (MSVC)
  add_definitions("/D_USE_INTERNAL_STRING_VIEW")
else()
  add_definitions("-D_USE_INTERNAL_STRING_VIEW")
endif()

set(SPM_SRCS
  ${PROTOBUF_LITE_SRCS}
  ${SPM_PROTO_HDRS}
  ${SPM_PROTO_SRCS}
  ${SPM_MODEL_PROTO_HDRS}
  ${SPM_MODEL_PROTO_SRCS}
  bpe_model.h
  common.h
  normalizer.h
  util.h
  freelist.h
  filesystem.h
  init.h
  sentencepiece_processor.h
  word_model.h
  model_factory.h
  char_model.h
  model_interface.h
  testharness.h
  unigram_model.h
  bpe_model.cc
  char_model.cc
  error.cc
  filesystem.cc
  model_factory.cc
  model_interface.cc
  normalizer.cc
  sentencepiece_processor.cc
  unigram_model.cc
  util.cc
  word_model.cc
  ${ABSL_STRINGS_SRCS}
  ${ABSL_FLAGS_SRCS})

set(SPM_TRAIN_SRCS
  ${SPM_PROTO_HDRS}
  ${SPM_MODEL_PROTO_HDRS}
  builder.h
  normalization_rule.h
  unicode_script.h
  unicode_script_map.h
  trainer_factory.h
  trainer_interface.h
  unigram_model_trainer.h
  word_model_trainer.h
  char_model_trainer.h
  bpe_model_trainer.h
  sentencepiece_trainer.h
  pretokenizer_for_training.h
  builder.cc
  unicode_script.cc
  trainer_factory.cc
  trainer_interface.cc
  unigram_model_trainer.cc
  word_model_trainer.cc
  char_model_trainer.cc
  bpe_model_trainer.cc
  sentencepiece_trainer.cc
  pretokenizer_for_training.cc)

set(SPM_TEST_SRCS
  ${SPM_PROTO_HDRS}
  ${SPM_MODEL_PROTO_HDRS}
  testharness.h
  bpe_model_test.cc
  bpe_model_trainer_test.cc
  builder_test.cc
  char_model_test.cc
  char_model_trainer_test.cc
  filesystem_test.cc
  init_test.cc
  model_factory_test.cc
  model_interface_test.cc
  normalizer_test.cc
  sentencepiece_processor_test.cc
  sentencepiece_trainer_test.cc
  test_main.cc
  testharness.cc
  trainer_factory_test.cc
  trainer_interface_test.cc
  unicode_script_test.cc
  unigram_model_test.cc
  unigram_model_trainer_test.cc
  util_test.cc
  word_model_test.cc
  word_model_trainer_test.cc
  pretokenizer_for_training_test.cc)

find_package(Threads REQUIRED)

list(APPEND SPM_LIBS ${PROTOBUF_LITE_LIBRARY} Threads::Threads)

if (SPM_ENABLE_NFKC_COMPILE)
  find_package(ICU 4.4 COMPONENTS i18n data uc REQUIRED)
  include_directories(${ICU_INCLUDE_DIRS})
  add_definitions(-DENABLE_NFKC_COMPILE)
  list(APPEND SPM_LIBS ICU::i18n ICU::data ICU::uc)
endif()

if (SPM_ENABLE_TCMALLOC)
  if (SPM_TCMALLOC_STATIC)
    find_library(TCMALLOC_LIB NAMES libtcmalloc_minimal.a)
  else()
    find_library(TCMALLOC_LIB NAMES tcmalloc_minimal)
  endif()
  if (TCMALLOC_LIB)
    message(STATUS "Found TCMalloc: ${TCMALLOC_LIB}")
    list(APPEND SPM_LIBS ${TCMALLOC_LIB})
    add_definitions(-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free)
  else()
    message(STATUS "Not Found TCMalloc: ${TCMALLOC_LIB}")
  endif()
endif()

if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR
    (${CMAKE_SYSTEM_PROCESSOR} MATCHES "mips") OR
    (${CMAKE_SYSTEM_PROCESSOR} MATCHES "m68k") OR
    (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc") OR
    (${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc") OR
    (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch") OR
    (${CMAKE_SYSTEM_PROCESSOR} MATCHES "sh4"))
  find_library(ATOMIC_LIB NAMES atomic libatomic.so libatomic.so.1)
  if (ATOMIC_LIB)
    message(STATUS "Found atomic: ${ATOMIC_LIB}")
    list(APPEND SPM_LIBS "atomic")
  endif()
endif()


if (SPM_ENABLE_SHARED)
  add_library(sentencepiece SHARED ${SPM_SRCS})
  add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS})
endif()

add_library(sentencepiece-static STATIC ${SPM_SRCS})
add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS})

target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS})
target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS})

if (SPM_ENABLE_SHARED)
  target_link_libraries(sentencepiece ${SPM_LIBS})
  target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece)
  set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static)
  set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0)
  set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
  set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
  if (MSVC)
    set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX "_import.lib")
    set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX "_import.lib")
  elseif (MINGW)
    set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX ".dll.a")
    set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX ".dll.a")
  endif()
else()
  add_library(sentencepiece ALIAS sentencepiece-static)
  add_library(sentencepiece_train ALIAS sentencepiece_train-static)
  set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static)
endif()

set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece")
set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train")

if (NOT MSVC)
  if (SPM_COVERAGE)
    set(CMAKE_CXX_FLAGS "-O0 -Wall -fPIC -coverage ${CMAKE_CXX_FLAGS}")
  else()
    set(CMAKE_CXX_FLAGS "-O3 -Wall -fPIC ${CMAKE_CXX_FLAGS}")
  endif()
  if (SPM_ENABLE_TENSORFLOW_SHARED)
    add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
  endif()
  if (SPM_NO_THREADLOCAL)
    add_definitions(-DSPM_NO_THREADLOCAL=1)
    add_definitions(-DGOOGLE_PROTOBUF_NO_THREADLOCAL=1)
  endif()
  set_source_files_properties(
    sentencepiece.pb.cc sentencepiece_model.pb.cc
    PROPERTIES COMPILE_FLAGS "-Wno-misleading-indentation")
  set_source_files_properties(${SPM_TEST_SRCS}
    PROPERTIES COMPILE_FLAGS "-Wno-sign-compare")
  if (SPM_ENABLE_SHARED)
    set_property(TARGET sentencepiece APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC")
    set_property(TARGET sentencepiece_train APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC")
  endif()
endif()

add_executable(spm_encode spm_encode_main.cc)
add_executable(spm_decode spm_decode_main.cc)
add_executable(spm_normalize spm_normalize_main.cc)
add_executable(spm_train spm_train_main.cc)
add_executable(spm_export_vocab spm_export_vocab_main.cc)

target_link_libraries(spm_encode sentencepiece)
target_link_libraries(spm_decode sentencepiece)
target_link_libraries(spm_normalize sentencepiece sentencepiece_train)
target_link_libraries(spm_train sentencepiece sentencepiece_train)
target_link_libraries(spm_export_vocab sentencepiece)

if (SPM_ENABLE_NFKC_COMPILE)
  add_executable(compile_charsmap compile_charsmap_main.cc)
  target_link_libraries(compile_charsmap sentencepiece sentencepiece_train)
endif()

list(APPEND SPM_INSTALLTARGETS
  spm_encode spm_decode spm_normalize spm_train spm_export_vocab)

if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
  install(TARGETS ${SPM_INSTALLTARGETS}
    BUNDLE DESTINATION ${CMAKE_INSTALL_BINDIR}
    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
else()
install(TARGETS ${SPM_INSTALLTARGETS}
  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()

install(FILES sentencepiece_trainer.h sentencepiece_processor.h
  DESTINATION ${CMAKE_INSTALL_INCDIR})
if (NOT SPM_USE_BUILTIN_PROTOBUF)
  install(FILES ${SPM_PROTO_HDRS} DESTINATION ${CMAKE_INSTALL_INCDIR})
endif()

file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/data" data_dir)

if (SPM_BUILD_TEST OR SPM_COVERAGE)
  enable_testing()
  add_executable(spm_test test_main.cc ${SPM_TEST_SRCS})

  if (SPM_COVERAGE)
    target_link_libraries(spm_test sentencepiece sentencepiece_train "-lgcov")
  else()
    target_link_libraries(spm_test sentencepiece sentencepiece_train)
  endif()

  set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --show-leak-kinds=definite,possible --error-exitcode=1")
  find_program(CTEST_MEMORYCHECK_COMMAND NAMES valgrind)
  include(Dart)

  add_test(NAME sentencepiece_test
    COMMAND $<TARGET_FILE:spm_test> --test_srcdir=${data_dir})
endif()

if (SPM_COVERAGE)
  add_custom_target(coverage
    COMMAND mkdir -p coverage
    COMMAND $<TARGET_FILE:spm_test> --test_srcdir=${data_dir}
    COMMAND lcov -c -d . -o coverage.info
    COMMAND lcov --remove coverage.info "include*" "/c++" "_test*" "testharness*" "third_party*" ".pb.*" -o coverage.info
    COMMAND mkdir -p lcov_html
    COMMAND genhtml -o lcov_html coverage.info)
  add_dependencies(coverage spm_test)
endif()

if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
  set_xcode_property(spm_encode PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All)
  set_xcode_property(spm_decode PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All)
  set_xcode_property(spm_normalize PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All)
  set_xcode_property(spm_train PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All)
  set_xcode_property(spm_export_vocab PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All)
endif()