Skip to content

Commit

Permalink
Build protoc from source, rather than using a binary protoc.
Browse files Browse the repository at this point in the history
Migrate to using the Bazel rules_proto and rules_python packages to build protocol buffers, rather than rolling our own protobuf generation code.

Fixes tensorflow#327
  • Loading branch information
hawkinsp committed Dec 19, 2023
1 parent 9ee9191 commit 0add39d
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 70 deletions.
41 changes: 38 additions & 3 deletions WORKSPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Workspace file for lingvo."""

load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("//lingvo:repo.bzl", "cc_tf_configure", "icu", "lingvo_protoc_deps", "lingvo_testonly_deps")
load("//lingvo:repo.bzl", "cc_tf_configure", "icu", "lingvo_testonly_deps")

http_archive(
name = "org_tensorflow",
Expand All @@ -14,10 +14,45 @@ load("@org_tensorflow//tensorflow/tools/toolchains/remote_config:configs.bzl", "

initialize_rbe_configs()

http_archive(
name = "rules_python",
sha256 = "5868e73107a8e85d8f323806e60cad7283f34b32163ea6ff1020cf27abef6036",
strip_prefix = "rules_python-0.25.0",
url = "https://github.com/bazelbuild/rules_python/releases/download/0.25.0/rules_python-0.25.0.tar.gz",
)

load("@rules_python//python:repositories.bzl", "py_repositories")

py_repositories()

http_archive(
name = "com_google_protobuf",
sha256 = "75be42bd736f4df6d702a0e4e4d30de9ee40eac024c4b845d17ae4cc831fe4ae",
strip_prefix = "protobuf-21.7",
urls = [
"https://mirror.bazel.build/github.com/protocolbuffers/protobuf/archive/v21.7.tar.gz",
"https://github.com/protocolbuffers/protobuf/archive/v21.7.tar.gz",
],
)
load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")

protobuf_deps()

http_archive(
name = "rules_proto",
sha256 = "dc3fb206a2cb3441b485eb1e423165b231235a1ea9b031b4433cf7bc1fa460dd",
strip_prefix = "rules_proto-5.3.0-21.7",
urls = [
"https://github.com/bazelbuild/rules_proto/archive/refs/tags/5.3.0-21.7.tar.gz",
],
)
load("@rules_proto//proto:repositories.bzl", "rules_proto_dependencies", "rules_proto_toolchains")
rules_proto_dependencies()
rules_proto_toolchains()


cc_tf_configure()

lingvo_testonly_deps()

lingvo_protoc_deps()

icu()
33 changes: 31 additions & 2 deletions lingvo/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ load(

# Placeholder: load py_library
# Placeholder: load py_test
load("//lingvo:lingvo.bzl", "pytype_library", "pytype_strict_library")
load("//lingvo:lingvo.bzl", "pytype_library", "pytype_strict_library",
"WELL_KNOWN_PROTO_LIBS")

package(default_visibility = ["//visibility:public"])

Expand Down Expand Up @@ -327,10 +328,38 @@ lingvo_py_binary(
],
)

TF_PROTOS = [
"tensorflow/core/framework/tensor.proto",
"tensorflow/core/framework/versions.proto",
"tensorflow/core/framework/op_def.proto",
"tensorflow/core/framework/resource_handle.proto",
"tensorflow/core/framework/function.proto",
"tensorflow/core/framework/graph_debug_info.proto",
"tensorflow/core/framework/node_def.proto",
"tensorflow/core/framework/graph.proto",
"tensorflow/core/framework/attr_value.proto",
"tensorflow/core/framework/variable.proto",
"tensorflow/core/framework/full_type.proto",
"tensorflow/core/framework/tensor_shape.proto",
"tensorflow/core/framework/types.proto",
"tensorflow/core/protobuf/trackable_object_graph.proto",
"tensorflow/core/protobuf/saver.proto",
"tensorflow/core/protobuf/struct.proto",
"tensorflow/core/protobuf/saved_object_graph.proto",
"tensorflow/core/protobuf/meta_graph.proto",
]

proto_library(
name = "tf_protos",
srcs = TF_PROTOS,
strip_import_prefix = "/lingvo",
deps = WELL_KNOWN_PROTO_LIBS,
)

genrule(
name = "tf_dot_protos",
srcs = [],
outs = ["tf_protos.tar"],
outs = TF_PROTOS,
cmd =
"$(location //lingvo/tools:" +
"generate_tf_dot_protos) $(location " +
Expand Down
79 changes: 34 additions & 45 deletions lingvo/lingvo.bzl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Implements custom rules for Lingvo."""

load("@rules_python//python:proto.bzl", "py_proto_library")

def tf_copts():
# "-Wno-sign-compare", "-mavx" removed for compat with aarch64
return ["-std=c++17"] + select({
Expand Down Expand Up @@ -107,65 +109,52 @@ def lingvo_cuda_py_test(name, tags = [], deps = [], **kwargs):
**kwargs
)

def _proto_gen_cc_src(name, basename):
native.genrule(
name = name,
srcs = [basename + ".proto"],
outs = [basename + ".pb.cc", basename + ".pb.h"],
tools = [
"@protobuf_protoc//:protoc_bin",
"//lingvo:tf_dot_protos",
],
# TODO(drpng): only unpack if tf_proto dependency is requested.
cmd = """
mkdir -p $(@D)/tf_proto.$$$$;
tar -C $(@D)/tf_proto.$$$$ -xf $(location //lingvo:tf_dot_protos);
$(location @protobuf_protoc//:protoc_bin) --proto_path=$(@D)/tf_proto.$$$$ --proto_path=. --cpp_out=$(GENDIR) $(<);
rm -rf $(@D)/tf_proto.$$$$
""",
)

def _proto_gen_py_src(name, basename):
native.genrule(
name = name,
srcs = [basename + ".proto"],
outs = [basename + "_pb2.py"],
tools = [
"@protobuf_protoc//:protoc_bin",
"//lingvo:tf_dot_protos",
],
# TODO(drpng): only unpack if tf_proto dependency is requested.
cmd = """
mkdir -p $(@D)/tf_proto.$$$$;
tar -C $(@D)/tf_proto.$$$$ -xf $(location //lingvo:tf_dot_protos);
$(location @protobuf_protoc//:protoc_bin) --proto_path=$(@D)/tf_proto.$$$$ --proto_path=. --python_out=$(GENDIR) $(<);
rm -rf $(@D)/tf_proto.$$$$
""",
)
WELL_KNOWN_PROTO_LIBS = [
"@com_google_protobuf//:any_proto",
"@com_google_protobuf//:api_proto",
"@com_google_protobuf//:compiler_plugin_proto",
"@com_google_protobuf//:descriptor_proto",
"@com_google_protobuf//:duration_proto",
"@com_google_protobuf//:empty_proto",
"@com_google_protobuf//:field_mask_proto",
"@com_google_protobuf//:source_context_proto",
"@com_google_protobuf//:struct_proto",
"@com_google_protobuf//:timestamp_proto",
"@com_google_protobuf//:type_proto",
"@com_google_protobuf//:wrappers_proto",
]

def lingvo_proto_cc(name, src, deps = []):
# TODO(drpng): only works with proto with no deps within lingvo.
_unused = [deps]
basename = src.replace(".proto", "")
_proto_gen_cc_src(name + "_gencc", basename)
lingvo_cc_library(
native.proto_library(
name = name,
srcs = [basename + ".pb.cc"],
hdrs = [basename + ".pb.h"],
srcs = [src],
deps = [
"//lingvo:tf_protos",
] + WELL_KNOWN_PROTO_LIBS,
)
lingvo_cc_library(
name = "%s_cc" % name,
deps = [":%s" % name],
native.cc_proto_library(
name = name + "_cc",
deps = [":" + name]
)


def lingvo_proto_py(name, src, deps = []):
# TODO(drpng): only works with proto with no deps within lingvo.
_unused = [deps]
basename = src.replace(".proto", "")
_proto_gen_py_src(name + "_genpy", basename)
native.py_library(
native.proto_library(
name = name + "_pyproto",
srcs = [src],
deps = [
"//lingvo:tf_protos",
] + WELL_KNOWN_PROTO_LIBS,
)
py_proto_library(
name = name,
srcs = [basename + "_pb2.py"],
deps = [name + "_pyproto"],
)

# Placeholders to use until bazel supports pytype_{,strict_}{library,test,binary}.
Expand Down
16 changes: 0 additions & 16 deletions lingvo/repo.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -221,22 +221,6 @@ cc_library(
strip_prefix = "googletest-release-1.10.0",
)

def lingvo_protoc_deps():
http_archive(
name = "protobuf_protoc",
build_file_content = """
filegroup(
name = "protoc_bin",
srcs = ["bin/protoc"],
visibility = ["//visibility:public"],
)
""",
urls = [
"https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip",
],
sha256 = "3cd951aff8ce713b94cde55e12378f505f2b89d47bf080508cf77e3934f680b6",
)

def icu():
third_party_http_archive(
name = "icu",
Expand Down
4 changes: 0 additions & 4 deletions lingvo/tools/generate_tf_dot_protos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,3 @@ mkdir -p ${dest}/tensorflow/core/framework
mkdir -p ${dest}/tensorflow/core/protobuf

${binary} ${dest}

# genrule requires statically determined outputs, so we package all
# into a single file.
tar -C ${dest} -cf ${dest}/tf_protos.tar tensorflow/core/{framework,protobuf}

0 comments on commit 0add39d

Please sign in to comment.