Bootstrap

TensorFlow on spark笔记

背景:需要在spark2.4上面调用TensorFlow1.14的CNN模型处理图片,然鹅官方当前jar包有bug(libtensorflow_jni-1.14.0.jar),表现形式如下:

spark-shell --master local[10] --driver-memory 10G --jars s3://xxx/EMR/jars/tensorflow-1.14.0.jar,s3://xxx/EMR/jars/libtensorflow-1.14.0.jar,s3://xxx/EMR/jars/libtensorflow_jni-1.14.0.jar
 

import org.tensorflow.{Graph, Session, Tensor}
val graph = new Graph()
scala> Tensor.create(Array(1,2,3))
java.lang.UnsatisfiedLinkError: /mnt/tmp/tensorflow_native_libraries-1608707431068-0/libtensorflow_jni.so: libtensorflow_framework.so.1: cannot open shared object file: No such file or directory
  at java.lang.ClassLoader$NativeLibrary.load(Native Method)
  at java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1934)
  at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1817)
  at java.lang.Runtime.load0(Runtime.java:810)
  at java.lang.System.load(System.java:1088)
  at org.tensorflow.NativeLibrary.load(NativeLibrary.java:101)
  at org.tensorflow.TensorFlow.init(TensorFlow.java:66)
  at org.tensorflow.TensorFlow.<clinit>(TensorFlow.java:70)
  at org.tensorflow.Tensor.<clinit>(Tensor.java:853)
  ... 49 elided

 

尝试解决:

先试试Java命令直接运行含有TensorFlow的jar,为此先将依赖添加到环境变量中:

TF_TYPE="cpu" # Default processor is CPU. If you want GPU, set to "gpu"
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
mkdir -p ./jni
curl -L "https://download.tensorflow.google.cn/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.14.0.tar.gz" |   tar -xz -C ./jni
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   305  100   305    0     0  10892      0 --:--:-- --:--:-- --:--:-- 10892
100   467  100   467    0     0   7915      0 --:--:-- --:--:-- --:--:--  7915
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 49.8M  100 49.8M    0     0  5484k      0  0:00:09  0:00:09 --:--:-- 6412k

ll jni/
total 181268
lrwxrwxrwx 1 hadoop hadoop        28 Jan  1  2000 libtensorflow_framework.so -> libtensorflow_framework.so.1
lrwxrwxrwx 1 hadoop hadoop        33 Jan  1  2000 libtensorflow_framework.so.1 -> libtensorflow_framework.so.1.14.0
-r-xr-xr-x 1 hadoop hadoop  34748520 Jan  1  2000 libtensorflow_framework.so.1.14.0
-r-xr-xr-x 1 hadoop hadoop 150449736 Jan  1  2000 libtensorflow_jni.so
-r-xr-xr-x 1 hadoop hadoop    414358 Jan  1  2000 LICENSE

 

vi HelloTF.java

import org.tensorflow.Graph;
import org.tensorflow.Session;
import org.tensorflow.Tensor;
import org.tensorflow.TensorFlow;

public class HelloTF {
  public static void main(String[] args) throws Exception {
    try (Graph g = new Graph()) {
      final String value = "Hello from " + TensorFlow.version();

      try (Tensor t = Tensor.create(value.getBytes("UTF-8"))) {
        g.opBuilder("Const", "MyConst").setAttr("dtype", t.dataType()).setAttr("value", t).build();
      }
      try (Session s = new Session(g);
           Tensor output = s.runner().fetch("MyConst").run().get(0)) {
        System.out.println(new String(output.bytesValue(), "UTF-8"));
      }
    }
  }
}

javac -cp libtensorflow-1.14.0.jar HelloTF.java

 

java -cp libtensorflow-1.14.0.jar:. -Djava.library.path=./jni HelloTF
2020-12-23 07:04:36.287193: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
2020-12-23 07:04:36.309812: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2999995000 Hz
2020-12-23 07:04:36.310521: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f2a90dba590 executing computations on platform Host. Devices:
2020-12-23 07:04:36.310542: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>
Hello from 1.14.0

export PATH="$PATH:/home/hadoop/jni"
source /etc/profile

添加环境变量后,在spark中问题依旧,但此时,至少能让Java后端调用.pb模型了。

 

最终解决办法:

在src-main-java中新建org.tensorflow包,并新建Java类NativeLibrary 覆盖原有的方法:

package org.tensorflow;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;

/**
 * 这个NativeLibrary会覆盖jar包里的方法,解决找不到tensorflow1.14 libtensorflow_framework.so.1的问题。
 * 参考:https://blog.csdn.net/handong01027/article/details/101371739
 * Helper class for loading the TensorFlow Java native library.
 *
 * <p>The Java TensorFlow bindings require a native (JNI) library. This library
 * (libtensorflow_jni.so on Linux, libtensorflow_jni.dylib on OS X, tensorflow_jni.dll on Windows)
 * can be made available to the JVM using the java.library.path System property (e.g., using
 * -Djava.library.path command-line argument). However, doing so requires an additional step of
 * configuration.
 *
 * <p>Alternatively, the native libraries can be packaed in a .jar, making them easily usable from
 * build systems like Maven. However, in such cases, the native library has to be extracted from the
 * .jar archive.
 *
 * <p>NativeLibrary.load() takes care of this. First looking for the library in java.library.path
 * and failing that, it tries to find the OS and architecture specific version of the library in the
 * set of ClassLoader resources (under org/tensorflow/native/OS-ARCH). The resources paths used for
 * lookup must be consistent with any packaging (such as on Maven Central) of the TensorFlow Java
 * native libraries.
 */
final class NativeLibrary {
    private static final boolean DEBUG =
            System.getProperty("org.tensorflow.NativeLibrary.DEBUG") != null;
    private static final String JNI_LIBNAME = "tensorflow_jni";

    public static void load() {
        if (isLoaded() || tryLoadLibrary()) {
            // Either:
            // (1) The native library has already been statically loaded, OR
            // (2) The required native code has been statically linked (through a custom launcher), OR
            // (3) The native code is part of another library (such as an application-level library)
            // that has already been loaded. For example, tensorflow/examples/android and
            // tensorflow/tools/android/inference_interface include the required native code in
            // differently named libraries.
            //
            // Doesn't matter how, but it seems the native code is loaded, so nothing else to do.
            return;
        }
        // Native code is not present, perhaps it has been packaged into the .jar file containing this.
        // Extract the JNI library itself
        final String jniLibName = System.mapLibraryName(JNI_LIBNAME);
        final String jniResourceName = makeResourceName(jniLibName);
        log("jniResourceName: " + jniResourceName);
        final InputStream jniResource =
                NativeLibrary.class.getClassLoader().getResourceAsStream(jniResourceName);
        // Extract the JNI's dependency
        final String frameworkLibName =
                getVersionedLibraryName(System.mapLibraryName("tensorflow_framework"));
        final String frameworkResourceName = makeResourceName(frameworkLibName);
        log("frameworkResourceName: " + frameworkResourceName);
        final InputStream frameworkResource =
                NativeLibrary.class.getClassLoader().getResourceAsStream(frameworkResourceName);
        // Do not complain if the framework resource wasn't found. This may just mean that we're
        // building with --config=monolithic (in which case it's not needed and not included).
        if (jniResource == null) {
            throw new UnsatisfiedLinkError(
                    String.format(
                            "Cannot find TensorFlow native library for OS: %s, architecture: %s. See "
                                    + "https://github.com/tensorflow/tensorflow/tree/master/tensorflow/java/README.md"
                                    + " for possible solutions (such as building the library from source). Additional"
                                    + " information on attempts to find the native library can be obtained by adding"
                                    + " org.tensorflow.NativeLibrary.DEBUG=1 to the system properties of the JVM.",
                            os(), architecture()));
        }
        try {
            // Create a temporary directory for the extracted resource and its dependencies.
            final File tempPath = createTemporaryDirectory();
            // Deletions are in the reverse order of requests, so we need to request that the directory be
            // deleted first, so that it is empty when the request is fulfilled.
            tempPath.deleteOnExit();
            final String tempDirectory = tempPath.getCanonicalPath();
            if (frameworkResource != null) {
                extractResource(frameworkResource, frameworkLibName, tempDirectory);
            } else {
                log(
                        frameworkResourceName
                                + " not found. This is fine assuming "
                                + jniResourceName
                                + " is not built to depend on it.");
            }
            System.load(extractResource(jniResource, jniLibName, tempDirectory));
        } catch (IOException e) {
            throw new UnsatisfiedLinkError(
                    String.format(
                            "Unable to extract native library into a temporary file (%s)", e.toString()));
        }
    }

    private static boolean tryLoadLibrary() {
        try {
            System.loadLibrary(JNI_LIBNAME);
            return true;
        } catch (UnsatisfiedLinkError e) {
            log("tryLoadLibraryFailed: " + e.getMessage());
            return false;
        }
    }

    private static boolean isLoaded() {
        try {
            TensorFlow.version();
            log("isLoaded: true");
            return true;
        } catch (UnsatisfiedLinkError e) {
            return false;
        }
    }

    private static boolean resourceExists(String baseName) {
        return NativeLibrary.class.getClassLoader().getResource(makeResourceName(baseName)) != null;
    }

    private static String getVersionedLibraryName(String libFilename) {
        final String versionName = getMajorVersionNumber();

        // If we're on darwin, the versioned libraries look like blah.1.dylib.
        final String darwinSuffix = ".dylib";
        if (libFilename.endsWith(darwinSuffix)) {
            final String prefix = libFilename.substring(0, libFilename.length() - darwinSuffix.length());
            if (versionName != null) {
                final String darwinVersionedLibrary = prefix + "." + versionName + darwinSuffix;
                if (resourceExists(darwinVersionedLibrary)) {
                    return darwinVersionedLibrary;
                }
            } else {
                // If we're here, we're on darwin, but we couldn't figure out the major version number. We
                // already tried the library name without any changes, but let's do one final try for the
                // library with a .so suffix.
                final String darwinSoName = prefix + ".so";
                if (resourceExists(darwinSoName)) {
                    return darwinSoName;
                }
            }
        } else if (libFilename.endsWith(".so")) {
            // Libraries ending in ".so" are versioned like "libfoo.so.1", so try that.
            final String versionedSoName = libFilename + "." + versionName;
            if (versionName != null && resourceExists(versionedSoName)) {
                return versionedSoName;
            }
        }

        // Otherwise, we've got no idea.
        return libFilename;
    }

    /**
     * Returns the major version number of this TensorFlow Java API, or {@code null} if it cannot be
     * determined.
     */
    private static String getMajorVersionNumber() {
        String version = NativeLibrary.class.getPackage().getImplementationVersion();
        // expecting a string like 1.14.0, we want to get the first '1'.
        int dotIndex;
        if (version == null || (dotIndex = version.indexOf('.')) == -1) {
            return "1";
        }
        String majorVersion = version.substring(0, dotIndex);
        try {
            Integer.parseInt(majorVersion);
            return majorVersion;
        } catch (NumberFormatException unused) {
            return null;
        }
    }

    private static String extractResource(
            InputStream resource, String resourceName, String extractToDirectory) throws IOException {
        final File dst = new File(extractToDirectory, resourceName);
        dst.deleteOnExit();
        final String dstPath = dst.toString();
        log("extracting native library to: " + dstPath);
        final long nbytes = copy(resource, dst);
        log(String.format("copied %d bytes to %s", nbytes, dstPath));
        return dstPath;
    }

    private static String os() {
        final String p = System.getProperty("os.name").toLowerCase();
        if (p.contains("linux")) {
            return "linux";
        } else if (p.contains("os x") || p.contains("darwin")) {
            return "darwin";
        } else if (p.contains("windows")) {
            return "windows";
        } else {
            return p.replaceAll("\\s", "");
        }
    }

    private static String architecture() {
        final String arch = System.getProperty("os.arch").toLowerCase();
        return (arch.equals("amd64")) ? "x86_64" : arch;
    }

    private static void log(String msg) {
        if (DEBUG) {
            System.err.println("org.tensorflow.NativeLibrary: " + msg);
        }
    }

    private static String makeResourceName(String baseName) {
        return "org/tensorflow/native/" + String.format("%s-%s/", os(), architecture()) + baseName;
    }

    private static long copy(InputStream src, File dstFile) throws IOException {
        FileOutputStream dst = new FileOutputStream(dstFile);
        try {
            byte[] buffer = new byte[1 << 20]; // 1MB
            long ret = 0;
            int n = 0;
            while ((n = src.read(buffer)) >= 0) {
                dst.write(buffer, 0, n);
                ret += n;
            }
            return ret;
        } finally {
            dst.close();
            src.close();
        }
    }

    // Shamelessly adapted from Guava to avoid using java.nio, for Android API
    // compatibility.
    private static File createTemporaryDirectory() {
        File baseDirectory = new File(System.getProperty("java.io.tmpdir"));
        String directoryName = "tensorflow_native_libraries-" + System.currentTimeMillis() + "-";
        for (int attempt = 0; attempt < 1000; attempt++) {
            File temporaryDirectory = new File(baseDirectory, directoryName + attempt);
            if (temporaryDirectory.mkdir()) {
                return temporaryDirectory;
            }
        }
        throw new IllegalStateException(
                "Could not create a temporary directory (tried to make "
                        + directoryName
                        + "*) to extract TensorFlow native libraries.");
    }

    private NativeLibrary() {}
}

 

 

参考:

https://blog.csdn.net/noiplcx/article/details/83270552

https://blog.csdn.net/handong01027/article/details/101371739

 

;