背景:需要在spark2.4上面调用TensorFlow1.14的CNN模型处理图片,然鹅官方当前jar包有bug(libtensorflow_jni-1.14.0.jar),表现形式如下:
spark-shell --master local[10] --driver-memory 10G --jars s3://xxx/EMR/jars/tensorflow-1.14.0.jar,s3://xxx/EMR/jars/libtensorflow-1.14.0.jar,s3://xxx/EMR/jars/libtensorflow_jni-1.14.0.jar
import org.tensorflow.{Graph, Session, Tensor}
val graph = new Graph()
scala> Tensor.create(Array(1,2,3))
java.lang.UnsatisfiedLinkError: /mnt/tmp/tensorflow_native_libraries-1608707431068-0/libtensorflow_jni.so: libtensorflow_framework.so.1: cannot open shared object file: No such file or directory
at java.lang.ClassLoader$NativeLibrary.load(Native Method)
at java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1934)
at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1817)
at java.lang.Runtime.load0(Runtime.java:810)
at java.lang.System.load(System.java:1088)
at org.tensorflow.NativeLibrary.load(NativeLibrary.java:101)
at org.tensorflow.TensorFlow.init(TensorFlow.java:66)
at org.tensorflow.TensorFlow.<clinit>(TensorFlow.java:70)
at org.tensorflow.Tensor.<clinit>(Tensor.java:853)
... 49 elided
尝试解决:
先试试Java命令直接运行含有TensorFlow的jar,为此先将依赖添加到环境变量中:
TF_TYPE="cpu" # Default processor is CPU. If you want GPU, set to "gpu"
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
mkdir -p ./jni
curl -L "https://download.tensorflow.google.cn/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.14.0.tar.gz" | tar -xz -C ./jni
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 305 100 305 0 0 10892 0 --:--:-- --:--:-- --:--:-- 10892
100 467 100 467 0 0 7915 0 --:--:-- --:--:-- --:--:-- 7915
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
100 49.8M 100 49.8M 0 0 5484k 0 0:00:09 0:00:09 --:--:-- 6412k
ll jni/
total 181268
lrwxrwxrwx 1 hadoop hadoop 28 Jan 1 2000 libtensorflow_framework.so -> libtensorflow_framework.so.1
lrwxrwxrwx 1 hadoop hadoop 33 Jan 1 2000 libtensorflow_framework.so.1 -> libtensorflow_framework.so.1.14.0
-r-xr-xr-x 1 hadoop hadoop 34748520 Jan 1 2000 libtensorflow_framework.so.1.14.0
-r-xr-xr-x 1 hadoop hadoop 150449736 Jan 1 2000 libtensorflow_jni.so
-r-xr-xr-x 1 hadoop hadoop 414358 Jan 1 2000 LICENSE
vi HelloTF.java
import org.tensorflow.Graph;
import org.tensorflow.Session;
import org.tensorflow.Tensor;
import org.tensorflow.TensorFlow;
public class HelloTF {
public static void main(String[] args) throws Exception {
try (Graph g = new Graph()) {
final String value = "Hello from " + TensorFlow.version();
try (Tensor t = Tensor.create(value.getBytes("UTF-8"))) {
g.opBuilder("Const", "MyConst").setAttr("dtype", t.dataType()).setAttr("value", t).build();
}
try (Session s = new Session(g);
Tensor output = s.runner().fetch("MyConst").run().get(0)) {
System.out.println(new String(output.bytesValue(), "UTF-8"));
}
}
}
}
javac -cp libtensorflow-1.14.0.jar HelloTF.java
java -cp libtensorflow-1.14.0.jar:. -Djava.library.path=./jni HelloTF
2020-12-23 07:04:36.287193: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
2020-12-23 07:04:36.309812: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2999995000 Hz
2020-12-23 07:04:36.310521: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f2a90dba590 executing computations on platform Host. Devices:
2020-12-23 07:04:36.310542: I tensorflow/compiler/xla/service/service.cc:175] StreamExecutor device (0): <undefined>, <undefined>
Hello from 1.14.0
export PATH="$PATH:/home/hadoop/jni"
source /etc/profile
添加环境变量后,在spark中问题依旧,但此时,至少能让Java后端调用.pb模型了。
最终解决办法:
在src-main-java中新建org.tensorflow包,并新建Java类NativeLibrary 覆盖原有的方法:
package org.tensorflow; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; /** * 这个NativeLibrary会覆盖jar包里的方法,解决找不到tensorflow1.14 libtensorflow_framework.so.1的问题。 * 参考:https://blog.csdn.net/handong01027/article/details/101371739 * Helper class for loading the TensorFlow Java native library. * * <p>The Java TensorFlow bindings require a native (JNI) library. This library * (libtensorflow_jni.so on Linux, libtensorflow_jni.dylib on OS X, tensorflow_jni.dll on Windows) * can be made available to the JVM using the java.library.path System property (e.g., using * -Djava.library.path command-line argument). However, doing so requires an additional step of * configuration. * * <p>Alternatively, the native libraries can be packaed in a .jar, making them easily usable from * build systems like Maven. However, in such cases, the native library has to be extracted from the * .jar archive. * * <p>NativeLibrary.load() takes care of this. First looking for the library in java.library.path * and failing that, it tries to find the OS and architecture specific version of the library in the * set of ClassLoader resources (under org/tensorflow/native/OS-ARCH). The resources paths used for * lookup must be consistent with any packaging (such as on Maven Central) of the TensorFlow Java * native libraries. */ final class NativeLibrary { private static final boolean DEBUG = System.getProperty("org.tensorflow.NativeLibrary.DEBUG") != null; private static final String JNI_LIBNAME = "tensorflow_jni"; public static void load() { if (isLoaded() || tryLoadLibrary()) { // Either: // (1) The native library has already been statically loaded, OR // (2) The required native code has been statically linked (through a custom launcher), OR // (3) The native code is part of another library (such as an application-level library) // that has already been loaded. For example, tensorflow/examples/android and // tensorflow/tools/android/inference_interface include the required native code in // differently named libraries. // // Doesn't matter how, but it seems the native code is loaded, so nothing else to do. return; } // Native code is not present, perhaps it has been packaged into the .jar file containing this. // Extract the JNI library itself final String jniLibName = System.mapLibraryName(JNI_LIBNAME); final String jniResourceName = makeResourceName(jniLibName); log("jniResourceName: " + jniResourceName); final InputStream jniResource = NativeLibrary.class.getClassLoader().getResourceAsStream(jniResourceName); // Extract the JNI's dependency final String frameworkLibName = getVersionedLibraryName(System.mapLibraryName("tensorflow_framework")); final String frameworkResourceName = makeResourceName(frameworkLibName); log("frameworkResourceName: " + frameworkResourceName); final InputStream frameworkResource = NativeLibrary.class.getClassLoader().getResourceAsStream(frameworkResourceName); // Do not complain if the framework resource wasn't found. This may just mean that we're // building with --config=monolithic (in which case it's not needed and not included). if (jniResource == null) { throw new UnsatisfiedLinkError( String.format( "Cannot find TensorFlow native library for OS: %s, architecture: %s. See " + "https://github.com/tensorflow/tensorflow/tree/master/tensorflow/java/README.md" + " for possible solutions (such as building the library from source). Additional" + " information on attempts to find the native library can be obtained by adding" + " org.tensorflow.NativeLibrary.DEBUG=1 to the system properties of the JVM.", os(), architecture())); } try { // Create a temporary directory for the extracted resource and its dependencies. final File tempPath = createTemporaryDirectory(); // Deletions are in the reverse order of requests, so we need to request that the directory be // deleted first, so that it is empty when the request is fulfilled. tempPath.deleteOnExit(); final String tempDirectory = tempPath.getCanonicalPath(); if (frameworkResource != null) { extractResource(frameworkResource, frameworkLibName, tempDirectory); } else { log( frameworkResourceName + " not found. This is fine assuming " + jniResourceName + " is not built to depend on it."); } System.load(extractResource(jniResource, jniLibName, tempDirectory)); } catch (IOException e) { throw new UnsatisfiedLinkError( String.format( "Unable to extract native library into a temporary file (%s)", e.toString())); } } private static boolean tryLoadLibrary() { try { System.loadLibrary(JNI_LIBNAME); return true; } catch (UnsatisfiedLinkError e) { log("tryLoadLibraryFailed: " + e.getMessage()); return false; } } private static boolean isLoaded() { try { TensorFlow.version(); log("isLoaded: true"); return true; } catch (UnsatisfiedLinkError e) { return false; } } private static boolean resourceExists(String baseName) { return NativeLibrary.class.getClassLoader().getResource(makeResourceName(baseName)) != null; } private static String getVersionedLibraryName(String libFilename) { final String versionName = getMajorVersionNumber(); // If we're on darwin, the versioned libraries look like blah.1.dylib. final String darwinSuffix = ".dylib"; if (libFilename.endsWith(darwinSuffix)) { final String prefix = libFilename.substring(0, libFilename.length() - darwinSuffix.length()); if (versionName != null) { final String darwinVersionedLibrary = prefix + "." + versionName + darwinSuffix; if (resourceExists(darwinVersionedLibrary)) { return darwinVersionedLibrary; } } else { // If we're here, we're on darwin, but we couldn't figure out the major version number. We // already tried the library name without any changes, but let's do one final try for the // library with a .so suffix. final String darwinSoName = prefix + ".so"; if (resourceExists(darwinSoName)) { return darwinSoName; } } } else if (libFilename.endsWith(".so")) { // Libraries ending in ".so" are versioned like "libfoo.so.1", so try that. final String versionedSoName = libFilename + "." + versionName; if (versionName != null && resourceExists(versionedSoName)) { return versionedSoName; } } // Otherwise, we've got no idea. return libFilename; } /** * Returns the major version number of this TensorFlow Java API, or {@code null} if it cannot be * determined. */ private static String getMajorVersionNumber() { String version = NativeLibrary.class.getPackage().getImplementationVersion(); // expecting a string like 1.14.0, we want to get the first '1'. int dotIndex; if (version == null || (dotIndex = version.indexOf('.')) == -1) { return "1"; } String majorVersion = version.substring(0, dotIndex); try { Integer.parseInt(majorVersion); return majorVersion; } catch (NumberFormatException unused) { return null; } } private static String extractResource( InputStream resource, String resourceName, String extractToDirectory) throws IOException { final File dst = new File(extractToDirectory, resourceName); dst.deleteOnExit(); final String dstPath = dst.toString(); log("extracting native library to: " + dstPath); final long nbytes = copy(resource, dst); log(String.format("copied %d bytes to %s", nbytes, dstPath)); return dstPath; } private static String os() { final String p = System.getProperty("os.name").toLowerCase(); if (p.contains("linux")) { return "linux"; } else if (p.contains("os x") || p.contains("darwin")) { return "darwin"; } else if (p.contains("windows")) { return "windows"; } else { return p.replaceAll("\\s", ""); } } private static String architecture() { final String arch = System.getProperty("os.arch").toLowerCase(); return (arch.equals("amd64")) ? "x86_64" : arch; } private static void log(String msg) { if (DEBUG) { System.err.println("org.tensorflow.NativeLibrary: " + msg); } } private static String makeResourceName(String baseName) { return "org/tensorflow/native/" + String.format("%s-%s/", os(), architecture()) + baseName; } private static long copy(InputStream src, File dstFile) throws IOException { FileOutputStream dst = new FileOutputStream(dstFile); try { byte[] buffer = new byte[1 << 20]; // 1MB long ret = 0; int n = 0; while ((n = src.read(buffer)) >= 0) { dst.write(buffer, 0, n); ret += n; } return ret; } finally { dst.close(); src.close(); } } // Shamelessly adapted from Guava to avoid using java.nio, for Android API // compatibility. private static File createTemporaryDirectory() { File baseDirectory = new File(System.getProperty("java.io.tmpdir")); String directoryName = "tensorflow_native_libraries-" + System.currentTimeMillis() + "-"; for (int attempt = 0; attempt < 1000; attempt++) { File temporaryDirectory = new File(baseDirectory, directoryName + attempt); if (temporaryDirectory.mkdir()) { return temporaryDirectory; } } throw new IllegalStateException( "Could not create a temporary directory (tried to make " + directoryName + "*) to extract TensorFlow native libraries."); } private NativeLibrary() {} }
参考:
https://blog.csdn.net/noiplcx/article/details/83270552
https://blog.csdn.net/handong01027/article/details/101371739