说明:通过麦克风将语音流通过websocket发送解析。
1、首选注册,进入官网:讯飞开放平台-以语音交互为核心的人工智能开放平台,进入右侧控制台,登录注册后,申请实时语音转写项目,生成APPID和APIKey,后面代码中要使用
创建新应用
然后进入该应用中,选择左侧实时语音转写,可以看到自己的APPID和APIKey,后续代码中使用
2、 下载java对应API文档(建议wss):实时语音转写 API 文档 | 讯飞开放平台文档中心
3、将demo文件导入IDEA中
4、官方demo是将.pcm文件转换为语音进行输出,在实际应用场景中往往需要通过麦克风实时监听转换(语音会议、直播字幕等),故对demo进行改造,以下为关键代码。
设置音频属性,将音频流发送。
附 RTASRTest.java代码
package com.iflytek.voicecloud.rtasr;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.iflytek.voicecloud.rtasr.util.EncryptUtil;
import org.java_websocket.WebSocket.READYSTATE;
import org.java_websocket.client.WebSocketClient;
import org.java_websocket.drafts.Draft;
import org.java_websocket.handshake.ServerHandshake;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.TargetDataLine;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Objects;
import java.util.concurrent.CountDownLatch;
/**
* 实时转写调用
* 此demo只是一个简单的调用示例,不适合用到实际生产环境中
*
* @author ming
*
*/
public class RTASRTest {
// appid
private static final String APPID = "xxx";
// appid对应的secret_key
private static final String SECRET_KEY = "xxx";
// 请求地址
private static final String HOST = "rtasr.xfyun.cn/v1/ws";
private static final String BASE_URL = "wss://" + HOST;
private static final String ORIGIN = "https://" + HOST;
// 音频文件路径
private static final String AUDIO_PATH = "./resource/test_1.pcm";
// 每次发送的数据大小 1280 字节
private static final int CHUNCKED_SIZE = 1280;
private static final SimpleDateFormat sdf = new SimpleDateFormat("yyy-MM-dd HH:mm:ss.SSS");
public static void main(String[] args) throws Exception {
while (true) {
URI url = new URI(BASE_URL + getHandShakeParams(APPID, SECRET_KEY));
DraftWithOrigin draft = new DraftWithOrigin(ORIGIN);
CountDownLatch handshakeSuccess = new CountDownLatch(1);
CountDownLatch connectClose = new CountDownLatch(1);
MyWebSocketClient client = new MyWebSocketClient(url, draft, handshakeSuccess, connectClose);
client.connect();
while (!client.getReadyState().equals(READYSTATE.OPEN)) {
System.out.println(getCurrentTimeStr() + "\t连接中");
Thread.sleep(1000);
}
// 等待握手成功
handshakeSuccess.await();
System.out.println(sdf.format(new Date()) + " 开始讲话并发送。。");
//音频属性
//采样率 16k
float rate = 16000.0F;
//位长16bit
int sizeInBits = 16;
//单声道
int channels = 1;
AudioFormat audioFormat = new AudioFormat(rate, sizeInBits, channels, true, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, audioFormat);
TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
targetDataLine.open(audioFormat);
targetDataLine.start();
final int bufSize = 1280;
byte[] buffer = new byte[bufSize];
while ((targetDataLine.read(buffer, 0, bufSize)) > 0) {
//发送麦克风数据流
send(client, buffer);
}
}
}
// 生成握手参数
public static String getHandShakeParams(String appId, String secretKey) {
String ts = System.currentTimeMillis()/1000 + "";
String signa = "";
try {
signa = EncryptUtil.HmacSHA1Encrypt(EncryptUtil.MD5(appId + ts), secretKey);
return "?appid=" + appId + "&ts=" + ts + "&signa=" + URLEncoder.encode(signa, "UTF-8") + "&vadMdn=2";
} catch (Exception e) {
e.printStackTrace();
}
return "";
}
public static void send(WebSocketClient client, byte[] bytes) {
if (client.isClosed()) {
throw new RuntimeException("client connect closed!");
}
client.send(bytes);
}
public static String getCurrentTimeStr() {
return sdf.format(new Date());
}
public static class MyWebSocketClient extends WebSocketClient {
private CountDownLatch handshakeSuccess;
private CountDownLatch connectClose;
public MyWebSocketClient(URI serverUri, Draft protocolDraft, CountDownLatch handshakeSuccess, CountDownLatch connectClose) {
super(serverUri, protocolDraft);
this.handshakeSuccess = handshakeSuccess;
this.connectClose = connectClose;
if(serverUri.toString().contains("wss")){
trustAllHosts(this);
}
}
@Override
public void onOpen(ServerHandshake handshake) {
System.out.println(getCurrentTimeStr() + "\t连接建立成功!");
}
@Override
public void onMessage(String msg) {
JSONObject msgObj = JSON.parseObject(msg);
String action = msgObj.getString("action");
if (Objects.equals("started", action)) {
// 握手成功
System.out.println(getCurrentTimeStr() + "\t握手成功!sid: " + msgObj.getString("sid"));
handshakeSuccess.countDown();
} else if (Objects.equals("result", action)) {
// 转写结果
System.out.println(getCurrentTimeStr() + "\tresult: " + getContent(msgObj.getString("data")));
} else if (Objects.equals("error", action)) {
// 连接发生错误
System.out.println("Error: " + msg);
System.exit(0);
}
}
@Override
public void onError(Exception e) {
System.out.println(getCurrentTimeStr() + "\t连接发生错误:" + e.getMessage() + ", " + new Date());
e.printStackTrace();
System.exit(0);
}
@Override
public void onClose(int arg0, String arg1, boolean arg2) {
System.out.println(getCurrentTimeStr() + "\t链接关闭");
connectClose.countDown();
}
@Override
public void onMessage(ByteBuffer bytes) {
try {
System.out.println(getCurrentTimeStr() + "\t服务端返回:" + new String(bytes.array(), "UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
public void trustAllHosts(MyWebSocketClient appClient) {
System.out.println("wss");
TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {
@Override
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return new java.security.cert.X509Certificate[]{};
}
@Override
public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
// TODO Auto-generated method stub
}
@Override
public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
// TODO Auto-generated method stub
}
}};
try {
SSLContext sc = SSLContext.getInstance("TLS");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
appClient.setSocket(sc.getSocketFactory().createSocket());
} catch (Exception e) {
e.printStackTrace();
}
}
}
// 把转写结果解析为句子
public static String getContent(String message) {
StringBuffer resultBuilder = new StringBuffer();
try {
JSONObject messageObj = JSON.parseObject(message);
JSONObject cn = messageObj.getJSONObject("cn");
JSONObject st = cn.getJSONObject("st");
JSONArray rtArr = st.getJSONArray("rt");
for (int i = 0; i < rtArr.size(); i++) {
JSONObject rtArrObj = rtArr.getJSONObject(i);
JSONArray wsArr = rtArrObj.getJSONArray("ws");
for (int j = 0; j < wsArr.size(); j++) {
JSONObject wsArrObj = wsArr.getJSONObject(j);
JSONArray cwArr = wsArrObj.getJSONArray("cw");
for (int k = 0; k < cwArr.size(); k++) {
JSONObject cwArrObj = cwArr.getJSONObject(k);
String wStr = cwArrObj.getString("w");
resultBuilder.append(wStr);
}
}
}
} catch (Exception e) {
return message;
}
return resultBuilder.toString();
}
}
运行结果:
另附Python实现视频教程:AI大学堂