main.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. /**
  2. * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  3. * Reserved. MIT License (https://opensource.org/licenses/MIT)
  4. */
  5. /* 2022-2023 by zhaoming,mali aihealthx.com */
  6. // 连接; 定义socket连接类对象与语音对象
  7. var wsconnecter = new WebSocketConnectMethod({msgHandle:getJsonMessage,stateHandle:getConnState});
  8. var audioBlob;
  9. // 录音; 定义录音对象,wav格式
  10. var rec = Recorder({
  11. type:"pcm",
  12. bitRate:16,
  13. sampleRate:16000,
  14. onProcess:recProcess
  15. });
  16. var sampleBuf=new Int16Array();
  17. // 定义按钮响应事件
  18. var btnStart = document.getElementById('btnStart');
  19. btnStart.onclick = record;
  20. var btnStop = document.getElementById('btnStop');
  21. btnStop.onclick = stop;
  22. btnStop.disabled = true;
  23. btnStart.disabled = true;
  24. btnConnect= document.getElementById('btnConnect');
  25. btnConnect.onclick = start;
  26. var awsslink= document.getElementById('wsslink');
  27. var rec_text=""; // for online rec asr result
  28. var offline_text=""; // for offline rec asr result
  29. var info_div = document.getElementById('info_div');
  30. var upfile = document.getElementById('upfile');
  31. var isfilemode=false; // if it is in file mode
  32. var file_ext="";
  33. var file_sample_rate=16000; //for wav file sample rate
  34. var file_data_array; // array to save file data
  35. var totalsend=0;
  36. // var now_ipaddress=window.location.href;
  37. // now_ipaddress=now_ipaddress.replace("https://","wss://");
  38. // now_ipaddress=now_ipaddress.replace("static/index.html","");
  39. // var localport=window.location.port;
  40. // now_ipaddress=now_ipaddress.replace(localport,"10095");
  41. // document.getElementById('wssip').value=now_ipaddress;
  42. addresschange();
  43. function addresschange()
  44. {
  45. var Uri = document.getElementById('wssip').value;
  46. document.getElementById('info_wslink').innerHTML="点此处手工授权(IOS手机)";
  47. Uri=Uri.replace(/wss/g,"https");
  48. console.log("addresschange uri=",Uri);
  49. awsslink.onclick=function(){
  50. window.open(Uri, '_blank');
  51. }
  52. }
  53. upfile.onclick=function()
  54. {
  55. btnStart.disabled = true;
  56. btnStop.disabled = true;
  57. btnConnect.disabled=false;
  58. }
  59. // from https://github.com/xiangyuecn/Recorder/tree/master
  60. var readWavInfo=function(bytes){
  61. //读取wav文件头,统一成44字节的头
  62. if(bytes.byteLength<44){
  63. return null;
  64. };
  65. var wavView=bytes;
  66. var eq=function(p,s){
  67. for(var i=0;i<s.length;i++){
  68. if(wavView[p+i]!=s.charCodeAt(i)){
  69. return false;
  70. };
  71. };
  72. return true;
  73. };
  74. if(eq(0,"RIFF")&&eq(8,"WAVEfmt ")){
  75. var numCh=wavView[22];
  76. if(wavView[20]==1 && (numCh==1||numCh==2)){//raw pcm 单或双声道
  77. var sampleRate=wavView[24]+(wavView[25]<<8)+(wavView[26]<<16)+(wavView[27]<<24);
  78. var bitRate=wavView[34]+(wavView[35]<<8);
  79. var heads=[wavView.subarray(0,12)],headSize=12;//head只保留必要的块
  80. //搜索data块的位置
  81. var dataPos=0; // 44 或有更多块
  82. for(var i=12,iL=wavView.length-8;i<iL;){
  83. if(wavView[i]==100&&wavView[i+1]==97&&wavView[i+2]==116&&wavView[i+3]==97){//eq(i,"data")
  84. heads.push(wavView.subarray(i,i+8));
  85. headSize+=8;
  86. dataPos=i+8;break;
  87. }
  88. var i0=i;
  89. i+=4;
  90. i+=4+wavView[i]+(wavView[i+1]<<8)+(wavView[i+2]<<16)+(wavView[i+3]<<24);
  91. if(i0==12){//fmt
  92. heads.push(wavView.subarray(i0,i));
  93. headSize+=i-i0;
  94. }
  95. }
  96. if(dataPos){
  97. var wavHead=new Uint8Array(headSize);
  98. for(var i=0,n=0;i<heads.length;i++){
  99. wavHead.set(heads[i],n);n+=heads[i].length;
  100. }
  101. return {
  102. sampleRate:sampleRate
  103. ,bitRate:bitRate
  104. ,numChannels:numCh
  105. ,wavHead44:wavHead
  106. ,dataPos:dataPos
  107. };
  108. };
  109. };
  110. };
  111. return null;
  112. };
  113. upfile.onchange = function () {
  114.       var len = this.files.length;
  115. for(let i = 0; i < len; i++) {
  116. let fileAudio = new FileReader();
  117. fileAudio.readAsArrayBuffer(this.files[i]);
  118. file_ext=this.files[i].name.split('.').pop().toLowerCase();
  119. var audioblob;
  120. fileAudio.onload = function() {
  121. audioblob = fileAudio.result;
  122. file_data_array=audioblob;
  123. info_div.innerHTML='请点击连接进行识别';
  124. }
  125.           fileAudio.onerror = function(e) {
  126.             console.log('error' + e);
  127.           }
  128. }
  129. // for wav file, we get the sample rate
  130. if(file_ext=="wav")
  131. for(let i = 0; i < len; i++) {
  132. let fileAudio = new FileReader();
  133. fileAudio.readAsArrayBuffer(this.files[i]);
  134. fileAudio.onload = function() {
  135. audioblob = new Uint8Array(fileAudio.result);
  136. // for wav file, we can get the sample rate
  137. var info=readWavInfo(audioblob);
  138. console.log(info);
  139. file_sample_rate=info.sampleRate;
  140. }
  141.       
  142. }
  143. }
  144. function play_file()
  145. {
  146. var audioblob=new Blob( [ new Uint8Array(file_data_array)] , {type :"audio/wav"});
  147. var audio_record = document.getElementById('audio_record');
  148. audio_record.src = (window.URL||webkitURL).createObjectURL(audioblob);
  149. audio_record.controls=true;
  150. //audio_record.play(); //not auto play
  151. }
  152. function start_file_send()
  153. {
  154. sampleBuf=new Uint8Array( file_data_array );
  155. var chunk_size=960; // for asr chunk_size [5, 10, 5]
  156. while(sampleBuf.length>=chunk_size){
  157. sendBuf=sampleBuf.slice(0,chunk_size);
  158. totalsend=totalsend+sampleBuf.length;
  159. sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length);
  160. wsconnecter.wsSend(sendBuf);
  161. }
  162. stop();
  163. }
  164. function on_recoder_mode_change()
  165. {
  166. var item = null;
  167. var obj = document.getElementsByName("recoder_mode");
  168. for (var i = 0; i < obj.length; i++) { //遍历Radio
  169. if (obj[i].checked) {
  170. item = obj[i].value;
  171. break;
  172. }
  173. }
  174. if(item=="mic")
  175. {
  176. document.getElementById("mic_mode_div").style.display = 'block';
  177. document.getElementById("rec_mode_div").style.display = 'none';
  178. btnStart.disabled = true;
  179. btnStop.disabled = true;
  180. btnConnect.disabled=false;
  181. isfilemode=false;
  182. }
  183. else
  184. {
  185. document.getElementById("mic_mode_div").style.display = 'none';
  186. document.getElementById("rec_mode_div").style.display = 'block';
  187. btnStart.disabled = true;
  188. btnStop.disabled = true;
  189. btnConnect.disabled=true;
  190. isfilemode=true;
  191. info_div.innerHTML='请点击选择文件';
  192. }
  193. }
  194. function getHotwords(){
  195. var obj = document.getElementById("varHot");
  196. if(typeof(obj) == 'undefined' || obj==null || obj.value.length<=0){
  197. return null;
  198. }
  199. let val = obj.value.toString();
  200. console.log("hotwords="+val);
  201. let items = val.split(/[(\r\n)\r\n]+/); //split by \r\n
  202. var jsonresult = {};
  203. const regexNum = /^[0-9]*$/; // test number
  204. for (item of items) {
  205. let result = item.split(" ");
  206. if(result.length>=2 && regexNum.test(result[result.length-1]))
  207. {
  208. var wordstr="";
  209. for(var i=0;i<result.length-1;i++)
  210. wordstr=wordstr+result[i]+" ";
  211. jsonresult[wordstr.trim()]= parseInt(result[result.length-1]);
  212. }
  213. }
  214. console.log("jsonresult="+JSON.stringify(jsonresult));
  215. return JSON.stringify(jsonresult);
  216. }
  217. function getAsrMode(){
  218. var item = null;
  219. var obj = document.getElementsByName("asr_mode");
  220. for (var i = 0; i < obj.length; i++) { //遍历Radio
  221. if (obj[i].checked) {
  222. item = obj[i].value;
  223. break;
  224. }
  225. }
  226. if(isfilemode)
  227. {
  228. item= "offline";
  229. }
  230. console.log("asr mode"+item);
  231. return item;
  232. }
  233. function handleWithTimestamp(tmptext,tmptime)
  234. {
  235. console.log( "tmptext: " + tmptext);
  236. console.log( "tmptime: " + tmptime);
  237. if(tmptime==null || tmptime=="undefined" || tmptext.length<=0)
  238. {
  239. return tmptext;
  240. }
  241. tmptext=tmptext.replace(/。|?|,|、|\?|\.|\ /g, ","); // in case there are a lot of "。"
  242. var words=tmptext.split(","); // split to chinese sentence or english words
  243. var jsontime=JSON.parse(tmptime); //JSON.parse(tmptime.replace(/\]\]\[\[/g, "],[")); // in case there are a lot segments by VAD
  244. var char_index=0; // index for timestamp
  245. var text_withtime="";
  246. for(var i=0;i<words.length;i++)
  247. {
  248. if(words[i]=="undefined" || words[i].length<=0)
  249. {
  250. continue;
  251. }
  252. console.log("words===",words[i]);
  253. console.log( "words: " + words[i]+",time="+jsontime[char_index][0]/1000);
  254. if (/^[a-zA-Z]+$/.test(words[i]))
  255. { // if it is english
  256. text_withtime=text_withtime+jsontime[char_index][0]/1000+":"+words[i]+"\n";
  257. char_index=char_index+1; //for english, timestamp unit is about a word
  258. }
  259. else{
  260. // if it is chinese
  261. text_withtime=text_withtime+jsontime[char_index][0]/1000+":"+words[i]+"\n";
  262. char_index=char_index+words[i].length; //for chinese, timestamp unit is about a char
  263. }
  264. }
  265. return text_withtime;
  266. }
  267. const sleep = (delay) => new Promise((resolve) => setTimeout(resolve, delay))
  268. async function is_speaking() {
  269. const response = await fetch('/is_speaking', {
  270. body: JSON.stringify({
  271. sessionid: parseInt(parent.document.getElementById('sessionid').value),
  272. }),
  273. headers: {
  274. 'Content-Type': 'application/json'
  275. },
  276. method: 'POST'
  277. });
  278. const data = await response.json();
  279. console.log('is_speaking res:',data)
  280. return data.data
  281. }
  282. async function waitSpeakingEnd() {
  283. rec.stop() //关闭录音
  284. for(let i=0;i<10;i++) { //等待数字人开始讲话,最长等待10s
  285. bspeak = await is_speaking()
  286. if(bspeak) {
  287. break
  288. }
  289. await sleep(1000)
  290. }
  291. while(true) { //等待数字人讲话结束
  292. bspeak = await is_speaking()
  293. if(!bspeak) {
  294. break
  295. }
  296. await sleep(1000)
  297. }
  298. await sleep(2000)
  299. rec.start()
  300. }
  301. // 语音识别结果; 对jsonMsg数据解析,将识别结果附加到编辑框中
  302. function getJsonMessage( jsonMsg ) {
  303. //console.log(jsonMsg);
  304. console.log( "message: " + JSON.parse(jsonMsg.data)['text'] );
  305. var rectxt=""+JSON.parse(jsonMsg.data)['text'];
  306. var asrmodel=JSON.parse(jsonMsg.data)['mode'];
  307. var is_final=JSON.parse(jsonMsg.data)['is_final'];
  308. var timestamp=JSON.parse(jsonMsg.data)['timestamp'];
  309. if(asrmodel=="2pass-offline" || asrmodel=="offline")
  310. {
  311. offline_text=offline_text+rectxt.replace(/ +/g,"")+'\n'; //handleWithTimestamp(rectxt,timestamp); //rectxt; //.replace(/ +/g,"");
  312. rec_text=offline_text;
  313. fetch('/human', {
  314. body: JSON.stringify({
  315. text: rectxt.replace(/ +/g,""),
  316. type: 'chat',
  317. sessionid:parseInt(parent.document.getElementById('sessionid').value),
  318. }),
  319. headers: {
  320. 'Content-Type': 'application/json'
  321. },
  322. method: 'POST'
  323. });
  324. waitSpeakingEnd();
  325. }
  326. else
  327. {
  328. rec_text=rec_text+rectxt; //.replace(/ +/g,"");
  329. }
  330. var varArea=document.getElementById('varArea');
  331. varArea.value=rec_text;
  332. console.log( "offline_text: " + asrmodel+","+offline_text);
  333. console.log( "rec_text: " + rec_text);
  334. if (isfilemode==true && is_final==true){
  335. console.log("call stop ws!");
  336. play_file();
  337. wsconnecter.wsStop();
  338. info_div.innerHTML="请点击连接";
  339. btnStart.disabled = true;
  340. btnStop.disabled = true;
  341. btnConnect.disabled=false;
  342. }
  343. }
  344. // 连接状态响应
  345. function getConnState( connState ) {
  346. if ( connState === 0 ) { //on open
  347. info_div.innerHTML='连接成功!请点击开始';
  348. if (isfilemode==true){
  349. info_div.innerHTML='请耐心等待,大文件等待时间更长';
  350. start_file_send();
  351. }
  352. else
  353. {
  354. btnStart.disabled = false;
  355. btnStop.disabled = true;
  356. btnConnect.disabled=true;
  357. }
  358. } else if ( connState === 1 ) {
  359. //stop();
  360. } else if ( connState === 2 ) {
  361. stop();
  362. console.log( 'connecttion error' );
  363. alert("连接地址"+document.getElementById('wssip').value+"失败,请检查asr地址和端口。或试试界面上手动授权,再连接。");
  364. btnStart.disabled = true;
  365. btnStop.disabled = true;
  366. btnConnect.disabled=false;
  367. info_div.innerHTML='请点击连接';
  368. }
  369. }
  370. function record()
  371. {
  372. rec.open( function(){
  373. rec.start();
  374. console.log("开始");
  375. btnStart.disabled = true;
  376. btnStop.disabled = false;
  377. btnConnect.disabled=true;
  378. });
  379. }
  380. // 识别启动、停止、清空操作
  381. function start() {
  382. // 清除显示
  383. clear();
  384. //控件状态更新
  385. console.log("isfilemode"+isfilemode);
  386. //启动连接
  387. var ret=wsconnecter.wsStart();
  388. // 1 is ok, 0 is error
  389. if(ret==1){
  390. info_div.innerHTML="正在连接asr服务器,请等待...";
  391. isRec = true;
  392. btnStart.disabled = true;
  393. btnStop.disabled = true;
  394. btnConnect.disabled=true;
  395. return 1;
  396. }
  397. else
  398. {
  399. info_div.innerHTML="请点击开始";
  400. btnStart.disabled = true;
  401. btnStop.disabled = true;
  402. btnConnect.disabled=false;
  403. return 0;
  404. }
  405. }
  406. function stop() {
  407. var chunk_size = new Array( 5, 10, 5 );
  408. var request = {
  409. "chunk_size": chunk_size,
  410. "wav_name": "h5",
  411. "is_speaking": false,
  412. "chunk_interval":10,
  413. "mode":getAsrMode(),
  414. };
  415. console.log(request);
  416. if(sampleBuf.length>0){
  417. wsconnecter.wsSend(sampleBuf);
  418. console.log("sampleBuf.length"+sampleBuf.length);
  419. sampleBuf=new Int16Array();
  420. }
  421. wsconnecter.wsSend( JSON.stringify(request) );
  422. // 控件状态更新
  423. isRec = false;
  424. info_div.innerHTML="发送完数据,请等候,正在识别...";
  425. if(isfilemode==false){
  426. btnStop.disabled = true;
  427. btnStart.disabled = true;
  428. btnConnect.disabled=true;
  429. //wait 3s for asr result
  430. setTimeout(function(){
  431. console.log("call stop ws!");
  432. wsconnecter.wsStop();
  433. btnConnect.disabled=false;
  434. info_div.innerHTML="请点击连接";}, 3000 );
  435. rec.stop(function(blob,duration){
  436. console.log(blob);
  437. var audioBlob = Recorder.pcm2wav(data = {sampleRate:16000, bitRate:16, blob:blob},
  438. function(theblob,duration){
  439. console.log(theblob);
  440. var audio_record = document.getElementById('audio_record');
  441. audio_record.src = (window.URL||webkitURL).createObjectURL(theblob);
  442. audio_record.controls=true;
  443. //audio_record.play();
  444. } ,function(msg){
  445. console.log(msg);
  446. }
  447. );
  448. },function(errMsg){
  449. console.log("errMsg: " + errMsg);
  450. });
  451. }
  452. // 停止连接
  453. }
  454. function clear() {
  455. var varArea=document.getElementById('varArea');
  456. varArea.value="";
  457. rec_text="";
  458. offline_text="";
  459. }
  460. function recProcess( buffer, powerLevel, bufferDuration, bufferSampleRate,newBufferIdx,asyncEnd ) {
  461. if ( isRec === true ) {
  462. var data_48k = buffer[buffer.length-1];
  463. var array_48k = new Array(data_48k);
  464. var data_16k=Recorder.SampleData(array_48k,bufferSampleRate,16000).data;
  465. sampleBuf = Int16Array.from([...sampleBuf, ...data_16k]);
  466. var chunk_size=960; // for asr chunk_size [5, 10, 5]
  467. info_div.innerHTML=""+bufferDuration/1000+"s";
  468. while(sampleBuf.length>=chunk_size){
  469. sendBuf=sampleBuf.slice(0,chunk_size);
  470. sampleBuf=sampleBuf.slice(chunk_size,sampleBuf.length);
  471. wsconnecter.wsSend(sendBuf);
  472. }
  473. }
  474. }
  475. function getUseITN() {
  476. var obj = document.getElementsByName("use_itn");
  477. for (var i = 0; i < obj.length; i++) {
  478. if (obj[i].checked) {
  479. return obj[i].value === "true";
  480. }
  481. }
  482. return false;
  483. }