|
【1】采用halcon软件提取语音输出文本唯一出现ID
- dev_update_off()
- dev_set_draw('margin')
- pathwav:='D:/2-LearningCode/902-ASR/AISHELL-1/data_aishell/wav/train'
- pathtrans := 'D:/2-LearningCode/902-ASR/AISHELL-1/data_aishell/transcript'
- pathtranscript:='D:/2-LearningCode/902-ASR/AISHELL-1/data_aishell/transcript/aishell_transcript_v0.8.txt'
- pathtranscriptWords := []
- open_file (pathtranscript, 'input', FileHandle)
- IsEOF := 0
- nums := []
- k:=0
- while (IsEOF == 0)
- fread_line (FileHandle, Number, IsEOF)
- pathtranscriptWords[k] := Number
- k:=k+1
- endwhile
- close_file (FileHandle)
- *
- * 在pathtranscript里面查找对应的wav对应的输出,找出唯一出现过的字母或者文字
- WavNameOutput := ''
- for k:=0 to |pathtranscriptWords|-1 by 1
- scriptWord := pathtranscriptWords[k]
- tuple_split(scriptWord, ' ', Substrings2)
- tuple_length(Substrings2, Length1)
- if(Length1>0)
- tuple_split(scriptWord, Substrings2[0], Substrings3)
- tuple_strlen(Substrings3, Length)
- tuple_substr(Substrings3, 1, Length-1, Substring)
- tuple_split(Substring[0], '\n', Substrings)
- WavNameOutput := WavNameOutput + Substrings[0]
- endif
- endfor
- * WriteTxt (FilePath + WavName + '.txt', WavNameOutput)
- * WavInputOut := WavInputOut + WavFile + ',' + FilePath + WavName + '.txt' + '\n'
- *
- * resPath := 'D:/2-LearningCode/902-ASR/AISHELL-1/data_aishell/'
- * WriteTxt (resPath + 'ASRInfo.txt', WavInputOut)
- UniqueWord := ''
- tuple_strlen(WavNameOutput, Length2)
- for i:= 0 to Length2-1 by 1
- tuple_substr(WavNameOutput, i, i, CurChar)
- tuple_strchr(UniqueWord, CurChar, Position)
- if(Position==-1 and CurChar != ' ')
- UniqueWord := UniqueWord + CurChar
- endif
- endfor
- * WriteTxt (FilePath + WavName + '.txt', WavNameOutput)
- File_Name := 'D:/2-LearningCode/902-ASR/AISHELL-1/data_aishell/LabelWav.json'
- file_exists(File_Name,FileExits)
- if(FileExits)
- delete_file(File_Name)
- open_file(File_Name,'append',FileHandle)
- else
- open_file(File_Name,'output',FileHandle)
- endif
- fwrite_string(FileHandle, '[' + '\n')
- tuple_strlen(UniqueWord, Length3)
- for i:= 0 to Length3-1 by 1
- tuple_substr(UniqueWord, i, i, Substring1)
- fwrite_string(FileHandle, '"' + Substring1 + '"' + '\n')
- endfor
- fwrite_string(FileHandle, '"' + ' ' + '"'+ '\n')
- fwrite_string(FileHandle, ']' + '\n')
- close_file (FileHandle)
复制代码 输出如下:
- [
- "_",
- "'",
- "a",
- "b",
- "c",
- "d",
- "e",
- "f",
- "g",
- "h",
- "i",
- "j",
- "k",
- "l",
- "m",
- "n",
- "o",
- "p",
- "q",
- "r",
- "s",
- "t",
- "u",
- "v",
- "w",
- "x",
- "y",
- "z",
- " "
- ]
复制代码
|
|