Java读取并解析wav格式文件
本文将主要介绍如何使用 Java 语言读取文件并解析 wave 格式,并以代码形式进行展开。代码主要包含三个功能类,分别为:Wave、DataParseUtils 和 Common。
本代码已用于 GitHub 上开源的 ASRT 语音识别系统的 Java 语言 SDK 项目: https://github.com/nl8590687/ASRT_SDK_Java
class Wave:
public class Wave {
public short[] samples;
public byte[] sampleBytes;
public int sampleRate;
public int channels;
public int sampleWidth;
public Wave(){}
public Wave(short[] samples, int sampleRate, int channels, int sampleWidth) {
this.samples = samples;
this.sampleRate = sampleRate;
this.channels = channels;
this.sampleWidth = sampleWidth;
// short[] 转 byte[]
this.sampleBytes = this.samplesToBytes(samples);
}
public Wave(byte[] sampleBytes, int sampleRate, int channels, int sampleWidth) {
this.sampleBytes = sampleBytes;
this.sampleRate = sampleRate;
this.channels = channels;
this.sampleWidth = sampleWidth;
// byte[] 转 short[]
this.samples = this.bytesToSamples(sampleBytes);
}
public boolean deserialize(byte[] wavBytes) {
try
{
byte[] riff = new byte[4];
byte[] riffSize = new byte[4];
byte[] waveID = new byte[4];
byte[] junkID = new byte[4];
boolean hasjunk = false;
byte[] junklength = new byte[4];
byte[] fmtID = new byte[4];
byte[] cksize = new byte[4];
int waveType = 0; // 无符号int整数,在获取时需要进行字节转码 (Byte.toUnsignedInt(byte x))
byte[] channel = new byte[2];
byte[] sample_rate = new byte[4];
byte[] bytespersec = new byte[4];
byte[] blocklen_sample = new byte[2];
byte[] bitNum = new byte[2];
byte[] unknown = new byte[2];
byte[] dataID = new byte[4]; //52
byte[] dataLength = new byte[4]; //56 个字节
int p = 0; //模拟流的指针位置
System.arraycopy(wavBytes, p, riff, 0, 4); // RIFF
p += 4;
if (DataParseUtils.convertFoutUnsignLong(riff[3], riff[2], riff[1], riff[0]) != 0x52494646) //0x52494646
{
Exception e = new Exception("该文件不是WAVE文件");
throw e;
}
/*if (riff[0]!=82 || riff[1]!=73 || riff[2]!=70 || riff[3]!=70) //0x52494646
{
Exception e = new Exception("该文件不是WAVE文件");
throw e;
}*/
System.arraycopy(wavBytes, p, riffSize, 0, 4); // 文件剩余长度
p += 4;
if (DataParseUtils.convertFoutUnsignLong(riffSize[3], riffSize[2], riffSize[1], riffSize[0]) != wavBytes.length - p)
{
//Exception e = new Exception("该WAVE文件损坏,文件长度与标记不一致");
//throw e;
}
System.arraycopy(wavBytes, p, waveID, 0, 4);
p += 4;
if (DataParseUtils.convertFoutUnsignLong(waveID[3], waveID[2], waveID[1], waveID[0]) != 0x57415645)
{
Exception e = new Exception("该文件不是WAVE文件");
throw e;
}
byte[] tmp = new byte[4];
System.arraycopy(wavBytes, p, tmp, 0, 4);
p += 4;
if (DataParseUtils.convertFoutUnsignLong(tmp[3], tmp[2], tmp[1], tmp[0]) == 0x4A554E4B)
{
//包含junk标记的wav
junkID = tmp;
hasjunk = true;
System.arraycopy(wavBytes, p, junklength, 0, 4);
p += 4;
long junklen = DataParseUtils.convertFoutUnsignLong(junklength[3], junklength[2], junklength[1], junklength[0]);
//将不要的junk部分读出
p += (int)junklen;
//读fmt 标记
System.arraycopy(wavBytes, p, fmtID, 0, 4);
p += 4;
}
else if (DataParseUtils.convertFoutUnsignLong(tmp[3], tmp[2], tmp[1], tmp[0]) == 0x666D7420)
{
fmtID = tmp;
}
else
{
Exception e = new Exception("无法找到WAVE文件的junk和fmt标记");
throw e;
}
if (DataParseUtils.convertFoutUnsignLong(fmtID[3], fmtID[2], fmtID[1], fmtID[0]) != 0x666D7420)
{
//fmt 标记
Exception e = new Exception("无法找到WAVE文件fmt标记");
throw e;
}
System.arraycopy(wavBytes, p, cksize, 0, 4);
p += 4;
long p_data_start = DataParseUtils.convertFoutUnsignLong(cksize[3], cksize[2], cksize[1], cksize[0]);
int p_wav_start = (int)p_data_start + 8;
byte[] tmp_waveType = new byte[2];
System.arraycopy(wavBytes, p, tmp_waveType, 0, 2);
p += 2;
waveType = DataParseUtils.convertTwoUnsignInt(tmp_waveType[0], tmp_waveType[1]);
if (waveType != 1)
{
// 非pcm格式,暂不支持
Exception e = new Exception("WAVE文件不是pcm格式,暂时不支持");
throw e;
}
//声道数
System.arraycopy(wavBytes, p, channel, 0, 2);
p += 2;
//采样频率
System.arraycopy(wavBytes, p, sample_rate, 0, 4);
p += 4;
int fs = (int)DataParseUtils.convertFoutUnsignLong(sample_rate[0], sample_rate[1], sample_rate[2], sample_rate[3]);
//每秒钟字节数
System.arraycopy(wavBytes, p, bytespersec, 0, 4);
p += 4;
//每次采样的字节大小,2为单声道,4为立体声道
System.arraycopy(wavBytes, p, blocklen_sample, 0, 2);
p += 2;
//每个声道的采样精度,默认16bit
System.arraycopy(wavBytes, p, bitNum, 0, 2);
p += 2;
System.arraycopy(wavBytes, p, tmp, 0, 2);
p += 2;
//寻找da标记
while (DataParseUtils.convertTwoUnsignInt(tmp[1], tmp[0]) != 0x6461)
{
System.arraycopy(wavBytes, p, tmp, 0, 2);
p += 2;
}
System.arraycopy(wavBytes, p, tmp, 0, 2);
p += 2;
if (DataParseUtils.convertTwoUnsignInt(tmp[1], tmp[0]) != 0x7461)
{
//ta标记
Exception e = new Exception("无法找到WAVE文件data标记");
throw e;
}
//wav数据byte长度
byte[] data_size_byte = new byte[4];
System.arraycopy(wavBytes, p, data_size_byte, 0, 4);
p += 4;
long DataSize = DataParseUtils.convertFoutUnsignLong(data_size_byte[0], data_size_byte[1], data_size_byte[2], data_size_byte[3]);
//计算样本数
long NumSamples = (long)DataSize / 2;
if (NumSamples == 0)
{
NumSamples = (wavBytes.length - p) / 2;
}
short[] data = new short[(int) NumSamples];
for (int i = 0; i < NumSamples; i++)
{
//读入2字节有符号整数
byte[] tmp_sample = new byte[2];
System.arraycopy(wavBytes, p, tmp_sample, 0, 2);
p += 2;
data[i] = (short)DataParseUtils.convertTwoUnsignInt(tmp_sample[0],tmp_sample[1]);
}
this.samples = data;
this.sampleBytes = this.samplesToBytes(this.samples);
this.sampleRate = fs;
this.channels = DataParseUtils.convertTwoUnsignInt(channel[0],channel[1]);
this.sampleWidth = DataParseUtils.convertTwoUnsignInt(bitNum[0],bitNum[1]) / 8;
return true;
}
catch (Exception ex)
{
System.out.println(ex);
return false;
}
}
private byte[] serialize() {
return null;
}
public byte[] getRawSamples() {
return this.sampleBytes;
}
protected byte[] samplesToBytes(short[] samples){
byte[] sampleBytes = new byte[samples.length * 2];
for(int i = 0; i < samples.length; i++){
byte[] sample = DataParseUtils.convertShortToBytes(samples[i], false);
for(int j = 0; j < 2; j++){
sampleBytes[2*i+j] = sample[j];
}
}
return sampleBytes;
}
protected short[] bytesToSamples(byte[] sampleBytes){
short[] data = new short[sampleBytes.length / 2];
for (int i = 0; i < sampleBytes.length / 2; i++)
{
//读入2字节有符号整数
byte[] tmp_sample = new byte[2];
System.arraycopy(sampleBytes, 2 * i, tmp_sample, 0, 2);
data[i] = (short)DataParseUtils.convertTwoUnsignInt(tmp_sample[0],tmp_sample[1]);
}
return data;
}
}
class DataParseUtils:
class DataParseUtils {
/**
* 有符号,int 占 2 个字节
*/
public static int convertTwoSignInt(byte b1, byte b2) { // signed
return (b2 << 8) | (b1 & 0xFF);
}
/**
* 有符号, int 占 4 个字节
*/
public static int convertFourSignInt(byte b1, byte b2, byte b3, byte b4) {
return (b4 << 24) | (b3 & 0xFF) << 16 | (b2 & 0xFF) << 8 | (b1 & 0xFF);
}
/**
* 无符号,int 占 2 个字节
*/
public static int convertTwoUnsignInt(byte b1, byte b2) // unsigned
{
return (b2 & 0xFF) << 8 | (b1 & 0xFF);
}
/**
* 无符号, int 占 4 个字节
*/
public static long convertFoutUnsignLong(byte b1, byte b2, byte b3, byte b4) {
return (long) (b4 & 0xFF) << 24 | (b3 & 0xFF) << 16 | (b2 & 0xFF) << 8 | (b1 & 0xFF);
}
public static byte[] convertShortToBytes(Short shortNumber, boolean big) {
byte[] bytes = new byte[2];
bytes[0] = (byte) (shortNumber & 0xff);
bytes[1] = (byte) (shortNumber >> 8 & 0xff);
if (big){
byte tmp = bytes[0];
bytes[0] = bytes[1];
bytes[1] = tmp;
}
return bytes;
}
}
class Common:
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;
public class Common {
public static byte[] readBinFile(String filename) {
FileInputStream input = null;
try {
List<Byte> byteList = new ArrayList();
input = new FileInputStream(filename);
byte[] buffer = new byte[1024];
while (true) {
int len = input.read(buffer);
if (len == -1) {
break;
}
for(int i = 0; i < len; i++){
byteList.add(buffer[i]);
}
}
byte[] byteArr = new byte[byteList.size()];
for(int i = 0; i< byteArr.length; i++){
byteArr[i] = byteList.get(i);
}
return byteArr;
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
input.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
}
参考资料 Refference
- AI 柠檬,ASRT 开源语音识别项目 Java SDK. GitHub, https://github.com/nl8590687/ASRT_SDK_Java
原文:https://blog.ailemon.net/2022/11/07/java-read-and-parse-wave-format-file/