001/* A library of audio operations. 002 003 Copyright (c) 1998-2014 The Regents of the University of California. 004 All rights reserved. 005 006 Permission is hereby granted, without written agreement and without 007 license or royalty fees, to use, copy, modify, and distribute this 008 software and its documentation for any purpose, provided that the above 009 copyright notice and the following two paragraphs appear in all copies 010 of this software. 011 012 IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 013 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 014 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 015 THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 016 SUCH DAMAGE. 017 018 THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 019 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 020 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 021 PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 022 CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 023 ENHANCEMENTS, OR MODIFICATIONS. 024 025 PT_COPYRIGHT_VERSION_2 026 COPYRIGHTENDKEY 027 028 */ 029package ptolemy.media; 030 031import java.io.DataInputStream; 032import java.io.DataOutputStream; 033import java.io.IOException; 034 035/////////////////////////////////////////////////////////////////// 036//// Audio 037 038/** 039 Instances of this class represent audio data equivalent to that 040 contained by a Sun/NeXT audio file (.au file). The class also 041 includes a set of utility static methods for manipulating audio 042 signals. 043 Currently, only an 8kHz sample rate, mu-law encoded, monophonic 044 audio format is supported. 045 <p> 046 The format of an audio file is:</p> 047 048 <table border="1"> 049 <caption>Audio File Format</caption> 050 <tr><th>byte</th><th>type</th><th>field name</th><th>field value</th></tr> 051 <tr><td>0x00</td><td>byte </td><td>magic[4] </td> 052 <td> 0x2E736E64 '.snd' in ASCII </td></tr> 053 <tr><td>0x04</td><td>int </td><td>offset </td> 054 <td> offset of audio data relative to the start of the stream </td></tr> 055 <tr><td>0x08</td><td>int </td><td>size </td> 056 <td> number of bytes of data </td></tr> 057 <tr><td>0x0C</td><td>int </td><td>format </td> 058 <td> format code: 1 for 8-bit u-law </td></tr> 059 <tr><td>0x10</td><td>int </td><td>samplingRate </td> 060 <td> the sampling rate </td></tr> 061 <tr><td>0x14</td><td>int </td><td>numChannels </td> 062 <td> the number of channels </td></tr> 063 <tr><td>0x18</td><td>byte</td><td>info[] </td> 064 <td> optional text information </td></tr> 065 </table> 066 <p> 067 The design of this class is based on the web page of 068 <a href="mailto:donahu@cooper.edu">Billy Donahue</a>. 069 <a href="http://www.cooper.edu/~donahu/auformat/auFormat.html#in_browser"> http://www.cooper.edu/~donahu/auformat/auFormat.html</a>. 070 That page no longer exists. This file was created on 1998-08-31, 071 The Wayback machine yields this page for that date: 072 <a href="http://web.archive.org/web/19980709194800/http://www.cooper.edu/~donahu/auformat/auFormat.html#in_browser">http://web.archive.org/web/19980709194800/http://www.cooper.edu/~donahu/auformat/auFormat.html</a>.</p> 073 074<p> Note that this class serves the same role as 075 the class by the same name in the sun.audio package, but is much 076 more public about its information. For example, the Sun version 077 does not give any access to the audio data itself.</p> 078 079 @author Edward A. Lee 080 @version $Id$ 081 @since Ptolemy II 0.2 082 @Pt.ProposedRating Red (eal) 083 @Pt.AcceptedRating Red (cxh) 084 */ 085public class Audio { 086 /** Construct an instance initialized with the audio 087 * signal given by the argument. The argument is an array 088 * of bytes that are mu-law encoded. The audio signal is assumed 089 * to have an 8kHz sample rate, with a single channel. 090 * @param audio An audio signal. 091 */ 092 public Audio(byte[] audio) { 093 String ptinfo = "Ptolemy audio"; 094 095 // NOTE: This uses the platform encoding, which is probably wrong. 096 info = ptinfo.getBytes(); 097 offset = 24 + info.length; 098 size = audio.length; 099 format = 1; 100 sampleRate = 8000; 101 numChannels = 1; 102 this.audio = new byte[1][]; 103 this.audio[0] = audio; 104 } 105 106 /** Construct an instance initialized with the audio 107 * signal given by the argument. The argument is an array 108 * of double-precision, floating point audio samples assumed to 109 * be normalized to be in the range -1.0 to 1.0. 110 * The data will be encoded according to the mu-law standard at an 111 * 8kHz sample rate, with a single channel. 112 * @param audio An audio signal. 113 */ 114 public Audio(double[] audio) { 115 String ptinfo = "Ptolemy audio"; 116 117 // NOTE: This uses the platform encoding, which is probably wrong. 118 info = ptinfo.getBytes(); 119 offset = 24 + info.length; 120 size = audio.length; 121 format = 1; 122 sampleRate = 8000; 123 numChannels = 1; 124 this.audio = new byte[1][size]; 125 126 for (int i = size - 1; i >= 0; i--) { 127 this.audio[0][i] = lin2mu((int) (audio[i] * 31616.0)); 128 } 129 } 130 131 /** Construct an instance and initialize it by reading 132 * the specified stream. 133 * @param input The input stream. 134 * @exception IOException If an error occurs reading the input data 135 * (e.g. a premature end of file). 136 */ 137 public Audio(DataInputStream input) throws IOException { 138 int bytesRead = input.read(magic, 0, 4); 139 if (bytesRead != 4) { 140 throw new IOException( 141 "Read only " + bytesRead + " bytes, expecting " + 4); 142 } 143 144 // Check the magic number, which should be 0x2E736E64, '.snd' 145 // in ASCII. 146 if (magic[0] != 0x2E || magic[1] != 0x73 || magic[2] != 0x6E 147 || magic[3] != 0x64) { 148 throw new IllegalArgumentException( 149 "ptolemy.media.Audio: bad magic number in " 150 + "stream header. Not an audio file?"); 151 } 152 153 offset = input.readInt(); 154 size = input.readInt(); 155 format = input.readInt(); 156 sampleRate = input.readInt(); 157 numChannels = input.readInt(); 158 159 if (offset < 0 || offset > 10000) { 160 throw new IllegalArgumentException("ptolemy.media.Audio:" 161 + " offset value '" + offset + "' is out of range 0-10000"); 162 } 163 164 info = new byte[offset - 24]; 165 bytesRead = input.read(info, 0, offset - 24); 166 if (bytesRead != offset - 24) { 167 throw new IOException("Read only " + bytesRead 168 + " bytes, expecting " + (offset - 24)); 169 } 170 171 if (format != 1) { 172 throw new IllegalArgumentException("ptolemy.media.Audio:" 173 + " Sorry, only 8-bit mu-law encoded data can be read, " 174 + format + " formats seen, 1 expected."); 175 } 176 177 if (numChannels != 1) { 178 throw new IllegalArgumentException("ptolemy.media.Audio:" 179 + " Sorry, only one-channel audio data can be read, " 180 + numChannels + " channels seen, 1 expected."); 181 } 182 183 // Finally read the audio data. 184 audio = new byte[1][size]; 185 input.readFully(audio[0]); 186 } 187 188 /////////////////////////////////////////////////////////////////// 189 //// public members //// 190 191 /** The file type identifier, 0x2E736E64 or '.snd' in ASCII. */ 192 public byte[] magic = { (byte) '.', (byte) 's', (byte) 'n', (byte) 'd' }; 193 194 /** Offset of audio data relative to the start of the stream. */ 195 public int offset; 196 197 /** Number of bytes of audio data. */ 198 public int size; 199 200 /** Format code; 1 for 8-bit mu-law. */ 201 public int format; 202 203 /** The sampling rate. */ 204 public int sampleRate; 205 206 /** The number of channels. */ 207 public int numChannels; 208 209 /** Four byte info field. */ 210 public byte[] info; 211 212 /** Audio data, by channel. */ 213 public byte[][] audio; 214 215 /////////////////////////////////////////////////////////////////// 216 //// public methods //// 217 218 /** Convert an integer linear representation of an audio sample 219 * into a mu-255 companded representation. Mu law is the standard 220 * used in Sun .au files as well as throughout the telephone network. 221 * <p> 222 * The integer argument is a 16-bit representation of the sample. 223 * Anything outside the range -32635 to 32635 will be clipped to within 224 * that range. 225 * <p> 226 * The mu-255 representation is a byte SEEEMMMM where S is the sign 227 * bit, EEE is the three-bit exponent, and MMMM is the four-bit 228 * mantissa. The bits are flipped, so that the binary 10000000 229 * is the largest positive number and 00000000 is the largest negative 230 * number. If you have called that static method setZeroTrap() with a 231 * <i>true</i> argument, then 232 * per MIL-STD-188-113, the 00000000 representation 233 * is never used, replaced instead with 00000010 (0x02). 234 * By default, this trap is not used. 235 * <p> 236 * This implementation was written by Anthony Hursh, who included with it 237 * the following information: 238 * <p> 239 * Copyright 1997 by Anthony Hursh 240 * <hursha@saturn.math.uaa.alaska.edu> 241 * This code may be freely used as long as proper credit 242 * is given. It was originally written in C by 243 * Craig Reese (IDA/Supercomputing Research Center) and 244 * Joe Campbell (Department of Defense), and 245 * ported to Java by Tony Hursh, January 1997. 246 * References: 247 * <ol> 248 * <li> CCITT Recommendation G.711 (very difficult to follow) 249 * <li> "A New Digital Technique for Implementation of Any 250 * Continuous PCM Companding Law," Villeret, Michel, 251 * et al. 1973 IEEE Int. Conf. on Communications, Vol 1, 252 * 1973, pg. 11.12-11.17 253 * <li> MIL-STD-188-113,"Interoperability and Performance Standards 254 * for Analog-to_Digital Conversion Techniques," 255 * 17 February 1987 256 * </ol> 257 * 258 * @param sample A linear representation of the sample. 259 * @return A mu-255 representation of the sample. 260 */ 261 public static byte lin2mu(int sample) { 262 int sign = 0; 263 264 if (sample < 0) { 265 sample = -sample; 266 sign = 0x80; 267 } 268 269 // clip the magnitude 270 if (sample > CLIP) { 271 sample = CLIP; 272 } 273 274 sample = sample + BIAS; 275 276 int exponent = exp_lut[sample >> 7 & 0xFF]; 277 int mantissa = sample >> exponent + 3 & 0x0F; 278 int ulawbyte = sign | exponent << 4 | mantissa; 279 280 // System.out.println(" sign = " + sign + " exponent = " + 281 // exponent + " mantissa = " + mantissa ); 282 ulawbyte = ~ulawbyte; 283 ulawbyte &= 0xFF; 284 285 if (_zerotrap && ulawbyte == 0) { 286 // optional CCITT trap 287 ulawbyte = 0x02; 288 } 289 290 return (byte) ulawbyte; 291 } 292 293 /** Convert mu-255 companded representation of an audio sample 294 * into an integer linear representation. Mu law is the standard 295 * used in Sun .au files as well as throughout the telephone network. 296 * This implementation is based on the web page by 297 * <a href=mailto:donahu@cooper.edu>Billy Donahue</a>: 298 * <a href=http://www.cooper.edu/~donahu/auformat/auFormat.html> 299 * http://www.cooper.edu/~donahu/auformat/auFormat.html</a>. 300 * The resulting integer values are scaled to be in the range -31616 301 * to 31616. This uses the low order 16 bits of the resulting integer, 302 * and thus provides a convenient 16-bit linear encoding. 303 * <p> 304 * The mu-255 representation is a byte SEEEMMMM where S is the sign 305 * bit, EEE is the three-bit exponent, and MMMM is the four-bit 306 * mantissa. The bits are flipped, so that the binary 10000000 307 * is the largest positive number and 00000000 is the largest negative 308 * number. 309 * <p> 310 * If you have called setZeroTrap() with a <i>true</i> argument, then 311 * this will not be an exact inverse of lin2mu because the zero code 312 * is interpreted as being the largest negative number, -31616. 313 * 314 * @param b A mu-255 representation of the sample. 315 * @return A linear representation of the sample. 316 */ 317 public static int mu2lin(byte b) { 318 // flip the bits 319 int mu = b ^ 0xFF; 320 int sign = (mu & 0x80) >> 7; 321 int exponent = (mu & 0x70) >> 4; 322 int mantissa = mu & 0x0F; 323 324 // System.out.println(" sign = " + sign + " exponent = " + 325 // exponent + " mantissa = " + mantissa ); 326 int linear = (mantissa << exponent + 1) - 0x20 + (0x20 << exponent); 327 328 // Make into a 16 bit sample. 329 linear <<= 2; 330 return sign == 1 ? -linear : linear; 331 } 332 333 /** Read Sun audio file (.au) format and return the audio data as an array. 334 * The argument stream may represent a file, a URL, or a byte array that 335 * contains the .au format. 336 * For example, given a URL called "url," you can read the audio file 337 * as follows: 338 * <pre> 339 * double audio[] = readAudio(new DataInputStream(url.openStream()); 340 * </pre> 341 * <p> 342 * The returned values lie in the range -1.0 to 1.0. 343 * 344 * @param input The input stream. 345 * @return The audio data as an array of doubles. 346 * @exception IOException If an I/O error occurs reading the stream. 347 */ 348 public static double[] readAudio(DataInputStream input) throws IOException { 349 Audio audio = new Audio(input); 350 return audio.toDouble(0); 351 } 352 353 /** Configure all instances of this class to use the MIL-STD zero trap. 354 * I.e., per MIL-STD-188-113, the 00000000 representation 355 * is never used, replaced instead with 00000010 (0x02). 356 * For some reason, an all zero 357 * mu-law code is sometimes undesirable. By default, this class does 358 * not use this trap, so you must call this with a <i>true</i> argument 359 * to use the trap. 360 * @param boole If true, use zero-trap encoding. 361 */ 362 public static void setZeroTrap(boolean boole) { 363 _zerotrap = boole; 364 } 365 366 /** Convert the audio data to linear double encoding (from mu-law). 367 * The returned numbers lie in the range -1.0 to 1.0. 368 * @param channel The channel number. 369 * @return A new array of integers, or null if there is no audio data. 370 */ 371 public double[] toDouble(int channel) { 372 int[] intdata = toLinear(channel); 373 374 if (intdata != null) { 375 double[] result = new double[intdata.length]; 376 377 for (int i = intdata.length - 1; i >= 0; i--) { 378 result[i] = intdata[i] / 31616.0; 379 } 380 381 return result; 382 } 383 384 return null; 385 } 386 387 /** Convert the audio data to linear integer encoding (from mu-law). 388 * The returned integers use the low-order 16 bits only, lying 389 * in the range -31616 to 31616. 390 * @param channel The channel number. 391 * @return A new array of integers, or null if there is no audio data. 392 */ 393 public int[] toLinear(int channel) { 394 if (audio != null) { 395 if (audio.length > channel && audio[channel] != null) { 396 int[] result = new int[audio[channel].length]; 397 398 for (int i = audio[channel].length - 1; i >= 0; i--) { 399 result[i] = mu2lin(audio[channel][i]); 400 } 401 402 return result; 403 } 404 } 405 406 return null; 407 } 408 409 /** Return a readable representation of the header data. */ 410 @Override 411 public String toString() { 412 return "file ID tag = " + new String(magic) + "\n" + "offset = " 413 + offset + "\n" + "size = " + size + "\n" + "format code = " 414 + format + "\n" + "sampleRate = " + sampleRate + "\n" 415 + "number of channels = " + numChannels + "\n" + "info field = " 416 + new String(info).trim(); 417 } 418 419 /** Write the audio data to an output stream in the Sun audio format. 420 * 421 * @param output The output stream. 422 * @exception IOException If an error occurs writing to the stream. 423 */ 424 public void write(DataOutputStream output) throws IOException { 425 output.write(magic, 0, 4); 426 output.writeInt(offset); 427 output.writeInt(size); 428 output.writeInt(format); 429 output.writeInt(sampleRate); 430 output.writeInt(numChannels); 431 output.write(info, 0, offset - 24); 432 output.write(audio[0], 0, size); 433 } 434 435 /** Write the raw audio data to an output stream. 436 * This method can be used to play the audio data using the 437 * (undocumented and unsupported) sun.audio package as follows: 438 * <pre> 439 * // The constructor argument below is optional 440 * ByteArrayOutputStream out = 441 * new ByteArrayOutputStream(sound.size); 442 * try { 443 * sound.writeRaw(new DataOutputStream(out)); 444 * } catch (IOException ex) { 445 * throw new RuntimeException("Audio output failed"); 446 * } 447 * byte[] iobuffer = out.toByteArray(); 448 * ByteArrayInputStream instream = 449 * new ByteArrayInputStream(_iobuffer); 450 * AudioPlayer.player.start(instream); 451 * </pre> 452 * The above code assumes we have an sun.audio.AudioData object 453 * called "sound". 454 * Although it would seem reasonable to include a "play" method in this 455 * class to do this, we wish to avoid a dependence on the sun.audio 456 * package in this Ptolemy package, so you will have to implement 457 * the above code yourself. 458 * 459 * @param output The output stream. 460 * @exception IOException If an error occurs writing to the stream. 461 */ 462 public void writeRaw(DataOutputStream output) throws IOException { 463 output.write(audio[0], 0, size); 464 } 465 466 /** Write Sun audio file (.au) format from an array. 467 * The argument is an array of doubles assumed to lie in the range of 468 * -1.0 to 1.0. The data is converted to one-channel mu-law samples 469 * at 8kHz. 470 * 471 * @param audio The audio data, as an array of doubles. 472 * @param output The output stream. 473 * @exception IOException If an I/O error occurs writing to the stream. 474 */ 475 public static void writeAudio(double[] audio, DataOutputStream output) 476 throws IOException { 477 Audio obj = new Audio(audio); 478 obj.write(output); 479 } 480 481 /////////////////////////////////////////////////////////////////// 482 //// private members //// 483 // The following are used for mu-law conversion. 484 // Turn on the trap as per the MIL-STD (this prevents a result of 0). 485 private static boolean _zerotrap = false; 486 487 // define the add-in bias for 16 bit samples. 488 private static final int BIAS = 0x84; 489 490 // clipping value for inputs. 491 private static final int CLIP = 32635; 492 493 // lookup table for the exponent. 494 private static final byte[] exp_lut = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 495 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 496 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 497 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 498 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 499 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 500 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 501 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 502 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 503 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 504 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 505 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }; 506}