001/* A library of audio operations.
002
003 Copyright (c) 1998-2014 The Regents of the University of California.
004 All rights reserved.
005
006 Permission is hereby granted, without written agreement and without
007 license or royalty fees, to use, copy, modify, and distribute this
008 software and its documentation for any purpose, provided that the above
009 copyright notice and the following two paragraphs appear in all copies
010 of this software.
011
012 IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
013 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
014 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
015 THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
016 SUCH DAMAGE.
017
018 THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
019 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
020 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
021 PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
022 CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
023 ENHANCEMENTS, OR MODIFICATIONS.
024
025 PT_COPYRIGHT_VERSION_2
026 COPYRIGHTENDKEY
027
028 */
029package ptolemy.media;
030
031import java.io.DataInputStream;
032import java.io.DataOutputStream;
033import java.io.IOException;
034
035///////////////////////////////////////////////////////////////////
036//// Audio
037
038/**
039 Instances of this class represent audio data equivalent to that
040 contained by a Sun/NeXT audio file (.au file).  The class also
041 includes a set of utility static methods for manipulating audio
042 signals.
043 Currently, only an 8kHz sample rate, mu-law encoded, monophonic
044 audio format is supported.
045 <p>
046 The format of an audio file is:</p>
047
048 <table border="1">
049 <caption>Audio File Format</caption>
050 <tr><th>byte</th><th>type</th><th>field name</th><th>field value</th></tr>
051 <tr><td>0x00</td><td>byte </td><td>magic[4]        </td>
052 <td> 0x2E736E64 '.snd' in ASCII     </td></tr>
053 <tr><td>0x04</td><td>int </td><td>offset </td>
054 <td> offset of audio data relative to the start of the stream </td></tr>
055 <tr><td>0x08</td><td>int </td><td>size     </td>
056 <td> number of bytes of data        </td></tr>
057 <tr><td>0x0C</td><td>int </td><td>format   </td>
058 <td> format code: 1 for 8-bit u-law </td></tr>
059 <tr><td>0x10</td><td>int </td><td>samplingRate </td>
060 <td> the sampling rate              </td></tr>
061 <tr><td>0x14</td><td>int </td><td>numChannels </td>
062 <td> the number of channels         </td></tr>
063 <tr><td>0x18</td><td>byte</td><td>info[]      </td>
064 <td> optional text information      </td></tr>
065 </table>
066 <p>
067 The design of this class is based on the web page of
068 <a href="mailto:donahu@cooper.edu">Billy Donahue</a>.
069 <a href="http://www.cooper.edu/~donahu/auformat/auFormat.html#in_browser"> http://www.cooper.edu/~donahu/auformat/auFormat.html</a>.
070 That page no longer exists.  This file was created on 1998-08-31,
071 The Wayback machine yields this page for that date:
072 <a href="http://web.archive.org/web/19980709194800/http://www.cooper.edu/~donahu/auformat/auFormat.html#in_browser">http://web.archive.org/web/19980709194800/http://www.cooper.edu/~donahu/auformat/auFormat.html</a>.</p>
073
074<p> Note that this class serves the same role as
075 the class by the same name in the sun.audio package, but is much
076 more public about its information.  For example, the Sun version
077 does not give any access to the audio data itself.</p>
078
079 @author Edward A. Lee
080 @version $Id$
081 @since Ptolemy II 0.2
082 @Pt.ProposedRating Red (eal)
083 @Pt.AcceptedRating Red (cxh)
084 */
085public class Audio {
086    /** Construct an instance initialized with the audio
087     *  signal given by the argument.  The argument is an array
088     *  of bytes that are mu-law encoded.  The audio signal is assumed
089     *  to have an 8kHz sample rate, with a single channel.
090     *  @param audio An audio signal.
091     */
092    public Audio(byte[] audio) {
093        String ptinfo = "Ptolemy audio";
094
095        // NOTE: This uses the platform encoding, which is probably wrong.
096        info = ptinfo.getBytes();
097        offset = 24 + info.length;
098        size = audio.length;
099        format = 1;
100        sampleRate = 8000;
101        numChannels = 1;
102        this.audio = new byte[1][];
103        this.audio[0] = audio;
104    }
105
106    /** Construct an instance initialized with the audio
107     *  signal given by the argument.  The argument is an array
108     *  of double-precision, floating point audio samples assumed to
109     *  be normalized to be in the range -1.0 to 1.0.
110     *  The data will be encoded according to the mu-law standard at an
111     *  8kHz sample rate, with a single channel.
112     *  @param audio An audio signal.
113     */
114    public Audio(double[] audio) {
115        String ptinfo = "Ptolemy audio";
116
117        // NOTE: This uses the platform encoding, which is probably wrong.
118        info = ptinfo.getBytes();
119        offset = 24 + info.length;
120        size = audio.length;
121        format = 1;
122        sampleRate = 8000;
123        numChannels = 1;
124        this.audio = new byte[1][size];
125
126        for (int i = size - 1; i >= 0; i--) {
127            this.audio[0][i] = lin2mu((int) (audio[i] * 31616.0));
128        }
129    }
130
131    /** Construct an instance and initialize it by reading
132     *  the specified stream.
133     *  @param input The input stream.
134     *  @exception IOException If an error occurs reading the input data
135     *   (e.g. a premature end of file).
136     */
137    public Audio(DataInputStream input) throws IOException {
138        int bytesRead = input.read(magic, 0, 4);
139        if (bytesRead != 4) {
140            throw new IOException(
141                    "Read only " + bytesRead + " bytes, expecting " + 4);
142        }
143
144        // Check the magic number, which should be 0x2E736E64, '.snd'
145        // in ASCII.
146        if (magic[0] != 0x2E || magic[1] != 0x73 || magic[2] != 0x6E
147                || magic[3] != 0x64) {
148            throw new IllegalArgumentException(
149                    "ptolemy.media.Audio: bad magic number in "
150                            + "stream header.  Not an audio file?");
151        }
152
153        offset = input.readInt();
154        size = input.readInt();
155        format = input.readInt();
156        sampleRate = input.readInt();
157        numChannels = input.readInt();
158
159        if (offset < 0 || offset > 10000) {
160            throw new IllegalArgumentException("ptolemy.media.Audio:"
161                    + " offset value '" + offset + "' is out of range 0-10000");
162        }
163
164        info = new byte[offset - 24];
165        bytesRead = input.read(info, 0, offset - 24);
166        if (bytesRead != offset - 24) {
167            throw new IOException("Read only " + bytesRead
168                    + " bytes, expecting " + (offset - 24));
169        }
170
171        if (format != 1) {
172            throw new IllegalArgumentException("ptolemy.media.Audio:"
173                    + " Sorry, only 8-bit mu-law encoded data can be read, "
174                    + format + " formats seen, 1 expected.");
175        }
176
177        if (numChannels != 1) {
178            throw new IllegalArgumentException("ptolemy.media.Audio:"
179                    + " Sorry, only one-channel audio data can be read, "
180                    + numChannels + " channels seen, 1 expected.");
181        }
182
183        // Finally read the audio data.
184        audio = new byte[1][size];
185        input.readFully(audio[0]);
186    }
187
188    ///////////////////////////////////////////////////////////////////
189    ////                      public members                         ////
190
191    /** The file type identifier, 0x2E736E64 or '.snd' in ASCII. */
192    public byte[] magic = { (byte) '.', (byte) 's', (byte) 'n', (byte) 'd' };
193
194    /** Offset of audio data relative to the start of the stream. */
195    public int offset;
196
197    /** Number of bytes of audio data. */
198    public int size;
199
200    /** Format code; 1 for 8-bit mu-law. */
201    public int format;
202
203    /** The sampling rate. */
204    public int sampleRate;
205
206    /** The number of channels. */
207    public int numChannels;
208
209    /** Four byte info field. */
210    public byte[] info;
211
212    /** Audio data, by channel. */
213    public byte[][] audio;
214
215    ///////////////////////////////////////////////////////////////////
216    ////                         public methods                    ////
217
218    /** Convert an integer linear representation of an audio sample
219     *  into a mu-255 companded representation.  Mu law is the standard
220     *  used in Sun .au files as well as throughout the telephone network.
221     *  <p>
222     *  The integer argument is a 16-bit representation of the sample.
223     *  Anything outside the range -32635 to 32635 will be clipped to within
224     *  that range.
225     *  <p>
226     *  The mu-255 representation is a byte SEEEMMMM where S is the sign
227     *  bit, EEE is the three-bit exponent, and MMMM is the four-bit
228     *  mantissa. The bits are flipped, so that the binary 10000000
229     *  is the largest positive number and 00000000 is the largest negative
230     *  number. If you have called that static method setZeroTrap() with a
231     *  <i>true</i> argument, then
232     *  per MIL-STD-188-113, the 00000000 representation
233     *  is never used, replaced instead with 00000010 (0x02).
234     *  By default, this trap is not used.
235     *  <p>
236     *  This implementation was written by Anthony Hursh, who included with it
237     *  the following information:
238     *  <p>
239     *  Copyright 1997 by Anthony Hursh
240     *  &lt;hursha@saturn.math.uaa.alaska.edu&gt;
241     *  This code may be freely used as long as proper credit
242     *  is given.  It was originally written in C by
243     *  Craig Reese (IDA/Supercomputing Research Center) and
244     *  Joe Campbell (Department of Defense), and
245     *  ported to Java by Tony Hursh, January 1997.
246     *  References:
247     *  <ol>
248     *  <li> CCITT Recommendation G.711  (very difficult to follow)
249     *  <li> "A New Digital Technique for Implementation of Any
250     *     Continuous PCM Companding Law," Villeret, Michel,
251     *     et al. 1973 IEEE Int. Conf. on Communications, Vol 1,
252     *     1973, pg. 11.12-11.17
253     *  <li> MIL-STD-188-113,"Interoperability and Performance Standards
254     *     for Analog-to_Digital Conversion Techniques,"
255     *     17 February 1987
256     *  </ol>
257     *
258     *  @param sample A linear representation of the sample.
259     *  @return A mu-255 representation of the sample.
260     */
261    public static byte lin2mu(int sample) {
262        int sign = 0;
263
264        if (sample < 0) {
265            sample = -sample;
266            sign = 0x80;
267        }
268
269        // clip the magnitude
270        if (sample > CLIP) {
271            sample = CLIP;
272        }
273
274        sample = sample + BIAS;
275
276        int exponent = exp_lut[sample >> 7 & 0xFF];
277        int mantissa = sample >> exponent + 3 & 0x0F;
278        int ulawbyte = sign | exponent << 4 | mantissa;
279
280        // System.out.println(" sign = " + sign + " exponent = " +
281        // exponent + " mantissa = " + mantissa );
282        ulawbyte = ~ulawbyte;
283        ulawbyte &= 0xFF;
284
285        if (_zerotrap && ulawbyte == 0) {
286            // optional CCITT trap
287            ulawbyte = 0x02;
288        }
289
290        return (byte) ulawbyte;
291    }
292
293    /** Convert mu-255 companded representation of an audio sample
294     *  into an integer linear representation.  Mu law is the standard
295     *  used in Sun .au files as well as throughout the telephone network.
296     *  This implementation is based on the web page by
297     *  <a href=mailto:donahu@cooper.edu>Billy Donahue</a>:
298     *  <a href=http://www.cooper.edu/~donahu/auformat/auFormat.html>
299     *  http://www.cooper.edu/~donahu/auformat/auFormat.html</a>.
300     *  The resulting integer values are scaled to be in the range -31616
301     *  to 31616.  This uses the low order 16 bits of the resulting integer,
302     *  and thus provides a convenient 16-bit linear encoding.
303     *  <p>
304     *  The mu-255 representation is a byte SEEEMMMM where S is the sign
305     *  bit, EEE is the three-bit exponent, and MMMM is the four-bit
306     *  mantissa.  The bits are flipped, so that the binary 10000000
307     *  is the largest positive number and 00000000 is the largest negative
308     *  number.
309     *  <p>
310     *  If you have called setZeroTrap() with a <i>true</i> argument, then
311     *  this will not be an exact inverse of lin2mu because the zero code
312     *  is interpreted as being the largest negative number, -31616.
313     *
314     *  @param b A mu-255 representation of the sample.
315     *  @return A linear representation of the sample.
316     */
317    public static int mu2lin(byte b) {
318        // flip the bits
319        int mu = b ^ 0xFF;
320        int sign = (mu & 0x80) >> 7;
321        int exponent = (mu & 0x70) >> 4;
322        int mantissa = mu & 0x0F;
323
324        // System.out.println(" sign = " + sign + " exponent = " +
325        // exponent + " mantissa = " + mantissa );
326        int linear = (mantissa << exponent + 1) - 0x20 + (0x20 << exponent);
327
328        // Make into a 16 bit sample.
329        linear <<= 2;
330        return sign == 1 ? -linear : linear;
331    }
332
333    /** Read Sun audio file (.au) format and return the audio data as an array.
334     *  The argument stream may represent a file, a URL, or a byte array that
335     *  contains the .au format.
336     *  For example, given a URL called "url," you can read the audio file
337     *  as follows:
338     *  <pre>
339     *     double audio[] = readAudio(new DataInputStream(url.openStream());
340     *  </pre>
341     *  <p>
342     *  The returned values lie in the range -1.0 to 1.0.
343     *
344     *  @param input The input stream.
345     *  @return The audio data as an array of doubles.
346     *  @exception IOException If an I/O error occurs reading the stream.
347     */
348    public static double[] readAudio(DataInputStream input) throws IOException {
349        Audio audio = new Audio(input);
350        return audio.toDouble(0);
351    }
352
353    /** Configure all instances of this class to use the MIL-STD zero trap.
354     *  I.e., per MIL-STD-188-113, the 00000000 representation
355     *  is never used, replaced instead with 00000010 (0x02).
356     *  For some reason, an all zero
357     *  mu-law code is sometimes undesirable.  By default, this class does
358     *  not use this trap, so you must call this with a <i>true</i> argument
359     *  to use the trap.
360     *  @param boole If true, use zero-trap encoding.
361     */
362    public static void setZeroTrap(boolean boole) {
363        _zerotrap = boole;
364    }
365
366    /** Convert the audio data to linear double encoding (from mu-law).
367     *  The returned numbers lie in the range -1.0 to 1.0.
368     *  @param channel The channel number.
369     *  @return A new array of integers, or null if there is no audio data.
370     */
371    public double[] toDouble(int channel) {
372        int[] intdata = toLinear(channel);
373
374        if (intdata != null) {
375            double[] result = new double[intdata.length];
376
377            for (int i = intdata.length - 1; i >= 0; i--) {
378                result[i] = intdata[i] / 31616.0;
379            }
380
381            return result;
382        }
383
384        return null;
385    }
386
387    /** Convert the audio data to linear integer encoding (from mu-law).
388     *  The returned integers use the low-order 16 bits only, lying
389     *  in the range -31616 to 31616.
390     *  @param channel The channel number.
391     *  @return A new array of integers, or null if there is no audio data.
392     */
393    public int[] toLinear(int channel) {
394        if (audio != null) {
395            if (audio.length > channel && audio[channel] != null) {
396                int[] result = new int[audio[channel].length];
397
398                for (int i = audio[channel].length - 1; i >= 0; i--) {
399                    result[i] = mu2lin(audio[channel][i]);
400                }
401
402                return result;
403            }
404        }
405
406        return null;
407    }
408
409    /** Return a readable representation of the header data. */
410    @Override
411    public String toString() {
412        return "file ID tag = " + new String(magic) + "\n" + "offset = "
413                + offset + "\n" + "size = " + size + "\n" + "format code = "
414                + format + "\n" + "sampleRate = " + sampleRate + "\n"
415                + "number of channels = " + numChannels + "\n" + "info field = "
416                + new String(info).trim();
417    }
418
419    /** Write the audio data to an output stream in the Sun audio format.
420     *
421     *  @param output The output stream.
422     *  @exception IOException If an error occurs writing to the stream.
423     */
424    public void write(DataOutputStream output) throws IOException {
425        output.write(magic, 0, 4);
426        output.writeInt(offset);
427        output.writeInt(size);
428        output.writeInt(format);
429        output.writeInt(sampleRate);
430        output.writeInt(numChannels);
431        output.write(info, 0, offset - 24);
432        output.write(audio[0], 0, size);
433    }
434
435    /** Write the raw audio data to an output stream.
436     *  This method can be used to play the audio data using the
437     *  (undocumented and unsupported) sun.audio package as follows:
438     *  <pre>
439     *      // The constructor argument below is optional
440     *      ByteArrayOutputStream out =
441     *               new ByteArrayOutputStream(sound.size);
442     *      try {
443     *          sound.writeRaw(new DataOutputStream(out));
444     *      } catch (IOException ex) {
445     *          throw new RuntimeException("Audio output failed");
446     *      }
447     *      byte[] iobuffer = out.toByteArray();
448     *      ByteArrayInputStream instream =
449     *              new ByteArrayInputStream(_iobuffer);
450     *      AudioPlayer.player.start(instream);
451     *  </pre>
452     *  The above code assumes we have an sun.audio.AudioData object
453     *  called "sound".
454     *  Although it would seem reasonable to include a "play" method in this
455     *  class to do this, we wish to avoid a dependence on the sun.audio
456     *  package in this Ptolemy package, so you will have to implement
457     *  the above code yourself.
458     *
459     *  @param output The output stream.
460     *  @exception IOException If an error occurs writing to the stream.
461     */
462    public void writeRaw(DataOutputStream output) throws IOException {
463        output.write(audio[0], 0, size);
464    }
465
466    /** Write Sun audio file (.au) format from an array.
467     *  The argument is an array of doubles assumed to lie in the range of
468     *  -1.0 to 1.0.  The data is converted to one-channel mu-law samples
469     *  at 8kHz.
470     *
471     *  @param audio The audio data, as an array of doubles.
472     *  @param output The output stream.
473     *  @exception IOException If an I/O error occurs writing to the stream.
474     */
475    public static void writeAudio(double[] audio, DataOutputStream output)
476            throws IOException {
477        Audio obj = new Audio(audio);
478        obj.write(output);
479    }
480
481    ///////////////////////////////////////////////////////////////////
482    ////                         private members                         ////
483    // The following are used for mu-law conversion.
484    // Turn on the trap as per the MIL-STD (this prevents a result of 0).
485    private static boolean _zerotrap = false;
486
487    // define the add-in bias for 16 bit samples.
488    private static final int BIAS = 0x84;
489
490    // clipping value for inputs.
491    private static final int CLIP = 32635;
492
493    // lookup table for the exponent.
494    private static final byte[] exp_lut = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
495            3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
496            5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
497            5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
498            6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
499            6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
500            6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
501            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
502            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
503            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
504            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
505            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 };
506}