/* Copyright (C) 1996-2001 Id Software, Inc. Copyright (C) 2010-2011 O. Sezer Copyright (C) 2010-2014 QuakeSpasm developers This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ // snd_mix.c -- portable code to mix sounds for snd_dma.c #include "q_defs.h" #define PAINTBUFFER_SIZE 2048 portable_samplepair_t paintbuffer[PAINTBUFFER_SIZE]; int32_t snd_scaletable[32][256]; int32_t *snd_p, snd_linear_count; int16_t *snd_out; static int32_t snd_vol; static void Snd_WriteLinearBlastStereo16(void) { int32_t i; int32_t val; for(i = 0; i < snd_linear_count; i += 2) { val = snd_p[i] / 256; if(val > 0x7fff) snd_out[i] = 0x7fff; else if(val < (int16_t)0x8000) snd_out[i] = (int16_t)0x8000; else snd_out[i] = val; val = snd_p[i + 1] / 256; if(val > 0x7fff) snd_out[i + 1] = 0x7fff; else if(val < (int16_t)0x8000) snd_out[i + 1] = (int16_t)0x8000; else snd_out[i + 1] = val; } } static void S_TransferStereo16(int32_t endtime) { int32_t lpos; int32_t lpaintedtime; snd_p = (int32_t *) paintbuffer; lpaintedtime = paintedtime; while(lpaintedtime < endtime) { // handle recirculating buffer issues lpos = lpaintedtime & ((shm->samples >> 1) - 1); snd_out = (int16_t *)shm->buffer + (lpos << 1); snd_linear_count = (shm->samples >> 1) - lpos; if(lpaintedtime + snd_linear_count > endtime) snd_linear_count = endtime - lpaintedtime; snd_linear_count <<= 1; // write a linear blast of samples Snd_WriteLinearBlastStereo16(); snd_p += snd_linear_count; lpaintedtime += (snd_linear_count >> 1); } } static void S_TransferPaintBuffer(int32_t endtime) { int32_t out_idx, out_mask; int32_t count, step, val; int32_t *p; if(shm->samplebits == 16 && shm->channels == 2) { S_TransferStereo16(endtime); return; } p = (int32_t *) paintbuffer; count = (endtime - paintedtime) * shm->channels; out_mask = shm->samples - 1; out_idx = paintedtime * shm->channels & out_mask; step = 3 - shm->channels; if(shm->samplebits == 16) { int16_t *out = (int16_t *)shm->buffer; while(count--) { val = *p / 256; p += step; if(val > 0x7fff) val = 0x7fff; else if(val < (int16_t)0x8000) val = (int16_t)0x8000; out[out_idx] = val; out_idx = (out_idx + 1) & out_mask; } } else if(shm->samplebits == 8 && !shm->signed8) { uint8_t *out = shm->buffer; while(count--) { val = *p / 256; p += step; if(val > 0x7fff) val = 0x7fff; else if(val < (int16_t)0x8000) val = (int16_t)0x8000; out[out_idx] = (val / 256) + 128; out_idx = (out_idx + 1) & out_mask; } } else if(shm->samplebits == 8) /* S8 format, e.g. with Amiga AHI */ { int8_t *out = (int8_t *) shm->buffer; while(count--) { val = *p / 256; p += step; if(val > 0x7fff) val = 0x7fff; else if(val < (int16_t)0x8000) val = (int16_t)0x8000; out[out_idx] = (val / 256); out_idx = (out_idx + 1) & out_mask; } } } /* ============== S_MakeBlackmanWindowKernel Makes a lowpass filter kernel, from equation 16-4 in "The Scientist and Engineer's Guide to Digital Signal Processing" M is the kernel size (not counting the center point), must be even kernel has room for M+1 floats f_c is the filter cutoff frequency, as a fraction of the samplerate ============== */ static void S_MakeBlackmanWindowKernel(float *kernel, int32_t M, float f_c) { int32_t i; for(i = 0; i <= M; i++) { if(i == M / 2) { kernel[i] = 2 * PI * f_c; } else { kernel[i] = (sin(2 * PI * f_c * (i - M / 2.0)) / (i - (M / 2.0))) * (0.42 - 0.5 * cos(2 * PI * i / (double)M) + 0.08 * cos(4 * PI * i / (double)M)); } } // normalize the kernel so all of the values sum to 1 { float sum = 0; for(i = 0; i <= M; i++) { sum += kernel[i]; } for(i = 0; i <= M; i++) { kernel[i] /= sum; } } } typedef struct { float *memory; // kernelsize floats float *kernel; // kernelsize floats int32_t kernelsize; // M+1, rounded up to be a multiple of 16 int32_t M; // M value used to make kernel, even int32_t parity; // 0-3 float f_c; // cutoff frequency, [0..1], fraction of sample rate } filter_t; static void S_UpdateFilter(filter_t *filter, int32_t M, float f_c) { if(filter->f_c != f_c || filter->M != M) { if(filter->memory != NULL) free(filter->memory); if(filter->kernel != NULL) free(filter->kernel); filter->M = M; filter->f_c = f_c; filter->parity = 0; // M + 1 rounded up to the next multiple of 16 filter->kernelsize = (M + 1) + 16 - ((M + 1) % 16); filter->memory = (float *) calloc(filter->kernelsize, sizeof(float)); filter->kernel = (float *) calloc(filter->kernelsize, sizeof(float)); S_MakeBlackmanWindowKernel(filter->kernel, M, f_c); } } /* ============== S_ApplyFilter Lowpass-filter the given buffer containing 44100Hz audio. As an optimization, it decimates the audio to 11025Hz (setting every sample position that's not a multiple of 4 to 0), then convoluting with the filter kernel is 4x faster, because we can skip 3/4 of the input samples that are known to be 0 and skip 3/4 of the filter kernel. ============== */ static void S_ApplyFilter(filter_t *filter, int32_t *data, int32_t stride, int32_t count) { int32_t i, j; float *input; const int32_t kernelsize = filter->kernelsize; const float *kernel = filter->kernel; int32_t parity; input = (float *) malloc(sizeof(float) * (filter->kernelsize + count)); // set up the input buffer // memory holds the previous filter->kernelsize samples of input. memcpy(input, filter->memory, filter->kernelsize * sizeof(float)); for(i = 0; i < count; i++) { input[filter->kernelsize + i] = data[i * stride] / (32768.0 * 256.0); } // copy out the last filter->kernelsize samples to 'memory' for next time memcpy(filter->memory, input + count, filter->kernelsize * sizeof(float)); // apply the filter parity = filter->parity; for(i = 0; i < count; i++) { const float *input_plus_i = input + i; float val[4] = {0, 0, 0, 0}; for(j = (4 - parity) % 4; j < kernelsize; j += 16) { val[0] += kernel[j] * input_plus_i[j]; val[1] += kernel[j + 4] * input_plus_i[j + 4]; val[2] += kernel[j + 8] * input_plus_i[j + 8]; val[3] += kernel[j + 12] * input_plus_i[j + 12]; } // 4.0 factor is to increase volume by 12 dB; this is to make up the // volume drop caused by the zero-filling this filter does. data[i * stride] = (val[0] + val[1] + val[2] + val[3]) * (32768.0 * 256.0 * 4.0); parity = (parity + 1) % 4; } filter->parity = parity; free(input); } /* ============== S_LowpassFilter lowpass filters 24-bit integer samples in 'data' (stored in 32-bit ints). assumes 44100Hz sample rate, and lowpasses at around 5kHz memory should be a zero-filled filter_t struct ============== */ static void S_LowpassFilter(int32_t *data, int32_t stride, int32_t count, filter_t *memory) { int32_t M; float bw, f_c; switch((int32_t)snd_filterquality.value) { case 1: M = 126; bw = 0.900; break; case 2: M = 150; bw = 0.915; break; case 3: M = 174; bw = 0.930; break; case 4: M = 198; bw = 0.945; break; case 5: default: M = 222; bw = 0.960; break; } f_c = (bw * 11025 / 2.0) / 44100.0; S_UpdateFilter(memory, M, f_c); S_ApplyFilter(memory, data, stride, count); } /* =============================================================================== CHANNEL MIXING =============================================================================== */ static void SND_PaintChannelFrom8(channel_t *ch, sfxcache_t *sc, int32_t endtime, int32_t paintbufferstart); static void SND_PaintChannelFrom16(channel_t *ch, sfxcache_t *sc, int32_t endtime, int32_t paintbufferstart); void S_PaintChannels(int32_t endtime) { int32_t i; int32_t end, ltime, count; channel_t *ch; sfxcache_t *sc; snd_vol = sfxvolume.value * 256; while(paintedtime < endtime) { // if paintbuffer is smaller than DMA buffer end = endtime; if(endtime - paintedtime > PAINTBUFFER_SIZE) end = paintedtime + PAINTBUFFER_SIZE; // clear the paint buffer memset(paintbuffer, 0, (end - paintedtime) * sizeof(portable_samplepair_t)); // paint in the channels. ch = snd_channels; for(i = 0; i < total_channels; i++, ch++) { if(!ch->sfx) continue; if(!ch->leftvol && !ch->rightvol) continue; sc = S_LoadSound(ch->sfx); if(!sc) continue; ltime = paintedtime; while(ltime < end) { // paint up to end if(ch->end < end) count = ch->end - ltime; else count = end - ltime; if(count > 0) { // the last param to SND_PaintChannelFrom is the index // to start painting to in the paintbuffer, usually 0. if(sc->width == 1) SND_PaintChannelFrom8(ch, sc, count, ltime - paintedtime); else SND_PaintChannelFrom16(ch, sc, count, ltime - paintedtime); ltime += count; } // if at end of loop, restart if(ltime >= ch->end) { if(sc->loopstart >= 0) { ch->pos = sc->loopstart; ch->end = ltime + sc->length - ch->pos; } else { // channel just stopped ch->sfx = NULL; break; } } } } // clip each sample to 0dB, then reduce by 6dB (to leave some headroom for // the lowpass filter and the music). the lowpass will smooth out the // clipping for(i = 0; i < end - paintedtime; i++) { paintbuffer[i].left = CLAMP(-32768 * 256, paintbuffer[i].left, 32767 * 256) / 2; paintbuffer[i].right = CLAMP(-32768 * 256, paintbuffer[i].right, 32767 * 256) / 2; } // apply a lowpass filter if(sndspeed.value == 11025 && shm->speed == 44100) { static filter_t memory_l, memory_r; S_LowpassFilter((int32_t *)paintbuffer, 2, end - paintedtime, &memory_l); S_LowpassFilter(((int32_t *)paintbuffer) + 1, 2, end - paintedtime, &memory_r); } // paint in the music if(s_rawend >= paintedtime) { // copy from the streaming sound source int32_t s; int32_t stop; stop = (end < s_rawend) ? end : s_rawend; for(i = paintedtime; i < stop; i++) { s = i & (MAX_RAW_SAMPLES - 1); // lower music by 6db to match sfx paintbuffer[i - paintedtime].left += s_rawsamples[s].left / 2; paintbuffer[i - paintedtime].right += s_rawsamples[s].right / 2; } // if (i != end) // Con_Printf ("partial stream\n"); // else // Con_Printf ("full stream\n"); } // transfer out according to DMA format S_TransferPaintBuffer(end); paintedtime = end; } } void SND_InitScaletable(void) { int32_t i, j; int32_t scale; for(i = 0; i < 32; i++) { scale = i * 8 * 256 * sfxvolume.value; for(j = 0; j < 256; j++) { /* When compiling with gcc-4.1.0 at optimisations O1 and higher, the tricky int8_t type conversion is not guaranteed. Therefore we explicity calculate the signed value from the index as required. From Kevin Shanahan. See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26719 */ // snd_scaletable[i][j] = ((int8_t)j) * scale; snd_scaletable[i][j] = ((j < 128) ? j : j - 256) * scale; } } } static void SND_PaintChannelFrom8(channel_t *ch, sfxcache_t *sc, int32_t count, int32_t paintbufferstart) { int32_t data; int32_t *lscale, *rscale; uint8_t *sfx; int32_t i; if(ch->leftvol > 255) ch->leftvol = 255; if(ch->rightvol > 255) ch->rightvol = 255; lscale = snd_scaletable[ch->leftvol >> 3]; rscale = snd_scaletable[ch->rightvol >> 3]; sfx = (uint8_t *)sc->data + ch->pos; for(i = 0; i < count; i++) { data = sfx[i]; paintbuffer[paintbufferstart + i].left += lscale[data]; paintbuffer[paintbufferstart + i].right += rscale[data]; } ch->pos += count; } static void SND_PaintChannelFrom16(channel_t *ch, sfxcache_t *sc, int32_t count, int32_t paintbufferstart) { int32_t data; int32_t left, right; int32_t leftvol, rightvol; int16_t *sfx; int32_t i; leftvol = ch->leftvol * snd_vol; rightvol = ch->rightvol * snd_vol; leftvol /= 256; rightvol /= 256; sfx = (int16_t *)sc->data + ch->pos; for(i = 0; i < count; i++) { data = sfx[i]; // this was causing integer overflow as observed in quakespasm // with the warpspasm mod moved >>8 to left/right volume above. // left = (data * leftvol) >> 8; // right = (data * rightvol) >> 8; left = data * leftvol; right = data * rightvol; paintbuffer[paintbufferstart + i].left += left; paintbuffer[paintbufferstart + i].right += right; } ch->pos += count; }