ffmpeg-2.8.5

git-svn-id: svn://kolibrios.org@6147 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Sergey Semyonov (Serge)
2016-02-05 22:08:02 +00:00
parent a08f61ddb9
commit a4b787f4b8
5429 changed files with 1356786 additions and 0 deletions

View File

@@ -0,0 +1,155 @@
/*
* 012v decoder
*
* Copyright (C) 2012 Carl Eugen Hoyos
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avcodec.h"
#include "internal.h"
#include "libavutil/intreadwrite.h"
static av_cold int zero12v_decode_init(AVCodecContext *avctx)
{
avctx->pix_fmt = AV_PIX_FMT_YUV422P16;
avctx->bits_per_raw_sample = 10;
if (avctx->codec_tag == MKTAG('a', '1', '2', 'v'))
avpriv_request_sample(avctx, "transparency");
return 0;
}
static int zero12v_decode_frame(AVCodecContext *avctx, void *data,
int *got_frame, AVPacket *avpkt)
{
int line, ret;
const int width = avctx->width;
AVFrame *pic = data;
uint16_t *y, *u, *v;
const uint8_t *line_end, *src = avpkt->data;
int stride = avctx->width * 8 / 3;
if (width <= 1 || avctx->height <= 0) {
av_log(avctx, AV_LOG_ERROR, "Dimensions %dx%d not supported.\n", width, avctx->height);
return AVERROR_INVALIDDATA;
}
if ( avctx->codec_tag == MKTAG('0', '1', '2', 'v')
&& avpkt->size % avctx->height == 0
&& avpkt->size / avctx->height * 3 >= width * 8)
stride = avpkt->size / avctx->height;
if (avpkt->size < avctx->height * stride) {
av_log(avctx, AV_LOG_ERROR, "Packet too small: %d instead of %d\n",
avpkt->size, avctx->height * stride);
return AVERROR_INVALIDDATA;
}
if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
return ret;
pic->pict_type = AV_PICTURE_TYPE_I;
pic->key_frame = 1;
line_end = avpkt->data + stride;
for (line = 0; line < avctx->height; line++) {
uint16_t y_temp[6] = {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000};
uint16_t u_temp[3] = {0x8000, 0x8000, 0x8000};
uint16_t v_temp[3] = {0x8000, 0x8000, 0x8000};
int x;
y = (uint16_t *)(pic->data[0] + line * pic->linesize[0]);
u = (uint16_t *)(pic->data[1] + line * pic->linesize[1]);
v = (uint16_t *)(pic->data[2] + line * pic->linesize[2]);
for (x = 0; x < width; x += 6) {
uint32_t t;
if (width - x < 6 || line_end - src < 16) {
y = y_temp;
u = u_temp;
v = v_temp;
}
if (line_end - src < 4)
break;
t = AV_RL32(src);
src += 4;
*u++ = t << 6 & 0xFFC0;
*y++ = t >> 4 & 0xFFC0;
*v++ = t >> 14 & 0xFFC0;
if (line_end - src < 4)
break;
t = AV_RL32(src);
src += 4;
*y++ = t << 6 & 0xFFC0;
*u++ = t >> 4 & 0xFFC0;
*y++ = t >> 14 & 0xFFC0;
if (line_end - src < 4)
break;
t = AV_RL32(src);
src += 4;
*v++ = t << 6 & 0xFFC0;
*y++ = t >> 4 & 0xFFC0;
*u++ = t >> 14 & 0xFFC0;
if (line_end - src < 4)
break;
t = AV_RL32(src);
src += 4;
*y++ = t << 6 & 0xFFC0;
*v++ = t >> 4 & 0xFFC0;
*y++ = t >> 14 & 0xFFC0;
if (width - x < 6)
break;
}
if (x < width) {
y = x + (uint16_t *)(pic->data[0] + line * pic->linesize[0]);
u = x/2 + (uint16_t *)(pic->data[1] + line * pic->linesize[1]);
v = x/2 + (uint16_t *)(pic->data[2] + line * pic->linesize[2]);
memcpy(y, y_temp, sizeof(*y) * (width - x));
memcpy(u, u_temp, sizeof(*u) * (width - x + 1) / 2);
memcpy(v, v_temp, sizeof(*v) * (width - x + 1) / 2);
}
line_end += stride;
src = line_end - stride;
}
*got_frame = 1;
return avpkt->size;
}
AVCodec ff_zero12v_decoder = {
.name = "012v",
.long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
.type = AVMEDIA_TYPE_VIDEO,
.id = AV_CODEC_ID_012V,
.init = zero12v_decode_init,
.decode = zero12v_decode_frame,
.capabilities = AV_CODEC_CAP_DR1,
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,188 @@
/*
* Quicktime Planar RGB (8BPS) Video Decoder
* Copyright (C) 2003 Roberto Togni
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* QT 8BPS Video Decoder by Roberto Togni
* For more information about the 8BPS format, visit:
* http://www.pcisys.net/~melanson/codecs/
*
* Supports: PAL8 (RGB 8bpp, paletted)
* : BGR24 (RGB 24bpp) (can also output it as RGB32)
* : RGB32 (RGB 32bpp, 4th plane is alpha)
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#include "avcodec.h"
#include "internal.h"
static const enum AVPixelFormat pixfmt_rgb24[] = {
AV_PIX_FMT_BGR24, AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE };
typedef struct EightBpsContext {
AVCodecContext *avctx;
unsigned char planes;
unsigned char planemap[4];
uint32_t pal[256];
} EightBpsContext;
static int decode_frame(AVCodecContext *avctx, void *data,
int *got_frame, AVPacket *avpkt)
{
AVFrame *frame = data;
const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
EightBpsContext * const c = avctx->priv_data;
const unsigned char *encoded = buf;
unsigned char *pixptr, *pixptr_end;
unsigned int height = avctx->height; // Real image height
unsigned int dlen, p, row;
const unsigned char *lp, *dp, *ep;
unsigned char count;
unsigned int planes = c->planes;
unsigned char *planemap = c->planemap;
int ret;
if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
return ret;
ep = encoded + buf_size;
/* Set data pointer after line lengths */
dp = encoded + planes * (height << 1);
for (p = 0; p < planes; p++) {
/* Lines length pointer for this plane */
lp = encoded + p * (height << 1);
/* Decode a plane */
for (row = 0; row < height; row++) {
pixptr = frame->data[0] + row * frame->linesize[0] + planemap[p];
pixptr_end = pixptr + frame->linesize[0];
if (ep - lp < row * 2 + 2)
return AVERROR_INVALIDDATA;
dlen = av_be2ne16(*(const unsigned short *)(lp + row * 2));
/* Decode a row of this plane */
while (dlen > 0) {
if (ep - dp <= 1)
return AVERROR_INVALIDDATA;
if ((count = *dp++) <= 127) {
count++;
dlen -= count + 1;
if (pixptr_end - pixptr < count * planes)
break;
if (ep - dp < count)
return AVERROR_INVALIDDATA;
while (count--) {
*pixptr = *dp++;
pixptr += planes;
}
} else {
count = 257 - count;
if (pixptr_end - pixptr < count * planes)
break;
while (count--) {
*pixptr = *dp;
pixptr += planes;
}
dp++;
dlen -= 2;
}
}
}
}
if (avctx->bits_per_coded_sample <= 8) {
const uint8_t *pal = av_packet_get_side_data(avpkt,
AV_PKT_DATA_PALETTE,
NULL);
if (pal) {
frame->palette_has_changed = 1;
memcpy(c->pal, pal, AVPALETTE_SIZE);
}
memcpy (frame->data[1], c->pal, AVPALETTE_SIZE);
}
*got_frame = 1;
/* always report that the buffer was completely consumed */
return buf_size;
}
static av_cold int decode_init(AVCodecContext *avctx)
{
EightBpsContext * const c = avctx->priv_data;
c->avctx = avctx;
switch (avctx->bits_per_coded_sample) {
case 8:
avctx->pix_fmt = AV_PIX_FMT_PAL8;
c->planes = 1;
c->planemap[0] = 0; // 1st plane is palette indexes
break;
case 24:
avctx->pix_fmt = ff_get_format(avctx, pixfmt_rgb24);
c->planes = 3;
c->planemap[0] = 2; // 1st plane is red
c->planemap[1] = 1; // 2nd plane is green
c->planemap[2] = 0; // 3rd plane is blue
break;
case 32:
avctx->pix_fmt = AV_PIX_FMT_RGB32;
c->planes = 4;
/* handle planemap setup later for decoding rgb24 data as rbg32 */
break;
default:
av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n",
avctx->bits_per_coded_sample);
return AVERROR_INVALIDDATA;
}
if (avctx->pix_fmt == AV_PIX_FMT_RGB32) {
c->planemap[0] = HAVE_BIGENDIAN ? 1 : 2; // 1st plane is red
c->planemap[1] = HAVE_BIGENDIAN ? 2 : 1; // 2nd plane is green
c->planemap[2] = HAVE_BIGENDIAN ? 3 : 0; // 3rd plane is blue
c->planemap[3] = HAVE_BIGENDIAN ? 0 : 3; // 4th plane is alpha
}
return 0;
}
AVCodec ff_eightbps_decoder = {
.name = "8bps",
.long_name = NULL_IF_CONFIG_SMALL("QuickTime 8BPS video"),
.type = AVMEDIA_TYPE_VIDEO,
.id = AV_CODEC_ID_8BPS,
.priv_data_size = sizeof(EightBpsContext),
.init = decode_init,
.decode = decode_frame,
.capabilities = AV_CODEC_CAP_DR1,
};

View File

@@ -0,0 +1,216 @@
/*
* Copyright (C) 2008 Jaikrishnan Menon
* Copyright (C) 2011 Stefano Sabatini
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* 8svx audio decoder
* @author Jaikrishnan Menon
*
* supports: fibonacci delta encoding
* : exponential encoding
*
* For more information about the 8SVX format:
* http://netghost.narod.ru/gff/vendspec/iff/iff.txt
* http://sox.sourceforge.net/AudioFormats-11.html
* http://aminet.net/package/mus/misc/wavepak
* http://amigan.1emu.net/reg/8SVX.txt
*
* Samples can be found here:
* http://aminet.net/mods/smpl/
*/
#include "libavutil/avassert.h"
#include "avcodec.h"
#include "internal.h"
#include "libavutil/common.h"
/** decoder context */
typedef struct EightSvxContext {
uint8_t fib_acc[2];
const int8_t *table;
/* buffer used to store the whole first packet.
data is only sent as one large packet */
uint8_t *data[2];
int data_size;
int data_idx;
} EightSvxContext;
static const int8_t fibonacci[16] = { -34, -21, -13, -8, -5, -3, -2, -1, 0, 1, 2, 3, 5, 8, 13, 21 };
static const int8_t exponential[16] = { -128, -64, -32, -16, -8, -4, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64 };
#define MAX_FRAME_SIZE 2048
/**
* Delta decode the compressed values in src, and put the resulting
* decoded samples in dst.
*
* @param[in,out] state starting value. it is saved for use in the next call.
* @param table delta sequence table
*/
static void delta_decode(uint8_t *dst, const uint8_t *src, int src_size,
uint8_t *state, const int8_t *table)
{
uint8_t val = *state;
while (src_size--) {
uint8_t d = *src++;
val = av_clip_uint8(val + table[d & 0xF]);
*dst++ = val;
val = av_clip_uint8(val + table[d >> 4]);
*dst++ = val;
}
*state = val;
}
/** decode a frame */
static int eightsvx_decode_frame(AVCodecContext *avctx, void *data,
int *got_frame_ptr, AVPacket *avpkt)
{
EightSvxContext *esc = avctx->priv_data;
AVFrame *frame = data;
int buf_size;
int ch, ret;
int hdr_size = 2;
/* decode and interleave the first packet */
if (!esc->data[0] && avpkt) {
int chan_size = avpkt->size / avctx->channels - hdr_size;
if (avpkt->size % avctx->channels) {
av_log(avctx, AV_LOG_WARNING, "Packet with odd size, ignoring last byte\n");
}
if (avpkt->size < (hdr_size + 1) * avctx->channels) {
av_log(avctx, AV_LOG_ERROR, "packet size is too small\n");
return AVERROR_INVALIDDATA;
}
esc->fib_acc[0] = avpkt->data[1] + 128;
if (avctx->channels == 2)
esc->fib_acc[1] = avpkt->data[2+chan_size+1] + 128;
esc->data_idx = 0;
esc->data_size = chan_size;
if (!(esc->data[0] = av_malloc(chan_size)))
return AVERROR(ENOMEM);
if (avctx->channels == 2) {
if (!(esc->data[1] = av_malloc(chan_size))) {
av_freep(&esc->data[0]);
return AVERROR(ENOMEM);
}
}
memcpy(esc->data[0], &avpkt->data[hdr_size], chan_size);
if (avctx->channels == 2)
memcpy(esc->data[1], &avpkt->data[2*hdr_size+chan_size], chan_size);
}
if (!esc->data[0]) {
av_log(avctx, AV_LOG_ERROR, "unexpected empty packet\n");
return AVERROR_INVALIDDATA;
}
/* decode next piece of data from the buffer */
buf_size = FFMIN(MAX_FRAME_SIZE, esc->data_size - esc->data_idx);
if (buf_size <= 0) {
*got_frame_ptr = 0;
return avpkt->size;
}
/* get output buffer */
frame->nb_samples = buf_size * 2;
if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
return ret;
for (ch = 0; ch < avctx->channels; ch++) {
delta_decode(frame->data[ch], &esc->data[ch][esc->data_idx],
buf_size, &esc->fib_acc[ch], esc->table);
}
esc->data_idx += buf_size;
*got_frame_ptr = 1;
return ((avctx->frame_number == 0)*hdr_size + buf_size)*avctx->channels;
}
static av_cold int eightsvx_decode_init(AVCodecContext *avctx)
{
EightSvxContext *esc = avctx->priv_data;
if (avctx->channels < 1 || avctx->channels > 2) {
av_log(avctx, AV_LOG_ERROR, "8SVX does not support more than 2 channels\n");
return AVERROR_INVALIDDATA;
}
switch (avctx->codec->id) {
case AV_CODEC_ID_8SVX_FIB: esc->table = fibonacci; break;
case AV_CODEC_ID_8SVX_EXP: esc->table = exponential; break;
default:
av_log(avctx, AV_LOG_ERROR, "Invalid codec id %d.\n", avctx->codec->id);
return AVERROR_INVALIDDATA;
}
avctx->sample_fmt = AV_SAMPLE_FMT_U8P;
return 0;
}
static av_cold int eightsvx_decode_close(AVCodecContext *avctx)
{
EightSvxContext *esc = avctx->priv_data;
av_freep(&esc->data[0]);
av_freep(&esc->data[1]);
esc->data_size = 0;
esc->data_idx = 0;
return 0;
}
#if CONFIG_EIGHTSVX_FIB_DECODER
AVCodec ff_eightsvx_fib_decoder = {
.name = "8svx_fib",
.long_name = NULL_IF_CONFIG_SMALL("8SVX fibonacci"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_8SVX_FIB,
.priv_data_size = sizeof (EightSvxContext),
.init = eightsvx_decode_init,
.decode = eightsvx_decode_frame,
.close = eightsvx_decode_close,
.capabilities = AV_CODEC_CAP_DR1,
.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
AV_SAMPLE_FMT_NONE },
};
#endif
#if CONFIG_EIGHTSVX_EXP_DECODER
AVCodec ff_eightsvx_exp_decoder = {
.name = "8svx_exp",
.long_name = NULL_IF_CONFIG_SMALL("8SVX exponential"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_8SVX_EXP,
.priv_data_size = sizeof (EightSvxContext),
.init = eightsvx_decode_init,
.decode = eightsvx_decode_frame,
.close = eightsvx_decode_close,
.capabilities = AV_CODEC_CAP_DR1,
.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
AV_SAMPLE_FMT_NONE },
};
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,52 @@
/*
* a64 video encoder - c64 colors in rgb (Pepto)
* Copyright (c) 2009 Tobias Bindhammer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* a64 video encoder - c64 colors in rgb
*/
#ifndef AVCODEC_A64COLORS_H
#define AVCODEC_A64COLORS_H
#include <stdint.h>
/* c64 palette in RGB */
static const uint8_t a64_palette[16][3] = {
{0x00, 0x00, 0x00},
{0xff, 0xff, 0xff},
{0x68, 0x37, 0x2b},
{0x70, 0xa4, 0xb2},
{0x6f, 0x3d, 0x86},
{0x58, 0x8d, 0x43},
{0x35, 0x28, 0x79},
{0xb8, 0xc7, 0x6f},
{0x6f, 0x4f, 0x25},
{0x43, 0x39, 0x00},
{0x9a, 0x67, 0x59},
{0x44, 0x44, 0x44},
{0x6c, 0x6c, 0x6c},
{0x9a, 0xd2, 0x84},
{0x6c, 0x5e, 0xb5},
{0x95, 0x95, 0x95},
};
#endif /* AVCODEC_A64COLORS_H */

View File

@@ -0,0 +1,425 @@
/*
* a64 video encoder - multicolor modes
* Copyright (c) 2009 Tobias Bindhammer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* a64 video encoder - multicolor modes
*/
#include "a64colors.h"
#include "a64tables.h"
#include "elbg.h"
#include "internal.h"
#include "libavutil/avassert.h"
#include "libavutil/common.h"
#include "libavutil/intreadwrite.h"
#define DITHERSTEPS 8
#define CHARSET_CHARS 256
#define INTERLACED 1
#define CROP_SCREENS 1
#define C64XRES 320
#define C64YRES 200
typedef struct A64Context {
/* variables for multicolor modes */
AVLFG randctx;
int mc_lifetime;
int mc_use_5col;
unsigned mc_frame_counter;
int *mc_meta_charset;
int *mc_charmap;
int *mc_best_cb;
int mc_luma_vals[5];
uint8_t *mc_charset;
uint8_t *mc_colram;
uint8_t *mc_palette;
int mc_pal_size;
/* pts of the next packet that will be output */
int64_t next_pts;
} A64Context;
/* gray gradient */
static const int mc_colors[5]={0x0,0xb,0xc,0xf,0x1};
/* other possible gradients - to be tested */
//static const int mc_colors[5]={0x0,0x8,0xa,0xf,0x7};
//static const int mc_colors[5]={0x0,0x9,0x8,0xa,0x3};
static void to_meta_with_crop(AVCodecContext *avctx,
const AVFrame *p, int *dest)
{
int blockx, blocky, x, y;
int luma = 0;
int height = FFMIN(avctx->height, C64YRES);
int width = FFMIN(avctx->width , C64XRES);
uint8_t *src = p->data[0];
for (blocky = 0; blocky < C64YRES; blocky += 8) {
for (blockx = 0; blockx < C64XRES; blockx += 8) {
for (y = blocky; y < blocky + 8 && y < C64YRES; y++) {
for (x = blockx; x < blockx + 8 && x < C64XRES; x += 2) {
if(x < width && y < height) {
if (x + 1 < width) {
/* build average over 2 pixels */
luma = (src[(x + 0 + y * p->linesize[0])] +
src[(x + 1 + y * p->linesize[0])]) / 2;
} else {
luma = src[(x + y * p->linesize[0])];
}
/* write blocks as linear data now so they are suitable for elbg */
dest[0] = luma;
}
dest++;
}
}
}
}
}
static void render_charset(AVCodecContext *avctx, uint8_t *charset,
uint8_t *colrammap)
{
A64Context *c = avctx->priv_data;
uint8_t row1, row2;
int charpos, x, y;
int a, b;
uint8_t pix;
int lowdiff, highdiff;
int *best_cb = c->mc_best_cb;
static uint8_t index1[256];
static uint8_t index2[256];
static uint8_t dither[256];
int i;
int distance;
/* generate lookup-tables for dither and index before looping */
i = 0;
for (a=0; a < 256; a++) {
if(i < c->mc_pal_size -1 && a == c->mc_luma_vals[i + 1]) {
distance = c->mc_luma_vals[i + 1] - c->mc_luma_vals[i];
for(b = 0; b <= distance; b++) {
dither[c->mc_luma_vals[i] + b] = b * (DITHERSTEPS - 1) / distance;
}
i++;
}
if(i >= c->mc_pal_size - 1) dither[a] = 0;
index1[a] = i;
index2[a] = FFMIN(i + 1, c->mc_pal_size - 1);
}
/* and render charset */
for (charpos = 0; charpos < CHARSET_CHARS; charpos++) {
lowdiff = 0;
highdiff = 0;
for (y = 0; y < 8; y++) {
row1 = 0; row2 = 0;
for (x = 0; x < 4; x++) {
pix = best_cb[y * 4 + x];
/* accumulate error for brightest/darkest color */
if (index1[pix] >= 3)
highdiff += pix - c->mc_luma_vals[3];
if (index1[pix] < 1)
lowdiff += c->mc_luma_vals[1] - pix;
row1 <<= 2;
if (INTERLACED) {
row2 <<= 2;
if (interlaced_dither_patterns[dither[pix]][(y & 3) * 2 + 0][x & 3])
row1 |= 3-(index2[pix] & 3);
else
row1 |= 3-(index1[pix] & 3);
if (interlaced_dither_patterns[dither[pix]][(y & 3) * 2 + 1][x & 3])
row2 |= 3-(index2[pix] & 3);
else
row2 |= 3-(index1[pix] & 3);
}
else {
if (multi_dither_patterns[dither[pix]][(y & 3)][x & 3])
row1 |= 3-(index2[pix] & 3);
else
row1 |= 3-(index1[pix] & 3);
}
}
charset[y+0x000] = row1;
if (INTERLACED) charset[y+0x800] = row2;
}
/* do we need to adjust pixels? */
if (highdiff > 0 && lowdiff > 0 && c->mc_use_5col) {
if (lowdiff > highdiff) {
for (x = 0; x < 32; x++)
best_cb[x] = FFMIN(c->mc_luma_vals[3], best_cb[x]);
} else {
for (x = 0; x < 32; x++)
best_cb[x] = FFMAX(c->mc_luma_vals[1], best_cb[x]);
}
charpos--; /* redo now adjusted char */
/* no adjustment needed, all fine */
} else {
/* advance pointers */
best_cb += 32;
charset += 8;
/* remember colorram value */
colrammap[charpos] = (highdiff > 0);
}
}
}
static av_cold int a64multi_close_encoder(AVCodecContext *avctx)
{
A64Context *c = avctx->priv_data;
av_freep(&c->mc_meta_charset);
av_freep(&c->mc_best_cb);
av_freep(&c->mc_charset);
av_freep(&c->mc_charmap);
av_freep(&c->mc_colram);
return 0;
}
static av_cold int a64multi_encode_init(AVCodecContext *avctx)
{
A64Context *c = avctx->priv_data;
int a;
av_lfg_init(&c->randctx, 1);
if (avctx->global_quality < 1) {
c->mc_lifetime = 4;
} else {
c->mc_lifetime = avctx->global_quality /= FF_QP2LAMBDA;
}
av_log(avctx, AV_LOG_INFO, "charset lifetime set to %d frame(s)\n", c->mc_lifetime);
c->mc_frame_counter = 0;
c->mc_use_5col = avctx->codec->id == AV_CODEC_ID_A64_MULTI5;
c->mc_pal_size = 4 + c->mc_use_5col;
/* precalc luma values for later use */
for (a = 0; a < c->mc_pal_size; a++) {
c->mc_luma_vals[a]=a64_palette[mc_colors[a]][0] * 0.30 +
a64_palette[mc_colors[a]][1] * 0.59 +
a64_palette[mc_colors[a]][2] * 0.11;
}
if (!(c->mc_meta_charset = av_mallocz_array(c->mc_lifetime, 32000 * sizeof(int))) ||
!(c->mc_best_cb = av_malloc(CHARSET_CHARS * 32 * sizeof(int))) ||
!(c->mc_charmap = av_mallocz_array(c->mc_lifetime, 1000 * sizeof(int))) ||
!(c->mc_colram = av_mallocz(CHARSET_CHARS * sizeof(uint8_t))) ||
!(c->mc_charset = av_malloc(0x800 * (INTERLACED+1) * sizeof(uint8_t)))) {
av_log(avctx, AV_LOG_ERROR, "Failed to allocate buffer memory.\n");
return AVERROR(ENOMEM);
}
/* set up extradata */
if (!(avctx->extradata = av_mallocz(8 * 4 + AV_INPUT_BUFFER_PADDING_SIZE))) {
av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory for extradata.\n");
return AVERROR(ENOMEM);
}
avctx->extradata_size = 8 * 4;
AV_WB32(avctx->extradata, c->mc_lifetime);
AV_WB32(avctx->extradata + 16, INTERLACED);
if (!avctx->codec_tag)
avctx->codec_tag = AV_RL32("a64m");
c->next_pts = AV_NOPTS_VALUE;
return 0;
}
static void a64_compress_colram(unsigned char *buf, int *charmap, uint8_t *colram)
{
int a;
uint8_t temp;
/* only needs to be done in 5col mode */
/* XXX could be squeezed to 0x80 bytes */
for (a = 0; a < 256; a++) {
temp = colram[charmap[a + 0x000]] << 0;
temp |= colram[charmap[a + 0x100]] << 1;
temp |= colram[charmap[a + 0x200]] << 2;
if (a < 0xe8) temp |= colram[charmap[a + 0x300]] << 3;
buf[a] = temp << 2;
}
}
static int a64multi_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
const AVFrame *p, int *got_packet)
{
A64Context *c = avctx->priv_data;
int frame;
int x, y;
int b_height;
int b_width;
int req_size, ret;
uint8_t *buf = NULL;
int *charmap = c->mc_charmap;
uint8_t *colram = c->mc_colram;
uint8_t *charset = c->mc_charset;
int *meta = c->mc_meta_charset;
int *best_cb = c->mc_best_cb;
int charset_size = 0x800 * (INTERLACED + 1);
int colram_size = 0x100 * c->mc_use_5col;
int screen_size;
if(CROP_SCREENS) {
b_height = FFMIN(avctx->height,C64YRES) >> 3;
b_width = FFMIN(avctx->width ,C64XRES) >> 3;
screen_size = b_width * b_height;
} else {
b_height = C64YRES >> 3;
b_width = C64XRES >> 3;
screen_size = 0x400;
}
/* no data, means end encoding asap */
if (!p) {
/* all done, end encoding */
if (!c->mc_lifetime) return 0;
/* no more frames in queue, prepare to flush remaining frames */
if (!c->mc_frame_counter) {
c->mc_lifetime = 0;
}
/* still frames in queue so limit lifetime to remaining frames */
else c->mc_lifetime = c->mc_frame_counter;
/* still new data available */
} else {
/* fill up mc_meta_charset with data until lifetime exceeds */
if (c->mc_frame_counter < c->mc_lifetime) {
to_meta_with_crop(avctx, p, meta + 32000 * c->mc_frame_counter);
c->mc_frame_counter++;
if (c->next_pts == AV_NOPTS_VALUE)
c->next_pts = p->pts;
/* lifetime is not reached so wait for next frame first */
return 0;
}
}
/* lifetime reached so now convert X frames at once */
if (c->mc_frame_counter == c->mc_lifetime) {
req_size = 0;
/* any frames to encode? */
if (c->mc_lifetime) {
int alloc_size = charset_size + c->mc_lifetime*(screen_size + colram_size);
if ((ret = ff_alloc_packet2(avctx, pkt, alloc_size, 0)) < 0)
return ret;
buf = pkt->data;
/* calc optimal new charset + charmaps */
ret = avpriv_init_elbg(meta, 32, 1000 * c->mc_lifetime, best_cb,
CHARSET_CHARS, 50, charmap, &c->randctx);
if (ret < 0)
return ret;
ret = avpriv_do_elbg(meta, 32, 1000 * c->mc_lifetime, best_cb,
CHARSET_CHARS, 50, charmap, &c->randctx);
if (ret < 0)
return ret;
/* create colorram map and a c64 readable charset */
render_charset(avctx, charset, colram);
/* copy charset to buf */
memcpy(buf, charset, charset_size);
/* advance pointers */
buf += charset_size;
req_size += charset_size;
}
/* write x frames to buf */
for (frame = 0; frame < c->mc_lifetime; frame++) {
/* copy charmap to buf. buf is uchar*, charmap is int*, so no memcpy here, sorry */
for (y = 0; y < b_height; y++) {
for (x = 0; x < b_width; x++) {
buf[y * b_width + x] = charmap[y * b_width + x];
}
}
/* advance pointers */
buf += screen_size;
req_size += screen_size;
/* compress and copy colram to buf */
if (c->mc_use_5col) {
a64_compress_colram(buf, charmap, colram);
/* advance pointers */
buf += colram_size;
req_size += colram_size;
}
/* advance to next charmap */
charmap += 1000;
}
AV_WB32(avctx->extradata + 4, c->mc_frame_counter);
AV_WB32(avctx->extradata + 8, charset_size);
AV_WB32(avctx->extradata + 12, screen_size + colram_size);
/* reset counter */
c->mc_frame_counter = 0;
pkt->pts = pkt->dts = c->next_pts;
c->next_pts = AV_NOPTS_VALUE;
av_assert0(pkt->size >= req_size);
pkt->size = req_size;
pkt->flags |= AV_PKT_FLAG_KEY;
*got_packet = !!req_size;
}
return 0;
}
#if CONFIG_A64MULTI_ENCODER
AVCodec ff_a64multi_encoder = {
.name = "a64multi",
.long_name = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64"),
.type = AVMEDIA_TYPE_VIDEO,
.id = AV_CODEC_ID_A64_MULTI,
.priv_data_size = sizeof(A64Context),
.init = a64multi_encode_init,
.encode2 = a64multi_encode_frame,
.close = a64multi_close_encoder,
.pix_fmts = (const enum AVPixelFormat[]) {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
.capabilities = AV_CODEC_CAP_DELAY,
};
#endif
#if CONFIG_A64MULTI5_ENCODER
AVCodec ff_a64multi5_encoder = {
.name = "a64multi5",
.long_name = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64, extended with 5th color (colram)"),
.type = AVMEDIA_TYPE_VIDEO,
.id = AV_CODEC_ID_A64_MULTI5,
.priv_data_size = sizeof(A64Context),
.init = a64multi_encode_init,
.encode2 = a64multi_encode_frame,
.close = a64multi_close_encoder,
.pix_fmts = (const enum AVPixelFormat[]) {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
.capabilities = AV_CODEC_CAP_DELAY,
};
#endif

View File

@@ -0,0 +1,150 @@
/*
* a64 video encoder - tables used by a64 encoders
* Copyright (c) 2009 Tobias Bindhammer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* a64 video encoder - tables used by a64 encoders
*/
#ifndef AVCODEC_A64TABLES_H
#define AVCODEC_A64TABLES_H
#include <stdint.h>
/**
* dither patterns used vor rendering the multicolor charset
*/
static const uint8_t multi_dither_patterns[9][4][4] = {
{
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0}
},
{
{1, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 1, 0},
{0, 0, 0, 0}
},
{
{1, 0, 0, 0},
{0, 0, 1, 0},
{0, 1, 0, 0},
{0, 0, 0, 1}
},
{
{1, 0, 0, 0},
{0, 1, 0, 1},
{0, 0, 1, 0},
{0, 1, 0, 1}
},
{
{1, 0, 1, 0},
{0, 1, 0, 1},
{1, 0, 1, 0},
{0, 1, 0, 1}
},
{
{1, 1, 1, 0},
{0, 1, 0, 1},
{1, 0, 1, 1},
{0, 1, 0, 1}
},
{
{0, 1, 1, 1},
{1, 1, 0, 1},
{1, 0, 1, 1},
{1, 1, 1, 0}
},
{
{0, 1, 1, 1},
{1, 1, 1, 1},
{1, 1, 0, 1},
{1, 1, 1, 1}
},
{
{1, 1, 1, 1},
{1, 1, 1, 1},
{1, 1, 1, 1},
{1, 1, 1, 1}
},
};
static const uint8_t interlaced_dither_patterns[9][8][4] = {
{
{0, 0, 0, 0}, {0, 0, 0, 0},
{0, 0, 0, 0}, {0, 0, 0, 0},
{0, 0, 0, 0}, {0, 0, 0, 0},
{0, 0, 0, 0}, {0, 0, 0, 0},
},
{
{1, 0, 1, 0}, {0, 0, 0, 0},
{0, 0, 0, 0}, {0, 0, 0, 0},
{1, 0, 1, 0}, {0, 0, 0, 0},
{0, 0, 0, 0}, {0, 0, 0, 0},
},
{
{1, 0, 1, 0}, {0, 0, 0, 0},
{0, 0, 0, 0}, {0, 1, 0, 1},
{1, 0, 1, 0}, {0, 0, 0, 0},
{0, 0, 0, 0}, {0, 1, 0, 1},
},
{
{1, 0, 1, 0}, {0, 1, 0, 1},
{0, 1, 0, 1}, {0, 0, 0, 0},
{1, 0, 1, 0}, {0, 1, 0, 1},
{0, 1, 0, 1}, {0, 0, 0, 0},
},
{
{1, 0, 1, 0}, {0, 1, 0, 1},
{0, 1, 0, 1}, {1, 0, 1, 0},
{1, 0, 1, 0}, {0, 1, 0, 1},
{0, 1, 0, 1}, {1, 0, 1, 0},
},
{
{1, 0, 1, 0}, {0, 1, 0, 1},
{1, 1, 1, 1}, {1, 0, 1, 0},
{1, 0, 1, 0}, {0, 1, 0, 1},
{1, 1, 1, 1}, {1, 0, 1, 0},
},
{
{1, 0, 1, 0}, {1, 1, 1, 1},
{1, 1, 1, 1}, {0, 1, 0, 1},
{1, 0, 1, 0}, {1, 1, 1, 1},
{1, 1, 1, 1}, {0, 1, 0, 1},
},
{
{1, 1, 1, 1}, {1, 1, 1, 1},
{1, 1, 1, 1}, {0, 1, 0, 1},
{1, 1, 1, 1}, {1, 1, 1, 1},
{1, 1, 1, 1}, {0, 1, 0, 1},
},
{
{1, 1, 1, 1}, {1, 1, 1, 1},
{1, 1, 1, 1}, {1, 1, 1, 1},
{1, 1, 1, 1}, {1, 1, 1, 1},
{1, 1, 1, 1}, {1, 1, 1, 1},
}
};
#endif /* AVCODEC_A64TABLES_H */

View File

@@ -0,0 +1,367 @@
/*
* AAC definitions and structures
* Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
* Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC definitions and structures
* @author Oded Shimon ( ods15 ods15 dyndns org )
* @author Maxim Gavrilov ( maxim.gavrilov gmail com )
*/
#ifndef AVCODEC_AAC_H
#define AVCODEC_AAC_H
#include "aac_defines.h"
#include "libavutil/float_dsp.h"
#include "libavutil/fixed_dsp.h"
#include "avcodec.h"
#if !USE_FIXED
#include "imdct15.h"
#endif
#include "fft.h"
#include "mpeg4audio.h"
#include "sbr.h"
#include <stdint.h>
#define MAX_CHANNELS 64
#define MAX_ELEM_ID 16
#define TNS_MAX_ORDER 20
#define MAX_LTP_LONG_SFB 40
#define CLIP_AVOIDANCE_FACTOR 0.95f
enum RawDataBlockType {
TYPE_SCE,
TYPE_CPE,
TYPE_CCE,
TYPE_LFE,
TYPE_DSE,
TYPE_PCE,
TYPE_FIL,
TYPE_END,
};
enum ExtensionPayloadID {
EXT_FILL,
EXT_FILL_DATA,
EXT_DATA_ELEMENT,
EXT_DYNAMIC_RANGE = 0xb,
EXT_SBR_DATA = 0xd,
EXT_SBR_DATA_CRC = 0xe,
};
enum WindowSequence {
ONLY_LONG_SEQUENCE,
LONG_START_SEQUENCE,
EIGHT_SHORT_SEQUENCE,
LONG_STOP_SEQUENCE,
};
enum BandType {
ZERO_BT = 0, ///< Scalefactors and spectral data are all zero.
FIRST_PAIR_BT = 5, ///< This and later band types encode two values (rather than four) with one code word.
ESC_BT = 11, ///< Spectral data are coded with an escape sequence.
RESERVED_BT = 12, ///< Band types following are encoded differently from others.
NOISE_BT = 13, ///< Spectral data are scaled white noise not coded in the bitstream.
INTENSITY_BT2 = 14, ///< Scalefactor data are intensity stereo positions (out of phase).
INTENSITY_BT = 15, ///< Scalefactor data are intensity stereo positions (in phase).
};
#define IS_CODEBOOK_UNSIGNED(x) (((x) - 1) & 10)
enum ChannelPosition {
AAC_CHANNEL_OFF = 0,
AAC_CHANNEL_FRONT = 1,
AAC_CHANNEL_SIDE = 2,
AAC_CHANNEL_BACK = 3,
AAC_CHANNEL_LFE = 4,
AAC_CHANNEL_CC = 5,
};
/**
* The point during decoding at which channel coupling is applied.
*/
enum CouplingPoint {
BEFORE_TNS,
BETWEEN_TNS_AND_IMDCT,
AFTER_IMDCT = 3,
};
/**
* Output configuration status
*/
enum OCStatus {
OC_NONE, ///< Output unconfigured
OC_TRIAL_PCE, ///< Output configuration under trial specified by an inband PCE
OC_TRIAL_FRAME, ///< Output configuration under trial specified by a frame header
OC_GLOBAL_HDR, ///< Output configuration set in a global header but not yet locked
OC_LOCKED, ///< Output configuration locked in place
};
typedef struct OutputConfiguration {
MPEG4AudioConfig m4ac;
uint8_t layout_map[MAX_ELEM_ID*4][3];
int layout_map_tags;
int channels;
uint64_t channel_layout;
enum OCStatus status;
} OutputConfiguration;
/**
* Predictor State
*/
typedef struct PredictorState {
AAC_FLOAT cor0;
AAC_FLOAT cor1;
AAC_FLOAT var0;
AAC_FLOAT var1;
AAC_FLOAT r0;
AAC_FLOAT r1;
AAC_FLOAT k1;
AAC_FLOAT x_est;
} PredictorState;
#define MAX_PREDICTORS 672
#define SCALE_DIV_512 36 ///< scalefactor difference that corresponds to scale difference in 512 times
#define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0
#define SCALE_MAX_POS 255 ///< scalefactor index maximum value
#define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard
#define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference
#define NOISE_PRE 256 ///< preamble for NOISE_BT, put in bitstream with the first noise band
#define NOISE_PRE_BITS 9 ///< length of preamble
#define NOISE_OFFSET 90 ///< subtracted from global gain, used as offset for the preamble
/**
* Long Term Prediction
*/
typedef struct LongTermPrediction {
int8_t present;
int16_t lag;
INTFLOAT coef;
int8_t used[MAX_LTP_LONG_SFB];
} LongTermPrediction;
/**
* Individual Channel Stream
*/
typedef struct IndividualChannelStream {
uint8_t max_sfb; ///< number of scalefactor bands per group
enum WindowSequence window_sequence[2];
uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sine window.
int num_window_groups;
uint8_t group_len[8];
LongTermPrediction ltp;
const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window
const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window
int num_swb; ///< number of scalefactor window bands
int num_windows;
int tns_max_bands;
int predictor_present;
int predictor_initialized;
int predictor_reset_group;
int predictor_reset_count[31]; ///< used by encoder to count prediction resets
uint8_t prediction_used[41];
uint8_t window_clipping[8]; ///< set if a certain window is near clipping
float clip_avoidance_factor; ///< set if any window is near clipping to the necessary atennuation factor to avoid it
} IndividualChannelStream;
/**
* Temporal Noise Shaping
*/
typedef struct TemporalNoiseShaping {
int present;
int n_filt[8];
int length[8][4];
int direction[8][4];
int order[8][4];
int coef_idx[8][4][TNS_MAX_ORDER];
INTFLOAT coef[8][4][TNS_MAX_ORDER];
} TemporalNoiseShaping;
/**
* Dynamic Range Control - decoded from the bitstream but not processed further.
*/
typedef struct DynamicRangeControl {
int pce_instance_tag; ///< Indicates with which program the DRC info is associated.
int dyn_rng_sgn[17]; ///< DRC sign information; 0 - positive, 1 - negative
int dyn_rng_ctl[17]; ///< DRC magnitude information
int exclude_mask[MAX_CHANNELS]; ///< Channels to be excluded from DRC processing.
int band_incr; ///< Number of DRC bands greater than 1 having DRC info.
int interpolation_scheme; ///< Indicates the interpolation scheme used in the SBR QMF domain.
int band_top[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines.
int prog_ref_level; /**< A reference level for the long-term program audio level for all
* channels combined.
*/
} DynamicRangeControl;
typedef struct Pulse {
int num_pulse;
int start;
int pos[4];
int amp[4];
} Pulse;
/**
* coupling parameters
*/
typedef struct ChannelCoupling {
enum CouplingPoint coupling_point; ///< The point during decoding at which coupling is applied.
int num_coupled; ///< number of target elements
enum RawDataBlockType type[8]; ///< Type of channel element to be coupled - SCE or CPE.
int id_select[8]; ///< element id
int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel;
* [2] list of gains for left channel; [3] lists of gains for both channels
*/
INTFLOAT gain[16][120];
} ChannelCoupling;
/**
* Single Channel Element - used for both SCE and LFE elements.
*/
typedef struct SingleChannelElement {
IndividualChannelStream ics;
TemporalNoiseShaping tns;
Pulse pulse;
enum BandType band_type[128]; ///< band types
enum BandType band_alt[128]; ///< alternative band type (used by encoder)
int band_type_run_end[120]; ///< band type run end points
INTFLOAT sf[120]; ///< scalefactors
int sf_idx[128]; ///< scalefactor indices (used by encoder)
uint8_t zeroes[128]; ///< band is not coded (used by encoder)
float is_ener[128]; ///< Intensity stereo pos (used by encoder)
float pns_ener[128]; ///< Noise energy values (used by encoder)
DECLARE_ALIGNED(32, INTFLOAT, pcoeffs)[1024]; ///< coefficients for IMDCT, pristine
DECLARE_ALIGNED(32, INTFLOAT, coeffs)[1024]; ///< coefficients for IMDCT, maybe processed
DECLARE_ALIGNED(32, INTFLOAT, saved)[1536]; ///< overlap
DECLARE_ALIGNED(32, INTFLOAT, ret_buf)[2048]; ///< PCM output buffer
DECLARE_ALIGNED(16, INTFLOAT, ltp_state)[3072]; ///< time signal for LTP
DECLARE_ALIGNED(32, AAC_FLOAT, prcoeffs)[1024]; ///< Main prediction coefs (used by encoder)
PredictorState predictor_state[MAX_PREDICTORS];
INTFLOAT *ret; ///< PCM output
} SingleChannelElement;
/**
* channel element - generic struct for SCE/CPE/CCE/LFE
*/
typedef struct ChannelElement {
int present;
// CPE specific
int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream.
int ms_mode; ///< Signals mid/side stereo flags coding mode (used by encoder)
uint8_t is_mode; ///< Set if any bands have been encoded using intensity stereo (used by encoder)
uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band
uint8_t is_mask[128]; ///< Set if intensity stereo is used (used by encoder)
// shared
SingleChannelElement ch[2];
// CCE specific
ChannelCoupling coup;
SpectralBandReplication sbr;
} ChannelElement;
/**
* main AAC context
*/
struct AACContext {
AVClass *class;
AVCodecContext *avctx;
AVFrame *frame;
int is_saved; ///< Set if elements have stored overlap from previous frame.
DynamicRangeControl che_drc;
/**
* @name Channel element related data
* @{
*/
ChannelElement *che[4][MAX_ELEM_ID];
ChannelElement *tag_che_map[4][MAX_ELEM_ID];
int tags_mapped;
int warned_remapping_once;
/** @} */
/**
* @name temporary aligned temporary buffers
* (We do not want to have these on the stack.)
* @{
*/
DECLARE_ALIGNED(32, INTFLOAT, buf_mdct)[1024];
/** @} */
/**
* @name Computed / set up during initialization
* @{
*/
FFTContext mdct;
FFTContext mdct_small;
FFTContext mdct_ld;
FFTContext mdct_ltp;
#if USE_FIXED
AVFixedDSPContext *fdsp;
#else
IMDCT15Context *mdct480;
AVFloatDSPContext *fdsp;
#endif /* USE_FIXED */
int random_state;
/** @} */
/**
* @name Members used for output
* @{
*/
SingleChannelElement *output_element[MAX_CHANNELS]; ///< Points to each SingleChannelElement
/** @} */
/**
* @name Japanese DTV specific extension
* @{
*/
int force_dmono_mode;///< 0->not dmono, 1->use first channel, 2->use second channel
int dmono_mode; ///< 0->not dmono, 1->use first channel, 2->use second channel
/** @} */
DECLARE_ALIGNED(32, INTFLOAT, temp)[128];
OutputConfiguration oc[2];
int warned_num_aac_frames;
/* aacdec functions pointers */
void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce);
void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce);
void (*apply_tns)(INTFLOAT coef[1024], TemporalNoiseShaping *tns,
IndividualChannelStream *ics, int decode);
void (*windowing_and_mdct_ltp)(AACContext *ac, INTFLOAT *out,
INTFLOAT *in, IndividualChannelStream *ics);
void (*update_ltp)(AACContext *ac, SingleChannelElement *sce);
void (*vector_pow43)(int *coefs, int len);
void (*subband_scale)(int *dst, int *src, int scale, int offset, int len);
};
void ff_aacdec_init_mips(AACContext *c);
#endif /* AVCODEC_AAC_H */

View File

@@ -0,0 +1,114 @@
/*
* Common AAC and AC-3 parser
* Copyright (c) 2003 Fabrice Bellard
* Copyright (c) 2003 Michael Niedermayer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/channel_layout.h"
#include "libavutil/common.h"
#include "parser.h"
#include "aac_ac3_parser.h"
int ff_aac_ac3_parse(AVCodecParserContext *s1,
AVCodecContext *avctx,
const uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size)
{
AACAC3ParseContext *s = s1->priv_data;
ParseContext *pc = &s->pc;
int len, i;
int new_frame_start;
get_next:
i=END_NOT_FOUND;
if(s->remaining_size <= buf_size){
if(s->remaining_size && !s->need_next_header){
i= s->remaining_size;
s->remaining_size = 0;
}else{ //we need a header first
len=0;
for(i=s->remaining_size; i<buf_size; i++){
s->state = (s->state<<8) + buf[i];
if((len=s->sync(s->state, s, &s->need_next_header, &new_frame_start)))
break;
}
if(len<=0){
i=END_NOT_FOUND;
}else{
s->state=0;
i-= s->header_size -1;
s->remaining_size = len;
if(!new_frame_start || pc->index+i<=0){
s->remaining_size += i;
goto get_next;
}
}
}
}
if(ff_combine_frame(pc, i, &buf, &buf_size)<0){
s->remaining_size -= FFMIN(s->remaining_size, buf_size);
*poutbuf = NULL;
*poutbuf_size = 0;
return buf_size;
}
*poutbuf = buf;
*poutbuf_size = buf_size;
/* update codec info */
if(s->codec_id)
avctx->codec_id = s->codec_id;
/* Due to backwards compatible HE-AAC the sample rate, channel count,
and total number of samples found in an AAC ADTS header are not
reliable. Bit rate is still accurate because the total frame duration in
seconds is still correct (as is the number of bits in the frame). */
if (avctx->codec_id != AV_CODEC_ID_AAC) {
avctx->sample_rate = s->sample_rate;
/* (E-)AC-3: allow downmixing to stereo or mono */
#if FF_API_REQUEST_CHANNELS
FF_DISABLE_DEPRECATION_WARNINGS
if (avctx->request_channels == 1)
avctx->request_channel_layout = AV_CH_LAYOUT_MONO;
else if (avctx->request_channels == 2)
avctx->request_channel_layout = AV_CH_LAYOUT_STEREO;
FF_ENABLE_DEPRECATION_WARNINGS
#endif
if (s->channels > 1 &&
avctx->request_channel_layout == AV_CH_LAYOUT_MONO) {
avctx->channels = 1;
avctx->channel_layout = AV_CH_LAYOUT_MONO;
} else if (s->channels > 2 &&
avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
avctx->channels = 2;
avctx->channel_layout = AV_CH_LAYOUT_STEREO;
} else {
avctx->channels = s->channels;
avctx->channel_layout = s->channel_layout;
}
s1->duration = s->samples;
avctx->audio_service_type = s->service_type;
}
avctx->bit_rate = s->bit_rate;
return i;
}

View File

@@ -0,0 +1,66 @@
/*
* Common AAC and AC-3 parser prototypes
* Copyright (c) 2003 Fabrice Bellard
* Copyright (c) 2003 Michael Niedermayer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AAC_AC3_PARSER_H
#define AVCODEC_AAC_AC3_PARSER_H
#include <stdint.h>
#include "avcodec.h"
#include "parser.h"
typedef enum {
AAC_AC3_PARSE_ERROR_SYNC = -0x1030c0a,
AAC_AC3_PARSE_ERROR_BSID = -0x2030c0a,
AAC_AC3_PARSE_ERROR_SAMPLE_RATE = -0x3030c0a,
AAC_AC3_PARSE_ERROR_FRAME_SIZE = -0x4030c0a,
AAC_AC3_PARSE_ERROR_FRAME_TYPE = -0x5030c0a,
AAC_AC3_PARSE_ERROR_CRC = -0x6030c0a,
AAC_AC3_PARSE_ERROR_CHANNEL_CFG = -0x7030c0a,
} AACAC3ParseError;
typedef struct AACAC3ParseContext {
ParseContext pc;
int frame_size;
int header_size;
int (*sync)(uint64_t state, struct AACAC3ParseContext *hdr_info,
int *need_next_header, int *new_frame_start);
int channels;
int sample_rate;
int bit_rate;
int samples;
uint64_t channel_layout;
int service_type;
int remaining_size;
uint64_t state;
int need_next_header;
enum AVCodecID codec_id;
} AACAC3ParseContext;
int ff_aac_ac3_parse(AVCodecParserContext *s1,
AVCodecContext *avctx,
const uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size);
#endif /* AVCODEC_AAC_AC3_PARSER_H */

View File

@@ -0,0 +1,123 @@
/*
* MPEG-2/4 AAC ADTS to MPEG-4 Audio Specific Configuration bitstream filter
* Copyright (c) 2009 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avcodec.h"
#include "aacadtsdec.h"
#include "put_bits.h"
#include "get_bits.h"
#include "mpeg4audio.h"
#include "internal.h"
typedef struct AACBSFContext {
int first_frame_done;
} AACBSFContext;
/**
* This filter creates an MPEG-4 AudioSpecificConfig from an MPEG-2/4
* ADTS header and removes the ADTS header.
*/
static int aac_adtstoasc_filter(AVBitStreamFilterContext *bsfc,
AVCodecContext *avctx, const char *args,
uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size,
int keyframe)
{
GetBitContext gb;
PutBitContext pb;
AACADTSHeaderInfo hdr;
AACBSFContext *ctx = bsfc->priv_data;
init_get_bits(&gb, buf, AAC_ADTS_HEADER_SIZE*8);
*poutbuf = (uint8_t*) buf;
*poutbuf_size = buf_size;
if (avctx->extradata)
if (show_bits(&gb, 12) != 0xfff)
return 0;
if (avpriv_aac_parse_header(&gb, &hdr) < 0) {
av_log(avctx, AV_LOG_ERROR, "Error parsing ADTS frame header!\n");
return AVERROR_INVALIDDATA;
}
if (!hdr.crc_absent && hdr.num_aac_frames > 1) {
avpriv_report_missing_feature(avctx,
"Multiple RDBs per frame with CRC");
return AVERROR_PATCHWELCOME;
}
buf += AAC_ADTS_HEADER_SIZE + 2*!hdr.crc_absent;
buf_size -= AAC_ADTS_HEADER_SIZE + 2*!hdr.crc_absent;
if (!ctx->first_frame_done) {
int pce_size = 0;
uint8_t pce_data[MAX_PCE_SIZE];
if (!hdr.chan_config) {
init_get_bits(&gb, buf, buf_size * 8);
if (get_bits(&gb, 3) != 5) {
avpriv_report_missing_feature(avctx,
"PCE-based channel configuration "
"without PCE as first syntax "
"element");
return AVERROR_PATCHWELCOME;
}
init_put_bits(&pb, pce_data, MAX_PCE_SIZE);
pce_size = avpriv_copy_pce_data(&pb, &gb)/8;
flush_put_bits(&pb);
buf_size -= get_bits_count(&gb)/8;
buf += get_bits_count(&gb)/8;
}
av_free(avctx->extradata);
avctx->extradata_size = 2 + pce_size;
avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
if (!avctx->extradata) {
avctx->extradata_size = 0;
return AVERROR(ENOMEM);
}
init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
put_bits(&pb, 5, hdr.object_type);
put_bits(&pb, 4, hdr.sampling_index);
put_bits(&pb, 4, hdr.chan_config);
put_bits(&pb, 1, 0); //frame length - 1024 samples
put_bits(&pb, 1, 0); //does not depend on core coder
put_bits(&pb, 1, 0); //is not extension
flush_put_bits(&pb);
if (pce_size) {
memcpy(avctx->extradata + 2, pce_data, pce_size);
}
ctx->first_frame_done = 1;
}
*poutbuf = (uint8_t*) buf;
*poutbuf_size = buf_size;
return 0;
}
AVBitStreamFilter ff_aac_adtstoasc_bsf = {
.name = "aac_adtstoasc",
.priv_data_size = sizeof(AACBSFContext),
.filter = aac_adtstoasc_filter,
};

View File

@@ -0,0 +1,114 @@
/*
* AAC defines
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AAC_DEFINES_H
#define AVCODEC_AAC_DEFINES_H
#ifndef USE_FIXED
#define USE_FIXED 0
#endif
#if USE_FIXED
#include "libavutil/softfloat.h"
#define FFT_FLOAT 0
#define FFT_FIXED_32 1
#define AAC_RENAME(x) x ## _fixed
#define AAC_RENAME_32(x) x ## _fixed_32
#define INTFLOAT int
#define INT64FLOAT int64_t
#define SHORTFLOAT int16_t
#define AAC_FLOAT SoftFloat
#define AAC_SIGNE int
#define FIXR(a) ((int)((a) * 1 + 0.5))
#define FIXR10(a) ((int)((a) * 1024.0 + 0.5))
#define Q23(a) (int)((a) * 8388608.0 + 0.5)
#define Q30(x) (int)((x)*1073741824.0 + 0.5)
#define Q31(x) (int)((x)*2147483648.0 + 0.5)
#define RANGE15(x) x
#define GET_GAIN(x, y) (-(y) << (x)) + 1024
#define AAC_MUL16(x, y) (int)(((int64_t)(x) * (y) + 0x8000) >> 16)
#define AAC_MUL26(x, y) (int)(((int64_t)(x) * (y) + 0x2000000) >> 26)
#define AAC_MUL30(x, y) (int)(((int64_t)(x) * (y) + 0x20000000) >> 30)
#define AAC_MUL31(x, y) (int)(((int64_t)(x) * (y) + 0x40000000) >> 31)
#define AAC_MADD28(x, y, a, b) (int)((((int64_t)(x) * (y)) + \
((int64_t)(a) * (b)) + \
0x8000000) >> 28)
#define AAC_MADD30(x, y, a, b) (int)((((int64_t)(x) * (y)) + \
((int64_t)(a) * (b)) + \
0x20000000) >> 30)
#define AAC_MADD30_V8(x, y, a, b, c, d, e, f) (int)((((int64_t)(x) * (y)) + \
((int64_t)(a) * (b)) + \
((int64_t)(c) * (d)) + \
((int64_t)(e) * (f)) + \
0x20000000) >> 30)
#define AAC_MSUB30(x, y, a, b) (int)((((int64_t)(x) * (y)) - \
((int64_t)(a) * (b)) + \
0x20000000) >> 30)
#define AAC_MSUB30_V8(x, y, a, b, c, d, e, f) (int)((((int64_t)(x) * (y)) + \
((int64_t)(a) * (b)) - \
((int64_t)(c) * (d)) - \
((int64_t)(e) * (f)) + \
0x20000000) >> 30)
#define AAC_MSUB31_V3(x, y, z) (int)((((int64_t)(x) * (z)) - \
((int64_t)(y) * (z)) + \
0x40000000) >> 31)
#define AAC_HALF_SUM(x, y) (x) >> 1 + (y) >> 1
#define AAC_SRA_R(x, y) (int)(((x) + (1 << ((y) - 1))) >> (y))
#else
#define FFT_FLOAT 1
#define FFT_FIXED_32 0
#define AAC_RENAME(x) x
#define AAC_RENAME_32(x) x
#define INTFLOAT float
#define INT64FLOAT float
#define SHORTFLOAT float
#define AAC_FLOAT float
#define AAC_SIGNE unsigned
#define FIXR(x) ((float)(x))
#define FIXR10(x) ((float)(x))
#define Q23(x) x
#define Q30(x) x
#define Q31(x) x
#define RANGE15(x) (32768.0 * (x))
#define GET_GAIN(x, y) powf((x), -(y))
#define AAC_MUL16(x, y) ((x) * (y))
#define AAC_MUL26(x, y) ((x) * (y))
#define AAC_MUL30(x, y) ((x) * (y))
#define AAC_MUL31(x, y) ((x) * (y))
#define AAC_MADD28(x, y, a, b) ((x) * (y) + (a) * (b))
#define AAC_MADD30(x, y, a, b) ((x) * (y) + (a) * (b))
#define AAC_MADD30_V8(x, y, a, b, c, d, e, f) ((x) * (y) + (a) * (b) + \
(c) * (d) + (e) * (f))
#define AAC_MSUB30(x, y, a, b) ((x) * (y) - (a) * (b))
#define AAC_MSUB30_V8(x, y, a, b, c, d, e, f) ((x) * (y) + (a) * (b) - \
(c) * (d) - (e) * (f))
#define AAC_MSUB31_V3(x, y, z) ((x) - (y)) * (z)
#define AAC_HALF_SUM(x, y) ((x) + (y)) * 0.5f
#define AAC_SRA_R(x, y) (x)
#endif /* USE_FIXED */
#endif /* AVCODEC_AAC_DEFINES_H */

View File

@@ -0,0 +1,69 @@
/*
* Audio and Video frame extraction
* Copyright (c) 2003 Fabrice Bellard
* Copyright (c) 2003 Michael Niedermayer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "parser.h"
#include "aac_ac3_parser.h"
#include "aacadtsdec.h"
#include "get_bits.h"
#include "mpeg4audio.h"
static int aac_sync(uint64_t state, AACAC3ParseContext *hdr_info,
int *need_next_header, int *new_frame_start)
{
GetBitContext bits;
AACADTSHeaderInfo hdr;
int size;
union {
uint64_t u64;
uint8_t u8[8 + AV_INPUT_BUFFER_PADDING_SIZE];
} tmp;
tmp.u64 = av_be2ne64(state);
init_get_bits(&bits, tmp.u8+8-AAC_ADTS_HEADER_SIZE, AAC_ADTS_HEADER_SIZE * 8);
if ((size = avpriv_aac_parse_header(&bits, &hdr)) < 0)
return 0;
*need_next_header = 0;
*new_frame_start = 1;
hdr_info->sample_rate = hdr.sample_rate;
hdr_info->channels = ff_mpeg4audio_channels[hdr.chan_config];
hdr_info->samples = hdr.samples;
hdr_info->bit_rate = hdr.bit_rate;
return size;
}
static av_cold int aac_parse_init(AVCodecParserContext *s1)
{
AACAC3ParseContext *s = s1->priv_data;
s->header_size = AAC_ADTS_HEADER_SIZE;
s->sync = aac_sync;
return 0;
}
AVCodecParser ff_aac_parser = {
.codec_ids = { AV_CODEC_ID_AAC },
.priv_data_size = sizeof(AACAC3ParseContext),
.parser_init = aac_parse_init,
.parser_parse = ff_aac_ac3_parse,
.parser_close = ff_parse_close,
};

View File

@@ -0,0 +1,39 @@
/*
* Generate a header file for hardcoded AAC tables
*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdlib.h>
#define CONFIG_HARDCODED_TABLES 0
#include "aac_tablegen.h"
#include "tableprint.h"
int main(void)
{
ff_aac_tableinit();
write_fileheader();
WRITE_ARRAY("const", float, ff_aac_pow2sf_tab);
WRITE_ARRAY("const", float, ff_aac_pow34sf_tab);
return 0;
}

View File

@@ -0,0 +1,45 @@
/*
* Header file for hardcoded AAC tables
*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AAC_TABLEGEN_H
#define AVCODEC_AAC_TABLEGEN_H
#include "aac_tablegen_decl.h"
#if CONFIG_HARDCODED_TABLES
#include "libavcodec/aac_tables.h"
#else
#include "libavutil/mathematics.h"
float ff_aac_pow2sf_tab[428];
float ff_aac_pow34sf_tab[428];
av_cold void ff_aac_tableinit(void)
{
int i;
for (i = 0; i < 428; i++) {
ff_aac_pow2sf_tab[i] = pow(2, (i - POW_SF2_ZERO) / 4.0);
ff_aac_pow34sf_tab[i] = pow(ff_aac_pow2sf_tab[i], 3.0/4.0);
}
}
#endif /* CONFIG_HARDCODED_TABLES */
#endif /* AVCODEC_AAC_TABLEGEN_H */

View File

@@ -0,0 +1,38 @@
/*
* Header file for hardcoded AAC tables
*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AAC_TABLEGEN_DECL_H
#define AVCODEC_AAC_TABLEGEN_DECL_H
#define POW_SF2_ZERO 200 ///< ff_aac_pow2sf_tab index corresponding to pow(2, 0);
#if CONFIG_HARDCODED_TABLES
#define ff_aac_tableinit()
extern const float ff_aac_pow2sf_tab[428];
extern const float ff_aac_pow34sf_tab[428];
#else
void ff_aac_tableinit(void);
extern float ff_aac_pow2sf_tab[428];
extern float ff_aac_pow34sf_tab[428];
#endif /* CONFIG_HARDCODED_TABLES */
#endif /* AVCODEC_AAC_TABLEGEN_DECL_H */

View File

@@ -0,0 +1,70 @@
/*
* Audio and Video frame extraction
* Copyright (c) 2003 Fabrice Bellard
* Copyright (c) 2003 Michael Niedermayer
* Copyright (c) 2009 Alex Converse
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "aac_ac3_parser.h"
#include "aacadtsdec.h"
#include "get_bits.h"
#include "mpeg4audio.h"
int avpriv_aac_parse_header(GetBitContext *gbc, AACADTSHeaderInfo *hdr)
{
int size, rdb, ch, sr;
int aot, crc_abs;
if (get_bits(gbc, 12) != 0xfff)
return AAC_AC3_PARSE_ERROR_SYNC;
skip_bits1(gbc); /* id */
skip_bits(gbc, 2); /* layer */
crc_abs = get_bits1(gbc); /* protection_absent */
aot = get_bits(gbc, 2); /* profile_objecttype */
sr = get_bits(gbc, 4); /* sample_frequency_index */
if (!avpriv_mpeg4audio_sample_rates[sr])
return AAC_AC3_PARSE_ERROR_SAMPLE_RATE;
skip_bits1(gbc); /* private_bit */
ch = get_bits(gbc, 3); /* channel_configuration */
skip_bits1(gbc); /* original/copy */
skip_bits1(gbc); /* home */
/* adts_variable_header */
skip_bits1(gbc); /* copyright_identification_bit */
skip_bits1(gbc); /* copyright_identification_start */
size = get_bits(gbc, 13); /* aac_frame_length */
if (size < AAC_ADTS_HEADER_SIZE)
return AAC_AC3_PARSE_ERROR_FRAME_SIZE;
skip_bits(gbc, 11); /* adts_buffer_fullness */
rdb = get_bits(gbc, 2); /* number_of_raw_data_blocks_in_frame */
hdr->object_type = aot + 1;
hdr->chan_config = ch;
hdr->crc_absent = crc_abs;
hdr->num_aac_frames = rdb + 1;
hdr->sampling_index = sr;
hdr->sample_rate = avpriv_mpeg4audio_sample_rates[sr];
hdr->samples = (rdb + 1) * 1024;
hdr->bit_rate = size * 8 * hdr->sample_rate / hdr->samples;
return size;
}

View File

@@ -0,0 +1,54 @@
/*
* AAC ADTS header decoding prototypes and structures
* Copyright (c) 2003 Fabrice Bellard
* Copyright (c) 2003 Michael Niedermayer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AACADTSDEC_H
#define AVCODEC_AACADTSDEC_H
#include <stdint.h>
#include "get_bits.h"
#define AAC_ADTS_HEADER_SIZE 7
typedef struct AACADTSHeaderInfo {
uint32_t sample_rate;
uint32_t samples;
uint32_t bit_rate;
uint8_t crc_absent;
uint8_t object_type;
uint8_t sampling_index;
uint8_t chan_config;
uint8_t num_aac_frames;
} AACADTSHeaderInfo;
/**
* Parse AAC frame header.
* Parse the ADTS frame header to the end of the variable header, which is
* the first 54 bits.
* @param[in] gbc BitContext containing the first 54 bits of the frame.
* @param[out] hdr Pointer to struct where header info is written.
* @return Returns 0 on success, -1 if there is a sync word mismatch,
* -2 if the version element is invalid, -3 if the sample rate
* element is invalid, or -4 if the bit rate element is invalid.
*/
int avpriv_aac_parse_header(GetBitContext *gbc, AACADTSHeaderInfo *hdr);
#endif /* AVCODEC_AACADTSDEC_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,581 @@
/*
* AAC decoder
* Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
* Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
* Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
*
* AAC LATM decoder
* Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
* Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC decoder
* @author Oded Shimon ( ods15 ods15 dyndns org )
* @author Maxim Gavrilov ( maxim.gavrilov gmail com )
*/
#define FFT_FLOAT 1
#define FFT_FIXED_32 0
#define USE_FIXED 0
#include "libavutil/float_dsp.h"
#include "libavutil/opt.h"
#include "avcodec.h"
#include "internal.h"
#include "get_bits.h"
#include "fft.h"
#include "imdct15.h"
#include "lpc.h"
#include "kbdwin.h"
#include "sinewin.h"
#include "aac.h"
#include "aactab.h"
#include "aacdectab.h"
#include "cbrt_tablegen.h"
#include "sbr.h"
#include "aacsbr.h"
#include "mpeg4audio.h"
#include "aacadtsdec.h"
#include "libavutil/intfloat.h"
#include <errno.h>
#include <math.h>
#include <stdint.h>
#include <string.h>
#if ARCH_ARM
# include "arm/aac.h"
#elif ARCH_MIPS
# include "mips/aacdec_mips.h"
#endif
static av_always_inline void reset_predict_state(PredictorState *ps)
{
ps->r0 = 0.0f;
ps->r1 = 0.0f;
ps->cor0 = 0.0f;
ps->cor1 = 0.0f;
ps->var0 = 1.0f;
ps->var1 = 1.0f;
}
#ifndef VMUL2
static inline float *VMUL2(float *dst, const float *v, unsigned idx,
const float *scale)
{
float s = *scale;
*dst++ = v[idx & 15] * s;
*dst++ = v[idx>>4 & 15] * s;
return dst;
}
#endif
#ifndef VMUL4
static inline float *VMUL4(float *dst, const float *v, unsigned idx,
const float *scale)
{
float s = *scale;
*dst++ = v[idx & 3] * s;
*dst++ = v[idx>>2 & 3] * s;
*dst++ = v[idx>>4 & 3] * s;
*dst++ = v[idx>>6 & 3] * s;
return dst;
}
#endif
#ifndef VMUL2S
static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
unsigned sign, const float *scale)
{
union av_intfloat32 s0, s1;
s0.f = s1.f = *scale;
s0.i ^= sign >> 1 << 31;
s1.i ^= sign << 31;
*dst++ = v[idx & 15] * s0.f;
*dst++ = v[idx>>4 & 15] * s1.f;
return dst;
}
#endif
#ifndef VMUL4S
static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
unsigned sign, const float *scale)
{
unsigned nz = idx >> 12;
union av_intfloat32 s = { .f = *scale };
union av_intfloat32 t;
t.i = s.i ^ (sign & 1U<<31);
*dst++ = v[idx & 3] * t.f;
sign <<= nz & 1; nz >>= 1;
t.i = s.i ^ (sign & 1U<<31);
*dst++ = v[idx>>2 & 3] * t.f;
sign <<= nz & 1; nz >>= 1;
t.i = s.i ^ (sign & 1U<<31);
*dst++ = v[idx>>4 & 3] * t.f;
sign <<= nz & 1;
t.i = s.i ^ (sign & 1U<<31);
*dst++ = v[idx>>6 & 3] * t.f;
return dst;
}
#endif
static av_always_inline float flt16_round(float pf)
{
union av_intfloat32 tmp;
tmp.f = pf;
tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
return tmp.f;
}
static av_always_inline float flt16_even(float pf)
{
union av_intfloat32 tmp;
tmp.f = pf;
tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
return tmp.f;
}
static av_always_inline float flt16_trunc(float pf)
{
union av_intfloat32 pun;
pun.f = pf;
pun.i &= 0xFFFF0000U;
return pun.f;
}
static av_always_inline void predict(PredictorState *ps, float *coef,
int output_enable)
{
const float a = 0.953125; // 61.0 / 64
const float alpha = 0.90625; // 29.0 / 32
float e0, e1;
float pv;
float k1, k2;
float r0 = ps->r0, r1 = ps->r1;
float cor0 = ps->cor0, cor1 = ps->cor1;
float var0 = ps->var0, var1 = ps->var1;
k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
pv = flt16_round(k1 * r0 + k2 * r1);
if (output_enable)
*coef += pv;
e0 = *coef;
e1 = e0 - k1 * r0;
ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
ps->r0 = flt16_trunc(a * e0);
}
/**
* Apply dependent channel coupling (applied before IMDCT).
*
* @param index index into coupling gain array
*/
static void apply_dependent_coupling(AACContext *ac,
SingleChannelElement *target,
ChannelElement *cce, int index)
{
IndividualChannelStream *ics = &cce->ch[0].ics;
const uint16_t *offsets = ics->swb_offset;
float *dest = target->coeffs;
const float *src = cce->ch[0].coeffs;
int g, i, group, k, idx = 0;
if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
av_log(ac->avctx, AV_LOG_ERROR,
"Dependent coupling is not supported together with LTP\n");
return;
}
for (g = 0; g < ics->num_window_groups; g++) {
for (i = 0; i < ics->max_sfb; i++, idx++) {
if (cce->ch[0].band_type[idx] != ZERO_BT) {
const float gain = cce->coup.gain[index][idx];
for (group = 0; group < ics->group_len[g]; group++) {
for (k = offsets[i]; k < offsets[i + 1]; k++) {
// FIXME: SIMDify
dest[group * 128 + k] += gain * src[group * 128 + k];
}
}
}
}
dest += ics->group_len[g] * 128;
src += ics->group_len[g] * 128;
}
}
/**
* Apply independent channel coupling (applied after IMDCT).
*
* @param index index into coupling gain array
*/
static void apply_independent_coupling(AACContext *ac,
SingleChannelElement *target,
ChannelElement *cce, int index)
{
int i;
const float gain = cce->coup.gain[index][0];
const float *src = cce->ch[0].ret;
float *dest = target->ret;
const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
for (i = 0; i < len; i++)
dest[i] += gain * src[i];
}
#include "aacdec_template.c"
#define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
struct LATMContext {
AACContext aac_ctx; ///< containing AACContext
int initialized; ///< initialized after a valid extradata was seen
// parser data
int audio_mux_version_A; ///< LATM syntax version
int frame_length_type; ///< 0/1 variable/fixed frame length
int frame_length; ///< frame length for fixed frame length
};
static inline uint32_t latm_get_value(GetBitContext *b)
{
int length = get_bits(b, 2);
return get_bits_long(b, (length+1)*8);
}
static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
GetBitContext *gb, int asclen)
{
AACContext *ac = &latmctx->aac_ctx;
AVCodecContext *avctx = ac->avctx;
MPEG4AudioConfig m4ac = { 0 };
int config_start_bit = get_bits_count(gb);
int sync_extension = 0;
int bits_consumed, esize;
if (asclen) {
sync_extension = 1;
asclen = FFMIN(asclen, get_bits_left(gb));
} else
asclen = get_bits_left(gb);
if (config_start_bit % 8) {
avpriv_request_sample(latmctx->aac_ctx.avctx,
"Non-byte-aligned audio-specific config");
return AVERROR_PATCHWELCOME;
}
if (asclen <= 0)
return AVERROR_INVALIDDATA;
bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
gb->buffer + (config_start_bit / 8),
asclen, sync_extension);
if (bits_consumed < 0)
return AVERROR_INVALIDDATA;
if (!latmctx->initialized ||
ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
if(latmctx->initialized) {
av_log(avctx, AV_LOG_INFO, "audio config changed\n");
} else {
av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
}
latmctx->initialized = 0;
esize = (bits_consumed+7) / 8;
if (avctx->extradata_size < esize) {
av_free(avctx->extradata);
avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
if (!avctx->extradata)
return AVERROR(ENOMEM);
}
avctx->extradata_size = esize;
memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
}
skip_bits_long(gb, bits_consumed);
return bits_consumed;
}
static int read_stream_mux_config(struct LATMContext *latmctx,
GetBitContext *gb)
{
int ret, audio_mux_version = get_bits(gb, 1);
latmctx->audio_mux_version_A = 0;
if (audio_mux_version)
latmctx->audio_mux_version_A = get_bits(gb, 1);
if (!latmctx->audio_mux_version_A) {
if (audio_mux_version)
latm_get_value(gb); // taraFullness
skip_bits(gb, 1); // allStreamSameTimeFraming
skip_bits(gb, 6); // numSubFrames
// numPrograms
if (get_bits(gb, 4)) { // numPrograms
avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
return AVERROR_PATCHWELCOME;
}
// for each program (which there is only one in DVB)
// for each layer (which there is only one in DVB)
if (get_bits(gb, 3)) { // numLayer
avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
return AVERROR_PATCHWELCOME;
}
// for all but first stream: use_same_config = get_bits(gb, 1);
if (!audio_mux_version) {
if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
return ret;
} else {
int ascLen = latm_get_value(gb);
if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
return ret;
ascLen -= ret;
skip_bits_long(gb, ascLen);
}
latmctx->frame_length_type = get_bits(gb, 3);
switch (latmctx->frame_length_type) {
case 0:
skip_bits(gb, 8); // latmBufferFullness
break;
case 1:
latmctx->frame_length = get_bits(gb, 9);
break;
case 3:
case 4:
case 5:
skip_bits(gb, 6); // CELP frame length table index
break;
case 6:
case 7:
skip_bits(gb, 1); // HVXC frame length table index
break;
}
if (get_bits(gb, 1)) { // other data
if (audio_mux_version) {
latm_get_value(gb); // other_data_bits
} else {
int esc;
do {
esc = get_bits(gb, 1);
skip_bits(gb, 8);
} while (esc);
}
}
if (get_bits(gb, 1)) // crc present
skip_bits(gb, 8); // config_crc
}
return 0;
}
static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
{
uint8_t tmp;
if (ctx->frame_length_type == 0) {
int mux_slot_length = 0;
do {
tmp = get_bits(gb, 8);
mux_slot_length += tmp;
} while (tmp == 255);
return mux_slot_length;
} else if (ctx->frame_length_type == 1) {
return ctx->frame_length;
} else if (ctx->frame_length_type == 3 ||
ctx->frame_length_type == 5 ||
ctx->frame_length_type == 7) {
skip_bits(gb, 2); // mux_slot_length_coded
}
return 0;
}
static int read_audio_mux_element(struct LATMContext *latmctx,
GetBitContext *gb)
{
int err;
uint8_t use_same_mux = get_bits(gb, 1);
if (!use_same_mux) {
if ((err = read_stream_mux_config(latmctx, gb)) < 0)
return err;
} else if (!latmctx->aac_ctx.avctx->extradata) {
av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
"no decoder config found\n");
return AVERROR(EAGAIN);
}
if (latmctx->audio_mux_version_A == 0) {
int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
return AVERROR_INVALIDDATA;
} else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
"frame length mismatch %d << %d\n",
mux_slot_length_bytes * 8, get_bits_left(gb));
return AVERROR_INVALIDDATA;
}
}
return 0;
}
static int latm_decode_frame(AVCodecContext *avctx, void *out,
int *got_frame_ptr, AVPacket *avpkt)
{
struct LATMContext *latmctx = avctx->priv_data;
int muxlength, err;
GetBitContext gb;
if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
return err;
// check for LOAS sync word
if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
return AVERROR_INVALIDDATA;
muxlength = get_bits(&gb, 13) + 3;
// not enough data, the parser should have sorted this out
if (muxlength > avpkt->size)
return AVERROR_INVALIDDATA;
if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
return err;
if (!latmctx->initialized) {
if (!avctx->extradata) {
*got_frame_ptr = 0;
return avpkt->size;
} else {
push_output_configuration(&latmctx->aac_ctx);
if ((err = decode_audio_specific_config(
&latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
pop_output_configuration(&latmctx->aac_ctx);
return err;
}
latmctx->initialized = 1;
}
}
if (show_bits(&gb, 12) == 0xfff) {
av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
"ADTS header detected, probably as result of configuration "
"misparsing\n");
return AVERROR_INVALIDDATA;
}
switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
case AOT_ER_AAC_LC:
case AOT_ER_AAC_LTP:
case AOT_ER_AAC_LD:
case AOT_ER_AAC_ELD:
err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
break;
default:
err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
}
if (err < 0)
return err;
return muxlength;
}
static av_cold int latm_decode_init(AVCodecContext *avctx)
{
struct LATMContext *latmctx = avctx->priv_data;
int ret = aac_decode_init(avctx);
if (avctx->extradata_size > 0)
latmctx->initialized = !ret;
return ret;
}
AVCodec ff_aac_decoder = {
.name = "aac",
.long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_AAC,
.priv_data_size = sizeof(AACContext),
.init = aac_decode_init,
.close = aac_decode_close,
.decode = aac_decode_frame,
.sample_fmts = (const enum AVSampleFormat[]) {
AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
},
.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
.channel_layouts = aac_channel_layout,
.flush = flush,
.priv_class = &aac_decoder_class,
.profiles = profiles,
};
/*
Note: This decoder filter is intended to decode LATM streams transferred
in MPEG transport streams which only contain one program.
To do a more complex LATM demuxing a separate LATM demuxer should be used.
*/
AVCodec ff_aac_latm_decoder = {
.name = "aac_latm",
.long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_AAC_LATM,
.priv_data_size = sizeof(struct LATMContext),
.init = latm_decode_init,
.close = aac_decode_close,
.decode = latm_decode_frame,
.sample_fmts = (const enum AVSampleFormat[]) {
AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
},
.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
.channel_layouts = aac_channel_layout,
.flush = flush,
.profiles = profiles,
};

View File

@@ -0,0 +1,443 @@
/*
* Copyright (c) 2013
* MIPS Technologies, Inc., California.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* AAC decoder fixed-point implementation
*
* Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
* Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC decoder
* @author Oded Shimon ( ods15 ods15 dyndns org )
* @author Maxim Gavrilov ( maxim.gavrilov gmail com )
*
* Fixed point implementation
* @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com )
*/
#define FFT_FLOAT 0
#define FFT_FIXED_32 1
#define USE_FIXED 1
#include "libavutil/fixed_dsp.h"
#include "libavutil/opt.h"
#include "avcodec.h"
#include "internal.h"
#include "get_bits.h"
#include "fft.h"
#include "lpc.h"
#include "kbdwin.h"
#include "sinewin.h"
#include "aac.h"
#include "aactab.h"
#include "aacdectab.h"
#include "cbrt_tablegen.h"
#include "sbr.h"
#include "aacsbr.h"
#include "mpeg4audio.h"
#include "aacadtsdec.h"
#include "libavutil/intfloat.h"
#include <math.h>
#include <string.h>
static av_always_inline void reset_predict_state(PredictorState *ps)
{
ps->r0.mant = 0;
ps->r0.exp = 0;
ps->r1.mant = 0;
ps->r1.exp = 0;
ps->cor0.mant = 0;
ps->cor0.exp = 0;
ps->cor1.mant = 0;
ps->cor1.exp = 0;
ps->var0.mant = 0x20000000;
ps->var0.exp = 1;
ps->var1.mant = 0x20000000;
ps->var1.exp = 1;
}
static const int exp2tab[4] = { Q31(1.0000000000/2), Q31(1.1892071150/2), Q31(1.4142135624/2), Q31(1.6817928305/2) }; // 2^0, 2^0.25, 2^0.5, 2^0.75
static inline int *DEC_SPAIR(int *dst, unsigned idx)
{
dst[0] = (idx & 15) - 4;
dst[1] = (idx >> 4 & 15) - 4;
return dst + 2;
}
static inline int *DEC_SQUAD(int *dst, unsigned idx)
{
dst[0] = (idx & 3) - 1;
dst[1] = (idx >> 2 & 3) - 1;
dst[2] = (idx >> 4 & 3) - 1;
dst[3] = (idx >> 6 & 3) - 1;
return dst + 4;
}
static inline int *DEC_UPAIR(int *dst, unsigned idx, unsigned sign)
{
dst[0] = (idx & 15) * (1 - (sign & 0xFFFFFFFE));
dst[1] = (idx >> 4 & 15) * (1 - ((sign & 1) << 1));
return dst + 2;
}
static inline int *DEC_UQUAD(int *dst, unsigned idx, unsigned sign)
{
unsigned nz = idx >> 12;
dst[0] = (idx & 3) * (1 + (((int)sign >> 31) << 1));
sign <<= nz & 1;
nz >>= 1;
dst[1] = (idx >> 2 & 3) * (1 + (((int)sign >> 31) << 1));
sign <<= nz & 1;
nz >>= 1;
dst[2] = (idx >> 4 & 3) * (1 + (((int)sign >> 31) << 1));
sign <<= nz & 1;
nz >>= 1;
dst[3] = (idx >> 6 & 3) * (1 + (((int)sign >> 31) << 1));
return dst + 4;
}
static void vector_pow43(int *coefs, int len)
{
int i, coef;
for (i=0; i<len; i++) {
coef = coefs[i];
if (coef < 0)
coef = -(int)cbrt_tab[-coef];
else
coef = (int)cbrt_tab[coef];
coefs[i] = coef;
}
}
static void subband_scale(int *dst, int *src, int scale, int offset, int len)
{
int ssign = scale < 0 ? -1 : 1;
int s = FFABS(scale);
unsigned int round;
int i, out, c = exp2tab[s & 3];
s = offset - (s >> 2);
if (s > 0) {
round = 1 << (s-1);
for (i=0; i<len; i++) {
out = (int)(((int64_t)src[i] * c) >> 32);
dst[i] = ((int)(out+round) >> s) * ssign;
}
}
else {
s = s + 32;
round = 1 << (s-1);
for (i=0; i<len; i++) {
out = (int)((int64_t)((int64_t)src[i] * c + round) >> s);
dst[i] = out * ssign;
}
}
}
static void noise_scale(int *coefs, int scale, int band_energy, int len)
{
int ssign = scale < 0 ? -1 : 1;
int s = FFABS(scale);
unsigned int round;
int i, out, c = exp2tab[s & 3];
int nlz = 0;
while (band_energy > 0x7fff) {
band_energy >>= 1;
nlz++;
}
c /= band_energy;
s = 21 + nlz - (s >> 2);
if (s > 0) {
round = 1 << (s-1);
for (i=0; i<len; i++) {
out = (int)(((int64_t)coefs[i] * c) >> 32);
coefs[i] = ((int)(out+round) >> s) * ssign;
}
}
else {
s = s + 32;
round = 1 << (s-1);
for (i=0; i<len; i++) {
out = (int)((int64_t)((int64_t)coefs[i] * c + round) >> s);
coefs[i] = out * ssign;
}
}
}
static av_always_inline SoftFloat flt16_round(SoftFloat pf)
{
SoftFloat tmp;
int s;
tmp.exp = pf.exp;
s = pf.mant >> 31;
tmp.mant = (pf.mant ^ s) - s;
tmp.mant = (tmp.mant + 0x00200000U) & 0xFFC00000U;
tmp.mant = (tmp.mant ^ s) - s;
return tmp;
}
static av_always_inline SoftFloat flt16_even(SoftFloat pf)
{
SoftFloat tmp;
int s;
tmp.exp = pf.exp;
s = pf.mant >> 31;
tmp.mant = (pf.mant ^ s) - s;
tmp.mant = (tmp.mant + 0x001FFFFFU + (tmp.mant & 0x00400000U >> 16)) & 0xFFC00000U;
tmp.mant = (tmp.mant ^ s) - s;
return tmp;
}
static av_always_inline SoftFloat flt16_trunc(SoftFloat pf)
{
SoftFloat pun;
int s;
pun.exp = pf.exp;
s = pf.mant >> 31;
pun.mant = (pf.mant ^ s) - s;
pun.mant = pun.mant & 0xFFC00000U;
pun.mant = (pun.mant ^ s) - s;
return pun;
}
static av_always_inline void predict(PredictorState *ps, int *coef,
int output_enable)
{
const SoftFloat a = { 1023410176, 0 }; // 61.0 / 64
const SoftFloat alpha = { 973078528, 0 }; // 29.0 / 32
SoftFloat e0, e1;
SoftFloat pv;
SoftFloat k1, k2;
SoftFloat r0 = ps->r0, r1 = ps->r1;
SoftFloat cor0 = ps->cor0, cor1 = ps->cor1;
SoftFloat var0 = ps->var0, var1 = ps->var1;
SoftFloat tmp;
if (var0.exp > 1 || (var0.exp == 1 && var0.mant > 0x20000000)) {
k1 = av_mul_sf(cor0, flt16_even(av_div_sf(a, var0)));
}
else {
k1.mant = 0;
k1.exp = 0;
}
if (var1.exp > 1 || (var1.exp == 1 && var1.mant > 0x20000000)) {
k2 = av_mul_sf(cor1, flt16_even(av_div_sf(a, var1)));
}
else {
k2.mant = 0;
k2.exp = 0;
}
tmp = av_mul_sf(k1, r0);
pv = flt16_round(av_add_sf(tmp, av_mul_sf(k2, r1)));
if (output_enable) {
int shift = 28 - pv.exp;
if (shift < 31)
*coef += (pv.mant + (1 << (shift - 1))) >> shift;
}
e0 = av_int2sf(*coef, 2);
e1 = av_sub_sf(e0, tmp);
ps->cor1 = flt16_trunc(av_add_sf(av_mul_sf(alpha, cor1), av_mul_sf(r1, e1)));
tmp = av_add_sf(av_mul_sf(r1, r1), av_mul_sf(e1, e1));
tmp.exp--;
ps->var1 = flt16_trunc(av_add_sf(av_mul_sf(alpha, var1), tmp));
ps->cor0 = flt16_trunc(av_add_sf(av_mul_sf(alpha, cor0), av_mul_sf(r0, e0)));
tmp = av_add_sf(av_mul_sf(r0, r0), av_mul_sf(e0, e0));
tmp.exp--;
ps->var0 = flt16_trunc(av_add_sf(av_mul_sf(alpha, var0), tmp));
ps->r1 = flt16_trunc(av_mul_sf(a, av_sub_sf(r0, av_mul_sf(k1, e0))));
ps->r0 = flt16_trunc(av_mul_sf(a, e0));
}
static const int cce_scale_fixed[8] = {
Q30(1.0), //2^(0/8)
Q30(1.0905077327), //2^(1/8)
Q30(1.1892071150), //2^(2/8)
Q30(1.2968395547), //2^(3/8)
Q30(1.4142135624), //2^(4/8)
Q30(1.5422108254), //2^(5/8)
Q30(1.6817928305), //2^(6/8)
Q30(1.8340080864), //2^(7/8)
};
/**
* Apply dependent channel coupling (applied before IMDCT).
*
* @param index index into coupling gain array
*/
static void apply_dependent_coupling_fixed(AACContext *ac,
SingleChannelElement *target,
ChannelElement *cce, int index)
{
IndividualChannelStream *ics = &cce->ch[0].ics;
const uint16_t *offsets = ics->swb_offset;
int *dest = target->coeffs;
const int *src = cce->ch[0].coeffs;
int g, i, group, k, idx = 0;
if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
av_log(ac->avctx, AV_LOG_ERROR,
"Dependent coupling is not supported together with LTP\n");
return;
}
for (g = 0; g < ics->num_window_groups; g++) {
for (i = 0; i < ics->max_sfb; i++, idx++) {
if (cce->ch[0].band_type[idx] != ZERO_BT) {
const int gain = cce->coup.gain[index][idx];
int shift, round, c, tmp;
if (gain < 0) {
c = -cce_scale_fixed[-gain & 7];
shift = (-gain-1024) >> 3;
}
else {
c = cce_scale_fixed[gain & 7];
shift = (gain-1024) >> 3;
}
if (shift < 0) {
shift = -shift;
round = 1 << (shift - 1);
for (group = 0; group < ics->group_len[g]; group++) {
for (k = offsets[i]; k < offsets[i + 1]; k++) {
tmp = (int)(((int64_t)src[group * 128 + k] * c + \
(int64_t)0x1000000000) >> 37);
dest[group * 128 + k] += (tmp + round) >> shift;
}
}
}
else {
for (group = 0; group < ics->group_len[g]; group++) {
for (k = offsets[i]; k < offsets[i + 1]; k++) {
tmp = (int)(((int64_t)src[group * 128 + k] * c + \
(int64_t)0x1000000000) >> 37);
dest[group * 128 + k] += tmp << shift;
}
}
}
}
}
dest += ics->group_len[g] * 128;
src += ics->group_len[g] * 128;
}
}
/**
* Apply independent channel coupling (applied after IMDCT).
*
* @param index index into coupling gain array
*/
static void apply_independent_coupling_fixed(AACContext *ac,
SingleChannelElement *target,
ChannelElement *cce, int index)
{
int i, c, shift, round, tmp;
const int gain = cce->coup.gain[index][0];
const int *src = cce->ch[0].ret;
int *dest = target->ret;
const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
c = cce_scale_fixed[gain & 7];
shift = (gain-1024) >> 3;
if (shift < 0) {
shift = -shift;
round = 1 << (shift - 1);
for (i = 0; i < len; i++) {
tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37);
dest[i] += (tmp + round) >> shift;
}
}
else {
for (i = 0; i < len; i++) {
tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37);
dest[i] += tmp << shift;
}
}
}
#include "aacdec_template.c"
AVCodec ff_aac_fixed_decoder = {
.name = "aac_fixed",
.long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_AAC,
.priv_data_size = sizeof(AACContext),
.init = aac_decode_init,
.close = aac_decode_close,
.decode = aac_decode_frame,
.sample_fmts = (const enum AVSampleFormat[]) {
AV_SAMPLE_FMT_S32P, AV_SAMPLE_FMT_NONE
},
.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
.channel_layouts = aac_channel_layout,
.flush = flush,
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,82 @@
/*
* AAC decoder data
* Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
* Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC decoder data
* @author Oded Shimon ( ods15 ods15 dyndns org )
* @author Maxim Gavrilov ( maxim.gavrilov gmail com )
*/
#ifndef AVCODEC_AACDECTAB_H
#define AVCODEC_AACDECTAB_H
#include "libavutil/channel_layout.h"
#include "aac.h"
#include <stdint.h>
/* @name ltp_coef
* Table of the LTP coefficients
*/
static const INTFLOAT ltp_coef[8] = {
Q30(0.570829f), Q30(0.696616f), Q30(0.813004f), Q30(0.911304f),
Q30(0.984900f), Q30(1.067894f), Q30(1.194601f), Q30(1.369533f),
};
static const int8_t tags_per_config[16] = { 0, 1, 1, 2, 3, 3, 4, 5, 0, 0, 0, 4, 5, 0, 5, 0 };
static const uint8_t aac_channel_layout_map[16][5][3] = {
{ { TYPE_SCE, 0, AAC_CHANNEL_FRONT }, },
{ { TYPE_CPE, 0, AAC_CHANNEL_FRONT }, },
{ { TYPE_SCE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 0, AAC_CHANNEL_FRONT }, },
{ { TYPE_SCE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 0, AAC_CHANNEL_FRONT }, { TYPE_SCE, 1, AAC_CHANNEL_BACK }, },
{ { TYPE_SCE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 1, AAC_CHANNEL_BACK }, },
{ { TYPE_SCE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 1, AAC_CHANNEL_BACK }, { TYPE_LFE, 0, AAC_CHANNEL_LFE }, },
{ { TYPE_SCE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 1, AAC_CHANNEL_FRONT }, { TYPE_CPE, 2, AAC_CHANNEL_BACK }, { TYPE_LFE, 0, AAC_CHANNEL_LFE }, },
{ { 0, } },
{ { 0, } },
{ { 0, } },
{ { TYPE_SCE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 1, AAC_CHANNEL_BACK }, { TYPE_SCE, 1, AAC_CHANNEL_BACK }, { TYPE_LFE, 0, AAC_CHANNEL_LFE }, },
{ { TYPE_SCE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 0, AAC_CHANNEL_FRONT }, { TYPE_CPE, 1, AAC_CHANNEL_SIDE }, { TYPE_CPE, 2, AAC_CHANNEL_BACK }, { TYPE_LFE, 0, AAC_CHANNEL_LFE }, },
{ { 0, } },
/* TODO: Add 7+1 TOP configuration */
};
static const uint64_t aac_channel_layout[16] = {
AV_CH_LAYOUT_MONO,
AV_CH_LAYOUT_STEREO,
AV_CH_LAYOUT_SURROUND,
AV_CH_LAYOUT_4POINT0,
AV_CH_LAYOUT_5POINT0_BACK,
AV_CH_LAYOUT_5POINT1_BACK,
AV_CH_LAYOUT_7POINT1_WIDE_BACK,
0,
0,
0,
AV_CH_LAYOUT_6POINT1,
AV_CH_LAYOUT_7POINT1,
0,
/* AV_CH_LAYOUT_7POINT1_TOP, */
};
#endif /* AVCODEC_AACDECTAB_H */

View File

@@ -0,0 +1,925 @@
/*
* AAC encoder
* Copyright (C) 2008 Konstantin Shishkov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC encoder
*/
/***********************************
* TODOs:
* add sane pulse detection
***********************************/
#include "libavutil/float_dsp.h"
#include "libavutil/opt.h"
#include "avcodec.h"
#include "put_bits.h"
#include "internal.h"
#include "mpeg4audio.h"
#include "kbdwin.h"
#include "sinewin.h"
#include "aac.h"
#include "aactab.h"
#include "aacenc.h"
#include "aacenctab.h"
#include "aacenc_utils.h"
#include "psymodel.h"
/**
* Make AAC audio config object.
* @see 1.6.2.1 "Syntax - AudioSpecificConfig"
*/
static void put_audio_specific_config(AVCodecContext *avctx)
{
PutBitContext pb;
AACEncContext *s = avctx->priv_data;
init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
put_bits(&pb, 5, s->profile+1); //profile
put_bits(&pb, 4, s->samplerate_index); //sample rate index
put_bits(&pb, 4, s->channels);
//GASpecificConfig
put_bits(&pb, 1, 0); //frame length - 1024 samples
put_bits(&pb, 1, 0); //does not depend on core coder
put_bits(&pb, 1, 0); //is not extension
//Explicitly Mark SBR absent
put_bits(&pb, 11, 0x2b7); //sync extension
put_bits(&pb, 5, AOT_SBR);
put_bits(&pb, 1, 0);
flush_put_bits(&pb);
}
#define WINDOW_FUNC(type) \
static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
SingleChannelElement *sce, \
const float *audio)
WINDOW_FUNC(only_long)
{
const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
float *out = sce->ret_buf;
fdsp->vector_fmul (out, audio, lwindow, 1024);
fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
}
WINDOW_FUNC(long_start)
{
const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
float *out = sce->ret_buf;
fdsp->vector_fmul(out, audio, lwindow, 1024);
memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
}
WINDOW_FUNC(long_stop)
{
const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
float *out = sce->ret_buf;
memset(out, 0, sizeof(out[0]) * 448);
fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
}
WINDOW_FUNC(eight_short)
{
const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
const float *in = audio + 448;
float *out = sce->ret_buf;
int w;
for (w = 0; w < 8; w++) {
fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
out += 128;
in += 128;
fdsp->vector_fmul_reverse(out, in, swindow, 128);
out += 128;
}
}
static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
SingleChannelElement *sce,
const float *audio) = {
[ONLY_LONG_SEQUENCE] = apply_only_long_window,
[LONG_START_SEQUENCE] = apply_long_start_window,
[EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
[LONG_STOP_SEQUENCE] = apply_long_stop_window
};
static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
float *audio)
{
int i;
float *output = sce->ret_buf;
apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
else
for (i = 0; i < 1024; i += 128)
s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
}
/**
* Encode ics_info element.
* @see Table 4.6 (syntax of ics_info)
*/
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
{
int w;
put_bits(&s->pb, 1, 0); // ics_reserved bit
put_bits(&s->pb, 2, info->window_sequence[0]);
put_bits(&s->pb, 1, info->use_kb_window[0]);
if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
put_bits(&s->pb, 6, info->max_sfb);
put_bits(&s->pb, 1, !!info->predictor_present);
} else {
put_bits(&s->pb, 4, info->max_sfb);
for (w = 1; w < 8; w++)
put_bits(&s->pb, 1, !info->group_len[w]);
}
}
/**
* Encode MS data.
* @see 4.6.8.1 "Joint Coding - M/S Stereo"
*/
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
{
int i, w;
put_bits(pb, 2, cpe->ms_mode);
if (cpe->ms_mode == 1)
for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
}
/**
* Produce integer coefficients from scalefactors provided by the model.
*/
static void adjust_frame_information(ChannelElement *cpe, int chans)
{
int i, w, w2, g, ch;
int maxsfb, cmaxsfb;
for (ch = 0; ch < chans; ch++) {
IndividualChannelStream *ics = &cpe->ch[ch].ics;
maxsfb = 0;
cpe->ch[ch].pulse.num_pulse = 0;
for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
for (w2 = 0; w2 < ics->group_len[w]; w2++) {
for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
;
maxsfb = FFMAX(maxsfb, cmaxsfb);
}
}
ics->max_sfb = maxsfb;
//adjust zero bands for window groups
for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
for (g = 0; g < ics->max_sfb; g++) {
i = 1;
for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
if (!cpe->ch[ch].zeroes[w2*16 + g]) {
i = 0;
break;
}
}
cpe->ch[ch].zeroes[w*16 + g] = i;
}
}
}
if (chans > 1 && cpe->common_window) {
IndividualChannelStream *ics0 = &cpe->ch[0].ics;
IndividualChannelStream *ics1 = &cpe->ch[1].ics;
int msc = 0;
ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
ics1->max_sfb = ics0->max_sfb;
for (w = 0; w < ics0->num_windows*16; w += 16)
for (i = 0; i < ics0->max_sfb; i++)
if (cpe->ms_mask[w+i])
msc++;
if (msc == 0 || ics0->max_sfb == 0)
cpe->ms_mode = 0;
else
cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
}
}
static void apply_intensity_stereo(ChannelElement *cpe)
{
int w, w2, g, i;
IndividualChannelStream *ics = &cpe->ch[0].ics;
if (!cpe->common_window)
return;
for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
for (w2 = 0; w2 < ics->group_len[w]; w2++) {
int start = (w+w2) * 128;
for (g = 0; g < ics->num_swb; g++) {
int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
float scale = cpe->ch[0].is_ener[w*16+g];
if (!cpe->is_mask[w*16 + g]) {
start += ics->swb_sizes[g];
continue;
}
for (i = 0; i < ics->swb_sizes[g]; i++) {
float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
cpe->ch[0].coeffs[start+i] = sum;
cpe->ch[1].coeffs[start+i] = 0.0f;
}
start += ics->swb_sizes[g];
}
}
}
}
static void apply_mid_side_stereo(ChannelElement *cpe)
{
int w, w2, g, i;
IndividualChannelStream *ics = &cpe->ch[0].ics;
if (!cpe->common_window)
return;
for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
for (w2 = 0; w2 < ics->group_len[w]; w2++) {
int start = (w+w2) * 128;
for (g = 0; g < ics->num_swb; g++) {
if (!cpe->ms_mask[w*16 + g]) {
start += ics->swb_sizes[g];
continue;
}
for (i = 0; i < ics->swb_sizes[g]; i++) {
float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
float R = L - cpe->ch[1].coeffs[start+i];
cpe->ch[0].coeffs[start+i] = L;
cpe->ch[1].coeffs[start+i] = R;
}
start += ics->swb_sizes[g];
}
}
}
}
/**
* Encode scalefactor band coding type.
*/
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
{
int w;
if (s->coder->set_special_band_scalefactors)
s->coder->set_special_band_scalefactors(s, sce);
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
}
/**
* Encode scalefactors.
*/
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
SingleChannelElement *sce)
{
int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
int off_is = 0, noise_flag = 1;
int i, w;
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
for (i = 0; i < sce->ics.max_sfb; i++) {
if (!sce->zeroes[w*16 + i]) {
if (sce->band_type[w*16 + i] == NOISE_BT) {
diff = sce->sf_idx[w*16 + i] - off_pns;
off_pns = sce->sf_idx[w*16 + i];
if (noise_flag-- > 0) {
put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
continue;
}
} else if (sce->band_type[w*16 + i] == INTENSITY_BT ||
sce->band_type[w*16 + i] == INTENSITY_BT2) {
diff = sce->sf_idx[w*16 + i] - off_is;
off_is = sce->sf_idx[w*16 + i];
} else {
diff = sce->sf_idx[w*16 + i] - off_sf;
off_sf = sce->sf_idx[w*16 + i];
}
diff += SCALE_DIFF_ZERO;
av_assert0(diff >= 0 && diff <= 120);
put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
}
}
}
}
/**
* Encode pulse data.
*/
static void encode_pulses(AACEncContext *s, Pulse *pulse)
{
int i;
put_bits(&s->pb, 1, !!pulse->num_pulse);
if (!pulse->num_pulse)
return;
put_bits(&s->pb, 2, pulse->num_pulse - 1);
put_bits(&s->pb, 6, pulse->start);
for (i = 0; i < pulse->num_pulse; i++) {
put_bits(&s->pb, 5, pulse->pos[i]);
put_bits(&s->pb, 4, pulse->amp[i]);
}
}
/**
* Encode spectral coefficients processed by psychoacoustic model.
*/
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
{
int start, i, w, w2;
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
start = 0;
for (i = 0; i < sce->ics.max_sfb; i++) {
if (sce->zeroes[w*16 + i]) {
start += sce->ics.swb_sizes[i];
continue;
}
for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
s->coder->quantize_and_encode_band(s, &s->pb,
&sce->coeffs[start + w2*128],
NULL, sce->ics.swb_sizes[i],
sce->sf_idx[w*16 + i],
sce->band_type[w*16 + i],
s->lambda,
sce->ics.window_clipping[w]);
}
start += sce->ics.swb_sizes[i];
}
}
}
/**
* Downscale spectral coefficients for near-clipping windows to avoid artifacts
*/
static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
{
int start, i, j, w;
if (sce->ics.clip_avoidance_factor < 1.0f) {
for (w = 0; w < sce->ics.num_windows; w++) {
start = 0;
for (i = 0; i < sce->ics.max_sfb; i++) {
float *swb_coeffs = &sce->coeffs[start + w*128];
for (j = 0; j < sce->ics.swb_sizes[i]; j++)
swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
start += sce->ics.swb_sizes[i];
}
}
}
}
/**
* Encode one channel of audio data.
*/
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
SingleChannelElement *sce,
int common_window)
{
put_bits(&s->pb, 8, sce->sf_idx[0]);
if (!common_window) {
put_ics_info(s, &sce->ics);
if (s->coder->encode_main_pred)
s->coder->encode_main_pred(s, sce);
}
encode_band_info(s, sce);
encode_scale_factors(avctx, s, sce);
encode_pulses(s, &sce->pulse);
put_bits(&s->pb, 1, !!sce->tns.present);
if (s->coder->encode_tns_info)
s->coder->encode_tns_info(s, sce);
put_bits(&s->pb, 1, 0); //ssr
encode_spectral_coeffs(s, sce);
return 0;
}
/**
* Write some auxiliary information about the created AAC file.
*/
static void put_bitstream_info(AACEncContext *s, const char *name)
{
int i, namelen, padbits;
namelen = strlen(name) + 2;
put_bits(&s->pb, 3, TYPE_FIL);
put_bits(&s->pb, 4, FFMIN(namelen, 15));
if (namelen >= 15)
put_bits(&s->pb, 8, namelen - 14);
put_bits(&s->pb, 4, 0); //extension type - filler
padbits = -put_bits_count(&s->pb) & 7;
avpriv_align_put_bits(&s->pb);
for (i = 0; i < namelen - 2; i++)
put_bits(&s->pb, 8, name[i]);
put_bits(&s->pb, 12 - padbits, 0);
}
/*
* Copy input samples.
* Channels are reordered from libavcodec's default order to AAC order.
*/
static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
{
int ch;
int end = 2048 + (frame ? frame->nb_samples : 0);
const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
/* copy and remap input samples */
for (ch = 0; ch < s->channels; ch++) {
/* copy last 1024 samples of previous frame to the start of the current frame */
memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
/* copy new samples and zero any remaining samples */
if (frame) {
memcpy(&s->planar_samples[ch][2048],
frame->extended_data[channel_map[ch]],
frame->nb_samples * sizeof(s->planar_samples[0][0]));
}
memset(&s->planar_samples[ch][end], 0,
(3072 - end) * sizeof(s->planar_samples[0][0]));
}
}
static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
const AVFrame *frame, int *got_packet_ptr)
{
AACEncContext *s = avctx->priv_data;
float **samples = s->planar_samples, *samples2, *la, *overlap;
ChannelElement *cpe;
SingleChannelElement *sce;
int i, ch, w, chans, tag, start_ch, ret;
int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
int chan_el_counter[4];
FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
if (s->last_frame == 2)
return 0;
/* add current frame to queue */
if (frame) {
if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
return ret;
}
copy_input_samples(s, frame);
if (s->psypp)
ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
if (!avctx->frame_number)
return 0;
start_ch = 0;
for (i = 0; i < s->chan_map[0]; i++) {
FFPsyWindowInfo* wi = windows + start_ch;
tag = s->chan_map[i+1];
chans = tag == TYPE_CPE ? 2 : 1;
cpe = &s->cpe[i];
for (ch = 0; ch < chans; ch++) {
IndividualChannelStream *ics = &cpe->ch[ch].ics;
int cur_channel = start_ch + ch;
float clip_avoidance_factor;
overlap = &samples[cur_channel][0];
samples2 = overlap + 1024;
la = samples2 + (448+64);
if (!frame)
la = NULL;
if (tag == TYPE_LFE) {
wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
wi[ch].window_shape = 0;
wi[ch].num_windows = 1;
wi[ch].grouping[0] = 1;
/* Only the lowest 12 coefficients are used in a LFE channel.
* The expression below results in only the bottom 8 coefficients
* being used for 11.025kHz to 16kHz sample rates.
*/
ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
} else {
wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
ics->window_sequence[0]);
}
ics->window_sequence[1] = ics->window_sequence[0];
ics->window_sequence[0] = wi[ch].window_type[0];
ics->use_kb_window[1] = ics->use_kb_window[0];
ics->use_kb_window[0] = wi[ch].window_shape;
ics->num_windows = wi[ch].num_windows;
ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
ics->max_sfb = FFMIN(ics->max_sfb, ics->num_swb);
ics->swb_offset = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
ff_swb_offset_128 [s->samplerate_index]:
ff_swb_offset_1024[s->samplerate_index];
ics->tns_max_bands = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
ff_tns_max_bands_128 [s->samplerate_index]:
ff_tns_max_bands_1024[s->samplerate_index];
clip_avoidance_factor = 0.0f;
for (w = 0; w < ics->num_windows; w++)
ics->group_len[w] = wi[ch].grouping[w];
for (w = 0; w < ics->num_windows; w++) {
if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
ics->window_clipping[w] = 1;
clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
} else {
ics->window_clipping[w] = 0;
}
}
if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
} else {
ics->clip_avoidance_factor = 1.0f;
}
apply_window_and_mdct(s, &cpe->ch[ch], overlap);
if (isnan(cpe->ch->coeffs[0])) {
av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
return AVERROR(EINVAL);
}
avoid_clipping(s, &cpe->ch[ch]);
}
start_ch += chans;
}
if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
return ret;
do {
int frame_bits;
init_put_bits(&s->pb, avpkt->data, avpkt->size);
if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
put_bitstream_info(s, LIBAVCODEC_IDENT);
start_ch = 0;
memset(chan_el_counter, 0, sizeof(chan_el_counter));
for (i = 0; i < s->chan_map[0]; i++) {
FFPsyWindowInfo* wi = windows + start_ch;
const float *coeffs[2];
tag = s->chan_map[i+1];
chans = tag == TYPE_CPE ? 2 : 1;
cpe = &s->cpe[i];
cpe->common_window = 0;
memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
put_bits(&s->pb, 3, tag);
put_bits(&s->pb, 4, chan_el_counter[tag]++);
for (ch = 0; ch < chans; ch++) {
sce = &cpe->ch[ch];
coeffs[ch] = sce->coeffs;
sce->ics.predictor_present = 0;
memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
for (w = 0; w < 128; w++)
if (sce->band_type[w] > RESERVED_BT)
sce->band_type[w] = 0;
}
s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
for (ch = 0; ch < chans; ch++) {
s->cur_channel = start_ch + ch;
s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
}
if (chans > 1
&& wi[0].window_type[0] == wi[1].window_type[0]
&& wi[0].window_shape == wi[1].window_shape) {
cpe->common_window = 1;
for (w = 0; w < wi[0].num_windows; w++) {
if (wi[0].grouping[w] != wi[1].grouping[w]) {
cpe->common_window = 0;
break;
}
}
}
for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
if (s->options.pns && s->coder->search_for_pns)
s->coder->search_for_pns(s, avctx, sce);
if (s->options.tns && s->coder->search_for_tns)
s->coder->search_for_tns(s, sce);
if (s->options.tns && s->coder->apply_tns_filt)
s->coder->apply_tns_filt(s, sce);
if (sce->tns.present)
tns_mode = 1;
}
s->cur_channel = start_ch;
if (s->options.intensity_stereo) { /* Intensity Stereo */
if (s->coder->search_for_is)
s->coder->search_for_is(s, avctx, cpe);
if (cpe->is_mode) is_mode = 1;
apply_intensity_stereo(cpe);
}
if (s->options.pred) { /* Prediction */
for (ch = 0; ch < chans; ch++) {
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
if (s->options.pred && s->coder->search_for_pred)
s->coder->search_for_pred(s, sce);
if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
}
if (s->coder->adjust_common_prediction)
s->coder->adjust_common_prediction(s, cpe);
for (ch = 0; ch < chans; ch++) {
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
if (s->options.pred && s->coder->apply_main_pred)
s->coder->apply_main_pred(s, sce);
}
s->cur_channel = start_ch;
}
if (s->options.stereo_mode) { /* Mid/Side stereo */
if (s->options.stereo_mode == -1 && s->coder->search_for_ms)
s->coder->search_for_ms(s, cpe);
else if (cpe->common_window)
memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
for (w = 0; w < 128; w++)
cpe->ms_mask[w] = cpe->is_mask[w] ? 0 : cpe->ms_mask[w];
apply_mid_side_stereo(cpe);
}
adjust_frame_information(cpe, chans);
if (chans == 2) {
put_bits(&s->pb, 1, cpe->common_window);
if (cpe->common_window) {
put_ics_info(s, &cpe->ch[0].ics);
if (s->coder->encode_main_pred)
s->coder->encode_main_pred(s, &cpe->ch[0]);
encode_ms_info(&s->pb, cpe);
if (cpe->ms_mode) ms_mode = 1;
}
}
for (ch = 0; ch < chans; ch++) {
s->cur_channel = start_ch + ch;
encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
}
start_ch += chans;
}
frame_bits = put_bits_count(&s->pb);
if (frame_bits <= 6144 * s->channels - 3) {
s->psy.bitres.bits = frame_bits / s->channels;
break;
}
if (is_mode || ms_mode || tns_mode || pred_mode) {
for (i = 0; i < s->chan_map[0]; i++) {
// Must restore coeffs
chans = tag == TYPE_CPE ? 2 : 1;
cpe = &s->cpe[i];
for (ch = 0; ch < chans; ch++)
memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
}
}
s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
} while (1);
put_bits(&s->pb, 3, TYPE_END);
flush_put_bits(&s->pb);
avctx->frame_bits = put_bits_count(&s->pb);
// rate control stuff
if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
s->lambda *= ratio;
s->lambda = FFMIN(s->lambda, 65536.f);
}
if (!frame)
s->last_frame++;
ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
&avpkt->duration);
avpkt->size = put_bits_count(&s->pb) >> 3;
*got_packet_ptr = 1;
return 0;
}
static av_cold int aac_encode_end(AVCodecContext *avctx)
{
AACEncContext *s = avctx->priv_data;
ff_mdct_end(&s->mdct1024);
ff_mdct_end(&s->mdct128);
ff_psy_end(&s->psy);
ff_lpc_end(&s->lpc);
if (s->psypp)
ff_psy_preprocess_end(s->psypp);
av_freep(&s->buffer.samples);
av_freep(&s->cpe);
av_freep(&s->fdsp);
ff_af_queue_close(&s->afq);
return 0;
}
static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
{
int ret = 0;
s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
if (!s->fdsp)
return AVERROR(ENOMEM);
// window init
ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
ff_init_ff_sine_windows(10);
ff_init_ff_sine_windows(7);
if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
return ret;
if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0)
return ret;
return 0;
}
static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
{
int ch;
FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + AV_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
for(ch = 0; ch < s->channels; ch++)
s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
return 0;
alloc_fail:
return AVERROR(ENOMEM);
}
static av_cold int aac_encode_init(AVCodecContext *avctx)
{
AACEncContext *s = avctx->priv_data;
int i, ret = 0;
const uint8_t *sizes[2];
uint8_t grouping[AAC_MAX_CHANNELS];
int lengths[2];
avctx->frame_size = 1024;
for (i = 0; i < 16; i++)
if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
break;
s->channels = avctx->channels;
ERROR_IF(i == 16 || i >= ff_aac_swb_size_1024_len || i >= ff_aac_swb_size_128_len,
"Unsupported sample rate %d\n", avctx->sample_rate);
ERROR_IF(s->channels > AAC_MAX_CHANNELS,
"Unsupported number of channels: %d\n", s->channels);
WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
"Too many bits per frame requested, clamping to max\n");
if (avctx->profile == FF_PROFILE_AAC_MAIN) {
s->options.pred = 1;
} else if ((avctx->profile == FF_PROFILE_AAC_LOW ||
avctx->profile == FF_PROFILE_UNKNOWN) && s->options.pred) {
s->profile = 0; /* Main */
WARN_IF(1, "Prediction requested, changing profile to AAC-Main\n");
} else if (avctx->profile == FF_PROFILE_AAC_LOW ||
avctx->profile == FF_PROFILE_UNKNOWN) {
s->profile = 1; /* Low */
} else {
ERROR_IF(1, "Unsupported profile %d\n", avctx->profile);
}
if (s->options.aac_coder != AAC_CODER_TWOLOOP) {
s->options.intensity_stereo = 0;
s->options.pns = 0;
}
avctx->bit_rate = (int)FFMIN(
6144 * s->channels / 1024.0 * avctx->sample_rate,
avctx->bit_rate);
s->samplerate_index = i;
s->chan_map = aac_chan_configs[s->channels-1];
if ((ret = dsp_init(avctx, s)) < 0)
goto fail;
if ((ret = alloc_buffers(avctx, s)) < 0)
goto fail;
avctx->extradata_size = 5;
put_audio_specific_config(avctx);
sizes[0] = ff_aac_swb_size_1024[i];
sizes[1] = ff_aac_swb_size_128[i];
lengths[0] = ff_aac_num_swb_1024[i];
lengths[1] = ff_aac_num_swb_128[i];
for (i = 0; i < s->chan_map[0]; i++)
grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
s->chan_map[0], grouping)) < 0)
goto fail;
s->psypp = ff_psy_preprocess_init(avctx);
s->coder = &ff_aac_coders[s->options.aac_coder];
ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
if (HAVE_MIPSDSPR1)
ff_aac_coder_init_mips(s);
s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
ff_aac_tableinit();
avctx->initial_padding = 1024;
ff_af_queue_init(avctx, &s->afq);
return 0;
fail:
aac_encode_end(avctx);
return ret;
}
#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
static const AVOption aacenc_options[] = {
{"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
{"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
{"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
{"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
{"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
{"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
{"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
{"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
{"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
{"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AACENC_FLAGS, "aac_pns"},
{"disable", "Disable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
{"enable", "Enable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
{"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
{"disable", "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
{"enable", "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
{"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_tns"},
{"disable", "Disable temporal noise shaping", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_tns"},
{"enable", "Enable temporal noise shaping", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_tns"},
{"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pred"},
{"disable", "Disable AAC-Main prediction", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pred"},
{"enable", "Enable AAC-Main prediction", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pred"},
{NULL}
};
static const AVClass aacenc_class = {
"AAC encoder",
av_default_item_name,
aacenc_options,
LIBAVUTIL_VERSION_INT,
};
AVCodec ff_aac_encoder = {
.name = "aac",
.long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_AAC,
.priv_data_size = sizeof(AACEncContext),
.init = aac_encode_init,
.encode2 = aac_encode_frame,
.close = aac_encode_end,
.supported_samplerates = mpeg4audio_sample_rates,
.capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY |
AV_CODEC_CAP_EXPERIMENTAL,
.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
AV_SAMPLE_FMT_NONE },
.priv_class = &aacenc_class,
};

View File

@@ -0,0 +1,113 @@
/*
* AAC encoder
* Copyright (C) 2008 Konstantin Shishkov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AACENC_H
#define AVCODEC_AACENC_H
#include "libavutil/float_dsp.h"
#include "avcodec.h"
#include "put_bits.h"
#include "aac.h"
#include "audio_frame_queue.h"
#include "psymodel.h"
#include "lpc.h"
typedef enum AACCoder {
AAC_CODER_FAAC = 0,
AAC_CODER_ANMR,
AAC_CODER_TWOLOOP,
AAC_CODER_FAST,
AAC_CODER_NB,
}AACCoder;
typedef struct AACEncOptions {
int stereo_mode;
int aac_coder;
int pns;
int tns;
int pred;
int intensity_stereo;
} AACEncOptions;
struct AACEncContext;
typedef struct AACCoefficientsEncoder {
void (*search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s,
SingleChannelElement *sce, const float lambda);
void (*encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce,
int win, int group_len, const float lambda);
void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size,
int scale_idx, int cb, const float lambda, int rtz);
void (*encode_tns_info)(struct AACEncContext *s, SingleChannelElement *sce);
void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe);
void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
void (*apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce);
void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);
void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe);
void (*search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe);
void (*search_for_pred)(struct AACEncContext *s, SingleChannelElement *sce);
} AACCoefficientsEncoder;
extern AACCoefficientsEncoder ff_aac_coders[];
/**
* AAC encoder context
*/
typedef struct AACEncContext {
AVClass *av_class;
AACEncOptions options; ///< encoding options
PutBitContext pb;
FFTContext mdct1024; ///< long (1024 samples) frame transform context
FFTContext mdct128; ///< short (128 samples) frame transform context
AVFloatDSPContext *fdsp;
float *planar_samples[6]; ///< saved preprocessed input
int profile; ///< copied from avctx
LPCContext lpc; ///< used by TNS
int samplerate_index; ///< MPEG-4 samplerate index
int channels; ///< channel count
const uint8_t *chan_map; ///< channel configuration map
ChannelElement *cpe; ///< channel elements
FFPsyContext psy;
struct FFPsyPreprocessContext* psypp;
AACCoefficientsEncoder *coder;
int cur_channel;
int last_frame;
float lambda;
AudioFrameQueue afq;
DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients
DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
struct {
float *samples;
} buffer;
} AACEncContext;
void ff_aac_coder_init_mips(AACEncContext *c);
#endif /* AVCODEC_AACENC_H */

View File

@@ -0,0 +1,136 @@
/*
* AAC encoder intensity stereo
* Copyright (C) 2015 Rostislav Pehlivanov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC encoder Intensity Stereo
* @author Rostislav Pehlivanov ( atomnuker gmail com )
*/
#include "aacenc.h"
#include "aacenc_utils.h"
#include "aacenc_is.h"
#include "aacenc_quantization.h"
struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe,
int start, int w, int g, float ener0,
float ener1, float ener01,
int use_pcoeffs, int phase)
{
int i, w2;
SingleChannelElement *sce0 = &cpe->ch[0];
SingleChannelElement *sce1 = &cpe->ch[1];
float *L = use_pcoeffs ? sce0->pcoeffs : sce0->coeffs;
float *R = use_pcoeffs ? sce1->pcoeffs : sce1->coeffs;
float *L34 = &s->scoefs[256*0], *R34 = &s->scoefs[256*1];
float *IS = &s->scoefs[256*2], *I34 = &s->scoefs[256*3];
float dist1 = 0.0f, dist2 = 0.0f;
struct AACISError is_error = {0};
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
int is_band_type, is_sf_idx = FFMAX(1, sce0->sf_idx[(w+w2)*16+g]-4);
float e01_34 = phase*pow(ener1/ener0, 3.0/4.0);
float maxval, dist_spec_err = 0.0f;
float minthr = FFMIN(band0->threshold, band1->threshold);
for (i = 0; i < sce0->ics.swb_sizes[g]; i++)
IS[i] = (L[start+(w+w2)*128+i] + phase*R[start+(w+w2)*128+i])*sqrt(ener0/ener01);
abs_pow34_v(L34, &L[start+(w+w2)*128], sce0->ics.swb_sizes[g]);
abs_pow34_v(R34, &R[start+(w+w2)*128], sce0->ics.swb_sizes[g]);
abs_pow34_v(I34, IS, sce0->ics.swb_sizes[g]);
maxval = find_max_val(1, sce0->ics.swb_sizes[g], I34);
is_band_type = find_min_book(maxval, is_sf_idx);
dist1 += quantize_band_cost(s, &L[start + (w+w2)*128], L34,
sce0->ics.swb_sizes[g],
sce0->sf_idx[(w+w2)*16+g],
sce0->band_type[(w+w2)*16+g],
s->lambda / band0->threshold, INFINITY, NULL, 0);
dist1 += quantize_band_cost(s, &R[start + (w+w2)*128], R34,
sce1->ics.swb_sizes[g],
sce1->sf_idx[(w+w2)*16+g],
sce1->band_type[(w+w2)*16+g],
s->lambda / band1->threshold, INFINITY, NULL, 0);
dist2 += quantize_band_cost(s, IS, I34, sce0->ics.swb_sizes[g],
is_sf_idx, is_band_type,
s->lambda / minthr, INFINITY, NULL, 0);
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34);
}
dist_spec_err *= s->lambda / minthr;
dist2 += dist_spec_err;
}
is_error.pass = dist2 <= dist1;
is_error.phase = phase;
is_error.error = fabsf(dist1 - dist2);
is_error.dist1 = dist1;
is_error.dist2 = dist2;
return is_error;
}
void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe)
{
SingleChannelElement *sce0 = &cpe->ch[0];
SingleChannelElement *sce1 = &cpe->ch[1];
int start = 0, count = 0, w, w2, g, i;
const float freq_mult = avctx->sample_rate/(1024.0f/sce0->ics.num_windows)/2.0f;
if (!cpe->common_window)
return;
for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
start = 0;
for (g = 0; g < sce0->ics.num_swb; g++) {
if (start*freq_mult > INT_STEREO_LOW_LIMIT*(s->lambda/170.0f) &&
cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] &&
cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) {
float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
struct AACISError ph_err1, ph_err2, *erf;
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
ener0 += coef0*coef0;
ener1 += coef1*coef1;
ener01 += (coef0 + coef1)*(coef0 + coef1);
}
}
ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
ener0, ener1, ener01, 0, -1);
ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
ener0, ener1, ener01, 0, +1);
erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
if (erf->pass) {
cpe->is_mask[w*16+g] = 1;
cpe->ch[0].is_ener[w*16+g] = sqrt(ener0/ener01);
cpe->ch[1].is_ener[w*16+g] = ener0/ener1;
cpe->ch[1].band_type[w*16+g] = erf->phase ? INTENSITY_BT : INTENSITY_BT2;
count++;
}
}
start += sce0->ics.swb_sizes[g];
}
}
cpe->is_mode = !!count;
}

View File

@@ -0,0 +1,50 @@
/*
* AAC encoder intensity stereo
* Copyright (C) 2015 Rostislav Pehlivanov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC encoder Intensity Stereo
* @author Rostislav Pehlivanov ( atomnuker gmail com )
*/
#ifndef AVCODEC_AACENC_IS_H
#define AVCODEC_AACENC_IS_H
#include "aacenc.h"
/** Frequency in Hz for lower limit of intensity stereo **/
#define INT_STEREO_LOW_LIMIT 6100
struct AACISError {
int pass; /* 1 if dist2 <= dist1 */
int phase; /* -1 or +1 */
float error; /* fabs(dist1 - dist2) */
float dist1; /* From original coeffs */
float dist2; /* From IS'd coeffs */
};
struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe,
int start, int w, int g, float ener0,
float ener1, float ener01,
int use_pcoeffs, int phase);
void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe);
#endif /* AVCODEC_AACENC_IS_H */

View File

@@ -0,0 +1,342 @@
/*
* AAC encoder main-type prediction
* Copyright (C) 2015 Rostislav Pehlivanov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC encoder Intensity Stereo
* @author Rostislav Pehlivanov ( atomnuker gmail com )
*/
#include "aactab.h"
#include "aacenc_pred.h"
#include "aacenc_utils.h"
#include "aacenc_is.h" /* <- Needed for common window distortions */
#include "aacenc_quantization.h"
#define RESTORE_PRED(sce, sfb) \
if (sce->ics.prediction_used[sfb]) {\
sce->ics.prediction_used[sfb] = 0;\
sce->band_type[sfb] = sce->band_alt[sfb];\
}
static inline float flt16_round(float pf)
{
union av_intfloat32 tmp;
tmp.f = pf;
tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
return tmp.f;
}
static inline float flt16_even(float pf)
{
union av_intfloat32 tmp;
tmp.f = pf;
tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
return tmp.f;
}
static inline float flt16_trunc(float pf)
{
union av_intfloat32 pun;
pun.f = pf;
pun.i &= 0xFFFF0000U;
return pun.f;
}
static inline void predict(PredictorState *ps, float *coef, float *rcoef, int set)
{
float k2;
const float a = 0.953125; // 61.0 / 64
const float alpha = 0.90625; // 29.0 / 32
const float k1 = ps->k1;
const float r0 = ps->r0, r1 = ps->r1;
const float cor0 = ps->cor0, cor1 = ps->cor1;
const float var0 = ps->var0, var1 = ps->var1;
const float e0 = *coef - ps->x_est;
const float e1 = e0 - k1 * r0;
if (set)
*coef = e0;
ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
ps->r0 = flt16_trunc(a * e0);
/* Prediction for next frame */
ps->k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
*rcoef = ps->x_est = flt16_round(ps->k1*ps->r0 + k2*ps->r1);
}
static inline void reset_predict_state(PredictorState *ps)
{
ps->r0 = 0.0f;
ps->r1 = 0.0f;
ps->k1 = 0.0f;
ps->cor0 = 0.0f;
ps->cor1 = 0.0f;
ps->var0 = 1.0f;
ps->var1 = 1.0f;
ps->x_est = 0.0f;
}
static inline void reset_all_predictors(PredictorState *ps)
{
int i;
for (i = 0; i < MAX_PREDICTORS; i++)
reset_predict_state(&ps[i]);
}
static inline void reset_predictor_group(SingleChannelElement *sce, int group_num)
{
int i;
PredictorState *ps = sce->predictor_state;
for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
reset_predict_state(&ps[i]);
}
void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce)
{
int sfb, k;
const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
for (sfb = 0; sfb < pmax; sfb++) {
for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k],
sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
}
}
if (sce->ics.predictor_reset_group) {
reset_predictor_group(sce, sce->ics.predictor_reset_group);
}
} else {
reset_all_predictors(sce->predictor_state);
}
}
/* If inc = 0 you can check if this returns 0 to see if you can reset freely */
static inline int update_counters(IndividualChannelStream *ics, int inc)
{
int i;
for (i = 1; i < 31; i++) {
ics->predictor_reset_count[i] += inc;
if (ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN)
return i; /* Reset this immediately */
}
return 0;
}
void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe)
{
int start, w, w2, g, i, count = 0;
SingleChannelElement *sce0 = &cpe->ch[0];
SingleChannelElement *sce1 = &cpe->ch[1];
const int pmax0 = FFMIN(sce0->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
const int pmax1 = FFMIN(sce1->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
const int pmax = FFMIN(pmax0, pmax1);
if (!cpe->common_window ||
sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE ||
sce1->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
return;
for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
start = 0;
for (g = 0; g < sce0->ics.num_swb; g++) {
int sfb = w*16+g;
int sum = sce0->ics.prediction_used[sfb] + sce1->ics.prediction_used[sfb];
float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
struct AACISError ph_err1, ph_err2, *erf;
if (sfb < PRED_SFB_START || sfb > pmax || sum != 2) {
RESTORE_PRED(sce0, sfb);
RESTORE_PRED(sce1, sfb);
start += sce0->ics.swb_sizes[g];
continue;
}
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
ener0 += coef0*coef0;
ener1 += coef1*coef1;
ener01 += (coef0 + coef1)*(coef0 + coef1);
}
}
ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
ener0, ener1, ener01, 1, -1);
ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
ener0, ener1, ener01, 1, +1);
erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
if (erf->pass) {
sce0->ics.prediction_used[sfb] = 1;
sce1->ics.prediction_used[sfb] = 1;
count++;
} else {
RESTORE_PRED(sce0, sfb);
RESTORE_PRED(sce1, sfb);
}
start += sce0->ics.swb_sizes[g];
}
}
sce1->ics.predictor_present = sce0->ics.predictor_present = !!count;
}
static void update_pred_resets(SingleChannelElement *sce)
{
int i, max_group_id_c, max_frame = 0;
float avg_frame = 0.0f;
IndividualChannelStream *ics = &sce->ics;
/* Update the counters and immediately update any frame behind schedule */
if ((ics->predictor_reset_group = update_counters(&sce->ics, 1)))
return;
for (i = 1; i < 31; i++) {
/* Count-based */
if (ics->predictor_reset_count[i] > max_frame) {
max_group_id_c = i;
max_frame = ics->predictor_reset_count[i];
}
avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2;
}
if (max_frame > PRED_RESET_MIN) {
ics->predictor_reset_group = max_group_id_c;
} else {
ics->predictor_reset_group = 0;
}
}
void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
{
int sfb, i, count = 0, cost_coeffs = 0, cost_pred = 0;
const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
float *O34 = &s->scoefs[128*0], *P34 = &s->scoefs[128*1];
float *SENT = &s->scoefs[128*2], *S34 = &s->scoefs[128*3];
float *QERR = &s->scoefs[128*4];
if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
sce->ics.predictor_present = 0;
return;
}
if (!sce->ics.predictor_initialized) {
reset_all_predictors(sce->predictor_state);
sce->ics.predictor_initialized = 1;
memcpy(sce->prcoeffs, sce->coeffs, 1024*sizeof(float));
for (i = 1; i < 31; i++)
sce->ics.predictor_reset_count[i] = i;
}
update_pred_resets(sce);
memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
for (sfb = PRED_SFB_START; sfb < pmax; sfb++) {
int cost1, cost2, cb_p;
float dist1, dist2, dist_spec_err = 0.0f;
const int cb_n = sce->band_type[sfb];
const int start_coef = sce->ics.swb_offset[sfb];
const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef;
const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
if (start_coef + num_coeffs > MAX_PREDICTORS)
continue;
/* Normal coefficients */
abs_pow34_v(O34, &sce->coeffs[start_coef], num_coeffs);
dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL,
O34, num_coeffs, sce->sf_idx[sfb],
cb_n, s->lambda / band->threshold, INFINITY, &cost1, 0);
cost_coeffs += cost1;
/* Encoded coefficients - needed for #bits, band type and quant. error */
for (i = 0; i < num_coeffs; i++)
SENT[i] = sce->coeffs[start_coef + i] - sce->prcoeffs[start_coef + i];
abs_pow34_v(S34, SENT, num_coeffs);
if (cb_n < RESERVED_BT)
cb_p = find_min_book(find_max_val(1, num_coeffs, S34), sce->sf_idx[sfb]);
else
cb_p = cb_n;
quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs,
sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY,
&cost2, 0);
/* Reconstructed coefficients - needed for distortion measurements */
for (i = 0; i < num_coeffs; i++)
sce->prcoeffs[start_coef + i] += QERR[i] != 0.0f ? (sce->prcoeffs[start_coef + i] - QERR[i]) : 0.0f;
abs_pow34_v(P34, &sce->prcoeffs[start_coef], num_coeffs);
if (cb_n < RESERVED_BT)
cb_p = find_min_book(find_max_val(1, num_coeffs, P34), sce->sf_idx[sfb]);
else
cb_p = cb_n;
dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL,
P34, num_coeffs, sce->sf_idx[sfb],
cb_p, s->lambda / band->threshold, INFINITY, NULL, 0);
for (i = 0; i < num_coeffs; i++)
dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]);
dist_spec_err *= s->lambda / band->threshold;
dist2 += dist_spec_err;
if (dist2 <= dist1 && cb_p <= cb_n) {
cost_pred += cost2;
sce->ics.prediction_used[sfb] = 1;
sce->band_alt[sfb] = cb_n;
sce->band_type[sfb] = cb_p;
count++;
} else {
cost_pred += cost1;
sce->band_alt[sfb] = cb_p;
}
}
if (count && cost_coeffs < cost_pred) {
count = 0;
for (sfb = PRED_SFB_START; sfb < pmax; sfb++)
RESTORE_PRED(sce, sfb);
memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
}
sce->ics.predictor_present = !!count;
}
/**
* Encoder predictors data.
*/
void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce)
{
int sfb;
IndividualChannelStream *ics = &sce->ics;
const int pmax = FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
if (!ics->predictor_present)
return;
put_bits(&s->pb, 1, !!ics->predictor_reset_group);
if (ics->predictor_reset_group)
put_bits(&s->pb, 5, ics->predictor_reset_group);
for (sfb = 0; sfb < pmax; sfb++)
put_bits(&s->pb, 1, ics->prediction_used[sfb]);
}

View File

@@ -0,0 +1,47 @@
/*
* AAC encoder main-type prediction
* Copyright (C) 2015 Rostislav Pehlivanov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC encoder main prediction
* @author Rostislav Pehlivanov ( atomnuker gmail com )
*/
#ifndef AVCODEC_AACENC_PRED_H
#define AVCODEC_AACENC_PRED_H
#include "aacenc.h"
/* Every predictor group needs to get reset at least once in this many frames */
#define PRED_RESET_FRAME_MIN 240
/* Any frame with less than this amount of frames since last reset is ok */
#define PRED_RESET_MIN 64
/* Raise to filter any low frequency artifacts due to prediction */
#define PRED_SFB_START 10
void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe);
void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce);
#endif /* AVCODEC_AACENC_PRED_H */

View File

@@ -0,0 +1,260 @@
/*
* AAC encoder intensity stereo
* Copyright (C) 2015 Rostislav Pehlivanov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC encoder quantizer
* @author Rostislav Pehlivanov ( atomnuker gmail com )
*/
#ifndef AVCODEC_AACENC_QUANTIZATION_H
#define AVCODEC_AACENC_QUANTIZATION_H
#include "aactab.h"
#include "aacenc.h"
#include "aacenctab.h"
#include "aacenc_utils.h"
/**
* Calculate rate distortion cost for quantizing with given codebook
*
* @return quantization distortion
*/
static av_always_inline float quantize_and_encode_band_cost_template(
struct AACEncContext *s,
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
int *bits, int BT_ZERO, int BT_UNSIGNED,
int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO,
const float ROUNDING)
{
const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512;
const float Q = ff_aac_pow2sf_tab [q_idx];
const float Q34 = ff_aac_pow34sf_tab[q_idx];
const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
const float CLIPPED_ESCAPE = 165140.0f*IQ;
int i, j;
float cost = 0;
const int dim = BT_PAIR ? 2 : 4;
int resbits = 0;
int off;
if (BT_ZERO || BT_NOISE || BT_STEREO) {
for (i = 0; i < size; i++)
cost += in[i]*in[i];
if (bits)
*bits = 0;
if (out) {
for (i = 0; i < size; i += dim)
for (j = 0; j < dim; j++)
out[i+j] = 0.0f;
}
return cost * lambda;
}
if (!scaled) {
abs_pow34_v(s->scoefs, in, size);
scaled = s->scoefs;
}
quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, aac_cb_maxval[cb], ROUNDING);
if (BT_UNSIGNED) {
off = 0;
} else {
off = aac_cb_maxval[cb];
}
for (i = 0; i < size; i += dim) {
const float *vec;
int *quants = s->qcoefs + i;
int curidx = 0;
int curbits;
float quantized, rd = 0.0f;
for (j = 0; j < dim; j++) {
curidx *= aac_cb_range[cb];
curidx += quants[j] + off;
}
curbits = ff_aac_spectral_bits[cb-1][curidx];
vec = &ff_aac_codebook_vectors[cb-1][curidx*dim];
if (BT_UNSIGNED) {
for (j = 0; j < dim; j++) {
float t = fabsf(in[i+j]);
float di;
if (BT_ESC && vec[j] == 64.0f) { //FIXME: slow
if (t >= CLIPPED_ESCAPE) {
quantized = CLIPPED_ESCAPE;
curbits += 21;
} else {
int c = av_clip_uintp2(quant(t, Q, ROUNDING), 13);
quantized = c*cbrtf(c)*IQ;
curbits += av_log2(c)*2 - 4 + 1;
}
} else {
quantized = vec[j]*IQ;
}
di = t - quantized;
if (out)
out[i+j] = in[i+j] >= 0 ? quantized : -quantized;
if (vec[j] != 0.0f)
curbits++;
rd += di*di;
}
} else {
for (j = 0; j < dim; j++) {
quantized = vec[j]*IQ;
if (out)
out[i+j] = quantized;
rd += (in[i+j] - quantized)*(in[i+j] - quantized);
}
}
cost += rd * lambda + curbits;
resbits += curbits;
if (cost >= uplim)
return uplim;
if (pb) {
put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
if (BT_UNSIGNED)
for (j = 0; j < dim; j++)
if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
put_bits(pb, 1, in[i+j] < 0.0f);
if (BT_ESC) {
for (j = 0; j < 2; j++) {
if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q, ROUNDING), 13);
int len = av_log2(coef);
put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
put_sbits(pb, len, coef);
}
}
}
}
}
if (bits)
*bits = resbits;
return cost;
}
static inline float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitContext *pb,
const float *in, float *quant, const float *scaled,
int size, int scale_idx, int cb,
const float lambda, const float uplim,
int *bits) {
av_assert0(0);
return 0.0f;
}
#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING) \
static float quantize_and_encode_band_cost_ ## NAME( \
struct AACEncContext *s, \
PutBitContext *pb, const float *in, float *quant, \
const float *scaled, int size, int scale_idx, \
int cb, const float lambda, const float uplim, \
int *bits) { \
return quantize_and_encode_band_cost_template( \
s, pb, in, quant, scaled, size, scale_idx, \
BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \
BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, \
ROUNDING); \
}
QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0, 0, 0, ROUND_STANDARD)
QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0, ROUND_STANDARD)
QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0, ROUND_STANDARD)
QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0, ROUND_STANDARD)
QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0, ROUND_STANDARD)
QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1, 0, 0, ROUND_STANDARD)
QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC_RTZ, 0, 1, 1, 1, 0, 0, ROUND_TO_ZERO)
QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0, ROUND_STANDARD)
QUANTIZE_AND_ENCODE_BAND_COST_FUNC(STEREO,0, 0, 0, 0, 0, 1, ROUND_STANDARD)
static float (*const quantize_and_encode_band_cost_arr[])(
struct AACEncContext *s,
PutBitContext *pb, const float *in, float *quant,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
int *bits) = {
quantize_and_encode_band_cost_ZERO,
quantize_and_encode_band_cost_SQUAD,
quantize_and_encode_band_cost_SQUAD,
quantize_and_encode_band_cost_UQUAD,
quantize_and_encode_band_cost_UQUAD,
quantize_and_encode_band_cost_SPAIR,
quantize_and_encode_band_cost_SPAIR,
quantize_and_encode_band_cost_UPAIR,
quantize_and_encode_band_cost_UPAIR,
quantize_and_encode_band_cost_UPAIR,
quantize_and_encode_band_cost_UPAIR,
quantize_and_encode_band_cost_ESC,
quantize_and_encode_band_cost_NONE, /* CB 12 doesn't exist */
quantize_and_encode_band_cost_NOISE,
quantize_and_encode_band_cost_STEREO,
quantize_and_encode_band_cost_STEREO,
};
static float (*const quantize_and_encode_band_cost_rtz_arr[])(
struct AACEncContext *s,
PutBitContext *pb, const float *in, float *quant,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
int *bits) = {
quantize_and_encode_band_cost_ZERO,
quantize_and_encode_band_cost_SQUAD,
quantize_and_encode_band_cost_SQUAD,
quantize_and_encode_band_cost_UQUAD,
quantize_and_encode_band_cost_UQUAD,
quantize_and_encode_band_cost_SPAIR,
quantize_and_encode_band_cost_SPAIR,
quantize_and_encode_band_cost_UPAIR,
quantize_and_encode_band_cost_UPAIR,
quantize_and_encode_band_cost_UPAIR,
quantize_and_encode_band_cost_UPAIR,
quantize_and_encode_band_cost_ESC_RTZ,
quantize_and_encode_band_cost_NONE, /* CB 12 doesn't exist */
quantize_and_encode_band_cost_NOISE,
quantize_and_encode_band_cost_STEREO,
quantize_and_encode_band_cost_STEREO,
};
#define quantize_and_encode_band_cost( \
s, pb, in, quant, scaled, size, scale_idx, cb, \
lambda, uplim, bits, rtz) \
((rtz) ? quantize_and_encode_band_cost_rtz_arr : quantize_and_encode_band_cost_arr)[cb]( \
s, pb, in, quant, scaled, size, scale_idx, cb, \
lambda, uplim, bits)
static inline float quantize_band_cost(struct AACEncContext *s, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
int *bits, int rtz)
{
return quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
cb, lambda, uplim, bits, rtz);
}
static inline void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
const float *in, float *out, int size, int scale_idx,
int cb, const float lambda, int rtz)
{
quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
INFINITY, NULL, rtz);
}
#endif /* AVCODEC_AACENC_QUANTIZATION_H */

View File

@@ -0,0 +1,194 @@
/*
* AAC encoder TNS
* Copyright (C) 2015 Rostislav Pehlivanov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC encoder temporal noise shaping
* @author Rostislav Pehlivanov ( atomnuker gmail com )
*/
#include "aacenc.h"
#include "aacenc_tns.h"
#include "aactab.h"
#include "aacenc_utils.h"
#include "aacenc_quantization.h"
/**
* Encode TNS data.
* Coefficient compression saves a single bit per coefficient.
*/
void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
{
uint8_t u_coef;
const uint8_t coef_res = TNS_Q_BITS == 4;
int i, w, filt, coef_len, coef_compress = 0;
const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
TemporalNoiseShaping *tns = &sce->tns;
if (!sce->tns.present)
return;
for (i = 0; i < sce->ics.num_windows; i++) {
put_bits(&s->pb, 2 - is8, sce->tns.n_filt[i]);
if (tns->n_filt[i]) {
put_bits(&s->pb, 1, coef_res);
for (filt = 0; filt < tns->n_filt[i]; filt++) {
put_bits(&s->pb, 6 - 2 * is8, tns->length[i][filt]);
put_bits(&s->pb, 5 - 2 * is8, tns->order[i][filt]);
if (tns->order[i][filt]) {
put_bits(&s->pb, 1, !!tns->direction[i][filt]);
put_bits(&s->pb, 1, !!coef_compress);
coef_len = coef_res + 3 - coef_compress;
for (w = 0; w < tns->order[i][filt]; w++) {
u_coef = (tns->coef_idx[i][filt][w])&(~(~0<<coef_len));
put_bits(&s->pb, coef_len, u_coef);
}
}
}
}
}
}
static inline void quantize_coefs(double *coef, int *idx, float *lpc, int order)
{
int i;
uint8_t u_coef;
const float *quant_arr = tns_tmp2_map[TNS_Q_BITS == 4];
const double iqfac_p = ((1 << (TNS_Q_BITS-1)) - 0.5)/(M_PI/2.0);
const double iqfac_m = ((1 << (TNS_Q_BITS-1)) + 0.5)/(M_PI/2.0);
for (i = 0; i < order; i++) {
idx[i] = ceilf(asin(coef[i])*((coef[i] >= 0) ? iqfac_p : iqfac_m));
u_coef = (idx[i])&(~(~0<<TNS_Q_BITS));
lpc[i] = quant_arr[u_coef];
}
}
/* Apply TNS filter */
void ff_aac_apply_tns(AACEncContext *s, SingleChannelElement *sce)
{
TemporalNoiseShaping *tns = &sce->tns;
IndividualChannelStream *ics = &sce->ics;
int w, filt, m, i, top, order, bottom, start, end, size, inc;
const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
float lpc[TNS_MAX_ORDER];
for (w = 0; w < ics->num_windows; w++) {
bottom = ics->num_swb;
for (filt = 0; filt < tns->n_filt[w]; filt++) {
top = bottom;
bottom = FFMAX(0, top - tns->length[w][filt]);
order = tns->order[w][filt];
if (order == 0)
continue;
// tns_decode_coef
compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
start = ics->swb_offset[FFMIN(bottom, mmm)];
end = ics->swb_offset[FFMIN( top, mmm)];
if ((size = end - start) <= 0)
continue;
if (tns->direction[w][filt]) {
inc = -1;
start = end - 1;
} else {
inc = 1;
}
start += w * 128;
// ar filter
for (m = 0; m < size; m++, start += inc)
for (i = 1; i <= FFMIN(m, order); i++)
sce->coeffs[start] += lpc[i-1]*sce->pcoeffs[start - i*inc];
}
}
}
void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
{
TemporalNoiseShaping *tns = &sce->tns;
int w, w2, g, count = 0;
const int mmm = FFMIN(sce->ics.tns_max_bands, sce->ics.max_sfb);
const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
const int order = is8 ? 7 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER;
int sfb_start = av_clip(tns_min_sfb[is8][s->samplerate_index], 0, mmm);
int sfb_end = av_clip(sce->ics.num_swb, 0, mmm);
for (w = 0; w < sce->ics.num_windows; w++) {
float e_ratio = 0.0f, threshold = 0.0f, spread = 0.0f, en[2] = {0.0, 0.0f};
double gain = 0.0f, coefs[MAX_LPC_ORDER] = {0};
int coef_start = w*sce->ics.num_swb + sce->ics.swb_offset[sfb_start];
int coef_len = sce->ics.swb_offset[sfb_end] - sce->ics.swb_offset[sfb_start];
for (g = 0; g < sce->ics.num_swb; g++) {
if (w*16+g < sfb_start || w*16+g > sfb_end)
continue;
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
if ((w+w2)*16+g > sfb_start + ((sfb_end - sfb_start)/2))
en[1] += band->energy;
else
en[0] += band->energy;
threshold += band->threshold;
spread += band->spread;
}
}
if (coef_len <= 0 || (sfb_end - sfb_start) <= 0)
continue;
else
e_ratio = en[0]/en[1];
/* LPC */
gain = ff_lpc_calc_ref_coefs_f(&s->lpc, &sce->coeffs[coef_start],
coef_len, order, coefs);
if (gain > TNS_GAIN_THRESHOLD_LOW && gain < TNS_GAIN_THRESHOLD_HIGH &&
(en[0]+en[1]) > TNS_GAIN_THRESHOLD_LOW*threshold &&
spread < TNS_SPREAD_THRESHOLD && order) {
if (is8 || order < 2 || (e_ratio > TNS_E_RATIO_LOW && e_ratio < TNS_E_RATIO_HIGH)) {
tns->n_filt[w] = 1;
for (g = 0; g < tns->n_filt[w]; g++) {
tns->length[w][g] = sfb_end - sfb_start;
tns->direction[w][g] = en[0] < en[1];
tns->order[w][g] = order;
quantize_coefs(coefs, tns->coef_idx[w][g], tns->coef[w][g],
order);
}
} else { /* 2 filters due to energy disbalance */
tns->n_filt[w] = 2;
for (g = 0; g < tns->n_filt[w]; g++) {
tns->direction[w][g] = en[g] < en[!g];
tns->order[w][g] = !g ? order/2 : order - tns->order[w][g-1];
tns->length[w][g] = !g ? (sfb_end - sfb_start)/2 : \
(sfb_end - sfb_start) - tns->length[w][g-1];
quantize_coefs(&coefs[!g ? 0 : order - tns->order[w][g-1]],
tns->coef_idx[w][g], tns->coef[w][g],
tns->order[w][g]);
}
}
count++;
}
}
sce->tns.present = !!count;
}

View File

@@ -0,0 +1,52 @@
/*
* AAC encoder TNS
* Copyright (C) 2015 Rostislav Pehlivanov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC encoder temporal noise shaping
* @author Rostislav Pehlivanov ( atomnuker gmail com )
*/
#ifndef AVCODEC_AACENC_TNS_H
#define AVCODEC_AACENC_TNS_H
#include "aacenc.h"
/* Could be set to 3 to save an additional bit at the cost of little quality */
#define TNS_Q_BITS 4
/* TNS will only be used if the LPC gain is within these margins */
#define TNS_GAIN_THRESHOLD_LOW 1.395f
#define TNS_GAIN_THRESHOLD_HIGH 11.19f
/* If the energy ratio between the low SFBs vs the high SFBs is not between
* those two values, use 2 filters instead */
#define TNS_E_RATIO_LOW 0.77
#define TNS_E_RATIO_HIGH 1.23
/* Do not use TNS if the psy band spread is below this value */
#define TNS_SPREAD_THRESHOLD 37.081512f
void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_apply_tns(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce);
#endif /* AVCODEC_AACENC_TNS_H */

View File

@@ -0,0 +1,143 @@
/*
* AAC encoder utilities
* Copyright (C) 2015 Rostislav Pehlivanov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC encoder utilities
* @author Rostislav Pehlivanov ( atomnuker gmail com )
*/
#ifndef AVCODEC_AACENC_UTILS_H
#define AVCODEC_AACENC_UTILS_H
#include "aac.h"
#include "aac_tablegen_decl.h"
#include "aacenctab.h"
#define ROUND_STANDARD 0.4054f
#define ROUND_TO_ZERO 0.1054f
#define C_QUANT 0.4054f
static inline void abs_pow34_v(float *out, const float *in, const int size)
{
int i;
for (i = 0; i < size; i++) {
float a = fabsf(in[i]);
out[i] = sqrtf(a * sqrtf(a));
}
}
/**
* Quantize one coefficient.
* @return absolute value of the quantized coefficient
* @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
*/
static inline int quant(float coef, const float Q, const float rounding)
{
float a = coef * Q;
return sqrtf(a * sqrtf(a)) + rounding;
}
static inline void quantize_bands(int *out, const float *in, const float *scaled,
int size, float Q34, int is_signed, int maxval,
const float rounding)
{
int i;
double qc;
for (i = 0; i < size; i++) {
qc = scaled[i] * Q34;
out[i] = (int)FFMIN(qc + rounding, (double)maxval);
if (is_signed && in[i] < 0.0f) {
out[i] = -out[i];
}
}
}
static inline float find_max_val(int group_len, int swb_size, const float *scaled)
{
float maxval = 0.0f;
int w2, i;
for (w2 = 0; w2 < group_len; w2++) {
for (i = 0; i < swb_size; i++) {
maxval = FFMAX(maxval, scaled[w2*128+i]);
}
}
return maxval;
}
static inline int find_min_book(float maxval, int sf)
{
float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
float Q34 = sqrtf(Q * sqrtf(Q));
int qmaxval, cb;
qmaxval = maxval * Q34 + C_QUANT;
if (qmaxval == 0) cb = 0;
else if (qmaxval == 1) cb = 1;
else if (qmaxval == 2) cb = 3;
else if (qmaxval <= 4) cb = 5;
else if (qmaxval <= 7) cb = 7;
else if (qmaxval <= 12) cb = 9;
else cb = 11;
return cb;
}
/** Return the minimum scalefactor where the quantized coef does not clip. */
static inline uint8_t coef2minsf(float coef)
{
return av_clip_uint8(log2f(coef)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
}
/** Return the maximum scalefactor where the quantized coef is not zero. */
static inline uint8_t coef2maxsf(float coef)
{
return av_clip_uint8(log2f(coef)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512);
}
/*
* Returns the closest possible index to an array of float values, given a value.
*/
static inline int quant_array_idx(const float val, const float *arr, const int num)
{
int i, index = 0;
float quant_min_err = INFINITY;
for (i = 0; i < num; i++) {
float error = (val - arr[i])*(val - arr[i]);
if (error < quant_min_err) {
quant_min_err = error;
index = i;
}
}
return index;
}
#define ERROR_IF(cond, ...) \
if (cond) { \
av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
return AVERROR(EINVAL); \
}
#define WARN_IF(cond, ...) \
if (cond) { \
av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
}
#endif /* AVCODEC_AACENC_UTILS_H */

View File

@@ -0,0 +1,108 @@
/*
* AAC encoder data
* Copyright (c) 2015 Rostislav Pehlivanov ( atomnuker gmail com )
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "aacenctab.h"
static const uint8_t swb_size_128_96[] = {
4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
};
static const uint8_t swb_size_128_64[] = {
4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
};
static const uint8_t swb_size_128_48[] = {
4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
};
static const uint8_t swb_size_128_24[] = {
4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
};
static const uint8_t swb_size_128_16[] = {
4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
};
static const uint8_t swb_size_128_8[] = {
4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
};
static const uint8_t swb_size_1024_96[] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
};
static const uint8_t swb_size_1024_64[] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
};
static const uint8_t swb_size_1024_48[] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
96
};
static const uint8_t swb_size_1024_32[] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
};
static const uint8_t swb_size_1024_24[] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
};
static const uint8_t swb_size_1024_16[] = {
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
};
static const uint8_t swb_size_1024_8[] = {
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
};
const uint8_t *ff_aac_swb_size_128[] = {
swb_size_128_96, swb_size_128_96, swb_size_128_64,
swb_size_128_48, swb_size_128_48, swb_size_128_48,
swb_size_128_24, swb_size_128_24, swb_size_128_16,
swb_size_128_16, swb_size_128_16, swb_size_128_8,
swb_size_128_8
};
const uint8_t *ff_aac_swb_size_1024[] = {
swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
swb_size_1024_8
};
const int ff_aac_swb_size_128_len = FF_ARRAY_ELEMS(ff_aac_swb_size_128);
const int ff_aac_swb_size_1024_len = FF_ARRAY_ELEMS(ff_aac_swb_size_1024);

View File

@@ -0,0 +1,113 @@
/*
* AAC encoder data
* Copyright (c) 2015 Rostislav Pehlivanov ( atomnuker gmail com )
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC encoder data
* @author Rostislav Pehlivanov ( atomnuker gmail com )
*/
#ifndef AVCODEC_AACENCTAB_H
#define AVCODEC_AACENCTAB_H
#include "aac.h"
/** Total number of usable codebooks **/
#define CB_TOT 12
/** Total number of codebooks, including special ones **/
#define CB_TOT_ALL 15
#define AAC_MAX_CHANNELS 6
extern const uint8_t *ff_aac_swb_size_1024[];
extern const int ff_aac_swb_size_1024_len;
extern const uint8_t *ff_aac_swb_size_128[];
extern const int ff_aac_swb_size_128_len;
/** default channel configurations */
static const uint8_t aac_chan_configs[6][5] = {
{1, TYPE_SCE}, // 1 channel - single channel element
{1, TYPE_CPE}, // 2 channels - channel pair
{2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
{3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
{3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
{4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
};
/**
* Table to remap channels from libavcodec's default order to AAC order.
*/
static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
{ 0 },
{ 0, 1 },
{ 2, 0, 1 },
{ 2, 0, 1, 3 },
{ 2, 0, 1, 3, 4 },
{ 2, 0, 1, 4, 5, 3 },
};
/* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
* failures */
static const int mpeg4audio_sample_rates[16] = {
96000, 88200, 64000, 48000, 44100, 32000,
24000, 22050, 16000, 12000, 11025, 8000, 7350
};
/** bits needed to code codebook run value for long windows */
static const uint8_t run_value_bits_long[64] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
};
/** bits needed to code codebook run value for short windows */
static const uint8_t run_value_bits_short[16] = {
3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
};
/* TNS starting SFBs for long and short windows */
static const uint8_t tns_min_sfb_short[16] = {
2, 2, 2, 3, 3, 4, 6, 6, 8, 10, 10, 12, 12, 12, 12, 12
};
static const uint8_t tns_min_sfb_long[16] = {
12, 13, 15, 16, 17, 20, 25, 26, 24, 28, 30, 31, 31, 31, 31, 31
};
static const uint8_t * const tns_min_sfb[2] = {
tns_min_sfb_long, tns_min_sfb_short
};
static const uint8_t * const run_value_bits[2] = {
run_value_bits_long, run_value_bits_short
};
/** Map to convert values from BandCodingPath index to a codebook index **/
static const uint8_t aac_cb_out_map[CB_TOT_ALL] = {0,1,2,3,4,5,6,7,8,9,10,11,13,14,15};
/** Inverse map to convert from codebooks to BandCodingPath indices **/
static const uint8_t aac_cb_in_map[CB_TOT_ALL+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,12,13,14};
static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
#endif /* AVCODEC_AACENCTAB_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,86 @@
/*
* MPEG-4 Parametric Stereo definitions and declarations
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_PS_H
#define AVCODEC_PS_H
#include <stdint.h>
#include "aacpsdsp.h"
#include "avcodec.h"
#include "get_bits.h"
#define PS_MAX_NUM_ENV 5
#define PS_MAX_NR_IIDICC 34
#define PS_MAX_NR_IPDOPD 17
#define PS_MAX_SSB 91
#define PS_MAX_AP_BANDS 50
#define PS_QMF_TIME_SLOTS 32
#define PS_MAX_DELAY 14
#define PS_AP_LINKS 3
#define PS_MAX_AP_DELAY 5
typedef struct PSContext {
int start;
int enable_iid;
int iid_quant;
int nr_iid_par;
int nr_ipdopd_par;
int enable_icc;
int icc_mode;
int nr_icc_par;
int enable_ext;
int frame_class;
int num_env_old;
int num_env;
int enable_ipdopd;
int border_position[PS_MAX_NUM_ENV+1];
int8_t iid_par[PS_MAX_NUM_ENV][PS_MAX_NR_IIDICC]; ///< Inter-channel Intensity Difference Parameters
int8_t icc_par[PS_MAX_NUM_ENV][PS_MAX_NR_IIDICC]; ///< Inter-Channel Coherence Parameters
/* ipd/opd is iid/icc sized so that the same functions can handle both */
int8_t ipd_par[PS_MAX_NUM_ENV][PS_MAX_NR_IIDICC]; ///< Inter-channel Phase Difference Parameters
int8_t opd_par[PS_MAX_NUM_ENV][PS_MAX_NR_IIDICC]; ///< Overall Phase Difference Parameters
int is34bands;
int is34bands_old;
DECLARE_ALIGNED(16, INTFLOAT, in_buf)[5][44][2];
DECLARE_ALIGNED(16, INTFLOAT, delay)[PS_MAX_SSB][PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2];
DECLARE_ALIGNED(16, INTFLOAT, ap_delay)[PS_MAX_AP_BANDS][PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2];
DECLARE_ALIGNED(16, INTFLOAT, peak_decay_nrg)[34];
DECLARE_ALIGNED(16, INTFLOAT, power_smooth)[34];
DECLARE_ALIGNED(16, INTFLOAT, peak_decay_diff_smooth)[34];
DECLARE_ALIGNED(16, INTFLOAT, H11)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
DECLARE_ALIGNED(16, INTFLOAT, H12)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
DECLARE_ALIGNED(16, INTFLOAT, H21)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
DECLARE_ALIGNED(16, INTFLOAT, H22)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
DECLARE_ALIGNED(16, INTFLOAT, Lbuf)[91][32][2];
DECLARE_ALIGNED(16, INTFLOAT, Rbuf)[91][32][2];
int8_t opd_hist[PS_MAX_NR_IIDICC];
int8_t ipd_hist[PS_MAX_NR_IIDICC];
PSDSPContext dsp;
} PSContext;
void AAC_RENAME(ff_ps_init)(void);
void AAC_RENAME(ff_ps_ctx_init)(PSContext *ps);
int AAC_RENAME(ff_ps_read_data)(AVCodecContext *avctx, GetBitContext *gb, PSContext *ps, int bits_left);
int AAC_RENAME(ff_ps_apply)(AVCodecContext *avctx, PSContext *ps, INTFLOAT L[2][38][64], INTFLOAT R[2][38][64], int top);
#endif /* AVCODEC_PS_H */

View File

@@ -0,0 +1,24 @@
/*
* MPEG-4 Parametric Stereo decoding functions
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define USE_FIXED 1
#include "aacps.c"

View File

@@ -0,0 +1,24 @@
/*
* Generate a header file for hardcoded Parametric Stereo tables
*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define USE_FIXED 1
#include "aacps_tablegen_template.c"

View File

@@ -0,0 +1,403 @@
/*
* Header file for hardcoded Parametric Stereo tables
*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* Note: Rounding-to-nearest used unless otherwise stated
*
*/
#ifndef AACPS_FIXED_TABLEGEN_H
#define AACPS_FIXED_TABLEGEN_H
#include <math.h>
#include <stdint.h>
#if CONFIG_HARDCODED_TABLES
#define ps_tableinit()
#define TABLE_CONST const
#include "libavcodec/aacps_fixed_tables.h"
#else
#include "libavutil/common.h"
#include "libavutil/mathematics.h"
#include "libavutil/mem.h"
#include "aac_defines.h"
#include "libavutil/softfloat.h"
#define NR_ALLPASS_BANDS20 30
#define NR_ALLPASS_BANDS34 50
#define PS_AP_LINKS 3
#define TABLE_CONST
static int pd_re_smooth[8*8*8];
static int pd_im_smooth[8*8*8];
static int HA[46][8][4];
static int HB[46][8][4];
static DECLARE_ALIGNED(16, int, f20_0_8) [ 8][8][2];
static DECLARE_ALIGNED(16, int, f34_0_12)[12][8][2];
static DECLARE_ALIGNED(16, int, f34_1_8) [ 8][8][2];
static DECLARE_ALIGNED(16, int, f34_2_4) [ 4][8][2];
static TABLE_CONST DECLARE_ALIGNED(16, int, Q_fract_allpass)[2][50][3][2];
static DECLARE_ALIGNED(16, int, phi_fract)[2][50][2];
static const int g0_Q8[] = {
Q31(0.00746082949812f), Q31(0.02270420949825f), Q31(0.04546865930473f), Q31(0.07266113929591f),
Q31(0.09885108575264f), Q31(0.11793710567217f), Q31(0.125f)
};
static const int g0_Q12[] = {
Q31(0.04081179924692f), Q31(0.03812810994926f), Q31(0.05144908135699f), Q31(0.06399831151592f),
Q31(0.07428313801106f), Q31(0.08100347892914f), Q31(0.08333333333333f)
};
static const int g1_Q8[] = {
Q31(0.01565675600122f), Q31(0.03752716391991f), Q31(0.05417891378782f), Q31(0.08417044116767f),
Q31(0.10307344158036f), Q31(0.12222452249753f), Q31(0.125f)
};
static const int g2_Q4[] = {
Q31(-0.05908211155639f), Q31(-0.04871498374946f), Q31(0.0f), Q31(0.07778723915851f),
Q31( 0.16486303567403f), Q31( 0.23279856662996f), Q31(0.25f)
};
static const int sintbl_4[4] = { 0, 1073741824, 0, -1073741824 };
static const int costbl_4[4] = { 1073741824, 0, -1073741824, 0 };
static const int sintbl_8[8] = { 0, 759250125, 1073741824, 759250125,
0, -759250125, -1073741824, -759250125 };
static const int costbl_8[8] = { 1073741824, 759250125, 0, -759250125,
-1073741824, -759250125, 0, 759250125 };
static const int sintbl_12[12] = { 0, 536870912, 929887697, 1073741824,
929887697, 536870912, 0, -536870912,
-929887697, -1073741824, -929887697, -536870912 };
static const int costbl_12[12] = { 1073741824, 929887697, 536870912, 0,
-536870912, -929887697, -1073741824, -929887697,
-536870912, 0, 536870912, 929887697 };
static void make_filters_from_proto(int (*filter)[8][2], const int *proto, int bands)
{
const int *sinptr, *cosptr;
int s, c, sinhalf, coshalf;
int q, n;
if (bands == 4) {
sinptr = sintbl_4;
cosptr = costbl_4;
sinhalf = 759250125;
coshalf = 759250125;
} else if (bands == 8) {
sinptr = sintbl_8;
cosptr = costbl_8;
sinhalf = 410903207;
coshalf = 992008094;
} else {
sinptr = sintbl_12;
cosptr = costbl_12;
sinhalf = 277904834;
coshalf = 1037154959;
}
for (q = 0; q < bands; q++) {
for (n = 0; n < 7; n++) {
int theta = (q*(n-6) + (n>>1) - 3) % bands;
if (theta < 0)
theta += bands;
s = sinptr[theta];
c = cosptr[theta];
if (n & 1) {
theta = (int)(((int64_t)c * coshalf - (int64_t)s * sinhalf + 0x20000000) >> 30);
s = (int)(((int64_t)s * coshalf + (int64_t)c * sinhalf + 0x20000000) >> 30);
c = theta;
}
filter[q][n][0] = (int)(((int64_t)proto[n] * c + 0x20000000) >> 30);
filter[q][n][1] = -(int)(((int64_t)proto[n] * s + 0x20000000) >> 30);
}
}
}
static void ps_tableinit(void)
{
static const int ipdopd_sin[] = { Q30(0), Q30(M_SQRT1_2), Q30(1), Q30( M_SQRT1_2), Q30( 0), Q30(-M_SQRT1_2), Q30(-1), Q30(-M_SQRT1_2) };
static const int ipdopd_cos[] = { Q30(1), Q30(M_SQRT1_2), Q30(0), Q30(-M_SQRT1_2), Q30(-1), Q30(-M_SQRT1_2), Q30( 0), Q30( M_SQRT1_2) };
int pd0, pd1, pd2;
int idx;
static const int alpha_tab[] =
{
Q30(1.5146213770f/M_PI), Q30(1.5181334019f/M_PI), Q30(1.5234849453f/M_PI), Q30(1.5369486809f/M_PI), Q30(1.5500687361f/M_PI), Q30(1.5679757595f/M_PI),
Q30(1.4455626011f/M_PI), Q30(1.4531552792f/M_PI), Q30(1.4648091793f/M_PI), Q30(1.4945238829f/M_PI), Q30(1.5239057541f/M_PI), Q30(1.5644006729f/M_PI),
Q30(1.3738563061f/M_PI), Q30(1.3851221800f/M_PI), Q30(1.4026404619f/M_PI), Q30(1.4484288692f/M_PI), Q30(1.4949874878f/M_PI), Q30(1.5604078770f/M_PI),
Q30(1.2645189762f/M_PI), Q30(1.2796478271f/M_PI), Q30(1.3038636446f/M_PI), Q30(1.3710125685f/M_PI), Q30(1.4443849325f/M_PI), Q30(1.5532352924f/M_PI),
Q30(1.1507037878f/M_PI), Q30(1.1669205427f/M_PI), Q30(1.1938756704f/M_PI), Q30(1.2754167318f/M_PI), Q30(1.3761177063f/M_PI), Q30(1.5429240465f/M_PI),
Q30(1.0079245567f/M_PI), Q30(1.0208238363f/M_PI), Q30(1.0433073044f/M_PI), Q30(1.1208510399f/M_PI), Q30(1.2424604893f/M_PI), Q30(1.5185726881f/M_PI),
Q30(0.8995233774f/M_PI), Q30(0.9069069624f/M_PI), Q30(0.9201194048f/M_PI), Q30(0.9698365927f/M_PI), Q30(1.0671583414f/M_PI), Q30(1.4647934437f/M_PI),
Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI),
Q30(0.6712729335f/M_PI), Q30(0.6638893485f/M_PI), Q30(0.6506769061f/M_PI), Q30(0.6009597182f/M_PI), Q30(0.5036380291f/M_PI), Q30(0.1060028747f/M_PI),
Q30(0.5628717542f/M_PI), Q30(0.5499725342f/M_PI), Q30(0.5274890065f/M_PI), Q30(0.4499453008f/M_PI), Q30(0.3283358216f/M_PI), Q30(0.0522236861f/M_PI),
Q30(0.4200925827f/M_PI), Q30(0.4038758278f/M_PI), Q30(0.3769206405f/M_PI), Q30(0.2953795493f/M_PI), Q30(0.1946786791f/M_PI), Q30(0.0278722942f/M_PI),
Q30(0.3062773645f/M_PI), Q30(0.2911485136f/M_PI), Q30(0.2669326365f/M_PI), Q30(0.1997837722f/M_PI), Q30(0.1264114529f/M_PI), Q30(0.0175609849f/M_PI),
Q30(0.1969399750f/M_PI), Q30(0.1856741160f/M_PI), Q30(0.1681558639f/M_PI), Q30(0.1223674342f/M_PI), Q30(0.0758088827f/M_PI), Q30(0.0103884479f/M_PI),
Q30(0.1252337098f/M_PI), Q30(0.1176410317f/M_PI), Q30(0.1059871912f/M_PI), Q30(0.0762724727f/M_PI), Q30(0.0468905345f/M_PI), Q30(0.0063956482f/M_PI),
Q30(0.0561749674f/M_PI), Q30(0.0526629239f/M_PI), Q30(0.0473113805f/M_PI), Q30(0.0338476151f/M_PI), Q30(0.0207276177f/M_PI), Q30(0.0028205961f/M_PI),
Q30(1.5676341057f/M_PI), Q30(1.5678333044f/M_PI), Q30(1.5681363344f/M_PI), Q30(1.5688960552f/M_PI), Q30(1.5696337223f/M_PI), Q30(1.5706381798f/M_PI),
Q30(1.5651730299f/M_PI), Q30(1.5655272007f/M_PI), Q30(1.5660660267f/M_PI), Q30(1.5674170256f/M_PI), Q30(1.5687289238f/M_PI), Q30(1.5705151558f/M_PI),
Q30(1.5607966185f/M_PI), Q30(1.5614265203f/M_PI), Q30(1.5623844862f/M_PI), Q30(1.5647867918f/M_PI), Q30(1.5671195984f/M_PI), Q30(1.5702962875f/M_PI),
Q30(1.5530153513f/M_PI), Q30(1.5541347265f/M_PI), Q30(1.5558375120f/M_PI), Q30(1.5601085424f/M_PI), Q30(1.5642569065f/M_PI), Q30(1.5699069500f/M_PI),
Q30(1.5391840935f/M_PI), Q30(1.5411708355f/M_PI), Q30(1.5441943407f/M_PI), Q30(1.5517836809f/M_PI), Q30(1.5591609478f/M_PI), Q30(1.5692136288f/M_PI),
Q30(1.5146213770f/M_PI), Q30(1.5181334019f/M_PI), Q30(1.5234849453f/M_PI), Q30(1.5369486809f/M_PI), Q30(1.5500687361f/M_PI), Q30(1.5679757595f/M_PI),
Q30(1.4915299416f/M_PI), Q30(1.4964480400f/M_PI), Q30(1.5039558411f/M_PI), Q30(1.5229074955f/M_PI), Q30(1.5414420366f/M_PI), Q30(1.5667995214f/M_PI),
Q30(1.4590617418f/M_PI), Q30(1.4658898115f/M_PI), Q30(1.4763505459f/M_PI), Q30(1.5029321909f/M_PI), Q30(1.5291173458f/M_PI), Q30(1.5651149750f/M_PI),
Q30(1.4136143923f/M_PI), Q30(1.4229322672f/M_PI), Q30(1.4373078346f/M_PI), Q30(1.4743183851f/M_PI), Q30(1.5113102198f/M_PI), Q30(1.5626684427f/M_PI),
Q30(1.3505556583f/M_PI), Q30(1.3628427982f/M_PI), Q30(1.3820509911f/M_PI), Q30(1.4327841997f/M_PI), Q30(1.4850014448f/M_PI), Q30(1.5590143204f/M_PI),
Q30(1.2645189762f/M_PI), Q30(1.2796478271f/M_PI), Q30(1.3038636446f/M_PI), Q30(1.3710125685f/M_PI), Q30(1.4443849325f/M_PI), Q30(1.5532352924f/M_PI),
Q30(1.1919227839f/M_PI), Q30(1.2081253529f/M_PI), Q30(1.2346779108f/M_PI), Q30(1.3123005629f/M_PI), Q30(1.4034168720f/M_PI), Q30(1.5471596718f/M_PI),
Q30(1.1061993837f/M_PI), Q30(1.1219338179f/M_PI), Q30(1.1484941244f/M_PI), Q30(1.2320860624f/M_PI), Q30(1.3421301842f/M_PI), Q30(1.5373806953f/M_PI),
Q30(1.0079245567f/M_PI), Q30(1.0208238363f/M_PI), Q30(1.0433073044f/M_PI), Q30(1.1208510399f/M_PI), Q30(1.2424604893f/M_PI), Q30(1.5185726881f/M_PI),
Q30(0.8995233774f/M_PI), Q30(0.9069069624f/M_PI), Q30(0.9201194048f/M_PI), Q30(0.9698365927f/M_PI), Q30(1.0671583414f/M_PI), Q30(1.4647934437f/M_PI),
Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI),
Q30(0.6712729335f/M_PI), Q30(0.6638893485f/M_PI), Q30(0.6506769061f/M_PI), Q30(0.6009597182f/M_PI), Q30(0.5036380291f/M_PI), Q30(0.1060028747f/M_PI),
Q30(0.5628717542f/M_PI), Q30(0.5499725342f/M_PI), Q30(0.5274890065f/M_PI), Q30(0.4499453008f/M_PI), Q30(0.3283358216f/M_PI), Q30(0.0522236861f/M_PI),
Q30(0.4645969570f/M_PI), Q30(0.4488625824f/M_PI), Q30(0.4223022461f/M_PI), Q30(0.3387103081f/M_PI), Q30(0.2286661267f/M_PI), Q30(0.0334156826f/M_PI),
Q30(0.3788735867f/M_PI), Q30(0.3626709878f/M_PI), Q30(0.3361184299f/M_PI), Q30(0.2584958076f/M_PI), Q30(0.1673794836f/M_PI), Q30(0.0236366931f/M_PI),
Q30(0.3062773645f/M_PI), Q30(0.2911485136f/M_PI), Q30(0.2669326365f/M_PI), Q30(0.1997837722f/M_PI), Q30(0.1264114529f/M_PI), Q30(0.0175609849f/M_PI),
Q30(0.2202406377f/M_PI), Q30(0.2079535723f/M_PI), Q30(0.1887452900f/M_PI), Q30(0.1380121708f/M_PI), Q30(0.0857949182f/M_PI), Q30(0.0117820343f/M_PI),
Q30(0.1571819335f/M_PI), Q30(0.1478640437f/M_PI), Q30(0.1334884763f/M_PI), Q30(0.0964778885f/M_PI), Q30(0.0594860613f/M_PI), Q30(0.0081279324f/M_PI),
Q30(0.1117345318f/M_PI), Q30(0.1049065739f/M_PI), Q30(0.0944457650f/M_PI), Q30(0.0678641573f/M_PI), Q30(0.0416790098f/M_PI), Q30(0.0056813755f/M_PI),
Q30(0.0792663917f/M_PI), Q30(0.0743482932f/M_PI), Q30(0.0668405443f/M_PI), Q30(0.0478888862f/M_PI), Q30(0.0293543357f/M_PI), Q30(0.0039967746f/M_PI),
Q30(0.0561749674f/M_PI), Q30(0.0526629239f/M_PI), Q30(0.0473113805f/M_PI), Q30(0.0338476151f/M_PI), Q30(0.0207276177f/M_PI), Q30(0.0028205961f/M_PI),
Q30(0.0316122435f/M_PI), Q30(0.0296254847f/M_PI), Q30(0.0266019460f/M_PI), Q30(0.0190126132f/M_PI), Q30(0.0116353342f/M_PI), Q30(0.0015827164f/M_PI),
Q30(0.0177809205f/M_PI), Q30(0.0166615788f/M_PI), Q30(0.0149587989f/M_PI), Q30(0.0106877899f/M_PI), Q30(0.0065393616f/M_PI), Q30(0.0008894200f/M_PI),
Q30(0.0099996664f/M_PI), Q30(0.0093698399f/M_PI), Q30(0.0084118480f/M_PI), Q30(0.0060095116f/M_PI), Q30(0.0036767013f/M_PI), Q30(0.0005000498f/M_PI),
Q30(0.0056233541f/M_PI), Q30(0.0052691097f/M_PI), Q30(0.0047303112f/M_PI), Q30(0.0033792770f/M_PI), Q30(0.0020674451f/M_PI), Q30(0.0002811795f/M_PI),
Q30(0.0031622672f/M_PI), Q30(0.0029630491f/M_PI), Q30(0.0026600463f/M_PI), Q30(0.0019002859f/M_PI), Q30(0.0011625893f/M_PI), Q30(0.0001581155f/M_PI)
};
static const int gamma_tab[] =
{
Q30(0.0000000000f/M_PI), Q30(0.0195873566f/M_PI), Q30(0.0303316917f/M_PI), Q30(0.0448668823f/M_PI), Q30(0.0522258915f/M_PI), Q30(0.0561044961f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0433459543f/M_PI), Q30(0.0672172382f/M_PI), Q30(0.0997167900f/M_PI), Q30(0.1162951663f/M_PI), Q30(0.1250736862f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0672341362f/M_PI), Q30(0.1045235619f/M_PI), Q30(0.1558904350f/M_PI), Q30(0.1824723780f/M_PI), Q30(0.1966800541f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1011129096f/M_PI), Q30(0.1580764502f/M_PI), Q30(0.2387557179f/M_PI), Q30(0.2820728719f/M_PI), Q30(0.3058380187f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1315985769f/M_PI), Q30(0.2072522491f/M_PI), Q30(0.3188187480f/M_PI), Q30(0.3825501204f/M_PI), Q30(0.4193951190f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1603866369f/M_PI), Q30(0.2549437582f/M_PI), Q30(0.4029446840f/M_PI), Q30(0.4980689585f/M_PI), Q30(0.5615641475f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1736015975f/M_PI), Q30(0.2773745656f/M_PI), Q30(0.4461984038f/M_PI), Q30(0.5666890144f/M_PI), Q30(0.6686112881f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1784276664f/M_PI), Q30(0.2856673002f/M_PI), Q30(0.4630723596f/M_PI), Q30(0.5971632004f/M_PI), Q30(0.7603877187f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1736015975f/M_PI), Q30(0.2773745656f/M_PI), Q30(0.4461984038f/M_PI), Q30(0.5666890144f/M_PI), Q30(0.6686112881f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1603866369f/M_PI), Q30(0.2549437582f/M_PI), Q30(0.4029446840f/M_PI), Q30(0.4980689585f/M_PI), Q30(0.5615641475f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1315985769f/M_PI), Q30(0.2072522491f/M_PI), Q30(0.3188187480f/M_PI), Q30(0.3825501204f/M_PI), Q30(0.4193951190f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1011129096f/M_PI), Q30(0.1580764502f/M_PI), Q30(0.2387557179f/M_PI), Q30(0.2820728719f/M_PI), Q30(0.3058380187f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0672341362f/M_PI), Q30(0.1045235619f/M_PI), Q30(0.1558904350f/M_PI), Q30(0.1824723780f/M_PI), Q30(0.1966800541f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0433459543f/M_PI), Q30(0.0672172382f/M_PI), Q30(0.0997167900f/M_PI), Q30(0.1162951663f/M_PI), Q30(0.1250736862f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0195873566f/M_PI), Q30(0.0303316917f/M_PI), Q30(0.0448668823f/M_PI), Q30(0.0522258915f/M_PI), Q30(0.0561044961f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0011053939f/M_PI), Q30(0.0017089852f/M_PI), Q30(0.0025254129f/M_PI), Q30(0.0029398468f/M_PI), Q30(0.0031597170f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0019607407f/M_PI), Q30(0.0030395309f/M_PI), Q30(0.0044951206f/M_PI), Q30(0.0052305623f/M_PI), Q30(0.0056152637f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0034913034f/M_PI), Q30(0.0054070661f/M_PI), Q30(0.0079917293f/M_PI), Q30(0.0092999367f/M_PI), Q30(0.0099875759f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0062100487f/M_PI), Q30(0.0096135242f/M_PI), Q30(0.0142110568f/M_PI), Q30(0.0165348612f/M_PI), Q30(0.0177587029f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0110366223f/M_PI), Q30(0.0170863140f/M_PI), Q30(0.0252620988f/M_PI), Q30(0.0293955617f/M_PI), Q30(0.0315726399f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0195873566f/M_PI), Q30(0.0303316917f/M_PI), Q30(0.0448668823f/M_PI), Q30(0.0522258915f/M_PI), Q30(0.0561044961f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0275881495f/M_PI), Q30(0.0427365713f/M_PI), Q30(0.0632618815f/M_PI), Q30(0.0736731067f/M_PI), Q30(0.0791663304f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0387469754f/M_PI), Q30(0.0600636788f/M_PI), Q30(0.0890387669f/M_PI), Q30(0.1037906483f/M_PI), Q30(0.1115923747f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0541138873f/M_PI), Q30(0.0839984417f/M_PI), Q30(0.1248718798f/M_PI), Q30(0.1458375156f/M_PI), Q30(0.1569785923f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0747506917f/M_PI), Q30(0.1163287833f/M_PI), Q30(0.1738867164f/M_PI), Q30(0.2038587779f/M_PI), Q30(0.2199459076f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1011129096f/M_PI), Q30(0.1580764502f/M_PI), Q30(0.2387557179f/M_PI), Q30(0.2820728719f/M_PI), Q30(0.3058380187f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1212290376f/M_PI), Q30(0.1903949380f/M_PI), Q30(0.2907958031f/M_PI), Q30(0.3466993868f/M_PI), Q30(0.3782821596f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1418247074f/M_PI), Q30(0.2240308374f/M_PI), Q30(0.3474813402f/M_PI), Q30(0.4202919006f/M_PI), Q30(0.4637607038f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1603866369f/M_PI), Q30(0.2549437582f/M_PI), Q30(0.4029446840f/M_PI), Q30(0.4980689585f/M_PI), Q30(0.5615641475f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1736015975f/M_PI), Q30(0.2773745656f/M_PI), Q30(0.4461984038f/M_PI), Q30(0.5666890144f/M_PI), Q30(0.6686112881f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1784276664f/M_PI), Q30(0.2856673002f/M_PI), Q30(0.4630723596f/M_PI), Q30(0.5971632004f/M_PI), Q30(0.7603877187f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1736015975f/M_PI), Q30(0.2773745656f/M_PI), Q30(0.4461984038f/M_PI), Q30(0.5666890144f/M_PI), Q30(0.6686112881f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1603866369f/M_PI), Q30(0.2549437582f/M_PI), Q30(0.4029446840f/M_PI), Q30(0.4980689585f/M_PI), Q30(0.5615641475f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1418247074f/M_PI), Q30(0.2240308374f/M_PI), Q30(0.3474813402f/M_PI), Q30(0.4202919006f/M_PI), Q30(0.4637607038f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1212290376f/M_PI), Q30(0.1903949380f/M_PI), Q30(0.2907958031f/M_PI), Q30(0.3466993868f/M_PI), Q30(0.3782821596f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.1011129096f/M_PI), Q30(0.1580764502f/M_PI), Q30(0.2387557179f/M_PI), Q30(0.2820728719f/M_PI), Q30(0.3058380187f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0747506917f/M_PI), Q30(0.1163287833f/M_PI), Q30(0.1738867164f/M_PI), Q30(0.2038587779f/M_PI), Q30(0.2199459076f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0541138873f/M_PI), Q30(0.0839984417f/M_PI), Q30(0.1248718798f/M_PI), Q30(0.1458375156f/M_PI), Q30(0.1569785923f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0387469754f/M_PI), Q30(0.0600636788f/M_PI), Q30(0.0890387669f/M_PI), Q30(0.1037906483f/M_PI), Q30(0.1115923747f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0275881495f/M_PI), Q30(0.0427365713f/M_PI), Q30(0.0632618815f/M_PI), Q30(0.0736731067f/M_PI), Q30(0.0791663304f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0195873566f/M_PI), Q30(0.0303316917f/M_PI), Q30(0.0448668823f/M_PI), Q30(0.0522258915f/M_PI), Q30(0.0561044961f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0110366223f/M_PI), Q30(0.0170863140f/M_PI), Q30(0.0252620988f/M_PI), Q30(0.0293955617f/M_PI), Q30(0.0315726399f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0062100487f/M_PI), Q30(0.0096135242f/M_PI), Q30(0.0142110568f/M_PI), Q30(0.0165348612f/M_PI), Q30(0.0177587029f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0034913034f/M_PI), Q30(0.0054070661f/M_PI), Q30(0.0079917293f/M_PI), Q30(0.0092999367f/M_PI), Q30(0.0099875759f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0019607407f/M_PI), Q30(0.0030395309f/M_PI), Q30(0.0044951206f/M_PI), Q30(0.0052305623f/M_PI), Q30(0.0056152637f/M_PI),
Q30(0.0000000000f/M_PI), Q30(0.0011053939f/M_PI), Q30(0.0017089852f/M_PI), Q30(0.0025254129f/M_PI), Q30(0.0029398468f/M_PI), Q30(0.0031597170f/M_PI)
};
static const int iid_par_dequant_c1[] = {
//iid_par_dequant_default
Q30(1.41198278375959f), Q30(1.40313815268360f), Q30(1.38687670404960f), Q30(1.34839972492648f),
Q30(1.29124937110028f), Q30(1.19603741667993f), Q30(1.10737240362323f), Q30(1),
Q30(0.87961716655242f), Q30(0.75464859232732f), Q30(0.57677990744575f), Q30(0.42640143271122f),
Q30(0.27671828230984f), Q30(0.17664462766713f), Q30(0.07940162697653f),
//iid_par_dequant_fine
Q30(1.41420649135832f), Q30(1.41419120222364f), Q30(1.41414285699784f), Q30(1.41399000859438f),
Q30(1.41350698548044f), Q30(1.41198278375959f), Q30(1.40977302262355f), Q30(1.40539479488545f),
Q30(1.39677960498402f), Q30(1.38005309967827f), Q30(1.34839972492648f), Q30(1.31392017367631f),
Q30(1.26431008149654f), Q30(1.19603741667993f), Q30(1.10737240362323f), Q30(1),
Q30(0.87961716655242f), Q30(0.75464859232732f), Q30(0.63365607219232f), Q30(0.52308104267543f),
Q30(0.42640143271122f), Q30(0.30895540465965f), Q30(0.22137464873077f), Q30(0.15768788954414f),
Q30(0.11198225164225f), Q30(0.07940162697653f), Q30(0.04469901562677f), Q30(0.02514469318284f),
Q30(0.01414142856998f), Q30(0.00795258154731f), Q30(0.00447211359449f),
};
static const int acos_icc_invq[] = {
Q31(0), Q31(0.178427635f/M_PI), Q31(0.28566733f/M_PI), Q31(0.46307236f/M_PI), Q31(0.59716315f/M_PI), Q31(0.78539816f/M_PI), Q31(1.10030855f/M_PI), Q31(1.57079633f/M_PI)
};
int iid, icc;
int k, m;
static const int8_t f_center_20[] = {
-3, -1, 1, 3, 5, 7, 10, 14, 18, 22,
};
static const int32_t f_center_34[] = {
Q31( 2/768.0),Q31( 6/768.0),Q31(10/768.0),Q31(14/768.0),Q31( 18/768.0),Q31( 22/768.0),Q31( 26/768.0),Q31(30/768.0),
Q31( 34/768.0),Q31(-10/768.0),Q31(-6/768.0),Q31(-2/768.0),Q31( 51/768.0),Q31( 57/768.0),Q31( 15/768.0),Q31(21/768.0),
Q31( 27/768.0),Q31( 33/768.0),Q31(39/768.0),Q31(45/768.0),Q31( 54/768.0),Q31( 66/768.0),Q31( 78/768.0),Q31(42/768.0),
Q31(102/768.0),Q31( 66/768.0),Q31(78/768.0),Q31(90/768.0),Q31(102/768.0),Q31(114/768.0),Q31(126/768.0),Q31(90/768.0)
};
static const int fractional_delay_links[] = { Q31(0.43f), Q31(0.75f), Q31(0.347f) };
const int fractional_delay_gain = Q31(0.39f);
for (pd0 = 0; pd0 < 8; pd0++) {
int pd0_re = (ipdopd_cos[pd0]+2)>>2;
int pd0_im = (ipdopd_sin[pd0]+2)>>2;
for (pd1 = 0; pd1 < 8; pd1++) {
int pd1_re = ipdopd_cos[pd1] >> 1;
int pd1_im = ipdopd_sin[pd1] >> 1;
for (pd2 = 0; pd2 < 8; pd2++) {
int shift, round;
int pd2_re = ipdopd_cos[pd2];
int pd2_im = ipdopd_sin[pd2];
int re_smooth = pd0_re + pd1_re + pd2_re;
int im_smooth = pd0_im + pd1_im + pd2_im;
SoftFloat pd_mag = av_int2sf(((ipdopd_cos[(pd0-pd1)&7]+8)>>4) + ((ipdopd_cos[(pd0-pd2)&7]+4)>>3) +
((ipdopd_cos[(pd1-pd2)&7]+2)>>2) + 0x15000000, 28);
pd_mag = av_div_sf(FLOAT_1, av_sqrt_sf(pd_mag));
shift = 30 - pd_mag.exp;
round = 1 << (shift-1);
pd_re_smooth[pd0*64+pd1*8+pd2] = (int)(((int64_t)re_smooth * pd_mag.mant + round) >> shift);
pd_im_smooth[pd0*64+pd1*8+pd2] = (int)(((int64_t)im_smooth * pd_mag.mant + round) >> shift);
}
}
}
idx = 0;
for (iid = 0; iid < 46; iid++) {
int c1, c2;
c1 = iid_par_dequant_c1[iid];
if (iid < 15)
c2 = iid_par_dequant_c1[14-iid];
else
c2 = iid_par_dequant_c1[60-iid];
for (icc = 0; icc < 8; icc++) {
/*if (PS_BASELINE || ps->icc_mode < 3)*/{
int alpha, beta;
int ca, sa, cb, sb;
alpha = acos_icc_invq[icc];
beta = (int)(((int64_t)alpha * 1518500250 + 0x40000000) >> 31);
alpha >>= 1;
beta = (int)(((int64_t)beta * (c1 - c2) + 0x40000000) >> 31);
av_sincos_sf(beta + alpha, &sa, &ca);
av_sincos_sf(beta - alpha, &sb, &cb);
HA[iid][icc][0] = (int)(((int64_t)c2 * ca + 0x20000000) >> 30);
HA[iid][icc][1] = (int)(((int64_t)c1 * cb + 0x20000000) >> 30);
HA[iid][icc][2] = (int)(((int64_t)c2 * sa + 0x20000000) >> 30);
HA[iid][icc][3] = (int)(((int64_t)c1 * sb + 0x20000000) >> 30);
} /* else */ {
int alpha_int, gamma_int;
int alpha_c_int, alpha_s_int, gamma_c_int, gamma_s_int;
alpha_int = alpha_tab[idx];
gamma_int = gamma_tab[idx];
av_sincos_sf(alpha_int, &alpha_s_int, &alpha_c_int);
av_sincos_sf(gamma_int, &gamma_s_int, &gamma_c_int);
alpha_c_int = (int)(((int64_t)alpha_c_int * 1518500250 + 0x20000000) >> 30);
alpha_s_int = (int)(((int64_t)alpha_s_int * 1518500250 + 0x20000000) >> 30);
HB[iid][icc][0] = (int)(((int64_t)alpha_c_int * gamma_c_int + 0x20000000) >> 30);
HB[iid][icc][1] = (int)(((int64_t)alpha_s_int * gamma_c_int + 0x20000000) >> 30);
HB[iid][icc][2] = -(int)(((int64_t)alpha_s_int * gamma_s_int + 0x20000000) >> 30);
HB[iid][icc][3] = (int)(((int64_t)alpha_c_int * gamma_s_int + 0x20000000) >> 30);
}
if (icc < 5 || icc > 6)
idx++;
}
}
for (k = 0; k < NR_ALLPASS_BANDS20; k++) {
int theta;
int64_t f_center;
int c, s;
if (k < FF_ARRAY_ELEMS(f_center_20))
f_center = f_center_20[k];
else
f_center = (k << 3) - 52;
for (m = 0; m < PS_AP_LINKS; m++) {
theta = (int)(((int64_t)fractional_delay_links[m] * f_center + 8) >> 4);
av_sincos_sf(-theta, &s, &c);
Q_fract_allpass[0][k][m][0] = c;
Q_fract_allpass[0][k][m][1] = s;
}
theta = (int)(((int64_t)fractional_delay_gain * f_center + 8) >> 4);
av_sincos_sf(-theta, &s, &c);
phi_fract[0][k][0] = c;
phi_fract[0][k][1] = s;
}
for (k = 0; k < NR_ALLPASS_BANDS34; k++) {
int theta, f_center;
int c, s;
if (k < FF_ARRAY_ELEMS(f_center_34))
f_center = f_center_34[k];
else
f_center = ((int64_t)k << 26) - (53 << 25);
for (m = 0; m < PS_AP_LINKS; m++) {
theta = (int)(((int64_t)fractional_delay_links[m] * f_center + 0x10000000) >> 27);
av_sincos_sf(-theta, &s, &c);
Q_fract_allpass[1][k][m][0] = c;
Q_fract_allpass[1][k][m][1] = s;
}
theta = (int)(((int64_t)fractional_delay_gain * f_center + 0x10000000) >> 27);
av_sincos_sf(-theta, &s, &c);
phi_fract[1][k][0] = c;
phi_fract[1][k][1] = s;
}
make_filters_from_proto(f20_0_8, g0_Q8, 8);
make_filters_from_proto(f34_0_12, g0_Q12, 12);
make_filters_from_proto(f34_1_8, g1_Q8, 8);
make_filters_from_proto(f34_2_4, g2_Q4, 4);
}
#endif /* CONFIG_HARDCODED_TABLES */
#endif /* AACPS_FIXED_TABLEGEN_H */

View File

@@ -0,0 +1,24 @@
/*
* MPEG-4 Parametric Stereo decoding functions
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define USE_FIXED 0
#include "aacps.c"

View File

@@ -0,0 +1,24 @@
/*
* Generate a header file for hardcoded Parametric Stereo tables
*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define USE_FIXED 0
#include "aacps_tablegen_template.c"

View File

@@ -0,0 +1,217 @@
/*
* Header file for hardcoded Parametric Stereo tables
*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AACPS_TABLEGEN_H
#define AACPS_TABLEGEN_H
#include <math.h>
#include <stdint.h>
#if CONFIG_HARDCODED_TABLES
#define ps_tableinit()
#define TABLE_CONST const
#include "libavcodec/aacps_tables.h"
#else
#include "libavutil/common.h"
#include "libavutil/libm.h"
#include "libavutil/mathematics.h"
#include "libavutil/mem.h"
#define NR_ALLPASS_BANDS20 30
#define NR_ALLPASS_BANDS34 50
#define PS_AP_LINKS 3
#define TABLE_CONST
static float pd_re_smooth[8*8*8];
static float pd_im_smooth[8*8*8];
static float HA[46][8][4];
static float HB[46][8][4];
static DECLARE_ALIGNED(16, float, f20_0_8) [ 8][8][2];
static DECLARE_ALIGNED(16, float, f34_0_12)[12][8][2];
static DECLARE_ALIGNED(16, float, f34_1_8) [ 8][8][2];
static DECLARE_ALIGNED(16, float, f34_2_4) [ 4][8][2];
static TABLE_CONST DECLARE_ALIGNED(16, float, Q_fract_allpass)[2][50][3][2];
static DECLARE_ALIGNED(16, float, phi_fract)[2][50][2];
static const float g0_Q8[] = {
0.00746082949812f, 0.02270420949825f, 0.04546865930473f, 0.07266113929591f,
0.09885108575264f, 0.11793710567217f, 0.125f
};
static const float g0_Q12[] = {
0.04081179924692f, 0.03812810994926f, 0.05144908135699f, 0.06399831151592f,
0.07428313801106f, 0.08100347892914f, 0.08333333333333f
};
static const float g1_Q8[] = {
0.01565675600122f, 0.03752716391991f, 0.05417891378782f, 0.08417044116767f,
0.10307344158036f, 0.12222452249753f, 0.125f
};
static const float g2_Q4[] = {
-0.05908211155639f, -0.04871498374946f, 0.0f, 0.07778723915851f,
0.16486303567403f, 0.23279856662996f, 0.25f
};
static av_cold void make_filters_from_proto(float (*filter)[8][2], const float *proto, int bands)
{
int q, n;
for (q = 0; q < bands; q++) {
for (n = 0; n < 7; n++) {
double theta = 2 * M_PI * (q + 0.5) * (n - 6) / bands;
filter[q][n][0] = proto[n] * cos(theta);
filter[q][n][1] = proto[n] * -sin(theta);
}
}
}
static av_cold void ps_tableinit(void)
{
static const float ipdopd_sin[] = { 0, M_SQRT1_2, 1, M_SQRT1_2, 0, -M_SQRT1_2, -1, -M_SQRT1_2 };
static const float ipdopd_cos[] = { 1, M_SQRT1_2, 0, -M_SQRT1_2, -1, -M_SQRT1_2, 0, M_SQRT1_2 };
int pd0, pd1, pd2;
static const float iid_par_dequant[] = {
//iid_par_dequant_default
0.05623413251903, 0.12589254117942, 0.19952623149689, 0.31622776601684,
0.44668359215096, 0.63095734448019, 0.79432823472428, 1,
1.25892541179417, 1.58489319246111, 2.23872113856834, 3.16227766016838,
5.01187233627272, 7.94328234724282, 17.7827941003892,
//iid_par_dequant_fine
0.00316227766017, 0.00562341325190, 0.01, 0.01778279410039,
0.03162277660168, 0.05623413251903, 0.07943282347243, 0.11220184543020,
0.15848931924611, 0.22387211385683, 0.31622776601684, 0.39810717055350,
0.50118723362727, 0.63095734448019, 0.79432823472428, 1,
1.25892541179417, 1.58489319246111, 1.99526231496888, 2.51188643150958,
3.16227766016838, 4.46683592150963, 6.30957344480193, 8.91250938133745,
12.5892541179417, 17.7827941003892, 31.6227766016838, 56.2341325190349,
100, 177.827941003892, 316.227766016837,
};
static const float icc_invq[] = {
1, 0.937, 0.84118, 0.60092, 0.36764, 0, -0.589, -1
};
static const float acos_icc_invq[] = {
0, 0.35685527, 0.57133466, 0.92614472, 1.1943263, M_PI/2, 2.2006171, M_PI
};
int iid, icc;
int k, m;
static const int8_t f_center_20[] = {
-3, -1, 1, 3, 5, 7, 10, 14, 18, 22,
};
static const int8_t f_center_34[] = {
2, 6, 10, 14, 18, 22, 26, 30,
34,-10, -6, -2, 51, 57, 15, 21,
27, 33, 39, 45, 54, 66, 78, 42,
102, 66, 78, 90,102,114,126, 90,
};
static const float fractional_delay_links[] = { 0.43f, 0.75f, 0.347f };
const float fractional_delay_gain = 0.39f;
for (pd0 = 0; pd0 < 8; pd0++) {
float pd0_re = ipdopd_cos[pd0];
float pd0_im = ipdopd_sin[pd0];
for (pd1 = 0; pd1 < 8; pd1++) {
float pd1_re = ipdopd_cos[pd1];
float pd1_im = ipdopd_sin[pd1];
for (pd2 = 0; pd2 < 8; pd2++) {
float pd2_re = ipdopd_cos[pd2];
float pd2_im = ipdopd_sin[pd2];
float re_smooth = 0.25f * pd0_re + 0.5f * pd1_re + pd2_re;
float im_smooth = 0.25f * pd0_im + 0.5f * pd1_im + pd2_im;
float pd_mag = 1 / sqrt(im_smooth * im_smooth + re_smooth * re_smooth);
pd_re_smooth[pd0*64+pd1*8+pd2] = re_smooth * pd_mag;
pd_im_smooth[pd0*64+pd1*8+pd2] = im_smooth * pd_mag;
}
}
}
for (iid = 0; iid < 46; iid++) {
float c = iid_par_dequant[iid]; ///< Linear Inter-channel Intensity Difference
float c1 = (float)M_SQRT2 / sqrtf(1.0f + c*c);
float c2 = c * c1;
for (icc = 0; icc < 8; icc++) {
/*if (PS_BASELINE || ps->icc_mode < 3)*/ {
float alpha = 0.5f * acos_icc_invq[icc];
float beta = alpha * (c1 - c2) * (float)M_SQRT1_2;
HA[iid][icc][0] = c2 * cosf(beta + alpha);
HA[iid][icc][1] = c1 * cosf(beta - alpha);
HA[iid][icc][2] = c2 * sinf(beta + alpha);
HA[iid][icc][3] = c1 * sinf(beta - alpha);
} /* else */ {
float alpha, gamma, mu, rho;
float alpha_c, alpha_s, gamma_c, gamma_s;
rho = FFMAX(icc_invq[icc], 0.05f);
alpha = 0.5f * atan2f(2.0f * c * rho, c*c - 1.0f);
mu = c + 1.0f / c;
mu = sqrtf(1 + (4 * rho * rho - 4)/(mu * mu));
gamma = atanf(sqrtf((1.0f - mu)/(1.0f + mu)));
if (alpha < 0) alpha += M_PI/2;
alpha_c = cosf(alpha);
alpha_s = sinf(alpha);
gamma_c = cosf(gamma);
gamma_s = sinf(gamma);
HB[iid][icc][0] = M_SQRT2 * alpha_c * gamma_c;
HB[iid][icc][1] = M_SQRT2 * alpha_s * gamma_c;
HB[iid][icc][2] = -M_SQRT2 * alpha_s * gamma_s;
HB[iid][icc][3] = M_SQRT2 * alpha_c * gamma_s;
}
}
}
for (k = 0; k < NR_ALLPASS_BANDS20; k++) {
double f_center, theta;
if (k < FF_ARRAY_ELEMS(f_center_20))
f_center = f_center_20[k] * 0.125;
else
f_center = k - 6.5f;
for (m = 0; m < PS_AP_LINKS; m++) {
theta = -M_PI * fractional_delay_links[m] * f_center;
Q_fract_allpass[0][k][m][0] = cos(theta);
Q_fract_allpass[0][k][m][1] = sin(theta);
}
theta = -M_PI*fractional_delay_gain*f_center;
phi_fract[0][k][0] = cos(theta);
phi_fract[0][k][1] = sin(theta);
}
for (k = 0; k < NR_ALLPASS_BANDS34; k++) {
double f_center, theta;
if (k < FF_ARRAY_ELEMS(f_center_34))
f_center = f_center_34[k] / 24.0;
else
f_center = k - 26.5f;
for (m = 0; m < PS_AP_LINKS; m++) {
theta = -M_PI * fractional_delay_links[m] * f_center;
Q_fract_allpass[1][k][m][0] = cos(theta);
Q_fract_allpass[1][k][m][1] = sin(theta);
}
theta = -M_PI*fractional_delay_gain*f_center;
phi_fract[1][k][0] = cos(theta);
phi_fract[1][k][1] = sin(theta);
}
make_filters_from_proto(f20_0_8, g0_Q8, 8);
make_filters_from_proto(f34_0_12, g0_Q12, 12);
make_filters_from_proto(f34_1_8, g1_Q8, 8);
make_filters_from_proto(f34_2_4, g2_Q4, 4);
}
#endif /* CONFIG_HARDCODED_TABLES */
#endif /* AACPS_TABLEGEN_H */

View File

@@ -0,0 +1,107 @@
/*
* Generate a header file for hardcoded Parametric Stereo tables
*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdlib.h>
#define CONFIG_HARDCODED_TABLES 0
#include "aac_defines.h"
#if USE_FIXED
#define TYPE_NAME "int32_t"
#define INT32FLOAT int32_t
#define ARRAY_RENAME(x) write_int32_t_ ## x
#define ARRAY_URENAME(x) write_uint32_t_ ## x
#include "aacps_fixed_tablegen.h"
#else
#define TYPE_NAME "float"
#define INT32FLOAT float
#define ARRAY_RENAME(x) write_float_ ## x
#define ARRAY_URENAME(x) write_float_ ## x
#include "aacps_tablegen.h"
#endif /* USE_FIXED */
#include "tableprint.h"
void ARRAY_RENAME(3d_array) (const void *p, int b, int c, int d)
{
int i;
const INT32FLOAT *f = p;
for (i = 0; i < b; i++) {
printf("{\n");
ARRAY_URENAME(2d_array)(f, c, d);
printf("},\n");
f += c * d;
}
}
void ARRAY_RENAME(4d_array) (const void *p, int a, int b, int c, int d)
{
int i;
const INT32FLOAT *f = p;
for (i = 0; i < a; i++) {
printf("{\n");
ARRAY_RENAME(3d_array)(f, b, c, d);
printf("},\n");
f += b * c * d;
}
}
int main(void)
{
ps_tableinit();
write_fileheader();
printf("static const %s pd_re_smooth[8*8*8] = {\n", TYPE_NAME);
ARRAY_RENAME(array)(pd_re_smooth, 8*8*8);
printf("};\n");
printf("static const %s pd_im_smooth[8*8*8] = {\n", TYPE_NAME);
ARRAY_RENAME(array)(pd_im_smooth, 8*8*8);
printf("};\n");
printf("static const %s HA[46][8][4] = {\n", TYPE_NAME);
ARRAY_RENAME(3d_array)(HA, 46, 8, 4);
printf("};\n");
printf("static const %s HB[46][8][4] = {\n", TYPE_NAME);
ARRAY_RENAME(3d_array)(HB, 46, 8, 4);
printf("};\n");
printf("static const DECLARE_ALIGNED(16, %s, f20_0_8)[8][8][2] = {\n", TYPE_NAME);
ARRAY_RENAME(3d_array)(f20_0_8, 8, 8, 2);
printf("};\n");
printf("static const DECLARE_ALIGNED(16, %s, f34_0_12)[12][8][2] = {\n", TYPE_NAME);
ARRAY_RENAME(3d_array)(f34_0_12, 12, 8, 2);
printf("};\n");
printf("static const DECLARE_ALIGNED(16, %s, f34_1_8)[8][8][2] = {\n", TYPE_NAME);
ARRAY_RENAME(3d_array)(f34_1_8, 8, 8, 2);
printf("};\n");
printf("static const DECLARE_ALIGNED(16, %s, f34_2_4)[4][8][2] = {\n", TYPE_NAME);
ARRAY_RENAME(3d_array)(f34_2_4, 4, 8, 2);
printf("};\n");
printf("static const DECLARE_ALIGNED(16, %s, Q_fract_allpass)[2][50][3][2] = {\n", TYPE_NAME);
ARRAY_RENAME(4d_array)(Q_fract_allpass, 2, 50, 3, 2);
printf("};\n");
printf("static const DECLARE_ALIGNED(16, %s, phi_fract)[2][50][2] = {\n", TYPE_NAME);
ARRAY_RENAME(3d_array)(phi_fract, 2, 50, 2);
printf("};\n");
return 0;
}

View File

@@ -0,0 +1,163 @@
/*
* MPEG-4 Parametric Stereo data tables
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
static const uint8_t huff_iid_df1_bits[] = {
18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 18, 17, 17, 16, 16, 15, 14, 14,
13, 12, 12, 11, 10, 10, 8, 7, 6, 5, 4, 3, 1, 3, 4, 5, 6, 7,
8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 16, 17, 17, 18, 17, 18, 18,
18, 18, 18, 18, 18, 18, 18,
};
static const uint32_t huff_iid_df1_codes[] = {
0x01FEB4, 0x01FEB5, 0x01FD76, 0x01FD77, 0x01FD74, 0x01FD75, 0x01FE8A,
0x01FE8B, 0x01FE88, 0x00FE80, 0x01FEB6, 0x00FE82, 0x00FEB8, 0x007F42,
0x007FAE, 0x003FAF, 0x001FD1, 0x001FE9, 0x000FE9, 0x0007EA, 0x0007FB,
0x0003FB, 0x0001FB, 0x0001FF, 0x00007C, 0x00003C, 0x00001C, 0x00000C,
0x000000, 0x000001, 0x000001, 0x000002, 0x000001, 0x00000D, 0x00001D,
0x00003D, 0x00007D, 0x0000FC, 0x0001FC, 0x0003FC, 0x0003F4, 0x0007EB,
0x000FEA, 0x001FEA, 0x001FD6, 0x003FD0, 0x007FAF, 0x007F43, 0x00FEB9,
0x00FE83, 0x01FEB7, 0x00FE81, 0x01FE89, 0x01FE8E, 0x01FE8F, 0x01FE8C,
0x01FE8D, 0x01FEB2, 0x01FEB3, 0x01FEB0, 0x01FEB1,
};
static const uint8_t huff_iid_dt1_bits[] = {
16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 14, 14, 13,
13, 13, 12, 12, 11, 10, 9, 9, 7, 6, 5, 3, 1, 2, 5, 6, 7, 8,
9, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16,
};
static const uint16_t huff_iid_dt1_codes[] = {
0x004ED4, 0x004ED5, 0x004ECE, 0x004ECF, 0x004ECC, 0x004ED6, 0x004ED8,
0x004F46, 0x004F60, 0x002718, 0x002719, 0x002764, 0x002765, 0x00276D,
0x0027B1, 0x0013B7, 0x0013D6, 0x0009C7, 0x0009E9, 0x0009ED, 0x0004EE,
0x0004F7, 0x000278, 0x000139, 0x00009A, 0x00009F, 0x000020, 0x000011,
0x00000A, 0x000003, 0x000001, 0x000000, 0x00000B, 0x000012, 0x000021,
0x00004C, 0x00009B, 0x00013A, 0x000279, 0x000270, 0x0004EF, 0x0004E2,
0x0009EA, 0x0009D8, 0x0013D7, 0x0013D0, 0x0027B2, 0x0027A2, 0x00271A,
0x00271B, 0x004F66, 0x004F67, 0x004F61, 0x004F47, 0x004ED9, 0x004ED7,
0x004ECD, 0x004ED2, 0x004ED3, 0x004ED0, 0x004ED1,
};
static const uint8_t huff_iid_df0_bits[] = {
17, 17, 17, 17, 16, 15, 13, 10, 9, 7, 6, 5, 4, 3, 1, 3, 4, 5,
6, 6, 8, 11, 13, 14, 14, 15, 17, 18, 18,
};
static const uint32_t huff_iid_df0_codes[] = {
0x01FFFB, 0x01FFFC, 0x01FFFD, 0x01FFFA, 0x00FFFC, 0x007FFC, 0x001FFD,
0x0003FE, 0x0001FE, 0x00007E, 0x00003C, 0x00001D, 0x00000D, 0x000005,
0x000000, 0x000004, 0x00000C, 0x00001C, 0x00003D, 0x00003E, 0x0000FE,
0x0007FE, 0x001FFC, 0x003FFC, 0x003FFD, 0x007FFD, 0x01FFFE, 0x03FFFE,
0x03FFFF,
};
static const uint8_t huff_iid_dt0_bits[] = {
19, 19, 19, 20, 20, 20, 17, 15, 12, 10, 8, 6, 4, 2, 1, 3, 5, 7,
9, 11, 13, 14, 17, 19, 20, 20, 20, 20, 20,
};
static const uint32_t huff_iid_dt0_codes[] = {
0x07FFF9, 0x07FFFA, 0x07FFFB, 0x0FFFF8, 0x0FFFF9, 0x0FFFFA, 0x01FFFD,
0x007FFE, 0x000FFE, 0x0003FE, 0x0000FE, 0x00003E, 0x00000E, 0x000002,
0x000000, 0x000006, 0x00001E, 0x00007E, 0x0001FE, 0x0007FE, 0x001FFE,
0x003FFE, 0x01FFFC, 0x07FFF8, 0x0FFFFB, 0x0FFFFC, 0x0FFFFD, 0x0FFFFE,
0x0FFFFF,
};
static const uint8_t huff_icc_df_bits[] = {
14, 14, 12, 10, 7, 5, 3, 1, 2, 4, 6, 8, 9, 11, 13,
};
static const uint16_t huff_icc_df_codes[] = {
0x3FFF, 0x3FFE, 0x0FFE, 0x03FE, 0x007E, 0x001E, 0x0006, 0x0000,
0x0002, 0x000E, 0x003E, 0x00FE, 0x01FE, 0x07FE, 0x1FFE,
};
static const uint8_t huff_icc_dt_bits[] = {
14, 13, 11, 9, 7, 5, 3, 1, 2, 4, 6, 8, 10, 12, 14,
};
static const uint16_t huff_icc_dt_codes[] = {
0x3FFE, 0x1FFE, 0x07FE, 0x01FE, 0x007E, 0x001E, 0x0006, 0x0000,
0x0002, 0x000E, 0x003E, 0x00FE, 0x03FE, 0x0FFE, 0x3FFF,
};
static const uint8_t huff_ipd_df_bits[] = {
1, 3, 4, 4, 4, 4, 4, 4,
};
static const uint8_t huff_ipd_df_codes[] = {
0x01, 0x00, 0x06, 0x04, 0x02, 0x03, 0x05, 0x07,
};
static const uint8_t huff_ipd_dt_bits[] = {
1, 3, 4, 5, 5, 4, 4, 3,
};
static const uint8_t huff_ipd_dt_codes[] = {
0x01, 0x02, 0x02, 0x03, 0x02, 0x00, 0x03, 0x03,
};
static const uint8_t huff_opd_df_bits[] = {
1, 3, 4, 4, 5, 5, 4, 3,
};
static const uint8_t huff_opd_df_codes[] = {
0x01, 0x01, 0x06, 0x04, 0x0F, 0x0E, 0x05, 0x00,
};
static const uint8_t huff_opd_dt_bits[] = {
1, 3, 4, 5, 5, 4, 4, 3,
};
static const uint8_t huff_opd_dt_codes[] = {
0x01, 0x02, 0x01, 0x07, 0x06, 0x00, 0x02, 0x03,
};
static const int8_t huff_offset[] = {
30, 30,
14, 14,
7, 7,
0, 0,
0, 0,
};
///Table 8.48
static const int8_t k_to_i_20[] = {
1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14, 15,
15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19
};
///Table 8.49
static const int8_t k_to_i_34[] = {
0, 1, 2, 3, 4, 5, 6, 6, 7, 2, 1, 0, 10, 10, 4, 5, 6, 7, 8,
9, 10, 11, 12, 9, 14, 11, 12, 13, 14, 15, 16, 13, 16, 17, 18, 19, 20, 21,
22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 27, 28, 28, 28, 29, 29, 29,
30, 30, 30, 31, 31, 31, 31, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33
};
static const INTFLOAT g1_Q2[] = {
Q31(0.0f), Q31(0.01899487526049f), Q31(0.0f), Q31(-0.07293139167538f),
Q31(0.0f), Q31(0.30596630545168f), Q31(0.5f)
};

View File

@@ -0,0 +1,57 @@
/*
* Copyright (c) 2012 Mans Rullgard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef LIBAVCODEC_AACPSDSP_H
#define LIBAVCODEC_AACPSDSP_H
#include "aac_defines.h"
#define PS_QMF_TIME_SLOTS 32
#define PS_AP_LINKS 3
#define PS_MAX_AP_DELAY 5
typedef struct PSDSPContext {
void (*add_squares)(INTFLOAT *dst, const INTFLOAT (*src)[2], int n);
void (*mul_pair_single)(INTFLOAT (*dst)[2], INTFLOAT (*src0)[2], INTFLOAT *src1,
int n);
void (*hybrid_analysis)(INTFLOAT (*out)[2], INTFLOAT (*in)[2],
const INTFLOAT (*filter)[8][2],
int stride, int n);
void (*hybrid_analysis_ileave)(INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
int i, int len);
void (*hybrid_synthesis_deint)(INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
int i, int len);
void (*decorrelate)(INTFLOAT (*out)[2], INTFLOAT (*delay)[2],
INTFLOAT (*ap_delay)[PS_QMF_TIME_SLOTS+PS_MAX_AP_DELAY][2],
const INTFLOAT phi_fract[2], const INTFLOAT (*Q_fract)[2],
const INTFLOAT *transient_gain,
INTFLOAT g_decay_slope,
int len);
void (*stereo_interpolate[2])(INTFLOAT (*l)[2], INTFLOAT (*r)[2],
INTFLOAT h[2][4], INTFLOAT h_step[2][4],
int len);
} PSDSPContext;
void AAC_RENAME(ff_psdsp_init)(PSDSPContext *s);
void ff_psdsp_init_arm(PSDSPContext *s);
void ff_psdsp_init_mips(PSDSPContext *s);
void ff_psdsp_init_x86(PSDSPContext *s);
#endif /* LIBAVCODEC_AACPSDSP_H */

View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define USE_FIXED 1
#include "aacpsdsp_template.c"

View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define USE_FIXED 0
#include "aacpsdsp_template.c"

View File

@@ -0,0 +1,230 @@
/*
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* Note: Rounding-to-nearest used unless otherwise stated
*
*/
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "aacpsdsp.h"
static void ps_add_squares_c(INTFLOAT *dst, const INTFLOAT (*src)[2], int n)
{
int i;
for (i = 0; i < n; i++)
dst[i] += AAC_MADD28(src[i][0], src[i][0], src[i][1], src[i][1]);
}
static void ps_mul_pair_single_c(INTFLOAT (*dst)[2], INTFLOAT (*src0)[2], INTFLOAT *src1,
int n)
{
int i;
for (i = 0; i < n; i++) {
dst[i][0] = AAC_MUL16(src0[i][0], src1[i]);
dst[i][1] = AAC_MUL16(src0[i][1], src1[i]);
}
}
static void ps_hybrid_analysis_c(INTFLOAT (*out)[2], INTFLOAT (*in)[2],
const INTFLOAT (*filter)[8][2],
int stride, int n)
{
int i, j;
for (i = 0; i < n; i++) {
INT64FLOAT sum_re = (INT64FLOAT)filter[i][6][0] * in[6][0];
INT64FLOAT sum_im = (INT64FLOAT)filter[i][6][0] * in[6][1];
for (j = 0; j < 6; j++) {
INTFLOAT in0_re = in[j][0];
INTFLOAT in0_im = in[j][1];
INTFLOAT in1_re = in[12-j][0];
INTFLOAT in1_im = in[12-j][1];
sum_re += (INT64FLOAT)filter[i][j][0] * (in0_re + in1_re) -
(INT64FLOAT)filter[i][j][1] * (in0_im - in1_im);
sum_im += (INT64FLOAT)filter[i][j][0] * (in0_im + in1_im) +
(INT64FLOAT)filter[i][j][1] * (in0_re - in1_re);
}
#if USE_FIXED
out[i * stride][0] = (int)((sum_re + 0x40000000) >> 31);
out[i * stride][1] = (int)((sum_im + 0x40000000) >> 31);
#else
out[i * stride][0] = sum_re;
out[i * stride][1] = sum_im;
#endif /* USE_FIXED */
}
}
static void ps_hybrid_analysis_ileave_c(INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
int i, int len)
{
int j;
for (; i < 64; i++) {
for (j = 0; j < len; j++) {
out[i][j][0] = L[0][j][i];
out[i][j][1] = L[1][j][i];
}
}
}
static void ps_hybrid_synthesis_deint_c(INTFLOAT out[2][38][64],
INTFLOAT (*in)[32][2],
int i, int len)
{
int n;
for (; i < 64; i++) {
for (n = 0; n < len; n++) {
out[0][n][i] = in[i][n][0];
out[1][n][i] = in[i][n][1];
}
}
}
static void ps_decorrelate_c(INTFLOAT (*out)[2], INTFLOAT (*delay)[2],
INTFLOAT (*ap_delay)[PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2],
const INTFLOAT phi_fract[2], const INTFLOAT (*Q_fract)[2],
const INTFLOAT *transient_gain,
INTFLOAT g_decay_slope,
int len)
{
static const INTFLOAT a[] = { Q31(0.65143905753106f),
Q31(0.56471812200776f),
Q31(0.48954165955695f) };
INTFLOAT ag[PS_AP_LINKS];
int m, n;
for (m = 0; m < PS_AP_LINKS; m++)
ag[m] = AAC_MUL30(a[m], g_decay_slope);
for (n = 0; n < len; n++) {
INTFLOAT in_re = AAC_MSUB30(delay[n][0], phi_fract[0], delay[n][1], phi_fract[1]);
INTFLOAT in_im = AAC_MADD30(delay[n][0], phi_fract[1], delay[n][1], phi_fract[0]);
for (m = 0; m < PS_AP_LINKS; m++) {
INTFLOAT a_re = AAC_MUL31(ag[m], in_re);
INTFLOAT a_im = AAC_MUL31(ag[m], in_im);
INTFLOAT link_delay_re = ap_delay[m][n+2-m][0];
INTFLOAT link_delay_im = ap_delay[m][n+2-m][1];
INTFLOAT fractional_delay_re = Q_fract[m][0];
INTFLOAT fractional_delay_im = Q_fract[m][1];
INTFLOAT apd_re = in_re;
INTFLOAT apd_im = in_im;
in_re = AAC_MSUB30(link_delay_re, fractional_delay_re,
link_delay_im, fractional_delay_im);
in_re -= a_re;
in_im = AAC_MADD30(link_delay_re, fractional_delay_im,
link_delay_im, fractional_delay_re);
in_im -= a_im;
ap_delay[m][n+5][0] = apd_re + AAC_MUL31(ag[m], in_re);
ap_delay[m][n+5][1] = apd_im + AAC_MUL31(ag[m], in_im);
}
out[n][0] = AAC_MUL16(transient_gain[n], in_re);
out[n][1] = AAC_MUL16(transient_gain[n], in_im);
}
}
static void ps_stereo_interpolate_c(INTFLOAT (*l)[2], INTFLOAT (*r)[2],
INTFLOAT h[2][4], INTFLOAT h_step[2][4],
int len)
{
INTFLOAT h0 = h[0][0];
INTFLOAT h1 = h[0][1];
INTFLOAT h2 = h[0][2];
INTFLOAT h3 = h[0][3];
INTFLOAT hs0 = h_step[0][0];
INTFLOAT hs1 = h_step[0][1];
INTFLOAT hs2 = h_step[0][2];
INTFLOAT hs3 = h_step[0][3];
int n;
for (n = 0; n < len; n++) {
//l is s, r is d
INTFLOAT l_re = l[n][0];
INTFLOAT l_im = l[n][1];
INTFLOAT r_re = r[n][0];
INTFLOAT r_im = r[n][1];
h0 += hs0;
h1 += hs1;
h2 += hs2;
h3 += hs3;
l[n][0] = AAC_MADD30(h0, l_re, h2, r_re);
l[n][1] = AAC_MADD30(h0, l_im, h2, r_im);
r[n][0] = AAC_MADD30(h1, l_re, h3, r_re);
r[n][1] = AAC_MADD30(h1, l_im, h3, r_im);
}
}
static void ps_stereo_interpolate_ipdopd_c(INTFLOAT (*l)[2], INTFLOAT (*r)[2],
INTFLOAT h[2][4], INTFLOAT h_step[2][4],
int len)
{
INTFLOAT h00 = h[0][0], h10 = h[1][0];
INTFLOAT h01 = h[0][1], h11 = h[1][1];
INTFLOAT h02 = h[0][2], h12 = h[1][2];
INTFLOAT h03 = h[0][3], h13 = h[1][3];
INTFLOAT hs00 = h_step[0][0], hs10 = h_step[1][0];
INTFLOAT hs01 = h_step[0][1], hs11 = h_step[1][1];
INTFLOAT hs02 = h_step[0][2], hs12 = h_step[1][2];
INTFLOAT hs03 = h_step[0][3], hs13 = h_step[1][3];
int n;
for (n = 0; n < len; n++) {
//l is s, r is d
INTFLOAT l_re = l[n][0];
INTFLOAT l_im = l[n][1];
INTFLOAT r_re = r[n][0];
INTFLOAT r_im = r[n][1];
h00 += hs00;
h01 += hs01;
h02 += hs02;
h03 += hs03;
h10 += hs10;
h11 += hs11;
h12 += hs12;
h13 += hs13;
l[n][0] = AAC_MSUB30_V8(h00, l_re, h02, r_re, h10, l_im, h12, r_im);
l[n][1] = AAC_MADD30_V8(h00, l_im, h02, r_im, h10, l_re, h12, r_re);
r[n][0] = AAC_MSUB30_V8(h01, l_re, h03, r_re, h11, l_im, h13, r_im);
r[n][1] = AAC_MADD30_V8(h01, l_im, h03, r_im, h11, l_re, h13, r_re);
}
}
av_cold void AAC_RENAME(ff_psdsp_init)(PSDSPContext *s)
{
s->add_squares = ps_add_squares_c;
s->mul_pair_single = ps_mul_pair_single_c;
s->hybrid_analysis = ps_hybrid_analysis_c;
s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_c;
s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_c;
s->decorrelate = ps_decorrelate_c;
s->stereo_interpolate[0] = ps_stereo_interpolate_c;
s->stereo_interpolate[1] = ps_stereo_interpolate_ipdopd_c;
#if !USE_FIXED
if (ARCH_ARM)
ff_psdsp_init_arm(s);
if (ARCH_MIPS)
ff_psdsp_init_mips(s);
if (ARCH_X86)
ff_psdsp_init_x86(s);
#endif /* !USE_FIXED */
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,362 @@
/*
* AAC Spectral Band Replication decoding functions
* Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
* Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC Spectral Band Replication decoding functions
* @author Robert Swain ( rob opendot cl )
*/
#define USE_FIXED 0
#include "aac.h"
#include "sbr.h"
#include "aacsbr.h"
#include "aacsbrdata.h"
#include "aacsbr_tablegen.h"
#include "fft.h"
#include "aacps.h"
#include "sbrdsp.h"
#include "libavutil/internal.h"
#include "libavutil/libm.h"
#include "libavutil/avassert.h"
#include <stdint.h>
#include <float.h>
#include <math.h>
#if ARCH_MIPS
#include "mips/aacsbr_mips.h"
#endif /* ARCH_MIPS */
static VLC vlc_sbr[10];
static void aacsbr_func_ptr_init(AACSBRContext *c);
static void make_bands(int16_t* bands, int start, int stop, int num_bands)
{
int k, previous, present;
float base, prod;
base = powf((float)stop / start, 1.0f / num_bands);
prod = start;
previous = start;
for (k = 0; k < num_bands-1; k++) {
prod *= base;
present = lrintf(prod);
bands[k] = present - previous;
previous = present;
}
bands[num_bands-1] = stop - previous;
}
/// Dequantization and stereo decoding (14496-3 sp04 p203)
static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
{
int k, e;
int ch;
if (id_aac == TYPE_CPE && sbr->bs_coupling) {
float alpha = sbr->data[0].bs_amp_res ? 1.0f : 0.5f;
float pan_offset = sbr->data[0].bs_amp_res ? 12.0f : 24.0f;
for (e = 1; e <= sbr->data[0].bs_num_env; e++) {
for (k = 0; k < sbr->n[sbr->data[0].bs_freq_res[e]]; k++) {
float temp1 = exp2f(sbr->data[0].env_facs[e][k] * alpha + 7.0f);
float temp2 = exp2f((pan_offset - sbr->data[1].env_facs[e][k]) * alpha);
float fac;
if (temp1 > 1E20) {
av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
temp1 = 1;
}
fac = temp1 / (1.0f + temp2);
sbr->data[0].env_facs[e][k] = fac;
sbr->data[1].env_facs[e][k] = fac * temp2;
}
}
for (e = 1; e <= sbr->data[0].bs_num_noise; e++) {
for (k = 0; k < sbr->n_q; k++) {
float temp1 = exp2f(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs[e][k] + 1);
float temp2 = exp2f(12 - sbr->data[1].noise_facs[e][k]);
float fac;
if (temp1 > 1E20) {
av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
temp1 = 1;
}
fac = temp1 / (1.0f + temp2);
sbr->data[0].noise_facs[e][k] = fac;
sbr->data[1].noise_facs[e][k] = fac * temp2;
}
}
} else { // SCE or one non-coupled CPE
for (ch = 0; ch < (id_aac == TYPE_CPE) + 1; ch++) {
float alpha = sbr->data[ch].bs_amp_res ? 1.0f : 0.5f;
for (e = 1; e <= sbr->data[ch].bs_num_env; e++)
for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++){
sbr->data[ch].env_facs[e][k] =
exp2f(alpha * sbr->data[ch].env_facs[e][k] + 6.0f);
if (sbr->data[ch].env_facs[e][k] > 1E20) {
av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
sbr->data[ch].env_facs[e][k] = 1;
}
}
for (e = 1; e <= sbr->data[ch].bs_num_noise; e++)
for (k = 0; k < sbr->n_q; k++)
sbr->data[ch].noise_facs[e][k] =
exp2f(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs[e][k]);
}
}
}
/** High Frequency Generation (14496-3 sp04 p214+) and Inverse Filtering
* (14496-3 sp04 p214)
* Warning: This routine does not seem numerically stable.
*/
static void sbr_hf_inverse_filter(SBRDSPContext *dsp,
float (*alpha0)[2], float (*alpha1)[2],
const float X_low[32][40][2], int k0)
{
int k;
for (k = 0; k < k0; k++) {
LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
float dk;
dsp->autocorrelate(X_low[k], phi);
dk = phi[2][1][0] * phi[1][0][0] -
(phi[1][1][0] * phi[1][1][0] + phi[1][1][1] * phi[1][1][1]) / 1.000001f;
if (!dk) {
alpha1[k][0] = 0;
alpha1[k][1] = 0;
} else {
float temp_real, temp_im;
temp_real = phi[0][0][0] * phi[1][1][0] -
phi[0][0][1] * phi[1][1][1] -
phi[0][1][0] * phi[1][0][0];
temp_im = phi[0][0][0] * phi[1][1][1] +
phi[0][0][1] * phi[1][1][0] -
phi[0][1][1] * phi[1][0][0];
alpha1[k][0] = temp_real / dk;
alpha1[k][1] = temp_im / dk;
}
if (!phi[1][0][0]) {
alpha0[k][0] = 0;
alpha0[k][1] = 0;
} else {
float temp_real, temp_im;
temp_real = phi[0][0][0] + alpha1[k][0] * phi[1][1][0] +
alpha1[k][1] * phi[1][1][1];
temp_im = phi[0][0][1] + alpha1[k][1] * phi[1][1][0] -
alpha1[k][0] * phi[1][1][1];
alpha0[k][0] = -temp_real / phi[1][0][0];
alpha0[k][1] = -temp_im / phi[1][0][0];
}
if (alpha1[k][0] * alpha1[k][0] + alpha1[k][1] * alpha1[k][1] >= 16.0f ||
alpha0[k][0] * alpha0[k][0] + alpha0[k][1] * alpha0[k][1] >= 16.0f) {
alpha1[k][0] = 0;
alpha1[k][1] = 0;
alpha0[k][0] = 0;
alpha0[k][1] = 0;
}
}
}
/// Chirp Factors (14496-3 sp04 p214)
static void sbr_chirp(SpectralBandReplication *sbr, SBRData *ch_data)
{
int i;
float new_bw;
static const float bw_tab[] = { 0.0f, 0.75f, 0.9f, 0.98f };
for (i = 0; i < sbr->n_q; i++) {
if (ch_data->bs_invf_mode[0][i] + ch_data->bs_invf_mode[1][i] == 1) {
new_bw = 0.6f;
} else
new_bw = bw_tab[ch_data->bs_invf_mode[0][i]];
if (new_bw < ch_data->bw_array[i]) {
new_bw = 0.75f * new_bw + 0.25f * ch_data->bw_array[i];
} else
new_bw = 0.90625f * new_bw + 0.09375f * ch_data->bw_array[i];
ch_data->bw_array[i] = new_bw < 0.015625f ? 0.0f : new_bw;
}
}
/**
* Calculation of levels of additional HF signal components (14496-3 sp04 p219)
* and Calculation of gain (14496-3 sp04 p219)
*/
static void sbr_gain_calc(AACContext *ac, SpectralBandReplication *sbr,
SBRData *ch_data, const int e_a[2])
{
int e, k, m;
// max gain limits : -3dB, 0dB, 3dB, inf dB (limiter off)
static const float limgain[4] = { 0.70795, 1.0, 1.41254, 10000000000 };
for (e = 0; e < ch_data->bs_num_env; e++) {
int delta = !((e == e_a[1]) || (e == e_a[0]));
for (k = 0; k < sbr->n_lim; k++) {
float gain_boost, gain_max;
float sum[2] = { 0.0f, 0.0f };
for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
const float temp = sbr->e_origmapped[e][m] / (1.0f + sbr->q_mapped[e][m]);
sbr->q_m[e][m] = sqrtf(temp * sbr->q_mapped[e][m]);
sbr->s_m[e][m] = sqrtf(temp * ch_data->s_indexmapped[e + 1][m]);
if (!sbr->s_mapped[e][m]) {
sbr->gain[e][m] = sqrtf(sbr->e_origmapped[e][m] /
((1.0f + sbr->e_curr[e][m]) *
(1.0f + sbr->q_mapped[e][m] * delta)));
} else {
sbr->gain[e][m] = sqrtf(sbr->e_origmapped[e][m] * sbr->q_mapped[e][m] /
((1.0f + sbr->e_curr[e][m]) *
(1.0f + sbr->q_mapped[e][m])));
}
}
for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
sum[0] += sbr->e_origmapped[e][m];
sum[1] += sbr->e_curr[e][m];
}
gain_max = limgain[sbr->bs_limiter_gains] * sqrtf((FLT_EPSILON + sum[0]) / (FLT_EPSILON + sum[1]));
gain_max = FFMIN(100000.f, gain_max);
for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
float q_m_max = sbr->q_m[e][m] * gain_max / sbr->gain[e][m];
sbr->q_m[e][m] = FFMIN(sbr->q_m[e][m], q_m_max);
sbr->gain[e][m] = FFMIN(sbr->gain[e][m], gain_max);
}
sum[0] = sum[1] = 0.0f;
for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
sum[0] += sbr->e_origmapped[e][m];
sum[1] += sbr->e_curr[e][m] * sbr->gain[e][m] * sbr->gain[e][m]
+ sbr->s_m[e][m] * sbr->s_m[e][m]
+ (delta && !sbr->s_m[e][m]) * sbr->q_m[e][m] * sbr->q_m[e][m];
}
gain_boost = sqrtf((FLT_EPSILON + sum[0]) / (FLT_EPSILON + sum[1]));
gain_boost = FFMIN(1.584893192f, gain_boost);
for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
sbr->gain[e][m] *= gain_boost;
sbr->q_m[e][m] *= gain_boost;
sbr->s_m[e][m] *= gain_boost;
}
}
}
}
/// Assembling HF Signals (14496-3 sp04 p220)
static void sbr_hf_assemble(float Y1[38][64][2],
const float X_high[64][40][2],
SpectralBandReplication *sbr, SBRData *ch_data,
const int e_a[2])
{
int e, i, j, m;
const int h_SL = 4 * !sbr->bs_smoothing_mode;
const int kx = sbr->kx[1];
const int m_max = sbr->m[1];
static const float h_smooth[5] = {
0.33333333333333,
0.30150283239582,
0.21816949906249,
0.11516383427084,
0.03183050093751,
};
float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
int indexnoise = ch_data->f_indexnoise;
int indexsine = ch_data->f_indexsine;
if (sbr->reset) {
for (i = 0; i < h_SL; i++) {
memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0], m_max * sizeof(sbr->q_m[0][0]));
}
} else if (h_SL) {
for (i = 0; i < 4; i++) {
memcpy(g_temp[i + 2 * ch_data->t_env[0]],
g_temp[i + 2 * ch_data->t_env_num_env_old],
sizeof(g_temp[0]));
memcpy(q_temp[i + 2 * ch_data->t_env[0]],
q_temp[i + 2 * ch_data->t_env_num_env_old],
sizeof(q_temp[0]));
}
}
for (e = 0; e < ch_data->bs_num_env; e++) {
for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
memcpy(g_temp[h_SL + i], sbr->gain[e], m_max * sizeof(sbr->gain[0][0]));
memcpy(q_temp[h_SL + i], sbr->q_m[e], m_max * sizeof(sbr->q_m[0][0]));
}
}
for (e = 0; e < ch_data->bs_num_env; e++) {
for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
float *g_filt, *q_filt;
if (h_SL && e != e_a[0] && e != e_a[1]) {
g_filt = g_filt_tab;
q_filt = q_filt_tab;
for (m = 0; m < m_max; m++) {
const int idx1 = i + h_SL;
g_filt[m] = 0.0f;
q_filt[m] = 0.0f;
for (j = 0; j <= h_SL; j++) {
g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
}
}
} else {
g_filt = g_temp[i + h_SL];
q_filt = q_temp[i];
}
sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
i + ENVELOPE_ADJUSTMENT_OFFSET);
if (e != e_a[0] && e != e_a[1]) {
sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
q_filt, indexnoise,
kx, m_max);
} else {
int idx = indexsine&1;
int A = (1-((indexsine+(kx & 1))&2));
int B = (A^(-idx)) + idx;
float *out = &Y1[i][kx][idx];
float *in = sbr->s_m[e];
for (m = 0; m+1 < m_max; m+=2) {
out[2*m ] += in[m ] * A;
out[2*m+2] += in[m+1] * B;
}
if(m_max&1)
out[2*m ] += in[m ] * A;
}
indexnoise = (indexnoise + m_max) & 0x1ff;
indexsine = (indexsine + 1) & 3;
}
}
ch_data->f_indexnoise = indexnoise;
ch_data->f_indexsine = indexsine;
}
#include "aacsbr_template.c"

View File

@@ -0,0 +1,96 @@
/*
* AAC Spectral Band Replication function declarations
* Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC Spectral Band Replication function declarations
* @author Robert Swain ( rob opendot cl )
*/
#ifndef AVCODEC_AACSBR_H
#define AVCODEC_AACSBR_H
#include "get_bits.h"
#include "aac.h"
#include "sbr.h"
#define ENVELOPE_ADJUSTMENT_OFFSET 2
#define NOISE_FLOOR_OFFSET FIXR(6.0f)
/**
* SBR VLC tables
*/
enum {
T_HUFFMAN_ENV_1_5DB,
F_HUFFMAN_ENV_1_5DB,
T_HUFFMAN_ENV_BAL_1_5DB,
F_HUFFMAN_ENV_BAL_1_5DB,
T_HUFFMAN_ENV_3_0DB,
F_HUFFMAN_ENV_3_0DB,
T_HUFFMAN_ENV_BAL_3_0DB,
F_HUFFMAN_ENV_BAL_3_0DB,
T_HUFFMAN_NOISE_3_0DB,
T_HUFFMAN_NOISE_BAL_3_0DB,
};
/**
* bs_frame_class - frame class of current SBR frame (14496-3 sp04 p98)
*/
enum {
FIXFIX,
FIXVAR,
VARFIX,
VARVAR,
};
enum {
EXTENSION_ID_PS = 2,
};
static const int8_t vlc_sbr_lav[10] =
{ 60, 60, 24, 24, 31, 31, 12, 12, 31, 12 };
#define SBR_INIT_VLC_STATIC(num, size) \
INIT_VLC_STATIC(&vlc_sbr[num], 9, sbr_tmp[num].table_size / sbr_tmp[num].elem_size, \
sbr_tmp[num].sbr_bits , 1, 1, \
sbr_tmp[num].sbr_codes, sbr_tmp[num].elem_size, sbr_tmp[num].elem_size, \
size)
#define SBR_VLC_ROW(name) \
{ name ## _codes, name ## _bits, sizeof(name ## _codes), sizeof(name ## _codes[0]) }
/** Initialize SBR. */
void AAC_RENAME(ff_aac_sbr_init)(void);
/** Initialize one SBR context. */
void AAC_RENAME(ff_aac_sbr_ctx_init)(AACContext *ac, SpectralBandReplication *sbr);
/** Close one SBR context. */
void AAC_RENAME(ff_aac_sbr_ctx_close)(SpectralBandReplication *sbr);
/** Decode one SBR element. */
int AAC_RENAME(ff_decode_sbr_extension)(AACContext *ac, SpectralBandReplication *sbr,
GetBitContext *gb, int crc, int cnt, int id_aac);
/** Apply one SBR element to one AAC element. */
void AAC_RENAME(ff_sbr_apply)(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
INTFLOAT* L, INTFLOAT *R);
void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c);
#endif /* AVCODEC_AACSBR_H */

View File

@@ -0,0 +1,597 @@
/*
* Copyright (c) 2013
* MIPS Technologies, Inc., California.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* AAC Spectral Band Replication decoding functions (fixed-point)
* Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
* Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC Spectral Band Replication decoding functions (fixed-point)
* Note: Rounding-to-nearest used unless otherwise stated
* @author Robert Swain ( rob opendot cl )
* @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com )
*/
#define USE_FIXED 1
#include "aac.h"
#include "sbr.h"
#include "aacsbr.h"
#include "aacsbrdata.h"
#include "aacsbr_fixed_tablegen.h"
#include "fft.h"
#include "aacps.h"
#include "sbrdsp.h"
#include "libavutil/internal.h"
#include "libavutil/libm.h"
#include "libavutil/avassert.h"
#include <stdint.h>
#include <float.h>
#include <math.h>
static VLC vlc_sbr[10];
static void aacsbr_func_ptr_init(AACSBRContext *c);
static const int CONST_LN2 = Q31(0.6931471806/256); // ln(2)/256
static const int CONST_RECIP_LN2 = Q31(0.7213475204); // 0.5/ln(2)
static const int CONST_076923 = Q31(0.76923076923076923077f);
static const int fixed_log_table[10] =
{
Q31(1.0/2), Q31(1.0/3), Q31(1.0/4), Q31(1.0/5), Q31(1.0/6),
Q31(1.0/7), Q31(1.0/8), Q31(1.0/9), Q31(1.0/10), Q31(1.0/11)
};
static int fixed_log(int x)
{
int i, ret, xpow, tmp;
ret = x;
xpow = x;
for (i=0; i<10; i+=2){
xpow = (int)(((int64_t)xpow * x + 0x40000000) >> 31);
tmp = (int)(((int64_t)xpow * fixed_log_table[i] + 0x40000000) >> 31);
ret -= tmp;
xpow = (int)(((int64_t)xpow * x + 0x40000000) >> 31);
tmp = (int)(((int64_t)xpow * fixed_log_table[i+1] + 0x40000000) >> 31);
ret += tmp;
}
return ret;
}
static const int fixed_exp_table[7] =
{
Q31(1.0/2), Q31(1.0/6), Q31(1.0/24), Q31(1.0/120),
Q31(1.0/720), Q31(1.0/5040), Q31(1.0/40320)
};
static int fixed_exp(int x)
{
int i, ret, xpow, tmp;
ret = 0x800000 + x;
xpow = x;
for (i=0; i<7; i++){
xpow = (int)(((int64_t)xpow * x + 0x400000) >> 23);
tmp = (int)(((int64_t)xpow * fixed_exp_table[i] + 0x40000000) >> 31);
ret += tmp;
}
return ret;
}
static void make_bands(int16_t* bands, int start, int stop, int num_bands)
{
int k, previous, present;
int base, prod, nz = 0;
base = (stop << 23) / start;
while (base < 0x40000000){
base <<= 1;
nz++;
}
base = fixed_log(base - 0x80000000);
base = (((base + 0x80) >> 8) + (8-nz)*CONST_LN2) / num_bands;
base = fixed_exp(base);
previous = start;
prod = start << 23;
for (k = 0; k < num_bands-1; k++) {
prod = (int)(((int64_t)prod * base + 0x400000) >> 23);
present = (prod + 0x400000) >> 23;
bands[k] = present - previous;
previous = present;
}
bands[num_bands-1] = stop - previous;
}
/// Dequantization and stereo decoding (14496-3 sp04 p203)
static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
{
int k, e;
int ch;
if (id_aac == TYPE_CPE && sbr->bs_coupling) {
int alpha = sbr->data[0].bs_amp_res ? 2 : 1;
int pan_offset = sbr->data[0].bs_amp_res ? 12 : 24;
for (e = 1; e <= sbr->data[0].bs_num_env; e++) {
for (k = 0; k < sbr->n[sbr->data[0].bs_freq_res[e]]; k++) {
SoftFloat temp1, temp2, fac;
temp1.exp = sbr->data[0].env_facs[e][k].mant * alpha + 14;
if (temp1.exp & 1)
temp1.mant = 759250125;
else
temp1.mant = 0x20000000;
temp1.exp = (temp1.exp >> 1) + 1;
if (temp1.exp > 66) { // temp1 > 1E20
av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
temp1 = FLOAT_1;
}
temp2.exp = (pan_offset - sbr->data[1].env_facs[e][k].mant) * alpha;
if (temp2.exp & 1)
temp2.mant = 759250125;
else
temp2.mant = 0x20000000;
temp2.exp = (temp2.exp >> 1) + 1;
fac = av_div_sf(temp1, av_add_sf(FLOAT_1, temp2));
sbr->data[0].env_facs[e][k] = fac;
sbr->data[1].env_facs[e][k] = av_mul_sf(fac, temp2);
}
}
for (e = 1; e <= sbr->data[0].bs_num_noise; e++) {
for (k = 0; k < sbr->n_q; k++) {
SoftFloat temp1, temp2, fac;
temp1.exp = NOISE_FLOOR_OFFSET - \
sbr->data[0].noise_facs[e][k].mant + 2;
temp1.mant = 0x20000000;
if (temp1.exp > 66) { // temp1 > 1E20
av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
temp1 = FLOAT_1;
}
temp2.exp = 12 - sbr->data[1].noise_facs[e][k].mant + 1;
temp2.mant = 0x20000000;
fac = av_div_sf(temp1, av_add_sf(FLOAT_1, temp2));
sbr->data[0].noise_facs[e][k] = fac;
sbr->data[1].noise_facs[e][k] = av_mul_sf(fac, temp2);
}
}
} else { // SCE or one non-coupled CPE
for (ch = 0; ch < (id_aac == TYPE_CPE) + 1; ch++) {
int alpha = sbr->data[ch].bs_amp_res ? 2 : 1;
for (e = 1; e <= sbr->data[ch].bs_num_env; e++)
for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++){
SoftFloat temp1;
temp1.exp = alpha * sbr->data[ch].env_facs[e][k].mant + 12;
if (temp1.exp & 1)
temp1.mant = 759250125;
else
temp1.mant = 0x20000000;
temp1.exp = (temp1.exp >> 1) + 1;
if (temp1.exp > 66) { // temp1 > 1E20
av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
temp1 = FLOAT_1;
}
sbr->data[ch].env_facs[e][k] = temp1;
}
for (e = 1; e <= sbr->data[ch].bs_num_noise; e++)
for (k = 0; k < sbr->n_q; k++){
sbr->data[ch].noise_facs[e][k].exp = NOISE_FLOOR_OFFSET - \
sbr->data[ch].noise_facs[e][k].mant + 1;
sbr->data[ch].noise_facs[e][k].mant = 0x20000000;
}
}
}
}
/** High Frequency Generation (14496-3 sp04 p214+) and Inverse Filtering
* (14496-3 sp04 p214)
* Warning: This routine does not seem numerically stable.
*/
static void sbr_hf_inverse_filter(SBRDSPContext *dsp,
int (*alpha0)[2], int (*alpha1)[2],
const int X_low[32][40][2], int k0)
{
int k;
int shift, round;
for (k = 0; k < k0; k++) {
SoftFloat phi[3][2][2];
SoftFloat a00, a01, a10, a11;
SoftFloat dk;
dsp->autocorrelate(X_low[k], phi);
dk = av_sub_sf(av_mul_sf(phi[2][1][0], phi[1][0][0]),
av_mul_sf(av_add_sf(av_mul_sf(phi[1][1][0], phi[1][1][0]),
av_mul_sf(phi[1][1][1], phi[1][1][1])), FLOAT_0999999));
if (!dk.mant) {
a10 = FLOAT_0;
a11 = FLOAT_0;
} else {
SoftFloat temp_real, temp_im;
temp_real = av_sub_sf(av_sub_sf(av_mul_sf(phi[0][0][0], phi[1][1][0]),
av_mul_sf(phi[0][0][1], phi[1][1][1])),
av_mul_sf(phi[0][1][0], phi[1][0][0]));
temp_im = av_sub_sf(av_add_sf(av_mul_sf(phi[0][0][0], phi[1][1][1]),
av_mul_sf(phi[0][0][1], phi[1][1][0])),
av_mul_sf(phi[0][1][1], phi[1][0][0]));
a10 = av_div_sf(temp_real, dk);
a11 = av_div_sf(temp_im, dk);
}
if (!phi[1][0][0].mant) {
a00 = FLOAT_0;
a01 = FLOAT_0;
} else {
SoftFloat temp_real, temp_im;
temp_real = av_add_sf(phi[0][0][0],
av_add_sf(av_mul_sf(a10, phi[1][1][0]),
av_mul_sf(a11, phi[1][1][1])));
temp_im = av_add_sf(phi[0][0][1],
av_sub_sf(av_mul_sf(a11, phi[1][1][0]),
av_mul_sf(a10, phi[1][1][1])));
temp_real.mant = -temp_real.mant;
temp_im.mant = -temp_im.mant;
a00 = av_div_sf(temp_real, phi[1][0][0]);
a01 = av_div_sf(temp_im, phi[1][0][0]);
}
shift = a00.exp;
if (shift >= 3)
alpha0[k][0] = 0x7fffffff;
else {
a00.mant <<= 1;
shift = 2-shift;
if (shift == 0)
alpha0[k][0] = a00.mant;
else {
round = 1 << (shift-1);
alpha0[k][0] = (a00.mant + round) >> shift;
}
}
shift = a01.exp;
if (shift >= 3)
alpha0[k][1] = 0x7fffffff;
else {
a01.mant <<= 1;
shift = 2-shift;
if (shift == 0)
alpha0[k][1] = a01.mant;
else {
round = 1 << (shift-1);
alpha0[k][1] = (a01.mant + round) >> shift;
}
}
shift = a10.exp;
if (shift >= 3)
alpha1[k][0] = 0x7fffffff;
else {
a10.mant <<= 1;
shift = 2-shift;
if (shift == 0)
alpha1[k][0] = a10.mant;
else {
round = 1 << (shift-1);
alpha1[k][0] = (a10.mant + round) >> shift;
}
}
shift = a11.exp;
if (shift >= 3)
alpha1[k][1] = 0x7fffffff;
else {
a11.mant <<= 1;
shift = 2-shift;
if (shift == 0)
alpha1[k][1] = a11.mant;
else {
round = 1 << (shift-1);
alpha1[k][1] = (a11.mant + round) >> shift;
}
}
shift = (int)(((int64_t)(alpha1[k][0]>>1) * (alpha1[k][0]>>1) + \
(int64_t)(alpha1[k][1]>>1) * (alpha1[k][1]>>1) + \
0x40000000) >> 31);
if (shift >= 0x20000000){
alpha1[k][0] = 0;
alpha1[k][1] = 0;
alpha0[k][0] = 0;
alpha0[k][1] = 0;
}
shift = (int)(((int64_t)(alpha0[k][0]>>1) * (alpha0[k][0]>>1) + \
(int64_t)(alpha0[k][1]>>1) * (alpha0[k][1]>>1) + \
0x40000000) >> 31);
if (shift >= 0x20000000){
alpha1[k][0] = 0;
alpha1[k][1] = 0;
alpha0[k][0] = 0;
alpha0[k][1] = 0;
}
}
}
/// Chirp Factors (14496-3 sp04 p214)
static void sbr_chirp(SpectralBandReplication *sbr, SBRData *ch_data)
{
int i;
int new_bw;
static const int bw_tab[] = { 0, 1610612736, 1932735283, 2104533975 };
int64_t accu;
for (i = 0; i < sbr->n_q; i++) {
if (ch_data->bs_invf_mode[0][i] + ch_data->bs_invf_mode[1][i] == 1)
new_bw = 1288490189;
else
new_bw = bw_tab[ch_data->bs_invf_mode[0][i]];
if (new_bw < ch_data->bw_array[i]){
accu = (int64_t)new_bw * 1610612736;
accu += (int64_t)ch_data->bw_array[i] * 0x20000000;
new_bw = (int)((accu + 0x40000000) >> 31);
} else {
accu = (int64_t)new_bw * 1946157056;
accu += (int64_t)ch_data->bw_array[i] * 201326592;
new_bw = (int)((accu + 0x40000000) >> 31);
}
ch_data->bw_array[i] = new_bw < 0x2000000 ? 0 : new_bw;
}
}
/**
* Calculation of levels of additional HF signal components (14496-3 sp04 p219)
* and Calculation of gain (14496-3 sp04 p219)
*/
static void sbr_gain_calc(AACContext *ac, SpectralBandReplication *sbr,
SBRData *ch_data, const int e_a[2])
{
int e, k, m;
// max gain limits : -3dB, 0dB, 3dB, inf dB (limiter off)
static const SoftFloat limgain[4] = { { 760155524, 0 }, { 0x20000000, 1 },
{ 758351638, 1 }, { 625000000, 34 } };
for (e = 0; e < ch_data->bs_num_env; e++) {
int delta = !((e == e_a[1]) || (e == e_a[0]));
for (k = 0; k < sbr->n_lim; k++) {
SoftFloat gain_boost, gain_max;
SoftFloat sum[2];
sum[0] = sum[1] = FLOAT_0;
for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
const SoftFloat temp = av_div_sf(sbr->e_origmapped[e][m],
av_add_sf(FLOAT_1, sbr->q_mapped[e][m]));
sbr->q_m[e][m] = av_sqrt_sf(av_mul_sf(temp, sbr->q_mapped[e][m]));
sbr->s_m[e][m] = av_sqrt_sf(av_mul_sf(temp, av_int2sf(ch_data->s_indexmapped[e + 1][m], 0)));
if (!sbr->s_mapped[e][m]) {
if (delta) {
sbr->gain[e][m] = av_sqrt_sf(av_div_sf(sbr->e_origmapped[e][m],
av_mul_sf(av_add_sf(FLOAT_1, sbr->e_curr[e][m]),
av_add_sf(FLOAT_1, sbr->q_mapped[e][m]))));
} else {
sbr->gain[e][m] = av_sqrt_sf(av_div_sf(sbr->e_origmapped[e][m],
av_add_sf(FLOAT_1, sbr->e_curr[e][m])));
}
} else {
sbr->gain[e][m] = av_sqrt_sf(
av_div_sf(
av_mul_sf(sbr->e_origmapped[e][m], sbr->q_mapped[e][m]),
av_mul_sf(
av_add_sf(FLOAT_1, sbr->e_curr[e][m]),
av_add_sf(FLOAT_1, sbr->q_mapped[e][m]))));
}
}
for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
sum[0] = av_add_sf(sum[0], sbr->e_origmapped[e][m]);
sum[1] = av_add_sf(sum[1], sbr->e_curr[e][m]);
}
gain_max = av_mul_sf(limgain[sbr->bs_limiter_gains],
av_sqrt_sf(
av_div_sf(
av_add_sf(FLOAT_EPSILON, sum[0]),
av_add_sf(FLOAT_EPSILON, sum[1]))));
if (av_gt_sf(gain_max, FLOAT_100000))
gain_max = FLOAT_100000;
for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
SoftFloat q_m_max = av_div_sf(
av_mul_sf(sbr->q_m[e][m], gain_max),
sbr->gain[e][m]);
if (av_gt_sf(sbr->q_m[e][m], q_m_max))
sbr->q_m[e][m] = q_m_max;
if (av_gt_sf(sbr->gain[e][m], gain_max))
sbr->gain[e][m] = gain_max;
}
sum[0] = sum[1] = FLOAT_0;
for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
sum[0] = av_add_sf(sum[0], sbr->e_origmapped[e][m]);
sum[1] = av_add_sf(sum[1],
av_mul_sf(
av_mul_sf(sbr->e_curr[e][m],
sbr->gain[e][m]),
sbr->gain[e][m]));
sum[1] = av_add_sf(sum[1],
av_mul_sf(sbr->s_m[e][m], sbr->s_m[e][m]));
if (delta && !sbr->s_m[e][m].mant)
sum[1] = av_add_sf(sum[1],
av_mul_sf(sbr->q_m[e][m], sbr->q_m[e][m]));
}
gain_boost = av_sqrt_sf(
av_div_sf(
av_add_sf(FLOAT_EPSILON, sum[0]),
av_add_sf(FLOAT_EPSILON, sum[1])));
if (av_gt_sf(gain_boost, FLOAT_1584893192))
gain_boost = FLOAT_1584893192;
for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
sbr->gain[e][m] = av_mul_sf(sbr->gain[e][m], gain_boost);
sbr->q_m[e][m] = av_mul_sf(sbr->q_m[e][m], gain_boost);
sbr->s_m[e][m] = av_mul_sf(sbr->s_m[e][m], gain_boost);
}
}
}
}
/// Assembling HF Signals (14496-3 sp04 p220)
static void sbr_hf_assemble(int Y1[38][64][2],
const int X_high[64][40][2],
SpectralBandReplication *sbr, SBRData *ch_data,
const int e_a[2])
{
int e, i, j, m;
const int h_SL = 4 * !sbr->bs_smoothing_mode;
const int kx = sbr->kx[1];
const int m_max = sbr->m[1];
static const SoftFloat h_smooth[5] = {
{ 715827883, -1 },
{ 647472402, -1 },
{ 937030863, -2 },
{ 989249804, -3 },
{ 546843842, -4 },
};
SoftFloat (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
int indexnoise = ch_data->f_indexnoise;
int indexsine = ch_data->f_indexsine;
if (sbr->reset) {
for (i = 0; i < h_SL; i++) {
memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0], m_max * sizeof(sbr->q_m[0][0]));
}
} else if (h_SL) {
for (i = 0; i < 4; i++) {
memcpy(g_temp[i + 2 * ch_data->t_env[0]],
g_temp[i + 2 * ch_data->t_env_num_env_old],
sizeof(g_temp[0]));
memcpy(q_temp[i + 2 * ch_data->t_env[0]],
q_temp[i + 2 * ch_data->t_env_num_env_old],
sizeof(q_temp[0]));
}
}
for (e = 0; e < ch_data->bs_num_env; e++) {
for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
memcpy(g_temp[h_SL + i], sbr->gain[e], m_max * sizeof(sbr->gain[0][0]));
memcpy(q_temp[h_SL + i], sbr->q_m[e], m_max * sizeof(sbr->q_m[0][0]));
}
}
for (e = 0; e < ch_data->bs_num_env; e++) {
for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
SoftFloat g_filt_tab[48];
SoftFloat q_filt_tab[48];
SoftFloat *g_filt, *q_filt;
if (h_SL && e != e_a[0] && e != e_a[1]) {
g_filt = g_filt_tab;
q_filt = q_filt_tab;
for (m = 0; m < m_max; m++) {
const int idx1 = i + h_SL;
g_filt[m].mant = g_filt[m].exp = 0;
q_filt[m].mant = q_filt[m].exp = 0;
for (j = 0; j <= h_SL; j++) {
g_filt[m] = av_add_sf(g_filt[m],
av_mul_sf(g_temp[idx1 - j][m],
h_smooth[j]));
q_filt[m] = av_add_sf(q_filt[m],
av_mul_sf(q_temp[idx1 - j][m],
h_smooth[j]));
}
}
} else {
g_filt = g_temp[i + h_SL];
q_filt = q_temp[i];
}
sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
i + ENVELOPE_ADJUSTMENT_OFFSET);
if (e != e_a[0] && e != e_a[1]) {
sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
q_filt, indexnoise,
kx, m_max);
} else {
int idx = indexsine&1;
int A = (1-((indexsine+(kx & 1))&2));
int B = (A^(-idx)) + idx;
int *out = &Y1[i][kx][idx];
int shift, round;
SoftFloat *in = sbr->s_m[e];
for (m = 0; m+1 < m_max; m+=2) {
shift = 22 - in[m ].exp;
round = 1 << (shift-1);
out[2*m ] += (in[m ].mant * A + round) >> shift;
shift = 22 - in[m+1].exp;
round = 1 << (shift-1);
out[2*m+2] += (in[m+1].mant * B + round) >> shift;
}
if(m_max&1)
{
shift = 22 - in[m ].exp;
round = 1 << (shift-1);
out[2*m ] += (in[m ].mant * A + round) >> shift;
}
}
indexnoise = (indexnoise + m_max) & 0x1ff;
indexsine = (indexsine + 1) & 3;
}
}
ch_data->f_indexnoise = indexnoise;
ch_data->f_indexsine = indexsine;
}
#include "aacsbr_template.c"

View File

@@ -0,0 +1,42 @@
/*
* Header file for hardcoded AAC SBR windows
*
* Copyright (c) 2014 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdlib.h>
#include "libavutil/internal.h"
#include "libavutil/common.h"
#undef CONFIG_HARDCODED_TABLES
#define CONFIG_HARDCODED_TABLES 0
#define USE_FIXED 1
#include "aacsbr_fixed_tablegen.h"
#include "tableprint.h"
int main(void)
{
aacsbr_tableinit();
write_fileheader();
WRITE_ARRAY_ALIGNED("static const", 32, int32_t, sbr_qmf_window_ds);
WRITE_ARRAY_ALIGNED("static const", 32, int32_t, sbr_qmf_window_us);
return 0;
}

View File

@@ -0,0 +1,32 @@
/*
* Header file for hardcoded AAC SBR windows
*
* Copyright (c) 2014 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AACSBR_FIXED_TABLEGEN_H
#define AVCODEC_AACSBR_FIXED_TABLEGEN_H
#include "aacsbr_tablegen_common.h"
#if CONFIG_HARDCODED_TABLES
#include "libavcodec/aacsbr_fixed_tables.h"
#endif /* CONFIG_HARDCODED_TABLES */
#endif /* AVCODEC_AACSBR_FIXED_TABLEGEN_H */

View File

@@ -0,0 +1,42 @@
/*
* Header file for hardcoded AAC SBR windows
*
* Copyright (c) 2014 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdlib.h>
#include "libavutil/internal.h"
#include "libavutil/common.h"
#undef CONFIG_HARDCODED_TABLES
#define CONFIG_HARDCODED_TABLES 0
#define USE_FIXED 0
#include "aacsbr_tablegen.h"
#include "tableprint.h"
int main(void)
{
aacsbr_tableinit();
write_fileheader();
WRITE_ARRAY_ALIGNED("static const", 32, float, sbr_qmf_window_ds);
WRITE_ARRAY_ALIGNED("static const", 32, float, sbr_qmf_window_us);
return 0;
}

View File

@@ -0,0 +1,32 @@
/*
* Header file for hardcoded AAC SBR windows
*
* Copyright (c) 2014 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AACSBR_TABLEGEN_H
#define AVCODEC_AACSBR_TABLEGEN_H
#include "aacsbr_tablegen_common.h"
#if CONFIG_HARDCODED_TABLES
#include "libavcodec/aacsbr_tables.h"
#endif /* CONFIG_HARDCODED_TABLES */
#endif /* AVCODEC_AACSBR_TABLEGEN_H */

View File

@@ -0,0 +1,129 @@
/*
* Header file for hardcoded AAC SBR windows
*
* Copyright (c) 2014 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AACSBR_TABLEGEN_COMMON_H
#define AVCODEC_AACSBR_TABLEGEN_COMMON_H
#include "aac.h"
#if CONFIG_HARDCODED_TABLES
#define aacsbr_tableinit()
#else
///< window coefficients for analysis/synthesis QMF banks
static DECLARE_ALIGNED(32, INTFLOAT, sbr_qmf_window_ds)[320];
static DECLARE_ALIGNED(32, INTFLOAT, sbr_qmf_window_us)[640] = {
Q31( 0.0000000000f), Q31(-0.0005525286f), Q31(-0.0005617692f), Q31(-0.0004947518f),
Q31(-0.0004875227f), Q31(-0.0004893791f), Q31(-0.0005040714f), Q31(-0.0005226564f),
Q31(-0.0005466565f), Q31(-0.0005677802f), Q31(-0.0005870930f), Q31(-0.0006132747f),
Q31(-0.0006312493f), Q31(-0.0006540333f), Q31(-0.0006777690f), Q31(-0.0006941614f),
Q31(-0.0007157736f), Q31(-0.0007255043f), Q31(-0.0007440941f), Q31(-0.0007490598f),
Q31(-0.0007681371f), Q31(-0.0007724848f), Q31(-0.0007834332f), Q31(-0.0007779869f),
Q31(-0.0007803664f), Q31(-0.0007801449f), Q31(-0.0007757977f), Q31(-0.0007630793f),
Q31(-0.0007530001f), Q31(-0.0007319357f), Q31(-0.0007215391f), Q31(-0.0006917937f),
Q31(-0.0006650415f), Q31(-0.0006341594f), Q31(-0.0005946118f), Q31(-0.0005564576f),
Q31(-0.0005145572f), Q31(-0.0004606325f), Q31(-0.0004095121f), Q31(-0.0003501175f),
Q31(-0.0002896981f), Q31(-0.0002098337f), Q31(-0.0001446380f), Q31(-0.0000617334f),
Q31( 0.0000134949f), Q31( 0.0001094383f), Q31( 0.0002043017f), Q31( 0.0002949531f),
Q31( 0.0004026540f), Q31( 0.0005107388f), Q31( 0.0006239376f), Q31( 0.0007458025f),
Q31( 0.0008608443f), Q31( 0.0009885988f), Q31( 0.0011250155f), Q31( 0.0012577884f),
Q31( 0.0013902494f), Q31( 0.0015443219f), Q31( 0.0016868083f), Q31( 0.0018348265f),
Q31( 0.0019841140f), Q31( 0.0021461583f), Q31( 0.0023017254f), Q31( 0.0024625616f),
Q31( 0.0026201758f), Q31( 0.0027870464f), Q31( 0.0029469447f), Q31( 0.0031125420f),
Q31( 0.0032739613f), Q31( 0.0034418874f), Q31( 0.0036008268f), Q31( 0.0037603922f),
Q31( 0.0039207432f), Q31( 0.0040819753f), Q31( 0.0042264269f), Q31( 0.0043730719f),
Q31( 0.0045209852f), Q31( 0.0046606460f), Q31( 0.0047932560f), Q31( 0.0049137603f),
Q31( 0.0050393022f), Q31( 0.0051407353f), Q31( 0.0052461166f), Q31( 0.0053471681f),
Q31( 0.0054196775f), Q31( 0.0054876040f), Q31( 0.0055475714f), Q31( 0.0055938023f),
Q31( 0.0056220643f), Q31( 0.0056455196f), Q31( 0.0056389199f), Q31( 0.0056266114f),
Q31( 0.0055917128f), Q31( 0.0055404363f), Q31( 0.0054753783f), Q31( 0.0053838975f),
Q31( 0.0052715758f), Q31( 0.0051382275f), Q31( 0.0049839687f), Q31( 0.0048109469f),
Q31( 0.0046039530f), Q31( 0.0043801861f), Q31( 0.0041251642f), Q31( 0.0038456408f),
Q31( 0.0035401246f), Q31( 0.0032091885f), Q31( 0.0028446757f), Q31( 0.0024508540f),
Q31( 0.0020274176f), Q31( 0.0015784682f), Q31( 0.0010902329f), Q31( 0.0005832264f),
Q31( 0.0000276045f), Q31(-0.0005464280f), Q31(-0.0011568135f), Q31(-0.0018039472f),
Q31(-0.0024826723f), Q31(-0.0031933778f), Q31(-0.0039401124f), Q31(-0.0047222596f),
Q31(-0.0055337211f), Q31(-0.0063792293f), Q31(-0.0072615816f), Q31(-0.0081798233f),
Q31(-0.0091325329f), Q31(-0.0101150215f), Q31(-0.0111315548f), Q31(-0.0121849995f),
Q31( 0.0132718220f), Q31( 0.0143904666f), Q31( 0.0155405553f), Q31( 0.0167324712f),
Q31( 0.0179433381f), Q31( 0.0191872431f), Q31( 0.0204531793f), Q31( 0.0217467550f),
Q31( 0.0230680169f), Q31( 0.0244160992f), Q31( 0.0257875847f), Q31( 0.0271859429f),
Q31( 0.0286072173f), Q31( 0.0300502657f), Q31( 0.0315017608f), Q31( 0.0329754081f),
Q31( 0.0344620948f), Q31( 0.0359697560f), Q31( 0.0374812850f), Q31( 0.0390053679f),
Q31( 0.0405349170f), Q31( 0.0420649094f), Q31( 0.0436097542f), Q31( 0.0451488405f),
Q31( 0.0466843027f), Q31( 0.0482165720f), Q31( 0.0497385755f), Q31( 0.0512556155f),
Q31( 0.0527630746f), Q31( 0.0542452768f), Q31( 0.0557173648f), Q31( 0.0571616450f),
Q31( 0.0585915683f), Q31( 0.0599837480f), Q31( 0.0613455171f), Q31( 0.0626857808f),
Q31( 0.0639715898f), Q31( 0.0652247106f), Q31( 0.0664367512f), Q31( 0.0676075985f),
Q31( 0.0687043828f), Q31( 0.0697630244f), Q31( 0.0707628710f), Q31( 0.0717002673f),
Q31( 0.0725682583f), Q31( 0.0733620255f), Q31( 0.0741003642f), Q31( 0.0747452558f),
Q31( 0.0753137336f), Q31( 0.0758008358f), Q31( 0.0761992479f), Q31( 0.0764992170f),
Q31( 0.0767093490f), Q31( 0.0768173975f), Q31( 0.0768230011f), Q31( 0.0767204924f),
Q31( 0.0765050718f), Q31( 0.0761748321f), Q31( 0.0757305756f), Q31( 0.0751576255f),
Q31( 0.0744664394f), Q31( 0.0736406005f), Q31( 0.0726774642f), Q31( 0.0715826364f),
Q31( 0.0703533073f), Q31( 0.0689664013f), Q31( 0.0674525021f), Q31( 0.0657690668f),
Q31( 0.0639444805f), Q31( 0.0619602779f), Q31( 0.0598166570f), Q31( 0.0575152691f),
Q31( 0.0550460034f), Q31( 0.0524093821f), Q31( 0.0495978676f), Q31( 0.0466303305f),
Q31( 0.0434768782f), Q31( 0.0401458278f), Q31( 0.0366418116f), Q31( 0.0329583930f),
Q31( 0.0290824006f), Q31( 0.0250307561f), Q31( 0.0207997072f), Q31( 0.0163701258f),
Q31( 0.0117623832f), Q31( 0.0069636862f), Q31( 0.0019765601f), Q31(-0.0032086896f),
Q31(-0.0085711749f), Q31(-0.0141288827f), Q31(-0.0198834129f), Q31(-0.0258227288f),
Q31(-0.0319531274f), Q31(-0.0382776572f), Q31(-0.0447806821f), Q31(-0.0514804176f),
Q31(-0.0583705326f), Q31(-0.0654409853f), Q31(-0.0726943300f), Q31(-0.0801372934f),
Q31(-0.0877547536f), Q31(-0.0955533352f), Q31(-0.1035329531f), Q31(-0.1116826931f),
Q31(-0.1200077984f), Q31(-0.1285002850f), Q31(-0.1371551761f), Q31(-0.1459766491f),
Q31(-0.1549607071f), Q31(-0.1640958855f), Q31(-0.1733808172f), Q31(-0.1828172548f),
Q31(-0.1923966745f), Q31(-0.2021250176f), Q31(-0.2119735853f), Q31(-0.2219652696f),
Q31(-0.2320690870f), Q31(-0.2423016884f), Q31(-0.2526480309f), Q31(-0.2631053299f),
Q31(-0.2736634040f), Q31(-0.2843214189f), Q31(-0.2950716717f), Q31(-0.3059098575f),
Q31(-0.3168278913f), Q31(-0.3278113727f), Q31(-0.3388722693f), Q31(-0.3499914122f),
Q31( 0.3611589903f), Q31( 0.3723795546f), Q31( 0.3836350013f), Q31( 0.3949211761f),
Q31( 0.4062317676f), Q31( 0.4175696896f), Q31( 0.4289119920f), Q31( 0.4402553754f),
Q31( 0.4515996535f), Q31( 0.4629308085f), Q31( 0.4742453214f), Q31( 0.4855253091f),
Q31( 0.4967708254f), Q31( 0.5079817500f), Q31( 0.5191234970f), Q31( 0.5302240895f),
Q31( 0.5412553448f), Q31( 0.5522051258f), Q31( 0.5630789140f), Q31( 0.5738524131f),
Q31( 0.5845403235f), Q31( 0.5951123086f), Q31( 0.6055783538f), Q31( 0.6159109932f),
Q31( 0.6261242695f), Q31( 0.6361980107f), Q31( 0.6461269695f), Q31( 0.6559016302f),
Q31( 0.6655139880f), Q31( 0.6749663190f), Q31( 0.6842353293f), Q31( 0.6933282376f),
Q31( 0.7022388719f), Q31( 0.7109410426f), Q31( 0.7194462634f), Q31( 0.7277448900f),
Q31( 0.7358211758f), Q31( 0.7436827863f), Q31( 0.7513137456f), Q31( 0.7587080760f),
Q31( 0.7658674865f), Q31( 0.7727780881f), Q31( 0.7794287519f), Q31( 0.7858353120f),
Q31( 0.7919735841f), Q31( 0.7978466413f), Q31( 0.8034485751f), Q31( 0.8087695004f),
Q31( 0.8138191270f), Q31( 0.8185776004f), Q31( 0.8230419890f), Q31( 0.8272275347f),
Q31( 0.8311038457f), Q31( 0.8346937361f), Q31( 0.8379717337f), Q31( 0.8409541392f),
Q31( 0.8436238281f), Q31( 0.8459818469f), Q31( 0.8480315777f), Q31( 0.8497805198f),
Q31( 0.8511971524f), Q31( 0.8523047035f), Q31( 0.8531020949f), Q31( 0.8535720573f),
Q31( 0.8537385600f),
};
static av_cold void aacsbr_tableinit(void)
{
int n;
for (n = 1; n < 320; n++)
sbr_qmf_window_us[320 + n] = sbr_qmf_window_us[320 - n];
sbr_qmf_window_us[384] = -sbr_qmf_window_us[384];
sbr_qmf_window_us[512] = -sbr_qmf_window_us[512];
for (n = 0; n < 320; n++)
sbr_qmf_window_ds[n] = sbr_qmf_window_us[2*n];
}
#endif /* CONFIG_HARDCODED_TABLES */
#endif /* AVCODEC_AACSBR_TABLEGEN_COMMON_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,535 @@
/*
* AAC Spectral Band Replication decoding data
* Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC Spectral Band Replication decoding data
* @author Robert Swain ( rob opendot cl )
*/
#ifndef AVCODEC_AACSBRDATA_H
#define AVCODEC_AACSBRDATA_H
#include <stdint.h>
#include "libavutil/mem.h"
#include "aac_defines.h"
///< Huffman tables for SBR
static const uint8_t t_huffman_env_1_5dB_bits[121] = {
18, 18, 18, 18, 18, 18, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 17, 18, 16, 17, 18, 17,
16, 16, 16, 16, 15, 14, 14, 13,
13, 12, 11, 10, 9, 8, 7, 6,
5, 4, 3, 2, 2, 3, 4, 5,
6, 7, 8, 9, 10, 12, 13, 14,
14, 15, 16, 17, 16, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19,
};
static const uint32_t t_huffman_env_1_5dB_codes[121] = {
0x3ffd6, 0x3ffd7, 0x3ffd8, 0x3ffd9, 0x3ffda, 0x3ffdb, 0x7ffb8, 0x7ffb9,
0x7ffba, 0x7ffbb, 0x7ffbc, 0x7ffbd, 0x7ffbe, 0x7ffbf, 0x7ffc0, 0x7ffc1,
0x7ffc2, 0x7ffc3, 0x7ffc4, 0x7ffc5, 0x7ffc6, 0x7ffc7, 0x7ffc8, 0x7ffc9,
0x7ffca, 0x7ffcb, 0x7ffcc, 0x7ffcd, 0x7ffce, 0x7ffcf, 0x7ffd0, 0x7ffd1,
0x7ffd2, 0x7ffd3, 0x1ffe6, 0x3ffd4, 0x0fff0, 0x1ffe9, 0x3ffd5, 0x1ffe7,
0x0fff1, 0x0ffec, 0x0ffed, 0x0ffee, 0x07ff4, 0x03ff9, 0x03ff7, 0x01ffa,
0x01ff9, 0x00ffb, 0x007fc, 0x003fc, 0x001fd, 0x000fd, 0x0007d, 0x0003d,
0x0001d, 0x0000d, 0x00005, 0x00001, 0x00000, 0x00004, 0x0000c, 0x0001c,
0x0003c, 0x0007c, 0x000fc, 0x001fc, 0x003fd, 0x00ffa, 0x01ff8, 0x03ff6,
0x03ff8, 0x07ff5, 0x0ffef, 0x1ffe8, 0x0fff2, 0x7ffd4, 0x7ffd5, 0x7ffd6,
0x7ffd7, 0x7ffd8, 0x7ffd9, 0x7ffda, 0x7ffdb, 0x7ffdc, 0x7ffdd, 0x7ffde,
0x7ffdf, 0x7ffe0, 0x7ffe1, 0x7ffe2, 0x7ffe3, 0x7ffe4, 0x7ffe5, 0x7ffe6,
0x7ffe7, 0x7ffe8, 0x7ffe9, 0x7ffea, 0x7ffeb, 0x7ffec, 0x7ffed, 0x7ffee,
0x7ffef, 0x7fff0, 0x7fff1, 0x7fff2, 0x7fff3, 0x7fff4, 0x7fff5, 0x7fff6,
0x7fff7, 0x7fff8, 0x7fff9, 0x7fffa, 0x7fffb, 0x7fffc, 0x7fffd, 0x7fffe,
0x7ffff,
};
static const uint8_t f_huffman_env_1_5dB_bits[121] = {
19, 19, 20, 20, 20, 20, 20, 20,
20, 19, 20, 20, 20, 20, 19, 20,
19, 19, 20, 18, 20, 20, 20, 19,
20, 20, 20, 19, 20, 19, 18, 19,
18, 18, 17, 18, 17, 17, 17, 16,
16, 16, 15, 15, 14, 13, 13, 12,
12, 11, 10, 9, 9, 8, 7, 6,
5, 4, 3, 2, 2, 3, 4, 5,
6, 8, 8, 9, 10, 11, 11, 11,
12, 12, 13, 13, 14, 14, 16, 16,
17, 17, 18, 18, 18, 18, 18, 18,
18, 20, 19, 20, 20, 20, 20, 20,
20, 19, 20, 20, 20, 20, 19, 20,
18, 20, 20, 19, 19, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20,
20,
};
static const uint32_t f_huffman_env_1_5dB_codes[121] = {
0x7ffe7, 0x7ffe8, 0xfffd2, 0xfffd3, 0xfffd4, 0xfffd5, 0xfffd6, 0xfffd7,
0xfffd8, 0x7ffda, 0xfffd9, 0xfffda, 0xfffdb, 0xfffdc, 0x7ffdb, 0xfffdd,
0x7ffdc, 0x7ffdd, 0xfffde, 0x3ffe4, 0xfffdf, 0xfffe0, 0xfffe1, 0x7ffde,
0xfffe2, 0xfffe3, 0xfffe4, 0x7ffdf, 0xfffe5, 0x7ffe0, 0x3ffe8, 0x7ffe1,
0x3ffe0, 0x3ffe9, 0x1ffef, 0x3ffe5, 0x1ffec, 0x1ffed, 0x1ffee, 0x0fff4,
0x0fff3, 0x0fff0, 0x07ff7, 0x07ff6, 0x03ffa, 0x01ffa, 0x01ff9, 0x00ffa,
0x00ff8, 0x007f9, 0x003fb, 0x001fc, 0x001fa, 0x000fb, 0x0007c, 0x0003c,
0x0001c, 0x0000c, 0x00005, 0x00001, 0x00000, 0x00004, 0x0000d, 0x0001d,
0x0003d, 0x000fa, 0x000fc, 0x001fb, 0x003fa, 0x007f8, 0x007fa, 0x007fb,
0x00ff9, 0x00ffb, 0x01ff8, 0x01ffb, 0x03ff8, 0x03ff9, 0x0fff1, 0x0fff2,
0x1ffea, 0x1ffeb, 0x3ffe1, 0x3ffe2, 0x3ffea, 0x3ffe3, 0x3ffe6, 0x3ffe7,
0x3ffeb, 0xfffe6, 0x7ffe2, 0xfffe7, 0xfffe8, 0xfffe9, 0xfffea, 0xfffeb,
0xfffec, 0x7ffe3, 0xfffed, 0xfffee, 0xfffef, 0xffff0, 0x7ffe4, 0xffff1,
0x3ffec, 0xffff2, 0xffff3, 0x7ffe5, 0x7ffe6, 0xffff4, 0xffff5, 0xffff6,
0xffff7, 0xffff8, 0xffff9, 0xffffa, 0xffffb, 0xffffc, 0xffffd, 0xffffe,
0xfffff,
};
static const uint8_t t_huffman_env_bal_1_5dB_bits[49] = {
16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 12, 11, 9, 7, 5, 3,
1, 2, 4, 6, 8, 11, 12, 15,
16, 16, 16, 16, 16, 16, 16, 17,
17, 17, 17, 17, 17, 17, 17, 17,
17,
};
static const uint32_t t_huffman_env_bal_1_5dB_codes[49] = {
0x0ffe4, 0x0ffe5, 0x0ffe6, 0x0ffe7, 0x0ffe8, 0x0ffe9, 0x0ffea, 0x0ffeb,
0x0ffec, 0x0ffed, 0x0ffee, 0x0ffef, 0x0fff0, 0x0fff1, 0x0fff2, 0x0fff3,
0x0fff4, 0x0ffe2, 0x00ffc, 0x007fc, 0x001fe, 0x0007e, 0x0001e, 0x00006,
0x00000, 0x00002, 0x0000e, 0x0003e, 0x000fe, 0x007fd, 0x00ffd, 0x07ff0,
0x0ffe3, 0x0fff5, 0x0fff6, 0x0fff7, 0x0fff8, 0x0fff9, 0x0fffa, 0x1fff6,
0x1fff7, 0x1fff8, 0x1fff9, 0x1fffa, 0x1fffb, 0x1fffc, 0x1fffd, 0x1fffe,
0x1ffff,
};
static const uint8_t f_huffman_env_bal_1_5dB_bits[49] = {
18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 16,
17, 14, 11, 11, 8, 7, 4, 2,
1, 3, 5, 6, 9, 11, 12, 15,
16, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 19,
19,
};
static const uint32_t f_huffman_env_bal_1_5dB_codes[49] = {
0x3ffe2, 0x3ffe3, 0x3ffe4, 0x3ffe5, 0x3ffe6, 0x3ffe7, 0x3ffe8, 0x3ffe9,
0x3ffea, 0x3ffeb, 0x3ffec, 0x3ffed, 0x3ffee, 0x3ffef, 0x3fff0, 0x0fff7,
0x1fff0, 0x03ffc, 0x007fe, 0x007fc, 0x000fe, 0x0007e, 0x0000e, 0x00002,
0x00000, 0x00006, 0x0001e, 0x0003e, 0x001fe, 0x007fd, 0x00ffe, 0x07ffa,
0x0fff6, 0x3fff1, 0x3fff2, 0x3fff3, 0x3fff4, 0x3fff5, 0x3fff6, 0x3fff7,
0x3fff8, 0x3fff9, 0x3fffa, 0x3fffb, 0x3fffc, 0x3fffd, 0x3fffe, 0x7fffe,
0x7ffff,
};
static const uint8_t t_huffman_env_3_0dB_bits[63] = {
18, 18, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 17, 16, 16, 16, 14, 14, 14,
13, 12, 11, 8, 6, 4, 2, 1,
3, 5, 7, 9, 11, 13, 14, 14,
15, 16, 17, 18, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19,
};
static const uint32_t t_huffman_env_3_0dB_codes[63] = {
0x3ffed, 0x3ffee, 0x7ffde, 0x7ffdf, 0x7ffe0, 0x7ffe1, 0x7ffe2, 0x7ffe3,
0x7ffe4, 0x7ffe5, 0x7ffe6, 0x7ffe7, 0x7ffe8, 0x7ffe9, 0x7ffea, 0x7ffeb,
0x7ffec, 0x1fff4, 0x0fff7, 0x0fff9, 0x0fff8, 0x03ffb, 0x03ffa, 0x03ff8,
0x01ffa, 0x00ffc, 0x007fc, 0x000fe, 0x0003e, 0x0000e, 0x00002, 0x00000,
0x00006, 0x0001e, 0x0007e, 0x001fe, 0x007fd, 0x01ffb, 0x03ff9, 0x03ffc,
0x07ffa, 0x0fff6, 0x1fff5, 0x3ffec, 0x7ffed, 0x7ffee, 0x7ffef, 0x7fff0,
0x7fff1, 0x7fff2, 0x7fff3, 0x7fff4, 0x7fff5, 0x7fff6, 0x7fff7, 0x7fff8,
0x7fff9, 0x7fffa, 0x7fffb, 0x7fffc, 0x7fffd, 0x7fffe, 0x7ffff,
};
static const uint8_t f_huffman_env_3_0dB_bits[63] = {
20, 20, 20, 20, 20, 20, 20, 18,
19, 19, 19, 19, 18, 18, 20, 19,
17, 18, 17, 16, 16, 15, 14, 12,
11, 10, 9, 8, 6, 4, 2, 1,
3, 5, 8, 9, 10, 11, 12, 13,
14, 15, 15, 16, 16, 17, 17, 18,
18, 18, 20, 19, 19, 19, 20, 19,
19, 20, 20, 20, 20, 20, 20,
};
static const uint32_t f_huffman_env_3_0dB_codes[63] = {
0xffff0, 0xffff1, 0xffff2, 0xffff3, 0xffff4, 0xffff5, 0xffff6, 0x3fff3,
0x7fff5, 0x7ffee, 0x7ffef, 0x7fff6, 0x3fff4, 0x3fff2, 0xffff7, 0x7fff0,
0x1fff5, 0x3fff0, 0x1fff4, 0x0fff7, 0x0fff6, 0x07ff8, 0x03ffb, 0x00ffd,
0x007fd, 0x003fd, 0x001fd, 0x000fd, 0x0003e, 0x0000e, 0x00002, 0x00000,
0x00006, 0x0001e, 0x000fc, 0x001fc, 0x003fc, 0x007fc, 0x00ffc, 0x01ffc,
0x03ffa, 0x07ff9, 0x07ffa, 0x0fff8, 0x0fff9, 0x1fff6, 0x1fff7, 0x3fff5,
0x3fff6, 0x3fff1, 0xffff8, 0x7fff1, 0x7fff2, 0x7fff3, 0xffff9, 0x7fff7,
0x7fff4, 0xffffa, 0xffffb, 0xffffc, 0xffffd, 0xffffe, 0xfffff,
};
static const uint8_t t_huffman_env_bal_3_0dB_bits[25] = {
13, 13, 13, 13, 13, 13, 13, 12,
8, 7, 4, 3, 1, 2, 5, 6,
9, 13, 13, 13, 13, 13, 13, 14,
14,
};
static const uint16_t t_huffman_env_bal_3_0dB_codes[25] = {
0x1ff2, 0x1ff3, 0x1ff4, 0x1ff5, 0x1ff6, 0x1ff7, 0x1ff8, 0x0ff8,
0x00fe, 0x007e, 0x000e, 0x0006, 0x0000, 0x0002, 0x001e, 0x003e,
0x01fe, 0x1ff9, 0x1ffa, 0x1ffb, 0x1ffc, 0x1ffd, 0x1ffe, 0x3ffe,
0x3fff,
};
static const uint8_t f_huffman_env_bal_3_0dB_bits[25] = {
13, 13, 13, 13, 13, 14, 14, 11,
8, 7, 4, 2, 1, 3, 5, 6,
9, 12, 13, 14, 14, 14, 14, 14,
14,
};
static const uint16_t f_huffman_env_bal_3_0dB_codes[25] = {
0x1ff7, 0x1ff8, 0x1ff9, 0x1ffa, 0x1ffb, 0x3ff8, 0x3ff9, 0x07fc,
0x00fe, 0x007e, 0x000e, 0x0002, 0x0000, 0x0006, 0x001e, 0x003e,
0x01fe, 0x0ffa, 0x1ff6, 0x3ffa, 0x3ffb, 0x3ffc, 0x3ffd, 0x3ffe,
0x3fff,
};
static const uint8_t t_huffman_noise_3_0dB_bits[63] = {
13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 11, 8, 6, 4, 3, 1,
2, 5, 8, 10, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 14, 14,
};
static const uint16_t t_huffman_noise_3_0dB_codes[63] = {
0x1fce, 0x1fcf, 0x1fd0, 0x1fd1, 0x1fd2, 0x1fd3, 0x1fd4, 0x1fd5,
0x1fd6, 0x1fd7, 0x1fd8, 0x1fd9, 0x1fda, 0x1fdb, 0x1fdc, 0x1fdd,
0x1fde, 0x1fdf, 0x1fe0, 0x1fe1, 0x1fe2, 0x1fe3, 0x1fe4, 0x1fe5,
0x1fe6, 0x1fe7, 0x07f2, 0x00fd, 0x003e, 0x000e, 0x0006, 0x0000,
0x0002, 0x001e, 0x00fc, 0x03f8, 0x1fcc, 0x1fe8, 0x1fe9, 0x1fea,
0x1feb, 0x1fec, 0x1fcd, 0x1fed, 0x1fee, 0x1fef, 0x1ff0, 0x1ff1,
0x1ff2, 0x1ff3, 0x1ff4, 0x1ff5, 0x1ff6, 0x1ff7, 0x1ff8, 0x1ff9,
0x1ffa, 0x1ffb, 0x1ffc, 0x1ffd, 0x1ffe, 0x3ffe, 0x3fff,
};
static const uint8_t t_huffman_noise_bal_3_0dB_bits[25] = {
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 5, 2, 1, 3, 6, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8,
};
static const uint8_t t_huffman_noise_bal_3_0dB_codes[25] = {
0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0x1c, 0x02, 0x00, 0x06, 0x3a, 0xf6,
0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
0xff,
};
static const int8_t sbr_offset[6][16] = {
{-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}, // fs_sbr = 16000 Hz
{-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 13}, // fs_sbr = 22050 Hz
{-5, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 13, 16}, // fs_sbr = 24000 Hz
{-6, -4, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 13, 16}, // fs_sbr = 32000 Hz
{-4, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 13, 16, 20}, // 44100 Hz <= fs_sbr <= 64000 Hz
{-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 13, 16, 20, 24}, // 64000 Hz < fs_sbr
};
/* First eight entries repeated at end to simplify SIMD implementations. */
const DECLARE_ALIGNED(16, INTFLOAT, AAC_RENAME(ff_sbr_noise_table))[][2] = {
{Q31(-0.99948153278296f), Q31(-0.59483417516607f)}, {Q31( 0.97113454393991f), Q31(-0.67528515225647f)},
{Q31( 0.14130051758487f), Q31(-0.95090983575689f)}, {Q31(-0.47005496701697f), Q31(-0.37340549728647f)},
{Q31( 0.80705063769351f), Q31( 0.29653668284408f)}, {Q31(-0.38981478896926f), Q31( 0.89572605717087f)},
{Q31(-0.01053049862020f), Q31(-0.66959058036166f)}, {Q31(-0.91266367957293f), Q31(-0.11522938140034f)},
{Q31( 0.54840422910309f), Q31( 0.75221367176302f)}, {Q31( 0.40009252867955f), Q31(-0.98929400334421f)},
{Q31(-0.99867974711855f), Q31(-0.88147068645358f)}, {Q31(-0.95531076805040f), Q31( 0.90908757154593f)},
{Q31(-0.45725933317144f), Q31(-0.56716323646760f)}, {Q31(-0.72929675029275f), Q31(-0.98008272727324f)},
{Q31( 0.75622801399036f), Q31( 0.20950329995549f)}, {Q31( 0.07069442601050f), Q31(-0.78247898470706f)},
{Q31( 0.74496252926055f), Q31(-0.91169004445807f)}, {Q31(-0.96440182703856f), Q31(-0.94739918296622f)},
{Q31( 0.30424629369539f), Q31(-0.49438267012479f)}, {Q31( 0.66565033746925f), Q31( 0.64652935542491f)},
{Q31( 0.91697008020594f), Q31( 0.17514097332009f)}, {Q31(-0.70774918760427f), Q31( 0.52548653416543f)},
{Q31(-0.70051415345560f), Q31(-0.45340028808763f)}, {Q31(-0.99496513054797f), Q31(-0.90071908066973f)},
{Q31( 0.98164490790123f), Q31(-0.77463155528697f)}, {Q31(-0.54671580548181f), Q31(-0.02570928536004f)},
{Q31(-0.01689629065389f), Q31( 0.00287506445732f)}, {Q31(-0.86110349531986f), Q31( 0.42548583726477f)},
{Q31(-0.98892980586032f), Q31(-0.87881132267556f)}, {Q31( 0.51756627678691f), Q31( 0.66926784710139f)},
{Q31(-0.99635026409640f), Q31(-0.58107730574765f)}, {Q31(-0.99969370862163f), Q31( 0.98369989360250f)},
{Q31( 0.55266258627194f), Q31( 0.59449057465591f)}, {Q31( 0.34581177741673f), Q31( 0.94879421061866f)},
{Q31( 0.62664209577999f), Q31(-0.74402970906471f)}, {Q31(-0.77149701404973f), Q31(-0.33883658042801f)},
{Q31(-0.91592244254432f), Q31( 0.03687901376713f)}, {Q31(-0.76285492357887f), Q31(-0.91371867919124f)},
{Q31( 0.79788337195331f), Q31(-0.93180971199849f)}, {Q31( 0.54473080610200f), Q31(-0.11919206037186f)},
{Q31(-0.85639281671058f), Q31( 0.42429854760451f)}, {Q31(-0.92882402971423f), Q31( 0.27871809078609f)},
{Q31(-0.11708371046774f), Q31(-0.99800843444966f)}, {Q31( 0.21356749817493f), Q31(-0.90716295627033f)},
{Q31(-0.76191692573909f), Q31( 0.99768118356265f)}, {Q31( 0.98111043100884f), Q31(-0.95854459734407f)},
{Q31(-0.85913269895572f), Q31( 0.95766566168880f)}, {Q31(-0.93307242253692f), Q31( 0.49431757696466f)},
{Q31( 0.30485754879632f), Q31(-0.70540034357529f)}, {Q31( 0.85289650925190f), Q31( 0.46766131791044f)},
{Q31( 0.91328082618125f), Q31(-0.99839597361769f)}, {Q31(-0.05890199924154f), Q31( 0.70741827819497f)},
{Q31( 0.28398686150148f), Q31( 0.34633555702188f)}, {Q31( 0.95258164539612f), Q31(-0.54893416026939f)},
{Q31(-0.78566324168507f), Q31(-0.75568541079691f)}, {Q31(-0.95789495447877f), Q31(-0.20423194696966f)},
{Q31( 0.82411158711197f), Q31( 0.96654618432562f)}, {Q31(-0.65185446735885f), Q31(-0.88734990773289f)},
{Q31(-0.93643603134666f), Q31( 0.99870790442385f)}, {Q31( 0.91427159529618f), Q31(-0.98290505544444f)},
{Q31(-0.70395684036886f), Q31( 0.58796798221039f)}, {Q31( 0.00563771969365f), Q31( 0.61768196727244f)},
{Q31( 0.89065051931895f), Q31( 0.52783352697585f)}, {Q31(-0.68683707712762f), Q31( 0.80806944710339f)},
{Q31( 0.72165342518718f), Q31(-0.69259857349564f)}, {Q31(-0.62928247730667f), Q31( 0.13627037407335f)},
{Q31( 0.29938434065514f), Q31(-0.46051329682246f)}, {Q31(-0.91781958879280f), Q31(-0.74012716684186f)},
{Q31( 0.99298717043688f), Q31( 0.40816610075661f)}, {Q31( 0.82368298622748f), Q31(-0.74036047190173f)},
{Q31(-0.98512833386833f), Q31(-0.99972330709594f)}, {Q31(-0.95915368242257f), Q31(-0.99237800466040f)},
{Q31(-0.21411126572790f), Q31(-0.93424819052545f)}, {Q31(-0.68821476106884f), Q31(-0.26892306315457f)},
{Q31( 0.91851997982317f), Q31( 0.09358228901785f)}, {Q31(-0.96062769559127f), Q31( 0.36099095133739f)},
{Q31( 0.51646184922287f), Q31(-0.71373332873917f)}, {Q31( 0.61130721139669f), Q31( 0.46950141175917f)},
{Q31( 0.47336129371299f), Q31(-0.27333178296162f)}, {Q31( 0.90998308703519f), Q31( 0.96715662938132f)},
{Q31( 0.44844799194357f), Q31( 0.99211574628306f)}, {Q31( 0.66614891079092f), Q31( 0.96590176169121f)},
{Q31( 0.74922239129237f), Q31(-0.89879858826087f)}, {Q31(-0.99571588506485f), Q31( 0.52785521494349f)},
{Q31( 0.97401082477563f), Q31(-0.16855870075190f)}, {Q31( 0.72683747733879f), Q31(-0.48060774432251f)},
{Q31( 0.95432193457128f), Q31( 0.68849603408441f)}, {Q31(-0.72962208425191f), Q31(-0.76608443420917f)},
{Q31(-0.85359479233537f), Q31( 0.88738125901579f)}, {Q31(-0.81412430338535f), Q31(-0.97480768049637f)},
{Q31(-0.87930772356786f), Q31( 0.74748307690436f)}, {Q31(-0.71573331064977f), Q31(-0.98570608178923f)},
{Q31( 0.83524300028228f), Q31( 0.83702537075163f)}, {Q31(-0.48086065601423f), Q31(-0.98848504923531f)},
{Q31( 0.97139128574778f), Q31( 0.80093621198236f)}, {Q31( 0.51992825347895f), Q31( 0.80247631400510f)},
{Q31(-0.00848591195325f), Q31(-0.76670128000486f)}, {Q31(-0.70294374303036f), Q31( 0.55359910445577f)},
{Q31(-0.95894428168140f), Q31(-0.43265504344783f)}, {Q31( 0.97079252950321f), Q31( 0.09325857238682f)},
{Q31(-0.92404293670797f), Q31( 0.85507704027855f)}, {Q31(-0.69506469500450f), Q31( 0.98633412625459f)},
{Q31( 0.26559203620024f), Q31( 0.73314307966524f)}, {Q31( 0.28038443336943f), Q31( 0.14537913654427f)},
{Q31(-0.74138124825523f), Q31( 0.99310339807762f)}, {Q31(-0.01752795995444f), Q31(-0.82616635284178f)},
{Q31(-0.55126773094930f), Q31(-0.98898543862153f)}, {Q31( 0.97960898850996f), Q31(-0.94021446752851f)},
{Q31(-0.99196309146936f), Q31( 0.67019017358456f)}, {Q31(-0.67684928085260f), Q31( 0.12631491649378f)},
{Q31( 0.09140039465500f), Q31(-0.20537731453108f)}, {Q31(-0.71658965751996f), Q31(-0.97788200391224f)},
{Q31( 0.81014640078925f), Q31( 0.53722648362443f)}, {Q31( 0.40616991671205f), Q31(-0.26469008598449f)},
{Q31(-0.67680188682972f), Q31( 0.94502052337695f)}, {Q31( 0.86849774348749f), Q31(-0.18333598647899f)},
{Q31(-0.99500381284851f), Q31(-0.02634122068550f)}, {Q31( 0.84329189340667f), Q31( 0.10406957462213f)},
{Q31(-0.09215968531446f), Q31( 0.69540012101253f)}, {Q31( 0.99956173327206f), Q31(-0.12358542001404f)},
{Q31(-0.79732779473535f), Q31(-0.91582524736159f)}, {Q31( 0.96349973642406f), Q31( 0.96640458041000f)},
{Q31(-0.79942778496547f), Q31( 0.64323902822857f)}, {Q31(-0.11566039853896f), Q31( 0.28587846253726f)},
{Q31(-0.39922954514662f), Q31( 0.94129601616966f)}, {Q31( 0.99089197565987f), Q31(-0.92062625581587f)},
{Q31( 0.28631285179909f), Q31(-0.91035047143603f)}, {Q31(-0.83302725605608f), Q31(-0.67330410892084f)},
{Q31( 0.95404443402072f), Q31( 0.49162765398743f)}, {Q31(-0.06449863579434f), Q31( 0.03250560813135f)},
{Q31(-0.99575054486311f), Q31( 0.42389784469507f)}, {Q31(-0.65501142790847f), Q31( 0.82546114655624f)},
{Q31(-0.81254441908887f), Q31(-0.51627234660629f)}, {Q31(-0.99646369485481f), Q31( 0.84490533520752f)},
{Q31( 0.00287840603348f), Q31( 0.64768261158166f)}, {Q31( 0.70176989408455f), Q31(-0.20453028573322f)},
{Q31( 0.96361882270190f), Q31( 0.40706967140989f)}, {Q31(-0.68883758192426f), Q31( 0.91338958840772f)},
{Q31(-0.34875585502238f), Q31( 0.71472290693300f)}, {Q31( 0.91980081243087f), Q31( 0.66507455644919f)},
{Q31(-0.99009048343881f), Q31( 0.85868021604848f)}, {Q31( 0.68865791458395f), Q31( 0.55660316809678f)},
{Q31(-0.99484402129368f), Q31(-0.20052559254934f)}, {Q31( 0.94214511408023f), Q31(-0.99696425367461f)},
{Q31(-0.67414626793544f), Q31( 0.49548221180078f)}, {Q31(-0.47339353684664f), Q31(-0.85904328834047f)},
{Q31( 0.14323651387360f), Q31(-0.94145598222488f)}, {Q31(-0.29268293575672f), Q31( 0.05759224927952f)},
{Q31( 0.43793861458754f), Q31(-0.78904969892724f)}, {Q31(-0.36345126374441f), Q31( 0.64874435357162f)},
{Q31(-0.08750604656825f), Q31( 0.97686944362527f)}, {Q31(-0.96495267812511f), Q31(-0.53960305946511f)},
{Q31( 0.55526940659947f), Q31( 0.78891523734774f)}, {Q31( 0.73538215752630f), Q31( 0.96452072373404f)},
{Q31(-0.30889773919437f), Q31(-0.80664389776860f)}, {Q31( 0.03574995626194f), Q31(-0.97325616900959f)},
{Q31( 0.98720684660488f), Q31( 0.48409133691962f)}, {Q31(-0.81689296271203f), Q31(-0.90827703628298f)},
{Q31( 0.67866860118215f), Q31( 0.81284503870856f)}, {Q31(-0.15808569732583f), Q31( 0.85279555024382f)},
{Q31( 0.80723395114371f), Q31(-0.24717418514605f)}, {Q31( 0.47788757329038f), Q31(-0.46333147839295f)},
{Q31( 0.96367554763201f), Q31( 0.38486749303242f)}, {Q31(-0.99143875716818f), Q31(-0.24945277239809f)},
{Q31( 0.83081876925833f), Q31(-0.94780851414763f)}, {Q31(-0.58753191905341f), Q31( 0.01290772389163f)},
{Q31( 0.95538108220960f), Q31(-0.85557052096538f)}, {Q31(-0.96490920476211f), Q31(-0.64020970923102f)},
{Q31(-0.97327101028521f), Q31( 0.12378128133110f)}, {Q31( 0.91400366022124f), Q31( 0.57972471346930f)},
{Q31(-0.99925837363824f), Q31( 0.71084847864067f)}, {Q31(-0.86875903507313f), Q31(-0.20291699203564f)},
{Q31(-0.26240034795124f), Q31(-0.68264554369108f)}, {Q31(-0.24664412953388f), Q31(-0.87642273115183f)},
{Q31( 0.02416275806869f), Q31( 0.27192914288905f)}, {Q31( 0.82068619590515f), Q31(-0.85087787994476f)},
{Q31( 0.88547373760759f), Q31(-0.89636802901469f)}, {Q31(-0.18173078152226f), Q31(-0.26152145156800f)},
{Q31( 0.09355476558534f), Q31( 0.54845123045604f)}, {Q31(-0.54668414224090f), Q31( 0.95980774020221f)},
{Q31( 0.37050990604091f), Q31(-0.59910140383171f)}, {Q31(-0.70373594262891f), Q31( 0.91227665827081f)},
{Q31(-0.34600785879594f), Q31(-0.99441426144200f)}, {Q31(-0.68774481731008f), Q31(-0.30238837956299f)},
{Q31(-0.26843291251234f), Q31( 0.83115668004362f)}, {Q31( 0.49072334613242f), Q31(-0.45359708737775f)},
{Q31( 0.38975993093975f), Q31( 0.95515358099121f)}, {Q31(-0.97757125224150f), Q31( 0.05305894580606f)},
{Q31(-0.17325552859616f), Q31(-0.92770672250494f)}, {Q31( 0.99948035025744f), Q31( 0.58285545563426f)},
{Q31(-0.64946246527458f), Q31( 0.68645507104960f)}, {Q31(-0.12016920576437f), Q31(-0.57147322153312f)},
{Q31(-0.58947456517751f), Q31(-0.34847132454388f)}, {Q31(-0.41815140454465f), Q31( 0.16276422358861f)},
{Q31( 0.99885650204884f), Q31( 0.11136095490444f)}, {Q31(-0.56649614128386f), Q31(-0.90494866361587f)},
{Q31( 0.94138021032330f), Q31( 0.35281916733018f)}, {Q31(-0.75725076534641f), Q31( 0.53650549640587f)},
{Q31( 0.20541973692630f), Q31(-0.94435144369918f)}, {Q31( 0.99980371023351f), Q31( 0.79835913565599f)},
{Q31( 0.29078277605775f), Q31( 0.35393777921520f)}, {Q31(-0.62858772103030f), Q31( 0.38765693387102f)},
{Q31( 0.43440904467688f), Q31(-0.98546330463232f)}, {Q31(-0.98298583762390f), Q31( 0.21021524625209f)},
{Q31( 0.19513029146934f), Q31(-0.94239832251867f)}, {Q31(-0.95476662400101f), Q31( 0.98364554179143f)},
{Q31( 0.93379635304810f), Q31(-0.70881994583682f)}, {Q31(-0.85235410573336f), Q31(-0.08342347966410f)},
{Q31(-0.86425093011245f), Q31(-0.45795025029466f)}, {Q31( 0.38879779059045f), Q31( 0.97274429344593f)},
{Q31( 0.92045124735495f), Q31(-0.62433652524220f)}, {Q31( 0.89162532251878f), Q31( 0.54950955570563f)},
{Q31(-0.36834336949252f), Q31( 0.96458298020975f)}, {Q31( 0.93891760988045f), Q31(-0.89968353740388f)},
{Q31( 0.99267657565094f), Q31(-0.03757034316958f)}, {Q31(-0.94063471614176f), Q31( 0.41332338538963f)},
{Q31( 0.99740224117019f), Q31(-0.16830494996370f)}, {Q31(-0.35899413170555f), Q31(-0.46633226649613f)},
{Q31( 0.05237237274947f), Q31(-0.25640361602661f)}, {Q31( 0.36703583957424f), Q31(-0.38653265641875f)},
{Q31( 0.91653180367913f), Q31(-0.30587628726597f)}, {Q31( 0.69000803499316f), Q31( 0.90952171386132f)},
{Q31(-0.38658751133527f), Q31( 0.99501571208985f)}, {Q31(-0.29250814029851f), Q31( 0.37444994344615f)},
{Q31(-0.60182204677608f), Q31( 0.86779651036123f)}, {Q31(-0.97418588163217f), Q31( 0.96468523666475f)},
{Q31( 0.88461574003963f), Q31( 0.57508405276414f)}, {Q31( 0.05198933055162f), Q31( 0.21269661669964f)},
{Q31(-0.53499621979720f), Q31( 0.97241553731237f)}, {Q31(-0.49429560226497f), Q31( 0.98183865291903f)},
{Q31(-0.98935142339139f), Q31(-0.40249159006933f)}, {Q31(-0.98081380091130f), Q31(-0.72856895534041f)},
{Q31(-0.27338148835532f), Q31( 0.99950922447209f)}, {Q31( 0.06310802338302f), Q31(-0.54539587529618f)},
{Q31(-0.20461677199539f), Q31(-0.14209977628489f)}, {Q31( 0.66223843141647f), Q31( 0.72528579940326f)},
{Q31(-0.84764345483665f), Q31( 0.02372316801261f)}, {Q31(-0.89039863483811f), Q31( 0.88866581484602f)},
{Q31( 0.95903308477986f), Q31( 0.76744927173873f)}, {Q31( 0.73504123909879f), Q31(-0.03747203173192f)},
{Q31(-0.31744434966056f), Q31(-0.36834111883652f)}, {Q31(-0.34110827591623f), Q31( 0.40211222807691f)},
{Q31( 0.47803883714199f), Q31(-0.39423219786288f)}, {Q31( 0.98299195879514f), Q31( 0.01989791390047f)},
{Q31(-0.30963073129751f), Q31(-0.18076720599336f)}, {Q31( 0.99992588229018f), Q31(-0.26281872094289f)},
{Q31(-0.93149731080767f), Q31(-0.98313162570490f)}, {Q31( 0.99923472302773f), Q31(-0.80142993767554f)},
{Q31(-0.26024169633417f), Q31(-0.75999759855752f)}, {Q31(-0.35712514743563f), Q31( 0.19298963768574f)},
{Q31(-0.99899084509530f), Q31( 0.74645156992493f)}, {Q31( 0.86557171579452f), Q31( 0.55593866696299f)},
{Q31( 0.33408042438752f), Q31( 0.86185953874709f)}, {Q31( 0.99010736374716f), Q31( 0.04602397576623f)},
{Q31(-0.66694269691195f), Q31(-0.91643611810148f)}, {Q31( 0.64016792079480f), Q31( 0.15649530836856f)},
{Q31( 0.99570534804836f), Q31( 0.45844586038111f)}, {Q31(-0.63431466947340f), Q31( 0.21079116459234f)},
{Q31(-0.07706847005931f), Q31(-0.89581437101329f)}, {Q31( 0.98590090577724f), Q31( 0.88241721133981f)},
{Q31( 0.80099335254678f), Q31(-0.36851896710853f)}, {Q31( 0.78368131392666f), Q31( 0.45506999802597f)},
{Q31( 0.08707806671691f), Q31( 0.80938994918745f)}, {Q31(-0.86811883080712f), Q31( 0.39347308654705f)},
{Q31(-0.39466529740375f), Q31(-0.66809432114456f)}, {Q31( 0.97875325649683f), Q31(-0.72467840967746f)},
{Q31(-0.95038560288864f), Q31( 0.89563219587625f)}, {Q31( 0.17005239424212f), Q31( 0.54683053962658f)},
{Q31(-0.76910792026848f), Q31(-0.96226617549298f)}, {Q31( 0.99743281016846f), Q31( 0.42697157037567f)},
{Q31( 0.95437383549973f), Q31( 0.97002324109952f)}, {Q31( 0.99578905365569f), Q31(-0.54106826257356f)},
{Q31( 0.28058259829990f), Q31(-0.85361420634036f)}, {Q31( 0.85256524470573f), Q31(-0.64567607735589f)},
{Q31(-0.50608540105128f), Q31(-0.65846015480300f)}, {Q31(-0.97210735183243f), Q31(-0.23095213067791f)},
{Q31( 0.95424048234441f), Q31(-0.99240147091219f)}, {Q31(-0.96926570524023f), Q31( 0.73775654896574f)},
{Q31( 0.30872163214726f), Q31( 0.41514960556126f)}, {Q31(-0.24523839572639f), Q31( 0.63206633394807f)},
{Q31(-0.33813265086024f), Q31(-0.38661779441897f)}, {Q31(-0.05826828420146f), Q31(-0.06940774188029f)},
{Q31(-0.22898461455054f), Q31( 0.97054853316316f)}, {Q31(-0.18509915019881f), Q31( 0.47565762892084f)},
{Q31(-0.10488238045009f), Q31(-0.87769947402394f)}, {Q31(-0.71886586182037f), Q31( 0.78030982480538f)},
{Q31( 0.99793873738654f), Q31( 0.90041310491497f)}, {Q31( 0.57563307626120f), Q31(-0.91034337352097f)},
{Q31( 0.28909646383717f), Q31( 0.96307783970534f)}, {Q31( 0.42188998312520f), Q31( 0.48148651230437f)},
{Q31( 0.93335049681047f), Q31(-0.43537023883588f)}, {Q31(-0.97087374418267f), Q31( 0.86636445711364f)},
{Q31( 0.36722871286923f), Q31( 0.65291654172961f)}, {Q31(-0.81093025665696f), Q31( 0.08778370229363f)},
{Q31(-0.26240603062237f), Q31(-0.92774095379098f)}, {Q31( 0.83996497984604f), Q31( 0.55839849139647f)},
{Q31(-0.99909615720225f), Q31(-0.96024605713970f)}, {Q31( 0.74649464155061f), Q31( 0.12144893606462f)},
{Q31(-0.74774595569805f), Q31(-0.26898062008959f)}, {Q31( 0.95781667469567f), Q31(-0.79047927052628f)},
{Q31( 0.95472308713099f), Q31(-0.08588776019550f)}, {Q31( 0.48708332746299f), Q31( 0.99999041579432f)},
{Q31( 0.46332038247497f), Q31( 0.10964126185063f)}, {Q31(-0.76497004940162f), Q31( 0.89210929242238f)},
{Q31( 0.57397389364339f), Q31( 0.35289703373760f)}, {Q31( 0.75374316974495f), Q31( 0.96705214651335f)},
{Q31(-0.59174397685714f), Q31(-0.89405370422752f)}, {Q31( 0.75087906691890f), Q31(-0.29612672982396f)},
{Q31(-0.98607857336230f), Q31( 0.25034911730023f)}, {Q31(-0.40761056640505f), Q31(-0.90045573444695f)},
{Q31( 0.66929266740477f), Q31( 0.98629493401748f)}, {Q31(-0.97463695257310f), Q31(-0.00190223301301f)},
{Q31( 0.90145509409859f), Q31( 0.99781390365446f)}, {Q31(-0.87259289048043f), Q31( 0.99233587353666f)},
{Q31(-0.91529461447692f), Q31(-0.15698707534206f)}, {Q31(-0.03305738840705f), Q31(-0.37205262859764f)},
{Q31( 0.07223051368337f), Q31(-0.88805001733626f)}, {Q31( 0.99498012188353f), Q31( 0.97094358113387f)},
{Q31(-0.74904939500519f), Q31( 0.99985483641521f)}, {Q31( 0.04585228574211f), Q31( 0.99812337444082f)},
{Q31(-0.89054954257993f), Q31(-0.31791913188064f)}, {Q31(-0.83782144651251f), Q31( 0.97637632547466f)},
{Q31( 0.33454804933804f), Q31(-0.86231516800408f)}, {Q31(-0.99707579362824f), Q31( 0.93237990079441f)},
{Q31(-0.22827527843994f), Q31( 0.18874759397997f)}, {Q31( 0.67248046289143f), Q31(-0.03646211390569f)},
{Q31(-0.05146538187944f), Q31(-0.92599700120679f)}, {Q31( 0.99947295749905f), Q31( 0.93625229707912f)},
{Q31( 0.66951124390363f), Q31( 0.98905825623893f)}, {Q31(-0.99602956559179f), Q31(-0.44654715757688f)},
{Q31( 0.82104905483590f), Q31( 0.99540741724928f)}, {Q31( 0.99186510988782f), Q31( 0.72023001312947f)},
{Q31(-0.65284592392918f), Q31( 0.52186723253637f)}, {Q31( 0.93885443798188f), Q31(-0.74895312615259f)},
{Q31( 0.96735248738388f), Q31( 0.90891816978629f)}, {Q31(-0.22225968841114f), Q31( 0.57124029781228f)},
{Q31(-0.44132783753414f), Q31(-0.92688840659280f)}, {Q31(-0.85694974219574f), Q31( 0.88844532719844f)},
{Q31( 0.91783042091762f), Q31(-0.46356892383970f)}, {Q31( 0.72556974415690f), Q31(-0.99899555770747f)},
{Q31(-0.99711581834508f), Q31( 0.58211560180426f)}, {Q31( 0.77638976371966f), Q31( 0.94321834873819f)},
{Q31( 0.07717324253925f), Q31( 0.58638399856595f)}, {Q31(-0.56049829194163f), Q31( 0.82522301569036f)},
{Q31( 0.98398893639988f), Q31( 0.39467440420569f)}, {Q31( 0.47546946844938f), Q31( 0.68613044836811f)},
{Q31( 0.65675089314631f), Q31( 0.18331637134880f)}, {Q31( 0.03273375457980f), Q31(-0.74933109564108f)},
{Q31(-0.38684144784738f), Q31( 0.51337349030406f)}, {Q31(-0.97346267944545f), Q31(-0.96549364384098f)},
{Q31(-0.53282156061942f), Q31(-0.91423265091354f)}, {Q31( 0.99817310731176f), Q31( 0.61133572482148f)},
{Q31(-0.50254500772635f), Q31(-0.88829338134294f)}, {Q31( 0.01995873238855f), Q31( 0.85223515096765f)},
{Q31( 0.99930381973804f), Q31( 0.94578896296649f)}, {Q31( 0.82907767600783f), Q31(-0.06323442598128f)},
{Q31(-0.58660709669728f), Q31( 0.96840773806582f)}, {Q31(-0.17573736667267f), Q31(-0.48166920859485f)},
{Q31( 0.83434292401346f), Q31(-0.13023450646997f)}, {Q31( 0.05946491307025f), Q31( 0.20511047074866f)},
{Q31( 0.81505484574602f), Q31(-0.94685947861369f)}, {Q31(-0.44976380954860f), Q31( 0.40894572671545f)},
{Q31(-0.89746474625671f), Q31( 0.99846578838537f)}, {Q31( 0.39677256130792f), Q31(-0.74854668609359f)},
{Q31(-0.07588948563079f), Q31( 0.74096214084170f)}, {Q31( 0.76343198951445f), Q31( 0.41746629422634f)},
{Q31(-0.74490104699626f), Q31( 0.94725911744610f)}, {Q31( 0.64880119792759f), Q31( 0.41336660830571f)},
{Q31( 0.62319537462542f), Q31(-0.93098313552599f)}, {Q31( 0.42215817594807f), Q31(-0.07712787385208f)},
{Q31( 0.02704554141885f), Q31(-0.05417518053666f)}, {Q31( 0.80001773566818f), Q31( 0.91542195141039f)},
{Q31(-0.79351832348816f), Q31(-0.36208897989136f)}, {Q31( 0.63872359151636f), Q31( 0.08128252493444f)},
{Q31( 0.52890520960295f), Q31( 0.60048872455592f)}, {Q31( 0.74238552914587f), Q31( 0.04491915291044f)},
{Q31( 0.99096131449250f), Q31(-0.19451182854402f)}, {Q31(-0.80412329643109f), Q31(-0.88513818199457f)},
{Q31(-0.64612616129736f), Q31( 0.72198674804544f)}, {Q31( 0.11657770663191f), Q31(-0.83662833815041f)},
{Q31(-0.95053182488101f), Q31(-0.96939905138082f)}, {Q31(-0.62228872928622f), Q31( 0.82767262846661f)},
{Q31( 0.03004475787316f), Q31(-0.99738896333384f)}, {Q31(-0.97987214341034f), Q31( 0.36526129686425f)},
{Q31(-0.99986980746200f), Q31(-0.36021610299715f)}, {Q31( 0.89110648599879f), Q31(-0.97894250343044f)},
{Q31( 0.10407960510582f), Q31( 0.77357793811619f)}, {Q31( 0.95964737821728f), Q31(-0.35435818285502f)},
{Q31( 0.50843233159162f), Q31( 0.96107691266205f)}, {Q31( 0.17006334670615f), Q31(-0.76854025314829f)},
{Q31( 0.25872675063360f), Q31( 0.99893303933816f)}, {Q31(-0.01115998681937f), Q31( 0.98496019742444f)},
{Q31(-0.79598702973261f), Q31( 0.97138411318894f)}, {Q31(-0.99264708948101f), Q31(-0.99542822402536f)},
{Q31(-0.99829663752818f), Q31( 0.01877138824311f)}, {Q31(-0.70801016548184f), Q31( 0.33680685948117f)},
{Q31(-0.70467057786826f), Q31( 0.93272777501857f)}, {Q31( 0.99846021905254f), Q31(-0.98725746254433f)},
{Q31(-0.63364968534650f), Q31(-0.16473594423746f)}, {Q31(-0.16258217500792f), Q31(-0.95939125400802f)},
{Q31(-0.43645594360633f), Q31(-0.94805030113284f)}, {Q31(-0.99848471702976f), Q31( 0.96245166923809f)},
{Q31(-0.16796458968998f), Q31(-0.98987511890470f)}, {Q31(-0.87979225745213f), Q31(-0.71725725041680f)},
{Q31( 0.44183099021786f), Q31(-0.93568974498761f)}, {Q31( 0.93310180125532f), Q31(-0.99913308068246f)},
{Q31(-0.93941931782002f), Q31(-0.56409379640356f)}, {Q31(-0.88590003188677f), Q31( 0.47624600491382f)},
{Q31( 0.99971463703691f), Q31(-0.83889954253462f)}, {Q31(-0.75376385639978f), Q31( 0.00814643438625f)},
{Q31( 0.93887685615875f), Q31(-0.11284528204636f)}, {Q31( 0.85126435782309f), Q31( 0.52349251543547f)},
{Q31( 0.39701421446381f), Q31( 0.81779634174316f)}, {Q31(-0.37024464187437f), Q31(-0.87071656222959f)},
{Q31(-0.36024828242896f), Q31( 0.34655735648287f)}, {Q31(-0.93388812549209f), Q31(-0.84476541096429f)},
{Q31(-0.65298804552119f), Q31(-0.18439575450921f)}, {Q31( 0.11960319006843f), Q31( 0.99899346780168f)},
{Q31( 0.94292565553160f), Q31( 0.83163906518293f)}, {Q31( 0.75081145286948f), Q31(-0.35533223142265f)},
{Q31( 0.56721979748394f), Q31(-0.24076836414499f)}, {Q31( 0.46857766746029f), Q31(-0.30140233457198f)},
{Q31( 0.97312313923635f), Q31(-0.99548191630031f)}, {Q31(-0.38299976567017f), Q31( 0.98516909715427f)},
{Q31( 0.41025800019463f), Q31( 0.02116736935734f)}, {Q31( 0.09638062008048f), Q31( 0.04411984381457f)},
{Q31(-0.85283249275397f), Q31( 0.91475563922421f)}, {Q31( 0.88866808958124f), Q31(-0.99735267083226f)},
{Q31(-0.48202429536989f), Q31(-0.96805608884164f)}, {Q31( 0.27572582416567f), Q31( 0.58634753335832f)},
{Q31(-0.65889129659168f), Q31( 0.58835634138583f)}, {Q31( 0.98838086953732f), Q31( 0.99994349600236f)},
{Q31(-0.20651349620689f), Q31( 0.54593044066355f)}, {Q31(-0.62126416356920f), Q31(-0.59893681700392f)},
{Q31( 0.20320105410437f), Q31(-0.86879180355289f)}, {Q31(-0.97790548600584f), Q31( 0.96290806999242f)},
{Q31( 0.11112534735126f), Q31( 0.21484763313301f)}, {Q31(-0.41368337314182f), Q31( 0.28216837680365f)},
{Q31( 0.24133038992960f), Q31( 0.51294362630238f)}, {Q31(-0.66393410674885f), Q31(-0.08249679629081f)},
{Q31(-0.53697829178752f), Q31(-0.97649903936228f)}, {Q31(-0.97224737889348f), Q31( 0.22081333579837f)},
{Q31( 0.87392477144549f), Q31(-0.12796173740361f)}, {Q31( 0.19050361015753f), Q31( 0.01602615387195f)},
{Q31(-0.46353441212724f), Q31(-0.95249041539006f)}, {Q31(-0.07064096339021f), Q31(-0.94479803205886f)},
{Q31(-0.92444085484466f), Q31(-0.10457590187436f)}, {Q31(-0.83822593578728f), Q31(-0.01695043208885f)},
{Q31( 0.75214681811150f), Q31(-0.99955681042665f)}, {Q31(-0.42102998829339f), Q31( 0.99720941999394f)},
{Q31(-0.72094786237696f), Q31(-0.35008961934255f)}, {Q31( 0.78843311019251f), Q31( 0.52851398958271f)},
{Q31( 0.97394027897442f), Q31(-0.26695944086561f)}, {Q31( 0.99206463477946f), Q31(-0.57010120849429f)},
{Q31( 0.76789609461795f), Q31(-0.76519356730966f)}, {Q31(-0.82002421836409f), Q31(-0.73530179553767f)},
{Q31( 0.81924990025724f), Q31( 0.99698425250579f)}, {Q31(-0.26719850873357f), Q31( 0.68903369776193f)},
{Q31(-0.43311260380975f), Q31( 0.85321815947490f)}, {Q31( 0.99194979673836f), Q31( 0.91876249766422f)},
{Q31(-0.80692001248487f), Q31(-0.32627540663214f)}, {Q31( 0.43080003649976f), Q31(-0.21919095636638f)},
{Q31( 0.67709491937357f), Q31(-0.95478075822906f)}, {Q31( 0.56151770568316f), Q31(-0.70693811747778f)},
{Q31( 0.10831862810749f), Q31(-0.08628837174592f)}, {Q31( 0.91229417540436f), Q31(-0.65987351408410f)},
{Q31(-0.48972893932274f), Q31( 0.56289246362686f)}, {Q31(-0.89033658689697f), Q31(-0.71656563987082f)},
{Q31( 0.65269447475094f), Q31( 0.65916004833932f)}, {Q31( 0.67439478141121f), Q31(-0.81684380846796f)},
{Q31(-0.47770832416973f), Q31(-0.16789556203025f)}, {Q31(-0.99715979260878f), Q31(-0.93565784007648f)},
{Q31(-0.90889593602546f), Q31( 0.62034397054380f)}, {Q31(-0.06618622548177f), Q31(-0.23812217221359f)},
{Q31( 0.99430266919728f), Q31( 0.18812555317553f)}, {Q31( 0.97686402381843f), Q31(-0.28664534366620f)},
{Q31( 0.94813650221268f), Q31(-0.97506640027128f)}, {Q31(-0.95434497492853f), Q31(-0.79607978501983f)},
{Q31(-0.49104783137150f), Q31( 0.32895214359663f)}, {Q31( 0.99881175120751f), Q31( 0.88993983831354f)},
{Q31( 0.50449166760303f), Q31(-0.85995072408434f)}, {Q31( 0.47162891065108f), Q31(-0.18680204049569f)},
{Q31(-0.62081581361840f), Q31( 0.75000676218956f)}, {Q31(-0.43867015250812f), Q31( 0.99998069244322f)},
{Q31( 0.98630563232075f), Q31(-0.53578899600662f)}, {Q31(-0.61510362277374f), Q31(-0.89515019899997f)},
{Q31(-0.03841517601843f), Q31(-0.69888815681179f)}, {Q31(-0.30102157304644f), Q31(-0.07667808922205f)},
{Q31( 0.41881284182683f), Q31( 0.02188098922282f)}, {Q31(-0.86135454941237f), Q31( 0.98947480909359f)},
{Q31( 0.67226861393788f), Q31(-0.13494389011014f)}, {Q31(-0.70737398842068f), Q31(-0.76547349325992f)},
{Q31( 0.94044946687963f), Q31( 0.09026201157416f)}, {Q31(-0.82386352534327f), Q31( 0.08924768823676f)},
{Q31(-0.32070666698656f), Q31( 0.50143421908753f)}, {Q31( 0.57593163224487f), Q31(-0.98966422921509f)},
{Q31(-0.36326018419965f), Q31( 0.07440243123228f)}, {Q31( 0.99979044674350f), Q31(-0.14130287347405f)},
{Q31(-0.92366023326932f), Q31(-0.97979298068180f)}, {Q31(-0.44607178518598f), Q31(-0.54233252016394f)},
{Q31( 0.44226800932956f), Q31( 0.71326756742752f)}, {Q31( 0.03671907158312f), Q31( 0.63606389366675f)},
{Q31( 0.52175424682195f), Q31(-0.85396826735705f)}, {Q31(-0.94701139690956f), Q31(-0.01826348194255f)},
{Q31(-0.98759606946049f), Q31( 0.82288714303073f)}, {Q31( 0.87434794743625f), Q31( 0.89399495655433f)},
{Q31(-0.93412041758744f), Q31( 0.41374052024363f)}, {Q31( 0.96063943315511f), Q31( 0.93116709541280f)},
{Q31( 0.97534253457837f), Q31( 0.86150930812689f)}, {Q31( 0.99642466504163f), Q31( 0.70190043427512f)},
{Q31(-0.94705089665984f), Q31(-0.29580042814306f)}, {Q31( 0.91599807087376f), Q31(-0.98147830385781f)},
// Start of duplicated table
{Q31(-0.99948153278296f), Q31(-0.59483417516607f)}, {Q31( 0.97113454393991f), Q31(-0.67528515225647f)},
{Q31( 0.14130051758487f), Q31(-0.95090983575689f)}, {Q31(-0.47005496701697f), Q31(-0.37340549728647f)},
{Q31( 0.80705063769351f), Q31( 0.29653668284408f)}, {Q31(-0.38981478896926f), Q31( 0.89572605717087f)},
{Q31(-0.01053049862020f), Q31(-0.66959058036166f)}, {Q31(-0.91266367957293f), Q31(-0.11522938140034f)},
};
#endif /* AVCODEC_AACSBRDATA_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
/*
* AAC data declarations
* Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
* Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAC data declarations
* @author Oded Shimon ( ods15 ods15 dyndns org )
* @author Maxim Gavrilov ( maxim.gavrilov gmail com )
*/
#ifndef AVCODEC_AACTAB_H
#define AVCODEC_AACTAB_H
#include "libavutil/mem.h"
#include "aac.h"
#include "aac_tablegen_decl.h"
#include <stdint.h>
/* NOTE:
* Tables in this file are used by the AAC decoder and will be used by the AAC
* encoder.
*/
/* @name tns_tmp2_map
* Tables of the tmp2[] arrays of LPC coefficients used for TNS.
* The suffix _M_N[] indicate the values of coef_compress and coef_res
* respectively.
* @{
*/
static const INTFLOAT tns_tmp2_map_1_3[4] = {
Q31(0.00000000f), Q31(-0.43388373f), Q31(0.64278758f), Q31(0.34202015f),
};
static const INTFLOAT tns_tmp2_map_0_3[8] = {
Q31(0.00000000f), Q31(-0.43388373f), Q31(-0.78183150f), Q31(-0.97492790f),
Q31(0.98480773f), Q31( 0.86602539f), Q31( 0.64278758f), Q31( 0.34202015f),
};
static const INTFLOAT tns_tmp2_map_1_4[8] = {
Q31(0.00000000f), Q31(-0.20791170f), Q31(-0.40673664f), Q31(-0.58778524f),
Q31(0.67369562f), Q31( 0.52643216f), Q31( 0.36124167f), Q31( 0.18374951f),
};
static const INTFLOAT tns_tmp2_map_0_4[16] = {
Q31( 0.00000000f), Q31(-0.20791170f), Q31(-0.40673664f), Q31(-0.58778524f),
Q31(-0.74314481f), Q31(-0.86602539f), Q31(-0.95105654f), Q31(-0.99452192f),
Q31( 0.99573416f), Q31( 0.96182561f), Q31( 0.89516330f), Q31( 0.79801720f),
Q31( 0.67369562f), Q31( 0.52643216f), Q31( 0.36124167f), Q31( 0.18374951f),
};
static const INTFLOAT * const tns_tmp2_map[4] = {
tns_tmp2_map_0_3,
tns_tmp2_map_0_4,
tns_tmp2_map_1_3,
tns_tmp2_map_1_4
};
// @}
/* @name window coefficients
* @{
*/
DECLARE_ALIGNED(32, extern float, ff_aac_kbd_long_1024)[1024];
DECLARE_ALIGNED(32, extern float, ff_aac_kbd_short_128)[128];
DECLARE_ALIGNED(32, extern int, ff_aac_kbd_long_1024_fixed)[1024];
DECLARE_ALIGNED(32, extern int, ff_aac_kbd_long_512_fixed)[512];
DECLARE_ALIGNED(32, extern int, ff_aac_kbd_short_128_fixed)[128];
const DECLARE_ALIGNED(32, extern float, ff_aac_eld_window_512)[1920];
const DECLARE_ALIGNED(32, extern int, ff_aac_eld_window_512_fixed)[1920];
const DECLARE_ALIGNED(32, extern float, ff_aac_eld_window_480)[1800];
const DECLARE_ALIGNED(32, extern int, ff_aac_eld_window_480_fixed)[1800];
// @}
/* @name number of scalefactor window bands for long and short transform windows respectively
* @{
*/
extern const uint8_t ff_aac_num_swb_1024[];
extern const uint8_t ff_aac_num_swb_512 [];
extern const uint8_t ff_aac_num_swb_480 [];
extern const uint8_t ff_aac_num_swb_128 [];
// @}
extern const uint8_t ff_aac_pred_sfb_max [];
extern const uint32_t ff_aac_scalefactor_code[121];
extern const uint8_t ff_aac_scalefactor_bits[121];
extern const uint16_t * const ff_aac_spectral_codes[11];
extern const uint8_t * const ff_aac_spectral_bits [11];
extern const uint16_t ff_aac_spectral_sizes[11];
extern const float *ff_aac_codebook_vectors[];
extern const float *ff_aac_codebook_vector_vals[];
extern const uint16_t *ff_aac_codebook_vector_idx[];
extern const uint16_t * const ff_swb_offset_1024[13];
extern const uint16_t * const ff_swb_offset_512 [13];
extern const uint16_t * const ff_swb_offset_480 [13];
extern const uint16_t * const ff_swb_offset_128 [13];
extern const uint8_t ff_tns_max_bands_1024[13];
extern const uint8_t ff_tns_max_bands_512 [13];
extern const uint8_t ff_tns_max_bands_480 [13];
extern const uint8_t ff_tns_max_bands_128 [13];
#endif /* AVCODEC_AACTAB_H */

View File

@@ -0,0 +1,47 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAN (Arai, Agui and Nakajima) (I)DCT tables
*/
#include <stdint.h>
const uint16_t ff_aanscales[64] = {
/* precomputed values scaled up by 14 bits */
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
8867 , 12299, 11585, 10426, 8867, 6967, 4799, 2446,
4520 , 6270, 5906, 5315, 4520, 3552, 2446, 1247
};
const uint16_t ff_inv_aanscales[64] = {
4096, 2953, 3135, 3483, 4096, 5213, 7568, 14846,
2953, 2129, 2260, 2511, 2953, 3759, 5457, 10703,
3135, 2260, 2399, 2666, 3135, 3990, 5793, 11363,
3483, 2511, 2666, 2962, 3483, 4433, 6436, 12625,
4096, 2953, 3135, 3483, 4096, 5213, 7568, 14846,
5213, 3759, 3990, 4433, 5213, 6635, 9633, 18895,
7568, 5457, 5793, 6436, 7568, 9633, 13985, 27432,
14846, 10703, 11363, 12625, 14846, 18895, 27432, 53809,
};

View File

@@ -0,0 +1,32 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* AAN (Arai, Agui and Nakajima) (I)DCT tables
*/
#ifndef AVCODEC_AANDCTTAB_H
#define AVCODEC_AANDCTTAB_H
#include <stdint.h>
extern const uint16_t ff_aanscales[64];
extern const uint16_t ff_inv_aanscales[64];
#endif /* AVCODEC_AANDCTTAB_H */

View File

@@ -0,0 +1,30 @@
OBJS-$(CONFIG_FFT) += aarch64/fft_init_aarch64.o
OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o
OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o
OBJS-$(CONFIG_IMDCT15) += aarch64/imdct15_init.o
OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_init.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o
OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp_init.o
OBJS-$(CONFIG_RV40_DECODER) += aarch64/rv40dsp_init_aarch64.o
OBJS-$(CONFIG_VC1_DECODER) += aarch64/vc1dsp_init_aarch64.o
OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_init.o
ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o
NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o
NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o
NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \
aarch64/h264idct_neon.o
NEON-OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_neon.o
NEON-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_neon.o \
aarch64/hpeldsp_neon.o
NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o
NEON-OBJS-$(CONFIG_IMDCT15) += aarch64/imdct15_neon.o
NEON-OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_neon.o
NEON-OBJS-$(CONFIG_MDCT) += aarch64/mdct_neon.o
NEON-OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_neon.o

View File

@@ -0,0 +1,30 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AARCH64_ASM_OFFSETS_H
#define AVCODEC_AARCH64_ASM_OFFSETS_H
/* CeltIMDCTContext */
#define CELT_EXPTAB 0x20
#define CELT_FFT_N 0x00
#define CELT_LEN2 0x04
#define CELT_LEN4 (CELT_LEN2 + 0x4) // loaded as pair
#define CELT_TMP 0x10
#define CELT_TWIDDLE (CELT_TMP + 0x8) // loaded as pair
#endif /* AVCODEC_AARCH64_ASM_OFFSETS_H */

View File

@@ -0,0 +1,104 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AARCH64_CABAC_H
#define AVCODEC_AARCH64_CABAC_H
#include "config.h"
#if HAVE_INLINE_ASM
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavcodec/cabac.h"
#define get_cabac_inline get_cabac_inline_aarch64
static av_always_inline int get_cabac_inline_aarch64(CABACContext *c,
uint8_t *const state)
{
int bit;
void *reg_a, *reg_b, *reg_c, *tmp;
__asm__ volatile(
"ldrb %w[bit] , [%[state]] \n\t"
"add %[r_b] , %[tables] , %[lps_off] \n\t"
"mov %w[tmp] , %w[range] \n\t"
"and %w[range] , %w[range] , #0xC0 \n\t"
"lsl %w[r_c] , %w[range] , #1 \n\t"
"add %[r_b] , %[r_b] , %w[bit], UXTW \n\t"
"ldrb %w[range] , [%[r_b], %w[r_c], SXTW] \n\t"
"sub %w[r_c] , %w[tmp] , %w[range] \n\t"
"lsl %w[tmp] , %w[r_c] , #17 \n\t"
"cmp %w[tmp] , %w[low] \n\t"
"csel %w[tmp] , %w[tmp] , wzr , cc \n\t"
"csel %w[range] , %w[r_c] , %w[range], gt \n\t"
"cinv %w[bit] , %w[bit] , cc \n\t"
"sub %w[low] , %w[low] , %w[tmp] \n\t"
"add %[r_b] , %[tables] , %[norm_off] \n\t"
"add %[r_a] , %[tables] , %[mlps_off] \n\t"
"ldrb %w[tmp] , [%[r_b], %w[range], SXTW] \n\t"
"ldrb %w[r_a] , [%[r_a], %w[bit], SXTW] \n\t"
"lsl %w[low] , %w[low] , %w[tmp] \n\t"
"lsl %w[range] , %w[range] , %w[tmp] \n\t"
"uxth %w[r_c] , %w[low] \n\t"
"strb %w[r_a] , [%[state]] \n\t"
"cbnz %w[r_c] , 2f \n\t"
"ldr %[r_c] , [%[c], %[byte]] \n\t"
"ldr %[r_a] , [%[c], %[end]] \n\t"
"ldrh %w[tmp] , [%[r_c]] \n\t"
"cmp %[r_c] , %[r_a] \n\t"
"b.ge 1f \n\t"
"add %[r_a] , %[r_c] , #2 \n\t"
"str %[r_a] , [%[c], %[byte]] \n\t"
"1: \n\t"
"sub %w[r_c] , %w[low] , #1 \n\t"
"eor %w[r_c] , %w[r_c] , %w[low] \n\t"
"rev %w[tmp] , %w[tmp] \n\t"
"lsr %w[r_c] , %w[r_c] , #15 \n\t"
"lsr %w[tmp] , %w[tmp] , #15 \n\t"
"ldrb %w[r_c] , [%[r_b], %w[r_c], SXTW] \n\t"
"mov %w[r_b] , #0xFFFF \n\t"
"mov %w[r_a] , #7 \n\t"
"sub %w[tmp] , %w[tmp] , %w[r_b] \n\t"
"sub %w[r_c] , %w[r_a] , %w[r_c] \n\t"
"lsl %w[tmp] , %w[tmp] , %w[r_c] \n\t"
"add %w[low] , %w[low] , %w[tmp] \n\t"
"2: \n\t"
: [bit]"=&r"(bit),
[low]"+&r"(c->low),
[range]"+&r"(c->range),
[r_a]"=&r"(reg_a),
[r_b]"=&r"(reg_b),
[r_c]"=&r"(reg_c),
[tmp]"=&r"(tmp)
: [c]"r"(c),
[state]"r"(state),
[tables]"r"(ff_h264_cabac_tables),
[byte]"i"(offsetof(CABACContext, bytestream)),
[end]"i"(offsetof(CABACContext, bytestream_end)),
[norm_off]"I"(H264_NORM_SHIFT_OFFSET),
[lps_off]"I"(H264_LPS_RANGE_OFFSET),
[mlps_off]"I"(H264_MLPS_STATE_OFFSET + 128)
: "memory", "cc"
);
return bit & 1;
}
#endif /* HAVE_INLINE_ASM */
#endif /* AVCODEC_AARCH64_CABAC_H */

View File

@@ -0,0 +1,47 @@
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/fft.h"
void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
av_cold void ff_fft_init_aarch64(FFTContext *s)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
s->fft_permute = ff_fft_permute_neon;
s->fft_calc = ff_fft_calc_neon;
#if CONFIG_MDCT
s->imdct_calc = ff_imdct_calc_neon;
s->imdct_half = ff_imdct_half_neon;
s->mdct_calc = ff_mdct_calc_neon;
s->mdct_permutation = FF_MDCT_PERM_INTERLEAVE;
#endif
}
}

View File

@@ -0,0 +1,442 @@
/*
* ARM NEON optimised FFT
*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2009 Naotoshi Nojiri
* Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
*
* This algorithm (though not any of the implementation details) is
* based on libdjbfft by D. J. Bernstein.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
#define M_SQRT1_2 0.70710678118654752440
.macro transpose d0, d1, s0, s1
trn1 \d0, \s0, \s1
trn2 \d1, \s0, \s1
.endm
function fft4_neon
ld1 {v0.2s,v1.2s,v2.2s,v3.2s}, [x0]
fadd v4.2s, v0.2s, v1.2s // r0+r1,i0+i1
fsub v6.2s, v0.2s, v1.2s // r0-r1,i0-i1
ext v16.8b, v2.8b, v3.8b, #4
ext v17.8b, v3.8b, v2.8b, #4
fadd v5.2s, v2.2s, v3.2s // i2+i3,r2+r3
fsub v7.2s, v16.2s, v17.2s // r3-r2,i2-i3
fadd v0.2s, v4.2s, v5.2s
fsub v2.2s, v4.2s, v5.2s
fadd v1.2s, v6.2s, v7.2s
fsub v3.2s, v6.2s, v7.2s
st1 {v0.2s,v1.2s,v2.2s,v3.2s}, [x0]
ret
endfunc
function fft8_neon
mov x1, x0
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [x0], #32
ld1 {v16.2s,v17.2s,v18.2s,v19.2s}, [x0]
ext v22.8b, v2.8b, v3.8b, #4
ext v23.8b, v3.8b, v2.8b, #4
fadd v4.2s, v16.2s, v17.2s // r4+r5,i4+i5
fadd v5.2s, v18.2s, v19.2s // r6+r7,i6+i7
fsub v17.2s, v16.2s, v17.2s // r4-r5,i4-i5
fsub v19.2s, v18.2s, v19.2s // r6-r7,i6-i7
rev64 v27.2s, v28.2s // ???
fadd v20.2s, v0.2s, v1.2s // r0+r1,i0+i1
fadd v21.2s, v2.2s, v3.2s // r2+r3,i2+i3
fmul v26.2s, v17.2s, v28.2s // -a2r*w,a2i*w
ext v6.8b, v4.8b, v5.8b, #4
ext v7.8b, v5.8b, v4.8b, #4
fmul v27.2s, v19.2s, v27.2s // a3r*w,-a3i*w
fsub v23.2s, v22.2s, v23.2s // i2-i3,r3-r2
fsub v22.2s, v0.2s, v1.2s // r0-r1,i0-i1
fmul v24.2s, v17.2s, v28.s[1] // a2r*w,a2i*w
fmul v25.2s, v19.2s, v28.s[1] // a3r*w,a3i*w
fadd v0.2s, v20.2s, v21.2s
fsub v2.2s, v20.2s, v21.2s
fadd v1.2s, v22.2s, v23.2s
rev64 v26.2s, v26.2s
rev64 v27.2s, v27.2s
fsub v3.2s, v22.2s, v23.2s
fsub v6.2s, v6.2s, v7.2s
fadd v24.2s, v24.2s, v26.2s // a2r+a2i,a2i-a2r t1,t2
fadd v25.2s, v25.2s, v27.2s // a3r-a3i,a3i+a3r t5,t6
fadd v7.2s, v4.2s, v5.2s
fsub v18.2s, v2.2s, v6.2s
ext v26.8b, v24.8b, v25.8b, #4
ext v27.8b, v25.8b, v24.8b, #4
fadd v2.2s, v2.2s, v6.2s
fsub v16.2s, v0.2s, v7.2s
fadd v5.2s, v25.2s, v24.2s
fsub v4.2s, v26.2s, v27.2s
fadd v0.2s, v0.2s, v7.2s
fsub v17.2s, v1.2s, v5.2s
fsub v19.2s, v3.2s, v4.2s
fadd v3.2s, v3.2s, v4.2s
fadd v1.2s, v1.2s, v5.2s
st1 {v16.2s,v17.2s,v18.2s,v19.2s}, [x0]
st1 {v0.2s, v1.2s, v2.2s, v3.2s}, [x1]
ret
endfunc
function fft16_neon
mov x1, x0
ld1 {v0.2s, v1.2s, v2.2s, v3.2s}, [x0], #32
ld1 {v16.2s,v17.2s,v18.2s,v19.2s}, [x0], #32
ext v22.8b, v2.8b, v3.8b, #4
ext v23.8b, v3.8b, v2.8b, #4
fadd v4.2s, v16.2s, v17.2s // r4+r5,i4+i5
fadd v5.2s, v18.2s, v19.2s // r6+r7,i6+i7
fsub v17.2s, v16.2s, v17.2s // r4-r5,i4-i5
fsub v19.2s, v18.2s, v19.2s // r6-r7,i6-i7
rev64 v27.2s, v28.2s // ???
fadd v20.2s, v0.2s, v1.2s // r0+r1,i0+i1
fadd v21.2s, v2.2s, v3.2s // r2+r3,i2+i3
fmul v26.2s, v17.2s, v28.2s // -a2r*w,a2i*w
ext v6.8b, v4.8b, v5.8b, #4
ext v7.8b, v5.8b, v4.8b, #4
fmul v27.2s, v19.2s, v27.2s // a3r*w,-a3i*w
fsub v23.2s, v22.2s, v23.2s // i2-i3,r3-r2
fsub v22.2s, v0.2s, v1.2s // r0-r1,i0-i1
fmul v24.2s, v17.2s, v28.s[1] // a2r*w,a2i*w
fmul v25.2s, v19.2s, v28.s[1] // a3r*w,a3i*w
fadd v0.2s, v20.2s, v21.2s
fsub v2.2s, v20.2s, v21.2s
fadd v1.2s, v22.2s, v23.2s
rev64 v26.2s, v26.2s
rev64 v27.2s, v27.2s
fsub v3.2s, v22.2s, v23.2s
fsub v6.2s, v6.2s, v7.2s
fadd v24.2s, v24.2s, v26.2s // a2r+a2i,a2i-a2r t1,t2
fadd v25.2s, v25.2s, v27.2s // a3r-a3i,a3i+a3r t5,t6
fadd v7.2s, v4.2s, v5.2s
fsub v18.2s, v2.2s, v6.2s
ld1 {v20.4s,v21.4s}, [x0], #32
ld1 {v22.4s,v23.4s}, [x0], #32
ext v26.8b, v24.8b, v25.8b, #4
ext v27.8b, v25.8b, v24.8b, #4
fadd v2.2s, v2.2s, v6.2s
fsub v16.2s, v0.2s, v7.2s
fadd v5.2s, v25.2s, v24.2s
fsub v4.2s, v26.2s, v27.2s
transpose v24.2d, v25.2d, v20.2d, v22.2d
transpose v26.2d, v27.2d, v21.2d, v23.2d
fadd v0.2s, v0.2s, v7.2s
fsub v17.2s, v1.2s, v5.2s
fsub v19.2s, v3.2s, v4.2s
fadd v3.2s, v3.2s, v4.2s
fadd v1.2s, v1.2s, v5.2s
ext v20.16b, v21.16b, v21.16b, #4
ext v21.16b, v23.16b, v23.16b, #4
zip1 v0.2d, v0.2d, v1.2d // {z[0], z[1]}
zip1 v1.2d, v2.2d, v3.2d // {z[2], z[3]}
zip1 v2.2d, v16.2d, v17.2d // {z[o1], z[o1+1]}
zip1 v3.2d, v18.2d, v19.2d // {z[o1+2],z[o1+3]}
// 2 x fft4
transpose v22.2d, v23.2d, v20.2d, v21.2d
fadd v4.4s, v24.4s, v25.4s
fadd v5.4s, v26.4s, v27.4s
fsub v6.4s, v24.4s, v25.4s
fsub v7.4s, v22.4s, v23.4s
ld1 {v23.4s}, [x14]
fadd v24.4s, v4.4s, v5.4s // {z[o2+0],z[o2+1]}
fsub v26.4s, v4.4s, v5.4s // {z[o2+2],z[o2+3]}
fadd v25.4s, v6.4s, v7.4s // {z[o3+0],z[o3+1]}
fsub v27.4s, v6.4s, v7.4s // {z[o3+2],z[o3+3]}
//fft_pass_neon_16
rev64 v7.4s, v25.4s
fmul v25.4s, v25.4s, v23.s[1]
fmul v7.4s, v7.4s, v29.4s
fmla v25.4s, v7.4s, v23.s[3] // {t1a,t2a,t5a,t6a}
zip1 v20.4s, v24.4s, v25.4s
zip2 v21.4s, v24.4s, v25.4s
fneg v22.4s, v20.4s
fadd v4.4s, v21.4s, v20.4s
fsub v6.4s, v20.4s, v21.4s // just the second half
fadd v5.4s, v21.4s, v22.4s // just the first half
tbl v4.16b, {v4.16b}, v30.16b // trans4_float
tbl v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
fsub v20.4s, v0.4s, v4.4s // {z[o2],z[o2+1]}
fadd v16.4s, v0.4s, v4.4s // {z[0], z[1]}
fsub v22.4s, v2.4s, v5.4s // {z[o3],z[o3+1]}
fadd v18.4s, v2.4s, v5.4s // {z[o1],z[o1+1]}
//second half
rev64 v6.4s, v26.4s
fmul v26.4s, v26.4s, v23.s[2]
rev64 v7.4s, v27.4s
fmul v27.4s, v27.4s, v23.s[3]
fmul v6.4s, v6.4s, v29.4s
fmul v7.4s, v7.4s, v29.4s
fmla v26.4s, v6.4s, v23.s[2] // {t1,t2,t5,t6}
fmla v27.4s, v7.4s, v23.s[1] // {t1a,t2a,t5a,t6a}
zip1 v24.4s, v26.4s, v27.4s
zip2 v25.4s, v26.4s, v27.4s
fneg v26.4s, v24.4s
fadd v4.4s, v25.4s, v24.4s
fsub v6.4s, v24.4s, v25.4s // just the second half
fadd v5.4s, v25.4s, v26.4s // just the first half
tbl v4.16b, {v4.16b}, v30.16b // trans4_float
tbl v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
fadd v17.4s, v1.4s, v4.4s // {z[2], z[3]}
fsub v21.4s, v1.4s, v4.4s // {z[o2+2],z[o2+3]}
fadd v19.4s, v3.4s, v5.4s // {z[o1+2],z[o1+3]}
fsub v23.4s, v3.4s, v5.4s // {z[o3+2],z[o3+3]}
st1 {v16.4s,v17.4s}, [x1], #32
st1 {v18.4s,v19.4s}, [x1], #32
st1 {v20.4s,v21.4s}, [x1], #32
st1 {v22.4s,v23.4s}, [x1], #32
ret
endfunc
const trans4_float, align=4
.byte 0, 1, 2, 3
.byte 8, 9, 10, 11
.byte 4, 5, 6, 7
.byte 12, 13, 14, 15
endconst
const trans8_float, align=4
.byte 24, 25, 26, 27
.byte 0, 1, 2, 3
.byte 28, 29, 30, 31
.byte 4, 5, 6, 7
endconst
function fft_pass_neon
sub x6, x2, #1 // n - 1, loop counter
lsl x5, x2, #3 // 2 * n * sizeof FFTSample
lsl x1, x2, #4 // 2 * n * sizeof FFTComplex
add x5, x4, x5 // wim
add x3, x1, x2, lsl #5 // 4 * n * sizeof FFTComplex
add x2, x0, x2, lsl #5 // &z[o2]
add x3, x0, x3 // &z[o3]
add x1, x0, x1 // &z[o1]
ld1 {v20.4s},[x2] // {z[o2],z[o2+1]}
ld1 {v22.4s},[x3] // {z[o3],z[o3+1]}
ld1 {v4.2s}, [x4], #8 // {wre[0],wre[1]}
trn2 v25.2d, v20.2d, v22.2d
sub x5, x5, #4 // wim--
trn1 v24.2d, v20.2d, v22.2d
ld1 {v5.s}[0], [x5], x7 // d5[0] = wim[-1]
rev64 v7.4s, v25.4s
fmul v25.4s, v25.4s, v4.s[1]
ld1 {v16.4s}, [x0] // {z[0],z[1]}
fmul v7.4s, v7.4s, v29.4s
ld1 {v17.4s}, [x1] // {z[o1],z[o1+1]}
prfm pldl1keep, [x2, #16]
prfm pldl1keep, [x3, #16]
fmla v25.4s, v7.4s, v5.s[0] // {t1a,t2a,t5a,t6a}
prfm pldl1keep, [x0, #16]
prfm pldl1keep, [x1, #16]
zip1 v20.4s, v24.4s, v25.4s
zip2 v21.4s, v24.4s, v25.4s
fneg v22.4s, v20.4s
fadd v4.4s, v21.4s, v20.4s
fsub v6.4s, v20.4s, v21.4s // just the second half
fadd v5.4s, v21.4s, v22.4s // just the first half
tbl v4.16b, {v4.16b}, v30.16b // trans4_float
tbl v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
fadd v20.4s, v16.4s, v4.4s
fsub v22.4s, v16.4s, v4.4s
fadd v21.4s, v17.4s, v5.4s
st1 {v20.4s}, [x0], #16 // {z[0], z[1]}
fsub v23.4s, v17.4s, v5.4s
st1 {v21.4s}, [x1], #16 // {z[o1],z[o1+1]}
st1 {v22.4s}, [x2], #16 // {z[o2],z[o2+1]}
st1 {v23.4s}, [x3], #16 // {z[o3],z[o3+1]}
1:
ld1 {v20.4s},[x2] // {z[o2],z[o2+1]}
ld1 {v22.4s},[x3] // {z[o3],z[o3+1]}
ld1 {v4.2s}, [x4], #8 // {wre[0],wre[1]}
transpose v26.2d, v27.2d, v20.2d, v22.2d
ld1 {v5.2s}, [x5], x7 // {wim[-1],wim[0]}
rev64 v6.4s, v26.4s
fmul v26.4s, v26.4s, v4.s[0]
rev64 v7.4s, v27.4s
fmul v27.4s, v27.4s, v4.s[1]
fmul v6.4s, v6.4s, v29.4s
fmul v7.4s, v7.4s, v29.4s
ld1 {v16.4s},[x0] // {z[0],z[1]}
fmla v26.4s, v6.4s, v5.s[1] // {t1,t2,t5,t6}
fmla v27.4s, v7.4s, v5.s[0] // {t1a,t2a,t5a,t6a}
ld1 {v17.4s},[x1] // {z[o1],z[o1+1]}
subs x6, x6, #1 // n--
zip1 v20.4s, v26.4s, v27.4s
zip2 v21.4s, v26.4s, v27.4s
fneg v22.4s, v20.4s
fadd v4.4s, v21.4s, v20.4s
fsub v6.4s, v20.4s, v21.4s // just the second half
fadd v5.4s, v21.4s, v22.4s // just the first half
tbl v4.16b, {v4.16b}, v30.16b // trans4_float
tbl v5.16b, {v5.16b,v6.16b}, v31.16b // trans8_float
fadd v20.4s, v16.4s, v4.4s
fsub v22.4s, v16.4s, v4.4s
fadd v21.4s, v17.4s, v5.4s
st1 {v20.4s}, [x0], #16 // {z[0], z[1]}
fsub v23.4s, v17.4s, v5.4s
st1 {v21.4s}, [x1], #16 // {z[o1],z[o1+1]}
st1 {v22.4s}, [x2], #16 // {z[o2],z[o2+1]}
st1 {v23.4s}, [x3], #16 // {z[o3],z[o3+1]}
b.ne 1b
ret
endfunc
.macro def_fft n, n2, n4
function fft\n\()_neon, align=6
sub sp, sp, #16
stp x28, x30, [sp]
add x28, x0, #\n4*2*8
bl fft\n2\()_neon
mov x0, x28
bl fft\n4\()_neon
add x0, x28, #\n4*1*8
bl fft\n4\()_neon
sub x0, x28, #\n4*2*8
ldp x28, x30, [sp], #16
movrel x4, X(ff_cos_\n)
mov x2, #\n4>>1
b fft_pass_neon
endfunc
.endm
def_fft 32, 16, 8
def_fft 64, 32, 16
def_fft 128, 64, 32
def_fft 256, 128, 64
def_fft 512, 256, 128
def_fft 1024, 512, 256
def_fft 2048, 1024, 512
def_fft 4096, 2048, 1024
def_fft 8192, 4096, 2048
def_fft 16384, 8192, 4096
def_fft 32768, 16384, 8192
def_fft 65536, 32768, 16384
function ff_fft_calc_neon, export=1
prfm pldl1keep, [x1]
movrel x10, trans4_float
ldr w2, [x0]
movrel x11, trans8_float
sub w2, w2, #2
movrel x3, fft_tab_neon
ld1 {v30.16b}, [x10]
mov x7, #-8
movrel x12, pmmp
ldr x3, [x3, x2, lsl #3]
movrel x13, mppm
movrel x14, X(ff_cos_16)
ld1 {v31.16b}, [x11]
mov x0, x1
ld1 {v29.4s}, [x12] // pmmp
ld1 {v28.4s}, [x13]
br x3
endfunc
function ff_fft_permute_neon, export=1
mov x6, #1
ldr w2, [x0] // nbits
ldr x3, [x0, #16] // tmp_buf
ldr x0, [x0, #8] // revtab
lsl x6, x6, x2
mov x2, x6
1:
ld1 {v0.2s,v1.2s}, [x1], #16
ldr w4, [x0], #4
uxth w5, w4
lsr w4, w4, #16
add x5, x3, x5, lsl #3
add x4, x3, x4, lsl #3
st1 {v0.2s}, [x5]
st1 {v1.2s}, [x4]
subs x6, x6, #2
b.gt 1b
sub x1, x1, x2, lsl #3
1:
ld1 {v0.4s,v1.4s}, [x3], #32
st1 {v0.4s,v1.4s}, [x1], #32
subs x2, x2, #4
b.gt 1b
ret
endfunc
const fft_tab_neon, relocate=1
.quad fft4_neon
.quad fft8_neon
.quad fft16_neon
.quad fft32_neon
.quad fft64_neon
.quad fft128_neon
.quad fft256_neon
.quad fft512_neon
.quad fft1024_neon
.quad fft2048_neon
.quad fft4096_neon
.quad fft8192_neon
.quad fft16384_neon
.quad fft32768_neon
.quad fft65536_neon
endconst
const pmmp, align=4
.float +1.0, -1.0, -1.0, +1.0
endconst
const mppm, align=4
.float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
endconst

View File

@@ -0,0 +1,59 @@
/*
* ARM NEON optimised H.264 chroma functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/h264chroma.h"
#include "config.h"
void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
void ff_put_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
void ff_put_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
av_cold void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth)
{
const int high_bit_depth = bit_depth > 8;
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags) && !high_bit_depth) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
}
}

View File

@@ -0,0 +1,453 @@
/*
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
.macro h264_chroma_mc8 type, codec=h264
function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
sxtw x2, w2
.ifc \type,avg
mov x8, x0
.endif
prfm pldl1strm, [x1]
prfm pldl1strm, [x1, x2]
.ifc \codec,rv40
movrel x6, rv40bias
lsr w9, w5, #1
lsr w10, w4, #1
lsl w9, w9, #3
lsl w10, w10, #1
add w9, w9, w10
add x6, x6, w9, UXTW
ld1r {v22.8H}, [x6]
.endif
.ifc \codec,vc1
movi v22.8H, #28
.endif
mul w7, w4, w5
lsl w14, w5, #3
lsl w13, w4, #3
cmp w7, #0
sub w6, w14, w7
sub w12, w13, w7
sub w4, w7, w13
sub w4, w4, w14
add w4, w4, #64
b.eq 2f
dup v0.8B, w4
dup v1.8B, w12
ld1 {v4.8B, v5.8B}, [x1], x2
dup v2.8B, w6
dup v3.8B, w7
ext v5.8B, v4.8B, v5.8B, #1
1: ld1 {v6.8B, v7.8B}, [x1], x2
umull v16.8H, v4.8B, v0.8B
umlal v16.8H, v5.8B, v1.8B
ext v7.8B, v6.8B, v7.8B, #1
ld1 {v4.8B, v5.8B}, [x1], x2
umlal v16.8H, v6.8B, v2.8B
prfm pldl1strm, [x1]
ext v5.8B, v4.8B, v5.8B, #1
umlal v16.8H, v7.8B, v3.8B
umull v17.8H, v6.8B, v0.8B
subs w3, w3, #2
umlal v17.8H, v7.8B, v1.8B
umlal v17.8H, v4.8B, v2.8B
umlal v17.8H, v5.8B, v3.8B
prfm pldl1strm, [x1, x2]
.ifc \codec,h264
rshrn v16.8B, v16.8H, #6
rshrn v17.8B, v17.8H, #6
.else
add v16.8H, v16.8H, v22.8H
add v17.8H, v17.8H, v22.8H
shrn v16.8B, v16.8H, #6
shrn v17.8B, v17.8H, #6
.endif
.ifc \type,avg
ld1 {v20.8B}, [x8], x2
ld1 {v21.8B}, [x8], x2
urhadd v16.8B, v16.8B, v20.8B
urhadd v17.8B, v17.8B, v21.8B
.endif
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
b.gt 1b
ret
2: adds w12, w12, w6
dup v0.8B, w4
b.eq 5f
tst w6, w6
dup v1.8B, w12
b.eq 4f
ld1 {v4.8B}, [x1], x2
3: ld1 {v6.8B}, [x1], x2
umull v16.8H, v4.8B, v0.8B
umlal v16.8H, v6.8B, v1.8B
ld1 {v4.8B}, [x1], x2
umull v17.8H, v6.8B, v0.8B
umlal v17.8H, v4.8B, v1.8B
prfm pldl1strm, [x1]
.ifc \codec,h264
rshrn v16.8B, v16.8H, #6
rshrn v17.8B, v17.8H, #6
.else
add v16.8H, v16.8H, v22.8H
add v17.8H, v17.8H, v22.8H
shrn v16.8B, v16.8H, #6
shrn v17.8B, v17.8H, #6
.endif
prfm pldl1strm, [x1, x2]
.ifc \type,avg
ld1 {v20.8B}, [x8], x2
ld1 {v21.8B}, [x8], x2
urhadd v16.8B, v16.8B, v20.8B
urhadd v17.8B, v17.8B, v21.8B
.endif
subs w3, w3, #2
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
b.gt 3b
ret
4: ld1 {v4.8B, v5.8B}, [x1], x2
ld1 {v6.8B, v7.8B}, [x1], x2
ext v5.8B, v4.8B, v5.8B, #1
ext v7.8B, v6.8B, v7.8B, #1
prfm pldl1strm, [x1]
subs w3, w3, #2
umull v16.8H, v4.8B, v0.8B
umlal v16.8H, v5.8B, v1.8B
umull v17.8H, v6.8B, v0.8B
umlal v17.8H, v7.8B, v1.8B
prfm pldl1strm, [x1, x2]
.ifc \codec,h264
rshrn v16.8B, v16.8H, #6
rshrn v17.8B, v17.8H, #6
.else
add v16.8H, v16.8H, v22.8H
add v17.8H, v17.8H, v22.8H
shrn v16.8B, v16.8H, #6
shrn v17.8B, v17.8H, #6
.endif
.ifc \type,avg
ld1 {v20.8B}, [x8], x2
ld1 {v21.8B}, [x8], x2
urhadd v16.8B, v16.8B, v20.8B
urhadd v17.8B, v17.8B, v21.8B
.endif
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
b.gt 4b
ret
5: ld1 {v4.8B}, [x1], x2
ld1 {v5.8B}, [x1], x2
prfm pldl1strm, [x1]
subs w3, w3, #2
umull v16.8H, v4.8B, v0.8B
umull v17.8H, v5.8B, v0.8B
prfm pldl1strm, [x1, x2]
.ifc \codec,h264
rshrn v16.8B, v16.8H, #6
rshrn v17.8B, v17.8H, #6
.else
add v16.8H, v16.8H, v22.8H
add v17.8H, v17.8H, v22.8H
shrn v16.8B, v16.8H, #6
shrn v17.8B, v17.8H, #6
.endif
.ifc \type,avg
ld1 {v20.8B}, [x8], x2
ld1 {v21.8B}, [x8], x2
urhadd v16.8B, v16.8B, v20.8B
urhadd v17.8B, v17.8B, v21.8B
.endif
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
b.gt 5b
ret
endfunc
.endm
/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
.macro h264_chroma_mc4 type, codec=h264
function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
sxtw x2, w2
.ifc \type,avg
mov x8, x0
.endif
prfm pldl1strm, [x1]
prfm pldl1strm, [x1, x2]
.ifc \codec,rv40
movrel x6, rv40bias
lsr w9, w5, #1
lsr w10, w4, #1
lsl w9, w9, #3
lsl w10, w10, #1
add w9, w9, w10
add x6, x6, w9, UXTW
ld1r {v22.8H}, [x6]
.endif
.ifc \codec,vc1
movi v22.8H, #28
.endif
mul w7, w4, w5
lsl w14, w5, #3
lsl w13, w4, #3
cmp w7, #0
sub w6, w14, w7
sub w12, w13, w7
sub w4, w7, w13
sub w4, w4, w14
add w4, w4, #64
b.eq 2f
dup v24.8B, w4
dup v25.8B, w12
ld1 {v4.8B}, [x1], x2
dup v26.8B, w6
dup v27.8B, w7
ext v5.8B, v4.8B, v5.8B, #1
trn1 v0.2S, v24.2S, v25.2S
trn1 v2.2S, v26.2S, v27.2S
trn1 v4.2S, v4.2S, v5.2S
1: ld1 {v6.8B}, [x1], x2
ext v7.8B, v6.8B, v7.8B, #1
trn1 v6.2S, v6.2S, v7.2S
umull v18.8H, v4.8B, v0.8B
umlal v18.8H, v6.8B, v2.8B
ld1 {v4.8B}, [x1], x2
ext v5.8B, v4.8B, v5.8B, #1
trn1 v4.2S, v4.2S, v5.2S
prfm pldl1strm, [x1]
umull v19.8H, v6.8B, v0.8B
umlal v19.8H, v4.8B, v2.8B
trn1 v30.2D, v18.2D, v19.2D
trn2 v31.2D, v18.2D, v19.2D
add v18.8H, v30.8H, v31.8H
.ifc \codec,h264
rshrn v16.8B, v18.8H, #6
.else
add v18.8H, v18.8H, v22.8H
shrn v16.8B, v18.8H, #6
.endif
subs w3, w3, #2
prfm pldl1strm, [x1, x2]
.ifc \type,avg
ld1 {v20.S}[0], [x8], x2
ld1 {v20.S}[1], [x8], x2
urhadd v16.8B, v16.8B, v20.8B
.endif
st1 {v16.S}[0], [x0], x2
st1 {v16.S}[1], [x0], x2
b.gt 1b
ret
2: adds w12, w12, w6
dup v30.8B, w4
b.eq 5f
tst w6, w6
dup v31.8B, w12
trn1 v0.2S, v30.2S, v31.2S
trn2 v1.2S, v30.2S, v31.2S
b.eq 4f
ext v1.8B, v0.8B, v1.8B, #4
ld1 {v4.S}[0], [x1], x2
3: ld1 {v4.S}[1], [x1], x2
umull v18.8H, v4.8B, v0.8B
ld1 {v4.S}[0], [x1], x2
umull v19.8H, v4.8B, v1.8B
trn1 v30.2D, v18.2D, v19.2D
trn2 v31.2D, v18.2D, v19.2D
add v18.8H, v30.8H, v31.8H
prfm pldl1strm, [x1]
.ifc \codec,h264
rshrn v16.8B, v18.8H, #6
.else
add v18.8H, v18.8H, v22.8H
shrn v16.8B, v18.8H, #6
.endif
.ifc \type,avg
ld1 {v20.S}[0], [x8], x2
ld1 {v20.S}[1], [x8], x2
urhadd v16.8B, v16.8B, v20.8B
.endif
subs w3, w3, #2
prfm pldl1strm, [x1, x2]
st1 {v16.S}[0], [x0], x2
st1 {v16.S}[1], [x0], x2
b.gt 3b
ret
4: ld1 {v4.8B}, [x1], x2
ld1 {v6.8B}, [x1], x2
ext v5.8B, v4.8B, v5.8B, #1
ext v7.8B, v6.8B, v7.8B, #1
trn1 v4.2S, v4.2S, v5.2S
trn1 v6.2S, v6.2S, v7.2S
umull v18.8H, v4.8B, v0.8B
umull v19.8H, v6.8B, v0.8B
subs w3, w3, #2
trn1 v30.2D, v18.2D, v19.2D
trn2 v31.2D, v18.2D, v19.2D
add v18.8H, v30.8H, v31.8H
prfm pldl1strm, [x1]
.ifc \codec,h264
rshrn v16.8B, v18.8H, #6
.else
add v18.8H, v18.8H, v22.8H
shrn v16.8B, v18.8H, #6
.endif
.ifc \type,avg
ld1 {v20.S}[0], [x8], x2
ld1 {v20.S}[1], [x8], x2
urhadd v16.8B, v16.8B, v20.8B
.endif
prfm pldl1strm, [x1]
st1 {v16.S}[0], [x0], x2
st1 {v16.S}[1], [x0], x2
b.gt 4b
ret
5: ld1 {v4.S}[0], [x1], x2
ld1 {v4.S}[1], [x1], x2
umull v18.8H, v4.8B, v30.8B
subs w3, w3, #2
prfm pldl1strm, [x1]
.ifc \codec,h264
rshrn v16.8B, v18.8H, #6
.else
add v18.8H, v18.8H, v22.8H
shrn v16.8B, v18.8H, #6
.endif
.ifc \type,avg
ld1 {v20.S}[0], [x8], x2
ld1 {v20.S}[1], [x8], x2
urhadd v16.8B, v16.8B, v20.8B
.endif
prfm pldl1strm, [x1]
st1 {v16.S}[0], [x0], x2
st1 {v16.S}[1], [x0], x2
b.gt 5b
ret
endfunc
.endm
.macro h264_chroma_mc2 type
function ff_\type\()_h264_chroma_mc2_neon, export=1
sxtw x2, w2
prfm pldl1strm, [x1]
prfm pldl1strm, [x1, x2]
orr w7, w4, w5
cbz w7, 2f
mul w7, w4, w5
lsl w14, w5, #3
lsl w13, w4, #3
sub w6, w14, w7
sub w12, w13, w7
sub w4, w7, w13
sub w4, w4, w14
add w4, w4, #64
dup v0.8B, w4
dup v2.8B, w12
dup v1.8B, w6
dup v3.8B, w7
trn1 v0.4H, v0.4H, v2.4H
trn1 v1.4H, v1.4H, v3.4H
1:
ld1 {v4.S}[0], [x1], x2
ld1 {v4.S}[1], [x1], x2
rev64 v5.2S, v4.2S
ld1 {v5.S}[1], [x1]
ext v6.8B, v4.8B, v5.8B, #1
ext v7.8B, v5.8B, v4.8B, #1
trn1 v4.4H, v4.4H, v6.4H
trn1 v5.4H, v5.4H, v7.4H
umull v16.8H, v4.8B, v0.8B
umlal v16.8H, v5.8B, v1.8B
.ifc \type,avg
ld1 {v18.H}[0], [x0], x2
ld1 {v18.H}[2], [x0]
sub x0, x0, x2
.endif
rev64 v17.4S, v16.4S
add v16.8H, v16.8H, v17.8H
rshrn v16.8B, v16.8H, #6
.ifc \type,avg
urhadd v16.8B, v16.8B, v18.8B
.endif
st1 {v16.H}[0], [x0], x2
st1 {v16.H}[2], [x0], x2
subs w3, w3, #2
b.gt 1b
ret
2:
ld1 {v16.H}[0], [x1], x2
ld1 {v16.H}[1], [x1], x2
.ifc \type,avg
ld1 {v18.H}[0], [x0], x2
ld1 {v18.H}[1], [x0]
sub x0, x0, x2
urhadd v16.8B, v16.8B, v18.8B
.endif
st1 {v16.H}[0], [x0], x2
st1 {v16.H}[1], [x0], x2
subs w3, w3, #2
b.gt 2b
ret
endfunc
.endm
h264_chroma_mc8 put
h264_chroma_mc8 avg
h264_chroma_mc4 put
h264_chroma_mc4 avg
h264_chroma_mc2 put
h264_chroma_mc2 avg
#if CONFIG_RV40_DECODER
const rv40bias
.short 0, 16, 32, 16
.short 32, 28, 32, 28
.short 0, 32, 16, 32
.short 32, 28, 32, 28
endconst
h264_chroma_mc8 put, rv40
h264_chroma_mc8 avg, rv40
h264_chroma_mc4 put, rv40
h264_chroma_mc4 avg, rv40
#endif
#if CONFIG_VC1_DECODER
h264_chroma_mc8 put, vc1
h264_chroma_mc8 avg, vc1
h264_chroma_mc4 put, vc1
h264_chroma_mc4 avg, vc1
#endif

View File

@@ -0,0 +1,102 @@
/*
* Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/h264dsp.h"
void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
int16_t *block, int stride,
const uint8_t nnzc[6*8]);
void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
int16_t *block, int stride,
const uint8_t nnzc[6*8]);
void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
int16_t *block, int stride,
const uint8_t nnzc[6*8]);
void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
int16_t *block, int stride,
const uint8_t nnzc[6*8]);
av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
const int chroma_format_idc)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags) && bit_depth == 8) {
c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
if (chroma_format_idc <= 1)
c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
c->h264_idct_add = ff_h264_idct_add_neon;
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
c->h264_idct_add16 = ff_h264_idct_add16_neon;
c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
if (chroma_format_idc <= 1)
c->h264_idct_add8 = ff_h264_idct_add8_neon;
c->h264_idct8_add = ff_h264_idct8_add_neon;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon;
c->h264_idct8_add4 = ff_h264_idct8_add4_neon;
}
}

View File

@@ -0,0 +1,498 @@
/*
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
#include "neon.S"
.macro h264_loop_filter_start
cmp w2, #0
ldr w6, [x4]
ccmp w3, #0, #0, ne
mov v24.S[0], w6
and w6, w6, w6, lsl #16
b.eq 1f
ands w6, w6, w6, lsl #8
b.ge 2f
1:
ret
2:
.endm
.macro h264_loop_filter_luma
dup v22.16B, w2 // alpha
uxtl v24.8H, v24.8B
uabd v21.16B, v16.16B, v0.16B // abs(p0 - q0)
uxtl v24.4S, v24.4H
uabd v28.16B, v18.16B, v16.16B // abs(p1 - p0)
sli v24.8H, v24.8H, #8
uabd v30.16B, v2.16B, v0.16B // abs(q1 - q0)
sli v24.4S, v24.4S, #16
cmhi v21.16B, v22.16B, v21.16B // < alpha
dup v22.16B, w3 // beta
cmlt v23.16B, v24.16B, #0
cmhi v28.16B, v22.16B, v28.16B // < beta
cmhi v30.16B, v22.16B, v30.16B // < beta
bic v21.16B, v21.16B, v23.16B
uabd v17.16B, v20.16B, v16.16B // abs(p2 - p0)
and v21.16B, v21.16B, v28.16B
uabd v19.16B, v4.16B, v0.16B // abs(q2 - q0)
cmhi v17.16B, v22.16B, v17.16B // < beta
and v21.16B, v21.16B, v30.16B
cmhi v19.16B, v22.16B, v19.16B // < beta
and v17.16B, v17.16B, v21.16B
and v19.16B, v19.16B, v21.16B
and v24.16B, v24.16B, v21.16B
urhadd v28.16B, v16.16B, v0.16B
sub v21.16B, v24.16B, v17.16B
uqadd v23.16B, v18.16B, v24.16B
uhadd v20.16B, v20.16B, v28.16B
sub v21.16B, v21.16B, v19.16B
uhadd v28.16B, v4.16B, v28.16B
umin v23.16B, v23.16B, v20.16B
uqsub v22.16B, v18.16B, v24.16B
uqadd v4.16B, v2.16B, v24.16B
umax v23.16B, v23.16B, v22.16B
uqsub v22.16B, v2.16B, v24.16B
umin v28.16B, v4.16B, v28.16B
uxtl v4.8H, v0.8B
umax v28.16B, v28.16B, v22.16B
uxtl2 v20.8H, v0.16B
usubw v4.8H, v4.8H, v16.8B
usubw2 v20.8H, v20.8H, v16.16B
shl v4.8H, v4.8H, #2
shl v20.8H, v20.8H, #2
uaddw v4.8H, v4.8H, v18.8B
uaddw2 v20.8H, v20.8H, v18.16B
usubw v4.8H, v4.8H, v2.8B
usubw2 v20.8H, v20.8H, v2.16B
rshrn v4.8B, v4.8H, #3
rshrn2 v4.16B, v20.8H, #3
bsl v17.16B, v23.16B, v18.16B
bsl v19.16B, v28.16B, v2.16B
neg v23.16B, v21.16B
uxtl v28.8H, v16.8B
smin v4.16B, v4.16B, v21.16B
uxtl2 v21.8H, v16.16B
smax v4.16B, v4.16B, v23.16B
uxtl v22.8H, v0.8B
uxtl2 v24.8H, v0.16B
saddw v28.8H, v28.8H, v4.8B
saddw2 v21.8H, v21.8H, v4.16B
ssubw v22.8H, v22.8H, v4.8B
ssubw2 v24.8H, v24.8H, v4.16B
sqxtun v16.8B, v28.8H
sqxtun2 v16.16B, v21.8H
sqxtun v0.8B, v22.8H
sqxtun2 v0.16B, v24.8H
.endm
function ff_h264_v_loop_filter_luma_neon, export=1
h264_loop_filter_start
sxtw x1, w1
ld1 {v0.16B}, [x0], x1
ld1 {v2.16B}, [x0], x1
ld1 {v4.16B}, [x0], x1
sub x0, x0, x1, lsl #2
sub x0, x0, x1, lsl #1
ld1 {v20.16B}, [x0], x1
ld1 {v18.16B}, [x0], x1
ld1 {v16.16B}, [x0], x1
h264_loop_filter_luma
sub x0, x0, x1, lsl #1
st1 {v17.16B}, [x0], x1
st1 {v16.16B}, [x0], x1
st1 {v0.16B}, [x0], x1
st1 {v19.16B}, [x0]
ret
endfunc
function ff_h264_h_loop_filter_luma_neon, export=1
h264_loop_filter_start
sub x0, x0, #4
ld1 {v6.8B}, [x0], x1
ld1 {v20.8B}, [x0], x1
ld1 {v18.8B}, [x0], x1
ld1 {v16.8B}, [x0], x1
ld1 {v0.8B}, [x0], x1
ld1 {v2.8B}, [x0], x1
ld1 {v4.8B}, [x0], x1
ld1 {v26.8B}, [x0], x1
ld1 {v6.D}[1], [x0], x1
ld1 {v20.D}[1], [x0], x1
ld1 {v18.D}[1], [x0], x1
ld1 {v16.D}[1], [x0], x1
ld1 {v0.D}[1], [x0], x1
ld1 {v2.D}[1], [x0], x1
ld1 {v4.D}[1], [x0], x1
ld1 {v26.D}[1], [x0], x1
transpose_8x16B v6, v20, v18, v16, v0, v2, v4, v26, v21, v23
h264_loop_filter_luma
transpose_4x16B v17, v16, v0, v19, v21, v23, v25, v27
sub x0, x0, x1, lsl #4
add x0, x0, #2
st1 {v17.S}[0], [x0], x1
st1 {v16.S}[0], [x0], x1
st1 {v0.S}[0], [x0], x1
st1 {v19.S}[0], [x0], x1
st1 {v17.S}[1], [x0], x1
st1 {v16.S}[1], [x0], x1
st1 {v0.S}[1], [x0], x1
st1 {v19.S}[1], [x0], x1
st1 {v17.S}[2], [x0], x1
st1 {v16.S}[2], [x0], x1
st1 {v0.S}[2], [x0], x1
st1 {v19.S}[2], [x0], x1
st1 {v17.S}[3], [x0], x1
st1 {v16.S}[3], [x0], x1
st1 {v0.S}[3], [x0], x1
st1 {v19.S}[3], [x0], x1
ret
endfunc
.macro h264_loop_filter_chroma
dup v22.8B, w2 // alpha
uxtl v24.8H, v24.8B
uabd v26.8B, v16.8B, v0.8B // abs(p0 - q0)
uxtl v4.8H, v0.8B
uabd v28.8B, v18.8B, v16.8B // abs(p1 - p0)
usubw v4.8H, v4.8H, v16.8B
sli v24.8H, v24.8H, #8
shl v4.8H, v4.8H, #2
uabd v30.8B, v2.8B, v0.8B // abs(q1 - q0)
uaddw v4.8H, v4.8H, v18.8B
cmhi v26.8B, v22.8B, v26.8B // < alpha
usubw v4.8H, v4.8H, v2.8B
dup v22.8B, w3 // beta
rshrn v4.8B, v4.8H, #3
cmhi v28.8B, v22.8B, v28.8B // < beta
cmhi v30.8B, v22.8B, v30.8B // < beta
smin v4.8B, v4.8B, v24.8B
neg v25.8B, v24.8B
and v26.8B, v26.8B, v28.8B
smax v4.8B, v4.8B, v25.8B
and v26.8B, v26.8B, v30.8B
uxtl v22.8H, v0.8B
and v4.8B, v4.8B, v26.8B
uxtl v28.8H, v16.8B
saddw v28.8H, v28.8H, v4.8B
ssubw v22.8H, v22.8H, v4.8B
sqxtun v16.8B, v28.8H
sqxtun v0.8B, v22.8H
.endm
function ff_h264_v_loop_filter_chroma_neon, export=1
h264_loop_filter_start
sub x0, x0, x1, lsl #1
ld1 {v18.8B}, [x0], x1
ld1 {v16.8B}, [x0], x1
ld1 {v0.8B}, [x0], x1
ld1 {v2.8B}, [x0]
h264_loop_filter_chroma
sub x0, x0, x1, lsl #1
st1 {v16.8B}, [x0], x1
st1 {v0.8B}, [x0], x1
ret
endfunc
function ff_h264_h_loop_filter_chroma_neon, export=1
h264_loop_filter_start
sub x0, x0, #2
ld1 {v18.S}[0], [x0], x1
ld1 {v16.S}[0], [x0], x1
ld1 {v0.S}[0], [x0], x1
ld1 {v2.S}[0], [x0], x1
ld1 {v18.S}[1], [x0], x1
ld1 {v16.S}[1], [x0], x1
ld1 {v0.S}[1], [x0], x1
ld1 {v2.S}[1], [x0], x1
transpose_4x8B v18, v16, v0, v2, v28, v29, v30, v31
h264_loop_filter_chroma
transpose_4x8B v18, v16, v0, v2, v28, v29, v30, v31
sub x0, x0, x1, lsl #3
st1 {v18.S}[0], [x0], x1
st1 {v16.S}[0], [x0], x1
st1 {v0.S}[0], [x0], x1
st1 {v2.S}[0], [x0], x1
st1 {v18.S}[1], [x0], x1
st1 {v16.S}[1], [x0], x1
st1 {v0.S}[1], [x0], x1
st1 {v2.S}[1], [x0], x1
ret
endfunc
.macro biweight_16 macs, macd
dup v0.16B, w5
dup v1.16B, w6
mov v4.16B, v16.16B
mov v6.16B, v16.16B
1: subs w3, w3, #2
ld1 {v20.16B}, [x0], x2
\macd v4.8H, v0.8B, v20.8B
\macd\()2 v6.8H, v0.16B, v20.16B
ld1 {v22.16B}, [x1], x2
\macs v4.8H, v1.8B, v22.8B
\macs\()2 v6.8H, v1.16B, v22.16B
mov v24.16B, v16.16B
ld1 {v28.16B}, [x0], x2
mov v26.16B, v16.16B
\macd v24.8H, v0.8B, v28.8B
\macd\()2 v26.8H, v0.16B, v28.16B
ld1 {v30.16B}, [x1], x2
\macs v24.8H, v1.8B, v30.8B
\macs\()2 v26.8H, v1.16B, v30.16B
sshl v4.8H, v4.8H, v18.8H
sshl v6.8H, v6.8H, v18.8H
sqxtun v4.8B, v4.8H
sqxtun2 v4.16B, v6.8H
sshl v24.8H, v24.8H, v18.8H
sshl v26.8H, v26.8H, v18.8H
sqxtun v24.8B, v24.8H
sqxtun2 v24.16B, v26.8H
mov v6.16B, v16.16B
st1 {v4.16B}, [x7], x2
mov v4.16B, v16.16B
st1 {v24.16B}, [x7], x2
b.ne 1b
ret
.endm
.macro biweight_8 macs, macd
dup v0.8B, w5
dup v1.8B, w6
mov v2.16B, v16.16B
mov v20.16B, v16.16B
1: subs w3, w3, #2
ld1 {v4.8B}, [x0], x2
\macd v2.8H, v0.8B, v4.8B
ld1 {v5.8B}, [x1], x2
\macs v2.8H, v1.8B, v5.8B
ld1 {v6.8B}, [x0], x2
\macd v20.8H, v0.8B, v6.8B
ld1 {v7.8B}, [x1], x2
\macs v20.8H, v1.8B, v7.8B
sshl v2.8H, v2.8H, v18.8H
sqxtun v2.8B, v2.8H
sshl v20.8H, v20.8H, v18.8H
sqxtun v4.8B, v20.8H
mov v20.16B, v16.16B
st1 {v2.8B}, [x7], x2
mov v2.16B, v16.16B
st1 {v4.8B}, [x7], x2
b.ne 1b
ret
.endm
.macro biweight_4 macs, macd
dup v0.8B, w5
dup v1.8B, w6
mov v2.16B, v16.16B
mov v20.16B,v16.16B
1: subs w3, w3, #4
ld1 {v4.S}[0], [x0], x2
ld1 {v4.S}[1], [x0], x2
\macd v2.8H, v0.8B, v4.8B
ld1 {v5.S}[0], [x1], x2
ld1 {v5.S}[1], [x1], x2
\macs v2.8H, v1.8B, v5.8B
b.lt 2f
ld1 {v6.S}[0], [x0], x2
ld1 {v6.S}[1], [x0], x2
\macd v20.8H, v0.8B, v6.8B
ld1 {v7.S}[0], [x1], x2
ld1 {v7.S}[1], [x1], x2
\macs v20.8H, v1.8B, v7.8B
sshl v2.8H, v2.8H, v18.8H
sqxtun v2.8B, v2.8H
sshl v20.8H, v20.8H, v18.8H
sqxtun v4.8B, v20.8H
mov v20.16B, v16.16B
st1 {v2.S}[0], [x7], x2
st1 {v2.S}[1], [x7], x2
mov v2.16B, v16.16B
st1 {v4.S}[0], [x7], x2
st1 {v4.S}[1], [x7], x2
b.ne 1b
ret
2: sshl v2.8H, v2.8H, v18.8H
sqxtun v2.8B, v2.8H
st1 {v2.S}[0], [x7], x2
st1 {v2.S}[1], [x7], x2
ret
.endm
.macro biweight_func w
function ff_biweight_h264_pixels_\w\()_neon, export=1
sxtw x2, w2
lsr w8, w5, #31
add w7, w7, #1
eor w8, w8, w6, lsr #30
orr w7, w7, #1
dup v18.8H, w4
lsl w7, w7, w4
not v18.16B, v18.16B
dup v16.8H, w7
mov x7, x0
cbz w8, 10f
subs w8, w8, #1
b.eq 20f
subs w8, w8, #1
b.eq 30f
b 40f
10: biweight_\w umlal, umlal
20: neg w5, w5
biweight_\w umlal, umlsl
30: neg w5, w5
neg w6, w6
biweight_\w umlsl, umlsl
40: neg w6, w6
biweight_\w umlsl, umlal
endfunc
.endm
biweight_func 16
biweight_func 8
biweight_func 4
.macro weight_16 add
dup v0.16B, w4
1: subs w2, w2, #2
ld1 {v20.16B}, [x0], x1
umull v4.8H, v0.8B, v20.8B
umull2 v6.8H, v0.16B, v20.16B
ld1 {v28.16B}, [x0], x1
umull v24.8H, v0.8B, v28.8B
umull2 v26.8H, v0.16B, v28.16B
\add v4.8H, v16.8H, v4.8H
srshl v4.8H, v4.8H, v18.8H
\add v6.8H, v16.8H, v6.8H
srshl v6.8H, v6.8H, v18.8H
sqxtun v4.8B, v4.8H
sqxtun2 v4.16B, v6.8H
\add v24.8H, v16.8H, v24.8H
srshl v24.8H, v24.8H, v18.8H
\add v26.8H, v16.8H, v26.8H
srshl v26.8H, v26.8H, v18.8H
sqxtun v24.8B, v24.8H
sqxtun2 v24.16B, v26.8H
st1 {v4.16B}, [x5], x1
st1 {v24.16B}, [x5], x1
b.ne 1b
ret
.endm
.macro weight_8 add
dup v0.8B, w4
1: subs w2, w2, #2
ld1 {v4.8B}, [x0], x1
umull v2.8H, v0.8B, v4.8B
ld1 {v6.8B}, [x0], x1
umull v20.8H, v0.8B, v6.8B
\add v2.8H, v16.8H, v2.8H
srshl v2.8H, v2.8H, v18.8H
sqxtun v2.8B, v2.8H
\add v20.8H, v16.8H, v20.8H
srshl v20.8H, v20.8H, v18.8H
sqxtun v4.8B, v20.8H
st1 {v2.8B}, [x5], x1
st1 {v4.8B}, [x5], x1
b.ne 1b
ret
.endm
.macro weight_4 add
dup v0.8B, w4
1: subs w2, w2, #4
ld1 {v4.S}[0], [x0], x1
ld1 {v4.S}[1], [x0], x1
umull v2.8H, v0.8B, v4.8B
b.lt 2f
ld1 {v6.S}[0], [x0], x1
ld1 {v6.S}[1], [x0], x1
umull v20.8H, v0.8B, v6.8B
\add v2.8H, v16.8H, v2.8H
srshl v2.8H, v2.8H, v18.8H
sqxtun v2.8B, v2.8H
\add v20.8H, v16.8H, v20.8H
srshl v20.8H, v20.8h, v18.8H
sqxtun v4.8B, v20.8H
st1 {v2.S}[0], [x5], x1
st1 {v2.S}[1], [x5], x1
st1 {v4.S}[0], [x5], x1
st1 {v4.S}[1], [x5], x1
b.ne 1b
ret
2: \add v2.8H, v16.8H, v2.8H
srshl v2.8H, v2.8H, v18.8H
sqxtun v2.8B, v2.8H
st1 {v2.S}[0], [x5], x1
st1 {v2.S}[1], [x5], x1
ret
.endm
.macro weight_func w
function ff_weight_h264_pixels_\w\()_neon, export=1
sxtw x1, w1
cmp w3, #1
mov w6, #1
lsl w5, w5, w3
dup v16.8H, w5
mov x5, x0
b.le 20f
sub w6, w6, w3
dup v18.8H, w6
cmp w4, #0
b.lt 10f
weight_\w shadd
10: neg w4, w4
weight_\w shsub
20: neg w6, w3
dup v18.8H, w6
cmp w4, #0
b.lt 10f
weight_\w add
10: neg w4, w4
weight_\w sub
endfunc
.endm
weight_func 16
weight_func 8
weight_func 4

View File

@@ -0,0 +1,408 @@
/*
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
#include "neon.S"
function ff_h264_idct_add_neon, export=1
ld1 {v0.4H, v1.4H, v2.4H, v3.4H}, [x1]
sxtw x2, w2
movi v30.8H, #0
add v4.4H, v0.4H, v2.4H
sshr v16.4H, v1.4H, #1
st1 {v30.8H}, [x1], #16
sshr v17.4H, v3.4H, #1
st1 {v30.8H}, [x1], #16
sub v5.4H, v0.4H, v2.4H
add v6.4H, v1.4H, v17.4H
sub v7.4H, v16.4H, v3.4H
add v0.4H, v4.4H, v6.4H
add v1.4H, v5.4H, v7.4H
sub v2.4H, v4.4H, v6.4H
sub v3.4H, v5.4H, v7.4H
transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7
add v4.4H, v0.4H, v3.4H
ld1 {v18.S}[0], [x0], x2
sshr v16.4H, v2.4H, #1
sshr v17.4H, v1.4H, #1
ld1 {v19.S}[1], [x0], x2
sub v5.4H, v0.4H, v3.4H
ld1 {v18.S}[1], [x0], x2
add v6.4H, v16.4H, v1.4H
ins v4.D[1], v5.D[0]
sub v7.4H, v2.4H, v17.4H
ld1 {v19.S}[0], [x0], x2
ins v6.D[1], v7.D[0]
sub x0, x0, x2, lsl #2
add v0.8H, v4.8H, v6.8H
sub v1.8H, v4.8H, v6.8H
srshr v0.8H, v0.8H, #6
srshr v1.8H, v1.8H, #6
uaddw v0.8H, v0.8H, v18.8B
uaddw v1.8H, v1.8H, v19.8B
sqxtun v0.8B, v0.8H
sqxtun v1.8B, v1.8H
st1 {v0.S}[0], [x0], x2
st1 {v1.S}[1], [x0], x2
st1 {v0.S}[1], [x0], x2
st1 {v1.S}[0], [x0], x2
sub x1, x1, #32
ret
endfunc
function ff_h264_idct_dc_add_neon, export=1
sxtw x2, w2
mov w3, #0
ld1r {v2.8H}, [x1]
strh w3, [x1]
srshr v2.8H, v2.8H, #6
ld1 {v0.S}[0], [x0], x2
ld1 {v0.S}[1], [x0], x2
uaddw v3.8H, v2.8H, v0.8B
ld1 {v1.S}[0], [x0], x2
ld1 {v1.S}[1], [x0], x2
uaddw v4.8H, v2.8H, v1.8B
sqxtun v0.8B, v3.8H
sqxtun v1.8B, v4.8H
sub x0, x0, x2, lsl #2
st1 {v0.S}[0], [x0], x2
st1 {v0.S}[1], [x0], x2
st1 {v1.S}[0], [x0], x2
st1 {v1.S}[1], [x0], x2
ret
endfunc
function ff_h264_idct_add16_neon, export=1
mov x12, x30
mov x6, x0 // dest
mov x5, x1 // block_offset
mov x1, x2 // block
mov w9, w3 // stride
movrel x7, scan8
mov x10, #16
movrel x13, X(ff_h264_idct_dc_add_neon)
movrel x14, X(ff_h264_idct_add_neon)
1: mov w2, w9
ldrb w3, [x7], #1
ldrsw x0, [x5], #4
ldrb w3, [x4, w3, uxtw]
subs w3, w3, #1
b.lt 2f
ldrsh w3, [x1]
add x0, x0, x6
ccmp w3, #0, #4, eq
csel x15, x13, x14, ne
blr x15
2: subs x10, x10, #1
add x1, x1, #32
b.ne 1b
ret x12
endfunc
function ff_h264_idct_add16intra_neon, export=1
mov x12, x30
mov x6, x0 // dest
mov x5, x1 // block_offset
mov x1, x2 // block
mov w9, w3 // stride
movrel x7, scan8
mov x10, #16
movrel x13, X(ff_h264_idct_dc_add_neon)
movrel x14, X(ff_h264_idct_add_neon)
1: mov w2, w9
ldrb w3, [x7], #1
ldrsw x0, [x5], #4
ldrb w3, [x4, w3, uxtw]
add x0, x0, x6
cmp w3, #0
ldrsh w3, [x1]
csel x15, x13, x14, eq
ccmp w3, #0, #0, eq
b.eq 2f
blr x15
2: subs x10, x10, #1
add x1, x1, #32
b.ne 1b
ret x12
endfunc
function ff_h264_idct_add8_neon, export=1
sub sp, sp, #0x40
stp x19, x20, [sp]
mov x12, x30
ldp x6, x15, [x0] // dest[0], dest[1]
add x5, x1, #16*4 // block_offset
add x9, x2, #16*32 // block
mov w19, w3 // stride
movrel x13, X(ff_h264_idct_dc_add_neon)
movrel x14, X(ff_h264_idct_add_neon)
movrel x7, scan8+16
mov x10, #0
mov x11, #16
1: mov w2, w19
ldrb w3, [x7, x10] // scan8[i]
ldrsw x0, [x5, x10, lsl #2] // block_offset[i]
ldrb w3, [x4, w3, uxtw] // nnzc[ scan8[i] ]
add x0, x0, x6 // block_offset[i] + dst[j-1]
add x1, x9, x10, lsl #5 // block + i * 16
cmp w3, #0
ldrsh w3, [x1] // block[i*16]
csel x20, x13, x14, eq
ccmp w3, #0, #0, eq
b.eq 2f
blr x20
2: add x10, x10, #1
cmp x10, #4
csel x10, x11, x10, eq // mov x10, #16
csel x6, x15, x6, eq
cmp x10, #20
b.lt 1b
ldp x19, x20, [sp]
add sp, sp, #0x40
ret x12
endfunc
.macro idct8x8_cols pass
.if \pass == 0
va .req v18
vb .req v30
sshr v18.8H, v26.8H, #1
add v16.8H, v24.8H, v28.8H
ld1 {v30.8H, v31.8H}, [x1]
st1 {v19.8H}, [x1], #16
st1 {v19.8H}, [x1], #16
sub v17.8H, v24.8H, v28.8H
sshr v19.8H, v30.8H, #1
sub v18.8H, v18.8H, v30.8H
add v19.8H, v19.8H, v26.8H
.else
va .req v30
vb .req v18
sshr v30.8H, v26.8H, #1
sshr v19.8H, v18.8H, #1
add v16.8H, v24.8H, v28.8H
sub v17.8H, v24.8H, v28.8H
sub v30.8H, v30.8H, v18.8H
add v19.8H, v19.8H, v26.8H
.endif
add v26.8H, v17.8H, va.8H
sub v28.8H, v17.8H, va.8H
add v24.8H, v16.8H, v19.8H
sub vb.8H, v16.8H, v19.8H
sub v16.8H, v29.8H, v27.8H
add v17.8H, v31.8H, v25.8H
sub va.8H, v31.8H, v25.8H
add v19.8H, v29.8H, v27.8H
sub v16.8H, v16.8H, v31.8H
sub v17.8H, v17.8H, v27.8H
add va.8H, va.8H, v29.8H
add v19.8H, v19.8H, v25.8H
sshr v25.8H, v25.8H, #1
sshr v27.8H, v27.8H, #1
sshr v29.8H, v29.8H, #1
sshr v31.8H, v31.8H, #1
sub v16.8H, v16.8H, v31.8H
sub v17.8H, v17.8H, v27.8H
add va.8H, va.8H, v29.8H
add v19.8H, v19.8H, v25.8H
sshr v25.8H, v16.8H, #2
sshr v27.8H, v17.8H, #2
sshr v29.8H, va.8H, #2
sshr v31.8H, v19.8H, #2
sub v19.8H, v19.8H, v25.8H
sub va.8H, v27.8H, va.8H
add v17.8H, v17.8H, v29.8H
add v16.8H, v16.8H, v31.8H
.if \pass == 0
sub v31.8H, v24.8H, v19.8H
add v24.8H, v24.8H, v19.8H
add v25.8H, v26.8H, v18.8H
sub v18.8H, v26.8H, v18.8H
add v26.8H, v28.8H, v17.8H
add v27.8H, v30.8H, v16.8H
sub v29.8H, v28.8H, v17.8H
sub v28.8H, v30.8H, v16.8H
.else
sub v31.8H, v24.8H, v19.8H
add v24.8H, v24.8H, v19.8H
add v25.8H, v26.8H, v30.8H
sub v30.8H, v26.8H, v30.8H
add v26.8H, v28.8H, v17.8H
sub v29.8H, v28.8H, v17.8H
add v27.8H, v18.8H, v16.8H
sub v28.8H, v18.8H, v16.8H
.endif
.unreq va
.unreq vb
.endm
function ff_h264_idct8_add_neon, export=1
movi v19.8H, #0
ld1 {v24.8H, v25.8H}, [x1]
st1 {v19.8H}, [x1], #16
st1 {v19.8H}, [x1], #16
ld1 {v26.8H, v27.8H}, [x1]
st1 {v19.8H}, [x1], #16
st1 {v19.8H}, [x1], #16
ld1 {v28.8H, v29.8H}, [x1]
st1 {v19.8H}, [x1], #16
st1 {v19.8H}, [x1], #16
idct8x8_cols 0
transpose_8x8H v24, v25, v26, v27, v28, v29, v18, v31, v6, v7
idct8x8_cols 1
mov x3, x0
srshr v24.8H, v24.8H, #6
ld1 {v0.8B}, [x0], x2
srshr v25.8H, v25.8H, #6
ld1 {v1.8B}, [x0], x2
srshr v26.8H, v26.8H, #6
ld1 {v2.8B}, [x0], x2
srshr v27.8H, v27.8H, #6
ld1 {v3.8B}, [x0], x2
srshr v28.8H, v28.8H, #6
ld1 {v4.8B}, [x0], x2
srshr v29.8H, v29.8H, #6
ld1 {v5.8B}, [x0], x2
srshr v30.8H, v30.8H, #6
ld1 {v6.8B}, [x0], x2
srshr v31.8H, v31.8H, #6
ld1 {v7.8B}, [x0], x2
uaddw v24.8H, v24.8H, v0.8B
uaddw v25.8H, v25.8H, v1.8B
uaddw v26.8H, v26.8H, v2.8B
sqxtun v0.8B, v24.8H
uaddw v27.8H, v27.8H, v3.8B
sqxtun v1.8B, v25.8H
uaddw v28.8H, v28.8H, v4.8B
sqxtun v2.8B, v26.8H
st1 {v0.8B}, [x3], x2
uaddw v29.8H, v29.8H, v5.8B
sqxtun v3.8B, v27.8H
st1 {v1.8B}, [x3], x2
uaddw v30.8H, v30.8H, v6.8B
sqxtun v4.8B, v28.8H
st1 {v2.8B}, [x3], x2
uaddw v31.8H, v31.8H, v7.8B
sqxtun v5.8B, v29.8H
st1 {v3.8B}, [x3], x2
sqxtun v6.8B, v30.8H
sqxtun v7.8B, v31.8H
st1 {v4.8B}, [x3], x2
st1 {v5.8B}, [x3], x2
st1 {v6.8B}, [x3], x2
st1 {v7.8B}, [x3], x2
sub x1, x1, #128
ret
endfunc
function ff_h264_idct8_dc_add_neon, export=1
mov w3, #0
sxtw x2, w2
ld1r {v31.8H}, [x1]
strh w3, [x1]
ld1 {v0.8B}, [x0], x2
srshr v31.8H, v31.8H, #6
ld1 {v1.8B}, [x0], x2
ld1 {v2.8B}, [x0], x2
uaddw v24.8H, v31.8H, v0.8B
ld1 {v3.8B}, [x0], x2
uaddw v25.8H, v31.8H, v1.8B
ld1 {v4.8B}, [x0], x2
uaddw v26.8H, v31.8H, v2.8B
ld1 {v5.8B}, [x0], x2
uaddw v27.8H, v31.8H, v3.8B
ld1 {v6.8B}, [x0], x2
uaddw v28.8H, v31.8H, v4.8B
ld1 {v7.8B}, [x0], x2
uaddw v29.8H, v31.8H, v5.8B
uaddw v30.8H, v31.8H, v6.8B
uaddw v31.8H, v31.8H, v7.8B
sqxtun v0.8B, v24.8H
sqxtun v1.8B, v25.8H
sqxtun v2.8B, v26.8H
sqxtun v3.8B, v27.8H
sub x0, x0, x2, lsl #3
st1 {v0.8B}, [x0], x2
sqxtun v4.8B, v28.8H
st1 {v1.8B}, [x0], x2
sqxtun v5.8B, v29.8H
st1 {v2.8B}, [x0], x2
sqxtun v6.8B, v30.8H
st1 {v3.8B}, [x0], x2
sqxtun v7.8B, v31.8H
st1 {v4.8B}, [x0], x2
st1 {v5.8B}, [x0], x2
st1 {v6.8B}, [x0], x2
st1 {v7.8B}, [x0], x2
ret
endfunc
function ff_h264_idct8_add4_neon, export=1
mov x12, x30
mov x6, x0
mov x5, x1
mov x1, x2
mov w2, w3
movrel x7, scan8
mov w10, #16
movrel x13, X(ff_h264_idct8_dc_add_neon)
movrel x14, X(ff_h264_idct8_add_neon)
1: ldrb w9, [x7], #4
ldrsw x0, [x5], #16
ldrb w9, [x4, w9, UXTW]
subs w9, w9, #1
b.lt 2f
ldrsh w11, [x1]
add x0, x6, x0
ccmp w11, #0, #4, eq
csel x15, x13, x14, ne
blr x15
2: subs w10, w10, #4
add x1, x1, #128
b.ne 1b
ret x12
endfunc
const scan8
.byte 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
.byte 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
.byte 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
.byte 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
.byte 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
.byte 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
.byte 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
.byte 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
.byte 4+11*8, 5+11*8, 4+12*8, 5+12*8
.byte 6+11*8, 7+11*8, 6+12*8, 7+12*8
.byte 4+13*8, 5+13*8, 4+14*8, 5+14*8
.byte 6+13*8, 7+13*8, 6+14*8, 7+14*8
endconst

View File

@@ -0,0 +1,93 @@
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/h264pred.h"
void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
const int bit_depth,
const int chroma_format_idc)
{
const int high_depth = bit_depth > 8;
if (high_depth)
return;
if (chroma_format_idc <= 1) {
h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon;
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon;
if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon;
h->pred8x8[DC_128_PRED8x8 ] = ff_pred8x8_128_dc_neon;
if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
codec_id != AV_CODEC_ID_VP8) {
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_neon;
h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon;
h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon;
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon;
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon;
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon;
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon;
}
}
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_neon;
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vert_neon;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_hor_neon;
h->pred16x16[LEFT_DC_PRED8x8] = ff_pred16x16_left_dc_neon;
h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_neon;
h->pred16x16[DC_128_PRED8x8 ] = ff_pred16x16_128_dc_neon;
if (codec_id != AV_CODEC_ID_SVQ3 && codec_id != AV_CODEC_ID_RV40 &&
codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon;
}
av_cold void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id,
int bit_depth, const int chroma_format_idc)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags))
h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
}

View File

@@ -0,0 +1,361 @@
/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
.macro ldcol.8 rd, rs, rt, n=8, hi=0
.if \n >= 8 || \hi == 0
ld1 {\rd\().b}[0], [\rs], \rt
ld1 {\rd\().b}[1], [\rs], \rt
ld1 {\rd\().b}[2], [\rs], \rt
ld1 {\rd\().b}[3], [\rs], \rt
.endif
.if \n >= 8 || \hi == 1
ld1 {\rd\().b}[4], [\rs], \rt
ld1 {\rd\().b}[5], [\rs], \rt
ld1 {\rd\().b}[6], [\rs], \rt
ld1 {\rd\().b}[7], [\rs], \rt
.endif
.if \n == 16
ld1 {\rd\().b}[8], [\rs], \rt
ld1 {\rd\().b}[9], [\rs], \rt
ld1 {\rd\().b}[10], [\rs], \rt
ld1 {\rd\().b}[11], [\rs], \rt
ld1 {\rd\().b}[12], [\rs], \rt
ld1 {\rd\().b}[13], [\rs], \rt
ld1 {\rd\().b}[14], [\rs], \rt
ld1 {\rd\().b}[15], [\rs], \rt
.endif
.endm
function ff_pred16x16_128_dc_neon, export=1
movi v0.16b, #128
b .L_pred16x16_dc_end
endfunc
function ff_pred16x16_top_dc_neon, export=1
sub x2, x0, x1
ld1 {v0.16b}, [x2]
uaddlv h0, v0.16b
rshrn v0.8b, v0.8h, #4
dup v0.16b, v0.b[0]
b .L_pred16x16_dc_end
endfunc
function ff_pred16x16_left_dc_neon, export=1
sub x2, x0, #1
ldcol.8 v0, x2, x1, 16
uaddlv h0, v0.16b
rshrn v0.8b, v0.8h, #4
dup v0.16b, v0.b[0]
b .L_pred16x16_dc_end
endfunc
function ff_pred16x16_dc_neon, export=1
sub x2, x0, x1
sub x3, x0, #1
ld1 {v0.16b}, [x2]
ldcol.8 v1, x3, x1, 16
uaddlv h0, v0.16b
uaddlv h1, v1.16b
add v0.4h, v0.4h, v1.4h
rshrn v0.8b, v0.8h, #5
dup v0.16b, v0.b[0]
.L_pred16x16_dc_end:
mov w3, #8
6: st1 {v0.16b}, [x0], x1
st1 {v0.16b}, [x0], x1
subs w3, w3, #1
b.ne 6b
ret
endfunc
function ff_pred16x16_hor_neon, export=1
sub x2, x0, #1
mov w3, #16
1: ld1r {v0.16b}, [x2], x1
st1 {v0.16b}, [x0], x1
subs w3, w3, #1
b.ne 1b
ret
endfunc
function ff_pred16x16_vert_neon, export=1
sub x2, x0, x1
add x1, x1, x1
ld1 {v0.16b}, [x2], x1
mov w3, #8
1: st1 {v0.16b}, [x0], x1
st1 {v0.16b}, [x2], x1
subs w3, w3, #1
b.ne 1b
ret
endfunc
function ff_pred16x16_plane_neon, export=1
sub x3, x0, x1
movrel x4, p16weight
add x2, x3, #8
sub x3, x3, #1
ld1 {v0.8b}, [x3]
ld1 {v2.8b}, [x2], x1
ldcol.8 v1, x3, x1
add x3, x3, x1
ldcol.8 v3, x3, x1
rev64 v0.8b, v0.8b
rev64 v1.8b, v1.8b
uaddl v7.8h, v2.8b, v3.8b
usubl v2.8h, v2.8b, v0.8b
usubl v3.8h, v3.8b, v1.8b
ld1 {v0.8h}, [x4]
mul v2.8h, v2.8h, v0.8h
mul v3.8h, v3.8h, v0.8h
addp v2.8h, v2.8h, v3.8h
addp v2.8h, v2.8h, v2.8h
addp v2.4h, v2.4h, v2.4h
sshll v3.4s, v2.4h, #2
saddw v2.4s, v3.4s, v2.4h
rshrn v4.4h, v2.4s, #6
trn2 v5.4h, v4.4h, v4.4h
add v2.4h, v4.4h, v5.4h
shl v3.4h, v2.4h, #3
ext v7.16b, v7.16b, v7.16b, #14
sub v3.4h, v3.4h, v2.4h // 7 * (b + c)
add v7.4h, v7.4h, v0.4h
shl v2.4h, v7.4h, #4
sub v2.4h, v2.4h, v3.4h
shl v3.4h, v4.4h, #4
ext v0.16b, v0.16b, v0.16b, #14
sub v6.4h, v5.4h, v3.4h
mov v0.h[0], wzr
mul v0.8h, v0.8h, v4.h[0]
dup v1.8h, v2.h[0]
dup v2.8h, v4.h[0]
dup v3.8h, v6.h[0]
shl v2.8h, v2.8h, #3
add v1.8h, v1.8h, v0.8h
add v3.8h, v3.8h, v2.8h
mov w3, #16
1:
sqshrun v0.8b, v1.8h, #5
add v1.8h, v1.8h, v2.8h
sqshrun2 v0.16b, v1.8h, #5
add v1.8h, v1.8h, v3.8h
st1 {v0.16b}, [x0], x1
subs w3, w3, #1
b.ne 1b
ret
endfunc
const p16weight, align=4
.short 1,2,3,4,5,6,7,8
endconst
const p8weight, align=4
.short 1,2,3,4,1,2,3,4
endconst
function ff_pred8x8_hor_neon, export=1
sub x2, x0, #1
mov w3, #8
1: ld1r {v0.8b}, [x2], x1
st1 {v0.8b}, [x0], x1
subs w3, w3, #1
b.ne 1b
ret
endfunc
function ff_pred8x8_vert_neon, export=1
sub x2, x0, x1
lsl x1, x1, #1
ld1 {v0.8b}, [x2], x1
mov w3, #4
1: st1 {v0.8b}, [x0], x1
st1 {v0.8b}, [x2], x1
subs w3, w3, #1
b.ne 1b
ret
endfunc
function ff_pred8x8_plane_neon, export=1
sub x3, x0, x1
movrel x4, p8weight
movrel x5, p16weight
add x2, x3, #4
sub x3, x3, #1
ld1 {v0.s}[0], [x3]
ld1 {v2.s}[0], [x2], x1
ldcol.8 v0, x3, x1, 4, hi=1
add x3, x3, x1
ldcol.8 v3, x3, x1, 4
uaddl v7.8h, v2.8b, v3.8b
rev32 v0.8b, v0.8b
trn1 v2.2s, v2.2s, v3.2s
usubl v2.8h, v2.8b, v0.8b
ld1 {v6.8h}, [x4]
mul v2.8h, v2.8h, v6.8h
ld1 {v0.8h}, [x5]
saddlp v2.4s, v2.8h
addp v2.4s, v2.4s, v2.4s
shl v3.4s, v2.4s, #4
add v2.4s, v3.4s, v2.4s
rshrn v5.4h, v2.4s, #5
addp v2.4h, v5.4h, v5.4h
shl v3.4h, v2.4h, #1
add v3.4h, v3.4h, v2.4h
rev64 v7.4h, v7.4h
add v7.4h, v7.4h, v0.4h
shl v2.4h, v7.4h, #4
sub v2.4h, v2.4h, v3.4h
ext v0.16b, v0.16b, v0.16b, #14
mov v0.h[0], wzr
mul v0.8h, v0.8h, v5.h[0]
dup v1.8h, v2.h[0]
dup v2.8h, v5.h[1]
add v1.8h, v1.8h, v0.8h
mov w3, #8
1:
sqshrun v0.8b, v1.8h, #5
add v1.8h, v1.8h, v2.8h
st1 {v0.8b}, [x0], x1
subs w3, w3, #1
b.ne 1b
ret
endfunc
function ff_pred8x8_128_dc_neon, export=1
movi v0.8b, #128
movi v1.8b, #128
b .L_pred8x8_dc_end
endfunc
function ff_pred8x8_top_dc_neon, export=1
sub x2, x0, x1
ld1 {v0.8b}, [x2]
uaddlp v0.4h, v0.8b
addp v0.4h, v0.4h, v0.4h
zip1 v0.8h, v0.8h, v0.8h
rshrn v2.8b, v0.8h, #2
zip1 v0.8b, v2.8b, v2.8b
zip1 v1.8b, v2.8b, v2.8b
b .L_pred8x8_dc_end
endfunc
function ff_pred8x8_left_dc_neon, export=1
sub x2, x0, #1
ldcol.8 v0, x2, x1
uaddlp v0.4h, v0.8b
addp v0.4h, v0.4h, v0.4h
rshrn v2.8b, v0.8h, #2
dup v1.8b, v2.b[1]
dup v0.8b, v2.b[0]
b .L_pred8x8_dc_end
endfunc
function ff_pred8x8_dc_neon, export=1
sub x2, x0, x1
sub x3, x0, #1
ld1 {v0.8b}, [x2]
ldcol.8 v1, x3, x1
uaddlp v0.4h, v0.8b
uaddlp v1.4h, v1.8b
trn1 v2.2s, v0.2s, v1.2s
trn2 v3.2s, v0.2s, v1.2s
addp v4.4h, v2.4h, v3.4h
addp v5.4h, v4.4h, v4.4h
rshrn v6.8b, v5.8h, #3
rshrn v7.8b, v4.8h, #2
dup v0.8b, v6.b[0]
dup v2.8b, v7.b[2]
dup v1.8b, v7.b[3]
dup v3.8b, v6.b[1]
zip1 v0.2s, v0.2s, v2.2s
zip1 v1.2s, v1.2s, v3.2s
.L_pred8x8_dc_end:
mov w3, #4
add x2, x0, x1, lsl #2
6: st1 {v0.8b}, [x0], x1
st1 {v1.8b}, [x2], x1
subs w3, w3, #1
b.ne 6b
ret
endfunc
function ff_pred8x8_l0t_dc_neon, export=1
sub x2, x0, x1
sub x3, x0, #1
ld1 {v0.8b}, [x2]
ldcol.8 v1, x3, x1, 4
zip1 v0.4s, v0.4s, v1.4s
uaddlp v0.8h, v0.16b
addp v0.8h, v0.8h, v0.8h
addp v1.4h, v0.4h, v0.4h
rshrn v2.8b, v0.8h, #2
rshrn v3.8b, v1.8h, #3
dup v4.8b, v3.b[0]
dup v6.8b, v2.b[2]
dup v5.8b, v2.b[0]
zip1 v0.2s, v4.2s, v6.2s
zip1 v1.2s, v5.2s, v6.2s
b .L_pred8x8_dc_end
endfunc
function ff_pred8x8_l00_dc_neon, export=1
sub x2, x0, #1
ldcol.8 v0, x2, x1, 4
uaddlp v0.4h, v0.8b
addp v0.4h, v0.4h, v0.4h
rshrn v0.8b, v0.8h, #2
movi v1.8b, #128
dup v0.8b, v0.b[0]
b .L_pred8x8_dc_end
endfunc
function ff_pred8x8_0lt_dc_neon, export=1
add x3, x0, x1, lsl #2
sub x2, x0, x1
sub x3, x3, #1
ld1 {v0.8b}, [x2]
ldcol.8 v1, x3, x1, 4, hi=1
zip1 v0.4s, v0.4s, v1.4s
uaddlp v0.8h, v0.16b
addp v0.8h, v0.8h, v0.8h
addp v1.4h, v0.4h, v0.4h
rshrn v2.8b, v0.8h, #2
rshrn v3.8b, v1.8h, #3
dup v4.8b, v2.b[0]
dup v5.8b, v2.b[3]
dup v6.8b, v2.b[2]
dup v7.8b, v3.b[1]
zip1 v0.2s, v4.2s, v6.2s
zip1 v1.2s, v5.2s, v7.2s
b .L_pred8x8_dc_end
endfunc
function ff_pred8x8_0l0_dc_neon, export=1
add x2, x0, x1, lsl #2
sub x2, x2, #1
ldcol.8 v1, x2, x1, 4
uaddlp v2.4h, v1.8b
addp v2.4h, v2.4h, v2.4h
rshrn v1.8b, v2.8h, #2
movi v0.8b, #128
dup v1.8b, v1.b[0]
b .L_pred8x8_dc_end
endfunc

View File

@@ -0,0 +1,172 @@
/*
* ARM NEON optimised DSP functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/h264qpel.h"
void ff_put_h264_qpel16_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc12_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc03_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc13_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel16_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc12_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc03_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc13_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_put_h264_qpel8_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc12_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc03_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc13_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel16_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc31_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc02_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc12_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc22_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc32_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc03_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc13_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc23_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
void ff_avg_h264_qpel8_mc33_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
{
const int high_bit_depth = bit_depth > 8;
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags) && !high_bit_depth) {
c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon;
c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon;
c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon;
c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon;
c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon;
c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon;
c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon;
c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon;
c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon;
c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon;
c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon;
c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon;
c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon;
c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon;
c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon;
c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon;
c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon;
c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon;
c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon;
c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon;
c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon;
c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon;
c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon;
c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon;
c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon;
c->avg_h264_qpel_pixels_tab[0][ 1] = ff_avg_h264_qpel16_mc10_neon;
c->avg_h264_qpel_pixels_tab[0][ 2] = ff_avg_h264_qpel16_mc20_neon;
c->avg_h264_qpel_pixels_tab[0][ 3] = ff_avg_h264_qpel16_mc30_neon;
c->avg_h264_qpel_pixels_tab[0][ 4] = ff_avg_h264_qpel16_mc01_neon;
c->avg_h264_qpel_pixels_tab[0][ 5] = ff_avg_h264_qpel16_mc11_neon;
c->avg_h264_qpel_pixels_tab[0][ 6] = ff_avg_h264_qpel16_mc21_neon;
c->avg_h264_qpel_pixels_tab[0][ 7] = ff_avg_h264_qpel16_mc31_neon;
c->avg_h264_qpel_pixels_tab[0][ 8] = ff_avg_h264_qpel16_mc02_neon;
c->avg_h264_qpel_pixels_tab[0][ 9] = ff_avg_h264_qpel16_mc12_neon;
c->avg_h264_qpel_pixels_tab[0][10] = ff_avg_h264_qpel16_mc22_neon;
c->avg_h264_qpel_pixels_tab[0][11] = ff_avg_h264_qpel16_mc32_neon;
c->avg_h264_qpel_pixels_tab[0][12] = ff_avg_h264_qpel16_mc03_neon;
c->avg_h264_qpel_pixels_tab[0][13] = ff_avg_h264_qpel16_mc13_neon;
c->avg_h264_qpel_pixels_tab[0][14] = ff_avg_h264_qpel16_mc23_neon;
c->avg_h264_qpel_pixels_tab[0][15] = ff_avg_h264_qpel16_mc33_neon;
c->avg_h264_qpel_pixels_tab[1][ 0] = ff_avg_h264_qpel8_mc00_neon;
c->avg_h264_qpel_pixels_tab[1][ 1] = ff_avg_h264_qpel8_mc10_neon;
c->avg_h264_qpel_pixels_tab[1][ 2] = ff_avg_h264_qpel8_mc20_neon;
c->avg_h264_qpel_pixels_tab[1][ 3] = ff_avg_h264_qpel8_mc30_neon;
c->avg_h264_qpel_pixels_tab[1][ 4] = ff_avg_h264_qpel8_mc01_neon;
c->avg_h264_qpel_pixels_tab[1][ 5] = ff_avg_h264_qpel8_mc11_neon;
c->avg_h264_qpel_pixels_tab[1][ 6] = ff_avg_h264_qpel8_mc21_neon;
c->avg_h264_qpel_pixels_tab[1][ 7] = ff_avg_h264_qpel8_mc31_neon;
c->avg_h264_qpel_pixels_tab[1][ 8] = ff_avg_h264_qpel8_mc02_neon;
c->avg_h264_qpel_pixels_tab[1][ 9] = ff_avg_h264_qpel8_mc12_neon;
c->avg_h264_qpel_pixels_tab[1][10] = ff_avg_h264_qpel8_mc22_neon;
c->avg_h264_qpel_pixels_tab[1][11] = ff_avg_h264_qpel8_mc32_neon;
c->avg_h264_qpel_pixels_tab[1][12] = ff_avg_h264_qpel8_mc03_neon;
c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon;
c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon;
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
}
}

View File

@@ -0,0 +1,934 @@
/*
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
#include "neon.S"
/* H.264 qpel MC */
.macro lowpass_const r
movz \r, #20, lsl #16
movk \r, #5
mov v6.S[0], \r
.endm
//trashes v0-v5
.macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
ext v2.8B, \r0\().8B, \r1\().8B, #2
ext v3.8B, \r0\().8B, \r1\().8B, #3
uaddl v2.8H, v2.8B, v3.8B
ext v4.8B, \r0\().8B, \r1\().8B, #1
ext v5.8B, \r0\().8B, \r1\().8B, #4
uaddl v4.8H, v4.8B, v5.8B
ext v1.8B, \r0\().8B, \r1\().8B, #5
uaddl \d0\().8H, \r0\().8B, v1.8B
ext v0.8B, \r2\().8B, \r3\().8B, #2
mla \d0\().8H, v2.8H, v6.H[1]
ext v1.8B, \r2\().8B, \r3\().8B, #3
uaddl v0.8H, v0.8B, v1.8B
ext v1.8B, \r2\().8B, \r3\().8B, #1
mls \d0\().8H, v4.8H, v6.H[0]
ext v3.8B, \r2\().8B, \r3\().8B, #4
uaddl v1.8H, v1.8B, v3.8B
ext v2.8B, \r2\().8B, \r3\().8B, #5
uaddl \d1\().8H, \r2\().8B, v2.8B
mla \d1\().8H, v0.8H, v6.H[1]
mls \d1\().8H, v1.8H, v6.H[0]
.if \narrow
sqrshrun \d0\().8B, \d0\().8H, #5
sqrshrun \d1\().8B, \d1\().8H, #5
.endif
.endm
//trashes v0-v5, v7, v30-v31
.macro lowpass_8H r0, r1
ext v0.16B, \r0\().16B, \r0\().16B, #2
ext v1.16B, \r0\().16B, \r0\().16B, #3
uaddl v0.8H, v0.8B, v1.8B
ext v2.16B, \r0\().16B, \r0\().16B, #1
ext v3.16B, \r0\().16B, \r0\().16B, #4
uaddl v2.8H, v2.8B, v3.8B
ext v30.16B, \r0\().16B, \r0\().16B, #5
uaddl \r0\().8H, \r0\().8B, v30.8B
ext v4.16B, \r1\().16B, \r1\().16B, #2
mla \r0\().8H, v0.8H, v6.H[1]
ext v5.16B, \r1\().16B, \r1\().16B, #3
uaddl v4.8H, v4.8B, v5.8B
ext v7.16B, \r1\().16B, \r1\().16B, #1
mls \r0\().8H, v2.8H, v6.H[0]
ext v0.16B, \r1\().16B, \r1\().16B, #4
uaddl v7.8H, v7.8B, v0.8B
ext v31.16B, \r1\().16B, \r1\().16B, #5
uaddl \r1\().8H, \r1\().8B, v31.8B
mla \r1\().8H, v4.8H, v6.H[1]
mls \r1\().8H, v7.8H, v6.H[0]
.endm
// trashes v2-v5, v30
.macro lowpass_8_1 r0, r1, d0, narrow=1
ext v2.8B, \r0\().8B, \r1\().8B, #2
ext v3.8B, \r0\().8B, \r1\().8B, #3
uaddl v2.8H, v2.8B, v3.8B
ext v4.8B, \r0\().8B, \r1\().8B, #1
ext v5.8B, \r0\().8B, \r1\().8B, #4
uaddl v4.8H, v4.8B, v5.8B
ext v30.8B, \r0\().8B, \r1\().8B, #5
uaddl \d0\().8H, \r0\().8B, v30.8B
mla \d0\().8H, v2.8H, v6.H[1]
mls \d0\().8H, v4.8H, v6.H[0]
.if \narrow
sqrshrun \d0\().8B, \d0\().8H, #5
.endif
.endm
// trashed v0-v7
.macro lowpass_8.16 r0, r1, r2
ext v1.16B, \r0\().16B, \r1\().16B, #4
ext v0.16B, \r0\().16B, \r1\().16B, #6
saddl v5.4S, v1.4H, v0.4H
ext v2.16B, \r0\().16B, \r1\().16B, #2
saddl2 v1.4S, v1.8H, v0.8H
ext v3.16B, \r0\().16B, \r1\().16B, #8
saddl v6.4S, v2.4H, v3.4H
ext \r1\().16B, \r0\().16B, \r1\().16B, #10
saddl2 v2.4S, v2.8H, v3.8H
saddl v0.4S, \r0\().4H, \r1\().4H
saddl2 v4.4S, \r0\().8H, \r1\().8H
shl v3.4S, v5.4S, #4
shl v5.4S, v5.4S, #2
shl v7.4S, v6.4S, #2
add v5.4S, v5.4S, v3.4S
add v6.4S, v6.4S, v7.4S
shl v3.4S, v1.4S, #4
shl v1.4S, v1.4S, #2
shl v7.4S, v2.4S, #2
add v1.4S, v1.4S, v3.4S
add v2.4S, v2.4S, v7.4S
add v5.4S, v5.4S, v0.4S
sub v5.4S, v5.4S, v6.4S
add v1.4S, v1.4S, v4.4S
sub v1.4S, v1.4S, v2.4S
rshrn v5.4H, v5.4S, #10
rshrn2 v5.8H, v1.4S, #10
sqxtun \r2\().8B, v5.8H
.endm
function put_h264_qpel16_h_lowpass_neon_packed
mov x4, x30
mov x12, #16
mov x3, #8
bl put_h264_qpel8_h_lowpass_neon
sub x1, x1, x2, lsl #4
add x1, x1, #8
mov x12, #16
mov x30, x4
b put_h264_qpel8_h_lowpass_neon
endfunc
.macro h264_qpel_h_lowpass type
function \type\()_h264_qpel16_h_lowpass_neon
mov x13, x30
mov x12, #16
bl \type\()_h264_qpel8_h_lowpass_neon
sub x0, x0, x3, lsl #4
sub x1, x1, x2, lsl #4
add x0, x0, #8
add x1, x1, #8
mov x12, #16
mov x30, x13
endfunc
function \type\()_h264_qpel8_h_lowpass_neon
1: ld1 {v28.8B, v29.8B}, [x1], x2
ld1 {v16.8B, v17.8B}, [x1], x2
subs x12, x12, #2
lowpass_8 v28, v29, v16, v17, v28, v16
.ifc \type,avg
ld1 {v2.8B}, [x0], x3
urhadd v28.8B, v28.8B, v2.8B
ld1 {v3.8B}, [x0]
urhadd v16.8B, v16.8B, v3.8B
sub x0, x0, x3
.endif
st1 {v28.8B}, [x0], x3
st1 {v16.8B}, [x0], x3
b.ne 1b
ret
endfunc
.endm
h264_qpel_h_lowpass put
h264_qpel_h_lowpass avg
.macro h264_qpel_h_lowpass_l2 type
function \type\()_h264_qpel16_h_lowpass_l2_neon
mov x13, x30
mov x12, #16
bl \type\()_h264_qpel8_h_lowpass_l2_neon
sub x0, x0, x2, lsl #4
sub x1, x1, x2, lsl #4
sub x3, x3, x2, lsl #4
add x0, x0, #8
add x1, x1, #8
add x3, x3, #8
mov x12, #16
mov x30, x13
endfunc
function \type\()_h264_qpel8_h_lowpass_l2_neon
1: ld1 {v26.8B, v27.8B}, [x1], x2
ld1 {v16.8B, v17.8B}, [x1], x2
ld1 {v28.8B}, [x3], x2
ld1 {v29.8B}, [x3], x2
subs x12, x12, #2
lowpass_8 v26, v27, v16, v17, v26, v27
urhadd v26.8B, v26.8B, v28.8B
urhadd v27.8B, v27.8B, v29.8B
.ifc \type,avg
ld1 {v2.8B}, [x0], x2
urhadd v26.8B, v26.8B, v2.8B
ld1 {v3.8B}, [x0]
urhadd v27.8B, v27.8B, v3.8B
sub x0, x0, x2
.endif
st1 {v26.8B}, [x0], x2
st1 {v27.8B}, [x0], x2
b.ne 1b
ret
endfunc
.endm
h264_qpel_h_lowpass_l2 put
h264_qpel_h_lowpass_l2 avg
function put_h264_qpel16_v_lowpass_neon_packed
mov x4, x30
mov x2, #8
bl put_h264_qpel8_v_lowpass_neon
sub x1, x1, x3, lsl #2
bl put_h264_qpel8_v_lowpass_neon
sub x1, x1, x3, lsl #4
sub x1, x1, x3, lsl #2
add x1, x1, #8
bl put_h264_qpel8_v_lowpass_neon
sub x1, x1, x3, lsl #2
mov x30, x4
b put_h264_qpel8_v_lowpass_neon
endfunc
.macro h264_qpel_v_lowpass type
function \type\()_h264_qpel16_v_lowpass_neon
mov x4, x30
bl \type\()_h264_qpel8_v_lowpass_neon
sub x1, x1, x3, lsl #2
bl \type\()_h264_qpel8_v_lowpass_neon
sub x0, x0, x2, lsl #4
add x0, x0, #8
sub x1, x1, x3, lsl #4
sub x1, x1, x3, lsl #2
add x1, x1, #8
bl \type\()_h264_qpel8_v_lowpass_neon
sub x1, x1, x3, lsl #2
mov x30, x4
endfunc
function \type\()_h264_qpel8_v_lowpass_neon
ld1 {v16.8B}, [x1], x3
ld1 {v18.8B}, [x1], x3
ld1 {v20.8B}, [x1], x3
ld1 {v22.8B}, [x1], x3
ld1 {v24.8B}, [x1], x3
ld1 {v26.8B}, [x1], x3
ld1 {v28.8B}, [x1], x3
ld1 {v30.8B}, [x1], x3
ld1 {v17.8B}, [x1], x3
ld1 {v19.8B}, [x1], x3
ld1 {v21.8B}, [x1], x3
ld1 {v23.8B}, [x1], x3
ld1 {v25.8B}, [x1]
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
lowpass_8 v16, v17, v18, v19, v16, v17
lowpass_8 v20, v21, v22, v23, v18, v19
lowpass_8 v24, v25, v26, v27, v20, v21
lowpass_8 v28, v29, v30, v31, v22, v23
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
.ifc \type,avg
ld1 {v24.8B}, [x0], x2
urhadd v16.8B, v16.8B, v24.8B
ld1 {v25.8B}, [x0], x2
urhadd v17.8B, v17.8B, v25.8B
ld1 {v26.8B}, [x0], x2
urhadd v18.8B, v18.8B, v26.8B
ld1 {v27.8B}, [x0], x2
urhadd v19.8B, v19.8B, v27.8B
ld1 {v28.8B}, [x0], x2
urhadd v20.8B, v20.8B, v28.8B
ld1 {v29.8B}, [x0], x2
urhadd v21.8B, v21.8B, v29.8B
ld1 {v30.8B}, [x0], x2
urhadd v22.8B, v22.8B, v30.8B
ld1 {v31.8B}, [x0], x2
urhadd v23.8B, v23.8B, v31.8B
sub x0, x0, x2, lsl #3
.endif
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
st1 {v18.8B}, [x0], x2
st1 {v19.8B}, [x0], x2
st1 {v20.8B}, [x0], x2
st1 {v21.8B}, [x0], x2
st1 {v22.8B}, [x0], x2
st1 {v23.8B}, [x0], x2
ret
endfunc
.endm
h264_qpel_v_lowpass put
h264_qpel_v_lowpass avg
.macro h264_qpel_v_lowpass_l2 type
function \type\()_h264_qpel16_v_lowpass_l2_neon
mov x4, x30
bl \type\()_h264_qpel8_v_lowpass_l2_neon
sub x1, x1, x3, lsl #2
bl \type\()_h264_qpel8_v_lowpass_l2_neon
sub x0, x0, x3, lsl #4
sub x12, x12, x2, lsl #4
add x0, x0, #8
add x12, x12, #8
sub x1, x1, x3, lsl #4
sub x1, x1, x3, lsl #2
add x1, x1, #8
bl \type\()_h264_qpel8_v_lowpass_l2_neon
sub x1, x1, x3, lsl #2
mov x30, x4
endfunc
function \type\()_h264_qpel8_v_lowpass_l2_neon
ld1 {v16.8B}, [x1], x3
ld1 {v18.8B}, [x1], x3
ld1 {v20.8B}, [x1], x3
ld1 {v22.8B}, [x1], x3
ld1 {v24.8B}, [x1], x3
ld1 {v26.8B}, [x1], x3
ld1 {v28.8B}, [x1], x3
ld1 {v30.8B}, [x1], x3
ld1 {v17.8B}, [x1], x3
ld1 {v19.8B}, [x1], x3
ld1 {v21.8B}, [x1], x3
ld1 {v23.8B}, [x1], x3
ld1 {v25.8B}, [x1]
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
lowpass_8 v16, v17, v18, v19, v16, v17
lowpass_8 v20, v21, v22, v23, v18, v19
lowpass_8 v24, v25, v26, v27, v20, v21
lowpass_8 v28, v29, v30, v31, v22, v23
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
ld1 {v24.8B}, [x12], x2
ld1 {v25.8B}, [x12], x2
ld1 {v26.8B}, [x12], x2
ld1 {v27.8B}, [x12], x2
ld1 {v28.8B}, [x12], x2
urhadd v16.8B, v24.8B, v16.8B
urhadd v17.8B, v25.8B, v17.8B
ld1 {v29.8B}, [x12], x2
urhadd v18.8B, v26.8B, v18.8B
urhadd v19.8B, v27.8B, v19.8B
ld1 {v30.8B}, [x12], x2
urhadd v20.8B, v28.8B, v20.8B
urhadd v21.8B, v29.8B, v21.8B
ld1 {v31.8B}, [x12], x2
urhadd v22.8B, v30.8B, v22.8B
urhadd v23.8B, v31.8B, v23.8B
.ifc \type,avg
ld1 {v24.8B}, [x0], x3
urhadd v16.8B, v16.8B, v24.8B
ld1 {v25.8B}, [x0], x3
urhadd v17.8B, v17.8B, v25.8B
ld1 {v26.8B}, [x0], x3
urhadd v18.8B, v18.8B, v26.8B
ld1 {v27.8B}, [x0], x3
urhadd v19.8B, v19.8B, v27.8B
ld1 {v28.8B}, [x0], x3
urhadd v20.8B, v20.8B, v28.8B
ld1 {v29.8B}, [x0], x3
urhadd v21.8B, v21.8B, v29.8B
ld1 {v30.8B}, [x0], x3
urhadd v22.8B, v22.8B, v30.8B
ld1 {v31.8B}, [x0], x3
urhadd v23.8B, v23.8B, v31.8B
sub x0, x0, x3, lsl #3
.endif
st1 {v16.8B}, [x0], x3
st1 {v17.8B}, [x0], x3
st1 {v18.8B}, [x0], x3
st1 {v19.8B}, [x0], x3
st1 {v20.8B}, [x0], x3
st1 {v21.8B}, [x0], x3
st1 {v22.8B}, [x0], x3
st1 {v23.8B}, [x0], x3
ret
endfunc
.endm
h264_qpel_v_lowpass_l2 put
h264_qpel_v_lowpass_l2 avg
function put_h264_qpel8_hv_lowpass_neon_top
lowpass_const w12
ld1 {v16.8H}, [x1], x3
ld1 {v17.8H}, [x1], x3
ld1 {v18.8H}, [x1], x3
ld1 {v19.8H}, [x1], x3
ld1 {v20.8H}, [x1], x3
ld1 {v21.8H}, [x1], x3
ld1 {v22.8H}, [x1], x3
ld1 {v23.8H}, [x1], x3
ld1 {v24.8H}, [x1], x3
ld1 {v25.8H}, [x1], x3
ld1 {v26.8H}, [x1], x3
ld1 {v27.8H}, [x1], x3
ld1 {v28.8H}, [x1]
lowpass_8H v16, v17
lowpass_8H v18, v19
lowpass_8H v20, v21
lowpass_8H v22, v23
lowpass_8H v24, v25
lowpass_8H v26, v27
lowpass_8H v28, v29
transpose_8x8H v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
transpose_8x8H v24, v25, v26, v27, v28, v29, v30, v31, v0, v1
lowpass_8.16 v16, v24, v16
lowpass_8.16 v17, v25, v17
lowpass_8.16 v18, v26, v18
lowpass_8.16 v19, v27, v19
lowpass_8.16 v20, v28, v20
lowpass_8.16 v21, v29, v21
lowpass_8.16 v22, v30, v22
lowpass_8.16 v23, v31, v23
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
ret
endfunc
.macro h264_qpel8_hv_lowpass type
function \type\()_h264_qpel8_hv_lowpass_neon
mov x10, x30
bl put_h264_qpel8_hv_lowpass_neon_top
.ifc \type,avg
ld1 {v0.8B}, [x0], x2
urhadd v16.8B, v16.8B, v0.8B
ld1 {v1.8B}, [x0], x2
urhadd v17.8B, v17.8B, v1.8B
ld1 {v2.8B}, [x0], x2
urhadd v18.8B, v18.8B, v2.8B
ld1 {v3.8B}, [x0], x2
urhadd v19.8B, v19.8B, v3.8B
ld1 {v4.8B}, [x0], x2
urhadd v20.8B, v20.8B, v4.8B
ld1 {v5.8B}, [x0], x2
urhadd v21.8B, v21.8B, v5.8B
ld1 {v6.8B}, [x0], x2
urhadd v22.8B, v22.8B, v6.8B
ld1 {v7.8B}, [x0], x2
urhadd v23.8B, v23.8B, v7.8B
sub x0, x0, x2, lsl #3
.endif
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
st1 {v18.8B}, [x0], x2
st1 {v19.8B}, [x0], x2
st1 {v20.8B}, [x0], x2
st1 {v21.8B}, [x0], x2
st1 {v22.8B}, [x0], x2
st1 {v23.8B}, [x0], x2
ret x10
endfunc
.endm
h264_qpel8_hv_lowpass put
h264_qpel8_hv_lowpass avg
.macro h264_qpel8_hv_lowpass_l2 type
function \type\()_h264_qpel8_hv_lowpass_l2_neon
mov x10, x30
bl put_h264_qpel8_hv_lowpass_neon_top
ld1 {v0.8B, v1.8B}, [x2], #16
ld1 {v2.8B, v3.8B}, [x2], #16
urhadd v0.8B, v0.8B, v16.8B
urhadd v1.8B, v1.8B, v17.8B
ld1 {v4.8B, v5.8B}, [x2], #16
urhadd v2.8B, v2.8B, v18.8B
urhadd v3.8B, v3.8B, v19.8B
ld1 {v6.8B, v7.8B}, [x2], #16
urhadd v4.8B, v4.8B, v20.8B
urhadd v5.8B, v5.8B, v21.8B
urhadd v6.8B, v6.8B, v22.8B
urhadd v7.8B, v7.8B, v23.8B
.ifc \type,avg
ld1 {v16.8B}, [x0], x3
urhadd v0.8B, v0.8B, v16.8B
ld1 {v17.8B}, [x0], x3
urhadd v1.8B, v1.8B, v17.8B
ld1 {v18.8B}, [x0], x3
urhadd v2.8B, v2.8B, v18.8B
ld1 {v19.8B}, [x0], x3
urhadd v3.8B, v3.8B, v19.8B
ld1 {v20.8B}, [x0], x3
urhadd v4.8B, v4.8B, v20.8B
ld1 {v21.8B}, [x0], x3
urhadd v5.8B, v5.8B, v21.8B
ld1 {v22.8B}, [x0], x3
urhadd v6.8B, v6.8B, v22.8B
ld1 {v23.8B}, [x0], x3
urhadd v7.8B, v7.8B, v23.8B
sub x0, x0, x3, lsl #3
.endif
st1 {v0.8B}, [x0], x3
st1 {v1.8B}, [x0], x3
st1 {v2.8B}, [x0], x3
st1 {v3.8B}, [x0], x3
st1 {v4.8B}, [x0], x3
st1 {v5.8B}, [x0], x3
st1 {v6.8B}, [x0], x3
st1 {v7.8B}, [x0], x3
ret x10
endfunc
.endm
h264_qpel8_hv_lowpass_l2 put
h264_qpel8_hv_lowpass_l2 avg
.macro h264_qpel16_hv type
function \type\()_h264_qpel16_hv_lowpass_neon
mov x13, x30
bl \type\()_h264_qpel8_hv_lowpass_neon
sub x1, x1, x3, lsl #2
bl \type\()_h264_qpel8_hv_lowpass_neon
sub x1, x1, x3, lsl #4
sub x1, x1, x3, lsl #2
add x1, x1, #8
sub x0, x0, x2, lsl #4
add x0, x0, #8
bl \type\()_h264_qpel8_hv_lowpass_neon
sub x1, x1, x3, lsl #2
mov x30, x13
b \type\()_h264_qpel8_hv_lowpass_neon
endfunc
function \type\()_h264_qpel16_hv_lowpass_l2_neon
mov x13, x30
sub x2, x4, #256
bl \type\()_h264_qpel8_hv_lowpass_l2_neon
sub x1, x1, x3, lsl #2
bl \type\()_h264_qpel8_hv_lowpass_l2_neon
sub x1, x1, x3, lsl #4
sub x1, x1, x3, lsl #2
add x1, x1, #8
sub x0, x0, x3, lsl #4
add x0, x0, #8
bl \type\()_h264_qpel8_hv_lowpass_l2_neon
sub x1, x1, x3, lsl #2
mov x30, x13
b \type\()_h264_qpel8_hv_lowpass_l2_neon
endfunc
.endm
h264_qpel16_hv put
h264_qpel16_hv avg
.macro h264_qpel8 type
function ff_\type\()_h264_qpel8_mc10_neon, export=1
lowpass_const w3
mov x3, x1
sub x1, x1, #2
mov x12, #8
b \type\()_h264_qpel8_h_lowpass_l2_neon
endfunc
function ff_\type\()_h264_qpel8_mc20_neon, export=1
lowpass_const w3
sub x1, x1, #2
mov x3, x2
mov x12, #8
b \type\()_h264_qpel8_h_lowpass_neon
endfunc
function ff_\type\()_h264_qpel8_mc30_neon, export=1
lowpass_const w3
add x3, x1, #1
sub x1, x1, #2
mov x12, #8
b \type\()_h264_qpel8_h_lowpass_l2_neon
endfunc
function ff_\type\()_h264_qpel8_mc01_neon, export=1
mov x14, x30
mov x12, x1
\type\()_h264_qpel8_mc01:
lowpass_const w3
mov x3, x2
sub x1, x1, x2, lsl #1
bl \type\()_h264_qpel8_v_lowpass_l2_neon
ret x14
endfunc
function ff_\type\()_h264_qpel8_mc11_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
\type\()_h264_qpel8_mc11:
lowpass_const w3
mov x11, sp
sub sp, sp, #64
mov x0, sp
sub x1, x1, #2
mov x3, #8
mov x12, #8
bl put_h264_qpel8_h_lowpass_neon
mov x0, x8
mov x3, x2
mov x12, sp
sub x1, x9, x2, lsl #1
mov x2, #8
bl \type\()_h264_qpel8_v_lowpass_l2_neon
mov sp, x11
ret x14
endfunc
function ff_\type\()_h264_qpel8_mc21_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
\type\()_h264_qpel8_mc21:
lowpass_const w3
mov x11, sp
sub sp, sp, #(8*8+16*12)
sub x1, x1, #2
mov x3, #8
mov x0, sp
mov x12, #8
bl put_h264_qpel8_h_lowpass_neon
mov x4, x0
mov x0, x8
sub x1, x9, x2, lsl #1
sub x1, x1, #2
mov x3, x2
sub x2, x4, #64
bl \type\()_h264_qpel8_hv_lowpass_l2_neon
mov sp, x11
ret x14
endfunc
function ff_\type\()_h264_qpel8_mc31_neon, export=1
add x1, x1, #1
mov x14, x30
mov x8, x0
mov x9, x1
sub x1, x1, #1
b \type\()_h264_qpel8_mc11
endfunc
function ff_\type\()_h264_qpel8_mc02_neon, export=1
mov x14, x30
lowpass_const w3
sub x1, x1, x2, lsl #1
mov x3, x2
bl \type\()_h264_qpel8_v_lowpass_neon
ret x14
endfunc
function ff_\type\()_h264_qpel8_mc12_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
\type\()_h264_qpel8_mc12:
lowpass_const w3
mov x11, sp
sub sp, sp, #(8*8+16*12)
sub x1, x1, x2, lsl #1
mov x3, x2
mov x2, #8
mov x0, sp
bl put_h264_qpel8_v_lowpass_neon
mov x4, x0
mov x0, x8
sub x1, x9, x3, lsl #1
sub x1, x1, #2
sub x2, x4, #64
bl \type\()_h264_qpel8_hv_lowpass_l2_neon
mov sp, x11
ret x14
endfunc
function ff_\type\()_h264_qpel8_mc22_neon, export=1
mov x14, x30
mov x11, sp
sub x1, x1, x2, lsl #1
sub x1, x1, #2
mov x3, x2
bl \type\()_h264_qpel8_hv_lowpass_neon
mov sp, x11
ret x14
endfunc
function ff_\type\()_h264_qpel8_mc32_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
add x1, x1, #1
b \type\()_h264_qpel8_mc12
endfunc
function ff_\type\()_h264_qpel8_mc03_neon, export=1
mov x14, x30
add x12, x1, x2
b \type\()_h264_qpel8_mc01
endfunc
function ff_\type\()_h264_qpel8_mc13_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
add x1, x1, x2
b \type\()_h264_qpel8_mc11
endfunc
function ff_\type\()_h264_qpel8_mc23_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
add x1, x1, x2
b \type\()_h264_qpel8_mc21
endfunc
function ff_\type\()_h264_qpel8_mc33_neon, export=1
add x1, x1, #1
mov x14, x30
mov x8, x0
mov x9, x1
add x1, x1, x2
sub x1, x1, #1
b \type\()_h264_qpel8_mc11
endfunc
.endm
h264_qpel8 put
h264_qpel8 avg
.macro h264_qpel16 type
function ff_\type\()_h264_qpel16_mc10_neon, export=1
lowpass_const w3
mov x3, x1
sub x1, x1, #2
b \type\()_h264_qpel16_h_lowpass_l2_neon
endfunc
function ff_\type\()_h264_qpel16_mc20_neon, export=1
lowpass_const w3
sub x1, x1, #2
mov x3, x2
b \type\()_h264_qpel16_h_lowpass_neon
endfunc
function ff_\type\()_h264_qpel16_mc30_neon, export=1
lowpass_const w3
add x3, x1, #1
sub x1, x1, #2
b \type\()_h264_qpel16_h_lowpass_l2_neon
endfunc
function ff_\type\()_h264_qpel16_mc01_neon, export=1
mov x14, x30
mov x12, x1
\type\()_h264_qpel16_mc01:
lowpass_const w3
mov x3, x2
sub x1, x1, x2, lsl #1
bl \type\()_h264_qpel16_v_lowpass_l2_neon
ret x14
endfunc
function ff_\type\()_h264_qpel16_mc11_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
\type\()_h264_qpel16_mc11:
lowpass_const w3
mov x11, sp
sub sp, sp, #256
mov x0, sp
sub x1, x1, #2
mov x3, #16
bl put_h264_qpel16_h_lowpass_neon
mov x0, x8
mov x3, x2
mov x12, sp
sub x1, x9, x2, lsl #1
mov x2, #16
bl \type\()_h264_qpel16_v_lowpass_l2_neon
mov sp, x11
ret x14
endfunc
function ff_\type\()_h264_qpel16_mc21_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
\type\()_h264_qpel16_mc21:
lowpass_const w3
mov x11, sp
sub sp, sp, #(16*16+16*12)
sub x1, x1, #2
mov x0, sp
bl put_h264_qpel16_h_lowpass_neon_packed
mov x4, x0
mov x0, x8
sub x1, x9, x2, lsl #1
sub x1, x1, #2
mov x3, x2
bl \type\()_h264_qpel16_hv_lowpass_l2_neon
mov sp, x11
ret x14
endfunc
function ff_\type\()_h264_qpel16_mc31_neon, export=1
add x1, x1, #1
mov x14, x30
mov x8, x0
mov x9, x1
sub x1, x1, #1
b \type\()_h264_qpel16_mc11
endfunc
function ff_\type\()_h264_qpel16_mc02_neon, export=1
mov x14, x30
lowpass_const w3
sub x1, x1, x2, lsl #1
mov x3, x2
bl \type\()_h264_qpel16_v_lowpass_neon
ret x14
endfunc
function ff_\type\()_h264_qpel16_mc12_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
\type\()_h264_qpel16_mc12:
lowpass_const w3
mov x11, sp
sub sp, sp, #(16*16+16*12)
sub x1, x1, x2, lsl #1
mov x0, sp
mov x3, x2
bl put_h264_qpel16_v_lowpass_neon_packed
mov x4, x0
mov x0, x8
sub x1, x9, x3, lsl #1
sub x1, x1, #2
mov x2, x3
bl \type\()_h264_qpel16_hv_lowpass_l2_neon
mov sp, x11
ret x14
endfunc
function ff_\type\()_h264_qpel16_mc22_neon, export=1
mov x14, x30
lowpass_const w3
mov x11, sp
sub x1, x1, x2, lsl #1
sub x1, x1, #2
mov x3, x2
bl \type\()_h264_qpel16_hv_lowpass_neon
mov sp, x11 // restore stack
ret x14
endfunc
function ff_\type\()_h264_qpel16_mc32_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
add x1, x1, #1
b \type\()_h264_qpel16_mc12
endfunc
function ff_\type\()_h264_qpel16_mc03_neon, export=1
mov x14, x30
add x12, x1, x2
b \type\()_h264_qpel16_mc01
endfunc
function ff_\type\()_h264_qpel16_mc13_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
add x1, x1, x2
b \type\()_h264_qpel16_mc11
endfunc
function ff_\type\()_h264_qpel16_mc23_neon, export=1
mov x14, x30
mov x8, x0
mov x9, x1
add x1, x1, x2
b \type\()_h264_qpel16_mc21
endfunc
function ff_\type\()_h264_qpel16_mc33_neon, export=1
add x1, x1, #1
mov x14, x30
mov x8, x0
mov x9, x1
add x1, x1, x2
sub x1, x1, #1
b \type\()_h264_qpel16_mc11
endfunc
.endm
h264_qpel16 put
h264_qpel16 avg

View File

@@ -0,0 +1,123 @@
/*
* ARM NEON optimised DSP functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stddef.h>
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/hpeldsp.h"
void ff_put_pixels16_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
av_cold void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon;
c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon;
c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon;
c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon;
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon;
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon;
c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon;
c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon;
c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon;
c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon;
}
}

View File

@@ -0,0 +1,397 @@
/*
* ARM NEON optimised DSP functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
.macro pixels16 rnd=1, avg=0
.if \avg
mov x12, x0
.endif
1: ld1 {v0.16B}, [x1], x2
ld1 {v1.16B}, [x1], x2
ld1 {v2.16B}, [x1], x2
ld1 {v3.16B}, [x1], x2
.if \avg
ld1 {v4.16B}, [x12], x2
urhadd v0.16B, v0.16B, v4.16B
ld1 {v5.16B}, [x12], x2
urhadd v1.16B, v1.16B, v5.16B
ld1 {v6.16B}, [x12], x2
urhadd v2.16B, v2.16B, v6.16B
ld1 {v7.16B}, [x12], x2
urhadd v3.16B, v3.16B, v7.16B
.endif
subs w3, w3, #4
st1 {v0.16B}, [x0], x2
st1 {v1.16B}, [x0], x2
st1 {v2.16B}, [x0], x2
st1 {v3.16B}, [x0], x2
b.ne 1b
ret
.endm
.macro pixels16_x2 rnd=1, avg=0
1: ld1 {v0.16B, v1.16B}, [x1], x2
ld1 {v2.16B, v3.16B}, [x1], x2
subs w3, w3, #2
ext v1.16B, v0.16B, v1.16B, #1
avg v0.16B, v0.16B, v1.16B
ext v3.16B, v2.16B, v3.16B, #1
avg v2.16B, v2.16B, v3.16B
.if \avg
ld1 {v1.16B}, [x0], x2
ld1 {v3.16B}, [x0]
urhadd v0.16B, v0.16B, v1.16B
urhadd v2.16B, v2.16B, v3.16B
sub x0, x0, x2
.endif
st1 {v0.16B}, [x0], x2
st1 {v2.16B}, [x0], x2
b.ne 1b
ret
.endm
.macro pixels16_y2 rnd=1, avg=0
sub w3, w3, #2
ld1 {v0.16B}, [x1], x2
ld1 {v1.16B}, [x1], x2
1: subs w3, w3, #2
avg v2.16B, v0.16B, v1.16B
ld1 {v0.16B}, [x1], x2
avg v3.16B, v0.16B, v1.16B
ld1 {v1.16B}, [x1], x2
.if \avg
ld1 {v4.16B}, [x0], x2
ld1 {v5.16B}, [x0]
urhadd v2.16B, v2.16B, v4.16B
urhadd v3.16B, v3.16B, v5.16B
sub x0, x0, x2
.endif
st1 {v2.16B}, [x0], x2
st1 {v3.16B}, [x0], x2
b.ne 1b
avg v2.16B, v0.16B, v1.16B
ld1 {v0.16B}, [x1], x2
avg v3.16B, v0.16B, v1.16B
.if \avg
ld1 {v4.16B}, [x0], x2
ld1 {v5.16B}, [x0]
urhadd v2.16B, v2.16B, v4.16B
urhadd v3.16B, v3.16B, v5.16B
sub x0, x0, x2
.endif
st1 {v2.16B}, [x0], x2
st1 {v3.16B}, [x0], x2
ret
.endm
.macro pixels16_xy2 rnd=1, avg=0
sub w3, w3, #2
ld1 {v0.16B, v1.16B}, [x1], x2
ld1 {v4.16B, v5.16B}, [x1], x2
NRND movi v26.8H, #1
ext v1.16B, v0.16B, v1.16B, #1
ext v5.16B, v4.16B, v5.16B, #1
uaddl v16.8H, v0.8B, v1.8B
uaddl2 v20.8H, v0.16B, v1.16B
uaddl v18.8H, v4.8B, v5.8B
uaddl2 v22.8H, v4.16B, v5.16B
1: subs w3, w3, #2
ld1 {v0.16B, v1.16B}, [x1], x2
add v24.8H, v16.8H, v18.8H
NRND add v24.8H, v24.8H, v26.8H
ext v30.16B, v0.16B, v1.16B, #1
add v1.8H, v20.8H, v22.8H
mshrn v28.8B, v24.8H, #2
NRND add v1.8H, v1.8H, v26.8H
mshrn2 v28.16B, v1.8H, #2
.if \avg
ld1 {v16.16B}, [x0]
urhadd v28.16B, v28.16B, v16.16B
.endif
uaddl v16.8H, v0.8B, v30.8B
ld1 {v2.16B, v3.16B}, [x1], x2
uaddl2 v20.8H, v0.16B, v30.16B
st1 {v28.16B}, [x0], x2
add v24.8H, v16.8H, v18.8H
NRND add v24.8H, v24.8H, v26.8H
ext v3.16B, v2.16B, v3.16B, #1
add v0.8H, v20.8H, v22.8H
mshrn v30.8B, v24.8H, #2
NRND add v0.8H, v0.8H, v26.8H
mshrn2 v30.16B, v0.8H, #2
.if \avg
ld1 {v18.16B}, [x0]
urhadd v30.16B, v30.16B, v18.16B
.endif
uaddl v18.8H, v2.8B, v3.8B
uaddl2 v22.8H, v2.16B, v3.16B
st1 {v30.16B}, [x0], x2
b.gt 1b
ld1 {v0.16B, v1.16B}, [x1], x2
add v24.8H, v16.8H, v18.8H
NRND add v24.8H, v24.8H, v26.8H
ext v30.16B, v0.16B, v1.16B, #1
add v1.8H, v20.8H, v22.8H
mshrn v28.8B, v24.8H, #2
NRND add v1.8H, v1.8H, v26.8H
mshrn2 v28.16B, v1.8H, #2
.if \avg
ld1 {v16.16B}, [x0]
urhadd v28.16B, v28.16B, v16.16B
.endif
uaddl v16.8H, v0.8B, v30.8B
uaddl2 v20.8H, v0.16B, v30.16B
st1 {v28.16B}, [x0], x2
add v24.8H, v16.8H, v18.8H
NRND add v24.8H, v24.8H, v26.8H
add v0.8H, v20.8H, v22.8H
mshrn v30.8B, v24.8H, #2
NRND add v0.8H, v0.8H, v26.8H
mshrn2 v30.16B, v0.8H, #2
.if \avg
ld1 {v18.16B}, [x0]
urhadd v30.16B, v30.16B, v18.16B
.endif
st1 {v30.16B}, [x0], x2
ret
.endm
.macro pixels8 rnd=1, avg=0
1: ld1 {v0.8B}, [x1], x2
ld1 {v1.8B}, [x1], x2
ld1 {v2.8B}, [x1], x2
ld1 {v3.8B}, [x1], x2
.if \avg
ld1 {v4.8B}, [x0], x2
urhadd v0.8B, v0.8B, v4.8B
ld1 {v5.8B}, [x0], x2
urhadd v1.8B, v1.8B, v5.8B
ld1 {v6.8B}, [x0], x2
urhadd v2.8B, v2.8B, v6.8B
ld1 {v7.8B}, [x0], x2
urhadd v3.8B, v3.8B, v7.8B
sub x0, x0, x2, lsl #2
.endif
subs w3, w3, #4
st1 {v0.8B}, [x0], x2
st1 {v1.8B}, [x0], x2
st1 {v2.8B}, [x0], x2
st1 {v3.8B}, [x0], x2
b.ne 1b
ret
.endm
.macro pixels8_x2 rnd=1, avg=0
1: ld1 {v0.8B, v1.8B}, [x1], x2
ext v1.8B, v0.8B, v1.8B, #1
ld1 {v2.8B, v3.8B}, [x1], x2
ext v3.8B, v2.8B, v3.8B, #1
subs w3, w3, #2
avg v0.8B, v0.8B, v1.8B
avg v2.8B, v2.8B, v3.8B
.if \avg
ld1 {v4.8B}, [x0], x2
ld1 {v5.8B}, [x0]
urhadd v0.8B, v0.8B, v4.8B
urhadd v2.8B, v2.8B, v5.8B
sub x0, x0, x2
.endif
st1 {v0.8B}, [x0], x2
st1 {v2.8B}, [x0], x2
b.ne 1b
ret
.endm
.macro pixels8_y2 rnd=1, avg=0
sub w3, w3, #2
ld1 {v0.8B}, [x1], x2
ld1 {v1.8B}, [x1], x2
1: subs w3, w3, #2
avg v4.8B, v0.8B, v1.8B
ld1 {v0.8B}, [x1], x2
avg v5.8B, v0.8B, v1.8B
ld1 {v1.8B}, [x1], x2
.if \avg
ld1 {v2.8B}, [x0], x2
ld1 {v3.8B}, [x0]
urhadd v4.8B, v4.8B, v2.8B
urhadd v5.8B, v5.8B, v3.8B
sub x0, x0, x2
.endif
st1 {v4.8B}, [x0], x2
st1 {v5.8B}, [x0], x2
b.ne 1b
avg v4.8B, v0.8B, v1.8B
ld1 {v0.8B}, [x1], x2
avg v5.8B, v0.8B, v1.8B
.if \avg
ld1 {v2.8B}, [x0], x2
ld1 {v3.8B}, [x0]
urhadd v4.8B, v4.8B, v2.8B
urhadd v5.8B, v5.8B, v3.8B
sub x0, x0, x2
.endif
st1 {v4.8B}, [x0], x2
st1 {v5.8B}, [x0], x2
ret
.endm
.macro pixels8_xy2 rnd=1, avg=0
sub w3, w3, #2
ld1 {v0.16B}, [x1], x2
ld1 {v1.16B}, [x1], x2
NRND movi v19.8H, #1
ext v4.16B, v0.16B, v4.16B, #1
ext v6.16B, v1.16B, v6.16B, #1
uaddl v16.8H, v0.8B, v4.8B
uaddl v17.8H, v1.8B, v6.8B
1: subs w3, w3, #2
ld1 {v0.16B}, [x1], x2
add v18.8H, v16.8H, v17.8H
ext v4.16B, v0.16B, v4.16B, #1
NRND add v18.8H, v18.8H, v19.8H
uaddl v16.8H, v0.8B, v4.8B
mshrn v5.8B, v18.8H, #2
ld1 {v1.16B}, [x1], x2
add v18.8H, v16.8H, v17.8H
.if \avg
ld1 {v7.8B}, [x0]
urhadd v5.8B, v5.8B, v7.8B
.endif
NRND add v18.8H, v18.8H, v19.8H
st1 {v5.8B}, [x0], x2
mshrn v7.8B, v18.8H, #2
.if \avg
ld1 {v5.8B}, [x0]
urhadd v7.8B, v7.8B, v5.8B
.endif
ext v6.16B, v1.16B, v6.16B, #1
uaddl v17.8H, v1.8B, v6.8B
st1 {v7.8B}, [x0], x2
b.gt 1b
ld1 {v0.16B}, [x1], x2
add v18.8H, v16.8H, v17.8H
ext v4.16B, v0.16B, v4.16B, #1
NRND add v18.8H, v18.8H, v19.8H
uaddl v16.8H, v0.8B, v4.8B
mshrn v5.8B, v18.8H, #2
add v18.8H, v16.8H, v17.8H
.if \avg
ld1 {v7.8B}, [x0]
urhadd v5.8B, v5.8B, v7.8B
.endif
NRND add v18.8H, v18.8H, v19.8H
st1 {v5.8B}, [x0], x2
mshrn v7.8B, v18.8H, #2
.if \avg
ld1 {v5.8B}, [x0]
urhadd v7.8B, v7.8B, v5.8B
.endif
st1 {v7.8B}, [x0], x2
ret
.endm
.macro pixfunc pfx, name, suf, rnd=1, avg=0
.if \rnd
.macro avg rd, rn, rm
urhadd \rd, \rn, \rm
.endm
.macro mshrn rd, rn, rm
rshrn \rd, \rn, \rm
.endm
.macro mshrn2 rd, rn, rm
rshrn2 \rd, \rn, \rm
.endm
.macro NRND insn:vararg
.endm
.else
.macro avg rd, rn, rm
uhadd \rd, \rn, \rm
.endm
.macro mshrn rd, rn, rm
shrn \rd, \rn, \rm
.endm
.macro mshrn2 rd, rn, rm
shrn2 \rd, \rn, \rm
.endm
.macro NRND insn:vararg
\insn
.endm
.endif
function ff_\pfx\name\suf\()_neon, export=1
\name \rnd, \avg
endfunc
.purgem avg
.purgem mshrn
.purgem mshrn2
.purgem NRND
.endm
.macro pixfunc2 pfx, name, avg=0
pixfunc \pfx, \name, rnd=1, avg=\avg
pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg
.endm
function ff_put_h264_qpel16_mc00_neon, export=1
mov w3, #16
endfunc
pixfunc put_, pixels16, avg=0
pixfunc2 put_, pixels16_x2, avg=0
pixfunc2 put_, pixels16_y2, avg=0
pixfunc2 put_, pixels16_xy2, avg=0
function ff_avg_h264_qpel16_mc00_neon, export=1
mov w3, #16
endfunc
pixfunc avg_, pixels16, avg=1
pixfunc2 avg_, pixels16_x2, avg=1
pixfunc2 avg_, pixels16_y2, avg=1
pixfunc2 avg_, pixels16_xy2, avg=1
function ff_put_h264_qpel8_mc00_neon, export=1
mov w3, #8
endfunc
pixfunc put_, pixels8, avg=0
pixfunc2 put_, pixels8_x2, avg=0
pixfunc2 put_, pixels8_y2, avg=0
pixfunc2 put_, pixels8_xy2, avg=0
function ff_avg_h264_qpel8_mc00_neon, export=1
mov w3, #8
endfunc
pixfunc avg_, pixels8, avg=1
pixfunc avg_, pixels8_x2, avg=1
pixfunc avg_, pixels8_y2, avg=1
pixfunc avg_, pixels8_xy2, avg=1

View File

@@ -0,0 +1,46 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stddef.h>
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavutil/internal.h"
#include "libavcodec/imdct15.h"
#include "asm-offsets.h"
AV_CHECK_OFFSET(IMDCT15Context, exptab, CELT_EXPTAB);
AV_CHECK_OFFSET(IMDCT15Context, fft_n, CELT_FFT_N);
AV_CHECK_OFFSET(IMDCT15Context, len2, CELT_LEN2);
AV_CHECK_OFFSET(IMDCT15Context, len4, CELT_LEN4);
AV_CHECK_OFFSET(IMDCT15Context, tmp, CELT_TMP);
AV_CHECK_OFFSET(IMDCT15Context, twiddle_exptab, CELT_TWIDDLE);
void ff_celt_imdct_half_neon(IMDCT15Context *s, float *dst, const float *src,
ptrdiff_t stride, float scale);
void ff_imdct15_init_aarch64(IMDCT15Context *s)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
s->imdct_half = ff_celt_imdct_half_neon;
}
}

View File

@@ -0,0 +1,647 @@
/*
* Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
#include "asm-offsets.h"
.macro shuffle a, b, c, d
const shuffle_\a\b\c\d, align=4
.byte (\a * 4), (\a * 4 + 1), (\a * 4 + 2), (\a * 4 + 3)
.byte (\b * 4), (\b * 4 + 1), (\b * 4 + 2), (\b * 4 + 3)
.byte (\c * 4), (\c * 4 + 1), (\c * 4 + 2), (\c * 4 + 3)
.byte (\d * 4), (\d * 4 + 1), (\d * 4 + 2), (\d * 4 + 3)
endconst
.endm
shuffle 0, 2, 1, 3
shuffle 1, 0, 3, 2
shuffle 2, 3, 0, 1
shuffle 3, 1, 2, 0
function fft5_neon
lsl x2, x2, #3
ld1 {v24.2s}, [x1], x2
ld2 {v25.s,v26.s}[0], [x1], x2
ld2 {v25.s,v26.s}[1], [x1], x2
ld2 {v25.s,v26.s}[2], [x1], x2
ld2 {v25.s,v26.s}[3], [x1]
dup v6.4s, v24.s[0]
dup v7.4s, v24.s[1]
faddp v0.4s, v25.4s, v26.4s
// z[][0], z[][3]
fmul v16.4s, v25.4s, v15.s[0] // rr
fmul v17.4s, v25.4s, v15.s[1] // ri
fmul v18.4s, v26.4s, v15.s[0] // ir
fmul v19.4s, v26.4s, v15.s[1] // ii
faddp v0.4s, v0.4s, v0.4s
// z[][1], z[][2]
fmul v20.4s, v25.4s, v15.s[2] // rr
fmul v21.4s, v25.4s, v15.s[3] // ri
fmul v22.4s, v26.4s, v15.s[2] // ir
fmul v23.4s, v26.4s, v15.s[3] // ii
fadd v0.2s, v24.2s, v0.2s // out[0]
// z[0123][0], z[0123][3]
fsub v24.4s, v16.4s, v19.4s // (c).re = rr - ii;
fadd v27.4s, v16.4s, v19.4s // (d).re = rr + ii;
ld1 {v16.16b}, [x11]
ld1 {v19.16b}, [x14]
fadd v28.4s, v17.4s, v18.4s // (c).im = ri + ir;
fsub v31.4s, v18.4s, v17.4s // (d).im = -ri + ir;
ld1 {v17.16b}, [x12]
// z[0123][1], z[0123][2]
fsub v25.4s, v20.4s, v23.4s // (c).re = rr - ii;
fadd v26.4s, v20.4s, v23.4s // (d).re = rr + ii;
ld1 {v18.16b}, [x13]
fadd v29.4s, v21.4s, v22.4s // (c).im = ri + ir;
fsub v30.4s, v22.4s, v21.4s // (d).im = -ri + ir;
//real
tbl v20.16b, {v24.16b}, v16.16b
tbl v21.16b, {v25.16b}, v17.16b
tbl v22.16b, {v26.16b}, v18.16b
tbl v23.16b, {v27.16b}, v19.16b
//imag
tbl v16.16b, {v28.16b}, v16.16b
tbl v17.16b, {v29.16b}, v17.16b
tbl v18.16b, {v30.16b}, v18.16b
tbl v19.16b, {v31.16b}, v19.16b
fadd v6.4s, v6.4s, v20.4s
fadd v22.4s, v22.4s, v23.4s
fadd v7.4s, v7.4s, v16.4s
fadd v18.4s, v18.4s, v19.4s
fadd v21.4s, v21.4s, v22.4s
fadd v17.4s, v17.4s, v18.4s
fadd v6.4s, v6.4s, v21.4s
fadd v7.4s, v7.4s, v17.4s
ret
endfunc
function fft15_neon
mov x8, x1
mov x9, x30
add x2, x3, x3, lsl #1 // 3 * stride
add x1, x8, x3, lsl #3 // in + 1 * stride
bl fft5_neon
mov v1.8b, v0.8b
mov v2.16b, v6.16b
mov v3.16b, v7.16b
add x1, x8, x3, lsl #4 // in + 2 * stride
add x2, x3, x3, lsl #1 // 3 * stride
bl fft5_neon
zip1 v1.4s, v1.4s, v0.4s
mov v4.16b, v6.16b
mov v5.16b, v7.16b
mov x1, x8 // in + 0 * stride
add x2, x3, x3, lsl #1 // 3 * stride
bl fft5_neon
faddp v20.4s, v1.4s, v1.4s
ext v18.16b, v8.16b, v8.16b, #4
ext v19.16b, v9.16b, v9.16b, #4
mov v16.16b, v6.16b
mov v17.16b, v7.16b
fadd v20.2s, v20.2s, v0.2s
uzp1 v18.4s, v18.4s, v10.4s // exp[2,4,6,8].re
uzp1 v19.4s, v19.4s, v11.4s // exp[2,4,6,8].im
st1 {v20.2s}, [x0], #8 // out[0]
fmla v16.4s, v2.4s, v8.4s
fmls v16.4s, v3.4s, v9.4s
fmla v17.4s, v2.4s, v9.4s
fmla v17.4s, v3.4s, v8.4s
fmla v16.4s, v4.4s, v18.4s
fmls v16.4s, v5.4s, v19.4s
fmla v17.4s, v4.4s, v19.4s
fmla v17.4s, v5.4s, v18.4s
zip1 v18.4s, v16.4s, v17.4s
zip2 v19.4s, v16.4s, v17.4s
rev64 v31.4s, v14.4s
trn1 v28.2d, v1.2d, v1.2d
trn2 v29.2d, v1.2d, v1.2d
zip1 v30.2d, v14.2d, v31.2d
zip2 v31.2d, v14.2d, v31.2d
st1 {v18.4s,v19.4s}, [x0], #32 // out[1-4]
fmul v16.4s, v28.4s, v30.4s
fmul v17.4s, v29.4s, v30.4s
fmls v16.4s, v29.4s, v31.4s
fmla v17.4s, v28.4s, v31.4s
faddp v16.4s, v16.4s, v16.4s
faddp v17.4s, v17.4s, v17.4s
zip1 v18.2s, v16.2s, v17.2s
zip2 v19.2s, v16.2s, v17.2s
fadd v18.2s, v18.2s, v0.2s
fadd v0.2s, v19.2s, v0.2s
ext v30.16b, v12.16b, v12.16b, #4
ext v31.16b, v13.16b, v13.16b, #4
mov v16.16b, v6.16b
mov v17.16b, v7.16b
uzp1 v30.4s, v30.4s, v8.4s
uzp1 v31.4s, v31.4s, v9.4s
st1 {v18.2s}, [x0], #8 // out[5]
fmla v16.4s, v2.4s, v10.4s
fmls v16.4s, v3.4s, v11.4s
fmla v17.4s, v2.4s, v11.4s
fmla v17.4s, v3.4s, v10.4s
fmla v16.4s, v4.4s, v30.4s
fmls v16.4s, v5.4s, v31.4s
fmla v17.4s, v4.4s, v31.4s
fmla v17.4s, v5.4s, v30.4s
zip1 v18.4s, v16.4s, v17.4s
zip2 v19.4s, v16.4s, v17.4s
ext v30.16b, v10.16b, v10.16b, #4
ext v31.16b, v11.16b, v11.16b, #4
fmla v6.4s, v2.4s, v12.4s
fmls v6.4s, v3.4s, v13.4s
st1 {v18.4s,v19.4s}, [x0], #32 // out[6-9]
uzp1 v30.4s, v30.4s, v12.4s
uzp1 v31.4s, v31.4s, v13.4s
fmla v7.4s, v2.4s, v13.4s
fmla v7.4s, v3.4s, v12.4s
st1 {v0.2s}, [x0], #8 // out[10]
fmla v6.4s, v4.4s, v30.4s
fmls v6.4s, v5.4s, v31.4s
fmla v7.4s, v4.4s, v31.4s
fmla v7.4s, v5.4s, v30.4s
zip1 v18.4s, v6.4s, v7.4s
zip2 v19.4s, v6.4s, v7.4s
st1 {v18.4s,v19.4s}, [x0], #32 // out[11-14]
ret x9
endfunc
// x0: out, x1: out+len2, x2: exptab, x3: len2
function fft15_pass
ands x6, x3, #3
mov x4, x0
mov x5, x1
b.eq 9f
ld1 {v0.2s}, [x0], #8
ld1 {v1.2s}, [x1], #8
sub x3, x3, x6
subs x6, x6, #1
fadd v2.2s, v0.2s, v1.2s
fsub v3.2s, v0.2s, v1.2s
add x2, x2, #8
st1 {v2.2s}, [x4], #8
st1 {v3.2s}, [x5], #8
b.eq 9f
1:
subs x6, x6, #1
ldp s4, s5, [x2], #8
ldp s2, s3, [x1], #8
ldp s0, s1, [x0], #8
fmul s6, s2, s4
fmul s7, s2, s5
fmls s6, s3, v5.s[0]
fmla s7, s3, v4.s[0]
fsub s2, s0, s6
fsub s3, s1, s7
fadd s0, s0, s6
fadd s1, s1, s7
stp s2, s3, [x5], #8
stp s0, s1, [x4], #8
b.gt 1b
9:
ld1 {v4.4s,v5.4s}, [x2], #32
ld2 {v2.4s,v3.4s}, [x1], #32
uzp1 v6.4s, v4.4s, v5.4s
uzp2 v7.4s, v4.4s, v5.4s
ld2 {v0.4s,v1.4s}, [x0], #32
8:
subs x3, x3, #8
fmul v4.4s, v2.4s, v6.4s
fmul v5.4s, v2.4s, v7.4s
b.lt 4f
ld1 {v18.4s,v19.4s}, [x2], #32
fmls v4.4s, v3.4s, v7.4s
fmla v5.4s, v3.4s, v6.4s
ld2 {v22.4s,v23.4s}, [x1], #32
fsub v2.4s, v0.4s, v4.4s
fadd v0.4s, v0.4s, v4.4s
fsub v3.4s, v1.4s, v5.4s
fadd v1.4s, v1.4s, v5.4s
uzp1 v16.4s, v18.4s, v19.4s
uzp2 v17.4s, v18.4s, v19.4s
st2 {v2.4s,v3.4s}, [x5], #32
st2 {v0.4s,v1.4s}, [x4], #32
ld2 {v20.4s,v21.4s}, [x0], #32
fmul v18.4s, v22.4s, v16.4s
fmul v19.4s, v22.4s, v17.4s
b.eq 0f
ld1 {v4.4s,v5.4s}, [x2], #32
fmls v18.4s, v23.4s, v17.4s
fmla v19.4s, v23.4s, v16.4s
ld2 {v2.4s,v3.4s}, [x1], #32
fsub v22.4s, v20.4s, v18.4s
fadd v20.4s, v20.4s, v18.4s
fsub v23.4s, v21.4s, v19.4s
fadd v21.4s, v21.4s, v19.4s
uzp1 v6.4s, v4.4s, v5.4s
uzp2 v7.4s, v4.4s, v5.4s
st2 {v22.4s,v23.4s}, [x5], #32
st2 {v20.4s,v21.4s}, [x4], #32
ld2 {v0.4s,v1.4s}, [x0], #32
b 8b
4:
fmls v4.4s, v3.4s, v7.4s
fmla v5.4s, v3.4s, v6.4s
fsub v2.4s, v0.4s, v4.4s
fadd v0.4s, v0.4s, v4.4s
fsub v3.4s, v1.4s, v5.4s
fadd v1.4s, v1.4s, v5.4s
st2 {v2.4s,v3.4s}, [x5], #32
st2 {v0.4s,v1.4s}, [x4], #32
ret
0:
fmls v18.4s, v23.4s, v17.4s
fmla v19.4s, v23.4s, v16.4s
fsub v22.4s, v20.4s, v18.4s
fadd v20.4s, v20.4s, v18.4s
fsub v23.4s, v21.4s, v19.4s
fadd v21.4s, v21.4s, v19.4s
st2 {v22.4s,v23.4s}, [x5], #32
st2 {v20.4s,v21.4s}, [x4], #32
ret
endfunc
function fft30_neon, align=6
sub sp, sp, #0x20
stp x20, x21, [sp]
stp x22, x30, [sp, #0x10]
mov x21, x1
mov x22, x2
mov x20, x4
mov x0, x21
mov x1, x22
lsl x3, x20, #1
bl fft15_neon
add x0, x21, #15*8
add x1, x22, x20, lsl #3
lsl x3, x20, #1
bl fft15_neon
ldr x2, [x10, #(CELT_EXPTAB + 8)] // s->exptab[1]
add x0, x21, #0
add x1, x21, #15*8
mov x3, #15
ldp x20, x21, [sp]
ldp x22, x30, [sp, #0x10]
add sp, sp, #0x20
b fft15_pass
endfunc
.macro def_fft n, n2
function fft\n\()_neon, align=6
sub sp, sp, #0x30
stp x20, x21, [sp]
stp x22, x30, [sp, #0x10]
stp x23, x24, [sp, #0x20]
mov x21, x1
mov x22, x2
mov x23, x3
mov x20, x4
sub x3, x3, #1
lsl x4, x4, #1
bl fft\n2\()_neon
add x1, x21, #(\n2 * 8)
add x2, x22, x20, lsl #3
sub x3, x23, #1
lsl x4, x20, #1
bl fft\n2\()_neon
add x5, x10, #CELT_EXPTAB
mov x0, x21
ldr x2, [x5, x23, lsl #3] // s->exptab[N]
add x1, x21, #(\n2 * 8)
mov x3, #\n2
ldp x20, x21, [sp]
ldp x22, x30, [sp, #0x10]
ldp x23, x24, [sp, #0x20]
add sp, sp, #0x30
b fft15_pass
endfunc
.endm
def_fft 60, 30
def_fft 120, 60
def_fft 240, 120
def_fft 480, 240
def_fft 960, 480
function fft_b15_calc_neon
sub sp, sp, #0x50
ldr x8, [x0, #CELT_EXPTAB] // s->exptab[0]
movrel x6, fact5
movrel x11, shuffle_0213
movrel x12, shuffle_1032
movrel x13, shuffle_2301
movrel x14, shuffle_3120
add x8, x8, #8
movrel x5, fft_tab_neon
stp x20, x30, [sp]
stp d8, d9, [sp, #0x10]
stp d10, d11, [sp, #0x20]
stp d12, d13, [sp, #0x30]
stp d14, d15, [sp, #0x40]
ld1 {v15.4s}, [x6]
ld1 {v0.4s,v1.4s}, [x8], #32
ld1 {v6.2s}, [x8], #8
ld1 {v2.4s,v3.4s}, [x8], #32
ld1 {v7.2s}, [x8], #8
ld1 {v4.4s,v5.4s}, [x8], #32
uzp1 v8.4s, v0.4s, v1.4s // exp[ 1 - 4].re
uzp2 v9.4s, v0.4s, v1.4s // exp[ 1 - 4].im
uzp1 v10.4s, v2.4s, v3.4s // exp[ 6 - 9].re
uzp2 v11.4s, v2.4s, v3.4s // exp[ 6 - 9].im
uzp1 v12.4s, v4.4s, v5.4s // exp[11 - 14].re
uzp2 v13.4s, v4.4s, v5.4s // exp[11 - 14].im
zip1 v14.4s, v6.4s, v7.4s // exp[5,10].re/exp[5,10].im
add x5, x5, x3, lsl #3
ldr x5, [x5]
mov x10, x0
blr x5
ldp x20, x30, [sp]
ldp d8, d9, [sp, #0x10]
ldp d10, d11, [sp, #0x20]
ldp d12, d13, [sp, #0x30]
ldp d14, d15, [sp, #0x40]
add sp, sp, #0x50
ret
endfunc
const fft_tab_neon, relocate=1
.quad fft15_neon
.quad fft30_neon
.quad fft60_neon
.quad fft120_neon
.quad fft240_neon
.quad fft480_neon
.quad fft960_neon
endconst
function ff_celt_imdct_half_neon, export=1
sub sp, sp, #0x20
stp x21, x30, [sp]
str s0, [sp, #0x10]
ldp w5, w6, [x0, #CELT_LEN2] // CELT_LEN4
mov x10, x0
mov x21, x1
sub w5, w5, #1
lsl x7, x3, #3 // 2 * stride * sizeof(float)
sub x8, xzr, x3, lsl #3 // -2 * stride * sizeof(float)
mul x5, x5, x3
ldp x9, x10, [x0, #CELT_TMP] // CELT_TWIDDLE
ldr w3, [x0, #CELT_FFT_N]
add x5, x2, x5, lsl #2
mov x11, x9
sub w6, w6, #4
ld1 {v0.s}[0], [x5], x8
ld1 {v1.s}[0], [x2], x7
ld1 {v4.4s,v5.4s}, [x10], #32
ld1 {v0.s}[1], [x5], x8
ld1 {v1.s}[1], [x2], x7
uzp1 v2.4s, v4.4s, v5.4s
ld1 {v0.s}[2], [x5], x8
ld1 {v1.s}[2], [x2], x7
uzp2 v3.4s, v4.4s, v5.4s
ld1 {v0.s}[3], [x5], x8
ld1 {v1.s}[3], [x2], x7
1:
subs w6, w6, #4
ld1 {v20.s}[0], [x5], x8
ld1 {v21.s}[0], [x2], x7
ld1 {v4.4s,v5.4s}, [x10], #32
fmul v6.4s, v0.4s, v2.4s
fmul v7.4s, v0.4s, v3.4s
ld1 {v20.s}[1], [x5], x8
ld1 {v21.s}[1], [x2], x7
fmls v6.4s, v1.4s, v3.4s
fmla v7.4s, v1.4s, v2.4s
ld1 {v20.s}[2], [x5], x8
ld1 {v21.s}[2], [x2], x7
uzp1 v2.4s, v4.4s, v5.4s
uzp2 v3.4s, v4.4s, v5.4s
ld1 {v20.s}[3], [x5], x8
ld1 {v21.s}[3], [x2], x7
zip1 v4.4s, v6.4s, v7.4s
zip2 v5.4s, v6.4s, v7.4s
fmul v6.4s, v20.4s, v2.4s
fmul v7.4s, v20.4s, v3.4s
st1 {v4.4s,v5.4s}, [x9], #32
fmls v6.4s, v21.4s, v3.4s
fmla v7.4s, v21.4s, v2.4s
b.eq 3f
subs w6, w6, #4
ld1 {v4.4s,v5.4s}, [x10], #32
ld1 {v0.s}[0], [x5], x8
ld1 {v1.s}[0], [x2], x7
uzp1 v2.4s, v4.4s, v5.4s
ld1 {v0.s}[1], [x5], x8
ld1 {v1.s}[1], [x2], x7
uzp2 v3.4s, v4.4s, v5.4s
ld1 {v0.s}[2], [x5], x8
ld1 {v1.s}[2], [x2], x7
zip1 v4.4s, v6.4s, v7.4s
zip2 v5.4s, v6.4s, v7.4s
ld1 {v0.s}[3], [x5], x8
ld1 {v1.s}[3], [x2], x7
st1 {v4.4s,v5.4s}, [x9], #32
b.gt 1b
fmul v6.4s, v0.4s, v2.4s
fmul v7.4s, v0.4s, v3.4s
fmls v6.4s, v1.4s, v3.4s
fmla v7.4s, v1.4s, v2.4s
3:
zip1 v4.4s, v6.4s, v7.4s
zip2 v5.4s, v6.4s, v7.4s
st1 {v4.4s,v5.4s}, [x9], #32
mov x2, x11
mov x4, #1
bl fft_b15_calc_neon
ldr w5, [x10, #CELT_LEN4]
ldr x6, [x10, #CELT_TWIDDLE]
ldr s31, [sp, #0x10]
add x1, x21, x5, lsl #2
add x3, x6, x5, lsl #2
sub x0, x1, #16
sub x2, x3, #16
mov x8, #-16
mov x7, #16
mov x10, x0
mov x11, x1
sub w5, w5, #4
ld1 {v0.4s}, [x0], x8
ld1 {v1.4s}, [x1], x7
ld1 {v2.4s}, [x2], x8
ld1 {v3.4s}, [x3], x7
uzp1 v4.4s, v0.4s, v1.4s // z[-i-2, -i-1, +i, i+1].re
uzp2 v6.4s, v0.4s, v1.4s // z[-i-2, -i-1, +i, i+1].im
uzp1 v5.4s, v2.4s, v3.4s // twidlle_exptab[-i-2, -i-1, +i, i+1].re
uzp2 v7.4s, v2.4s, v3.4s // twidlle_exptab[-i-2, -i-1, +i, i+1].im
fmul v1.4s, v6.4s, v5.4s
fmul v0.4s, v6.4s, v7.4s
2:
subs w5, w5, #4
ld1 {v20.4s}, [x0], x8
fmla v1.4s, v4.4s, v7.4s
fmls v0.4s, v4.4s, v5.4s
ld1 {v21.4s}, [x1], x7
ext v1.16b, v1.16b, v1.16b, #8
fmul v0.4s, v0.4s, v31.s[0]
ld1 {v2.4s}, [x2], x8
rev64 v1.4s, v1.4s
fmul v1.4s, v1.4s, v31.s[0]
ld1 {v3.4s}, [x3], x7
zip1 v5.4s, v0.4s, v1.4s
zip2 v7.4s, v0.4s, v1.4s
uzp1 v4.4s, v20.4s, v21.4s // z[-i-2, -i-1, +i, i+1].re
uzp2 v6.4s, v20.4s, v21.4s // z[-i-2, -i-1, +i, i+1].im
st1 {v5.4s}, [x10], x8
st1 {v7.4s}, [x11], x7
uzp1 v5.4s, v2.4s, v3.4s // twidlle_exptab[-i-2, -i-1, +i, i+1].re
uzp2 v7.4s, v2.4s, v3.4s // twidlle_exptab[-i-2, -i-1, +i, i+1].im
fmul v1.4s, v6.4s, v5.4s
fmul v0.4s, v6.4s, v7.4s
b.gt 2b
fmla v1.4s, v4.4s, v7.4s
fmls v0.4s, v4.4s, v5.4s
ext v1.16b, v1.16b, v1.16b, #8
fmul v0.4s, v0.4s, v31.s[0]
rev64 v1.4s, v1.4s
fmul v1.4s, v1.4s, v31.s[0]
zip1 v5.4s, v0.4s, v1.4s
zip2 v7.4s, v0.4s, v1.4s
st1 {v5.4s}, [x10], x8
st1 {v7.4s}, [x11], x7
ldp x21, x30, [sp]
add sp, sp, #0x20
ret
endfunc
// [0] = exp(2 * i * pi / 5), [1] = exp(2 * i * pi * 2 / 5)
const fact5, align=4
.float 0.30901699437494745, 0.95105651629515353
.float -0.80901699437494734, 0.58778525229247325
endconst

View File

@@ -0,0 +1,323 @@
/*
* AArch64 NEON optimised MDCT
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
function ff_imdct_half_neon, export=1
sub sp, sp, #32
stp x19, x20, [sp]
str x30, [sp, #16]
mov x12, #1
ldr w14, [x0, #28] // mdct_bits
ldr x4, [x0, #32] // tcos
ldr x3, [x0, #8] // revtab
lsl x12, x12, x14 // n = 1 << nbits
lsr x14, x12, #2 // n4 = n >> 2
add x7, x2, x12, lsl #1
mov x12, #-16
sub x7, x7, #16
ld2 {v16.2s,v17.2s}, [x7], x12 // d16=x,n1 d17=x,n0
ld2 {v0.2s,v1.2s}, [x2], #16 // d0 =m0,x d1 =m1,x
rev64 v17.2s, v17.2s
ld2 {v2.2s,v3.2s}, [x4], #16 // d2=c0,c1 d3=s0,s2
fmul v6.2s, v17.2s, v2.2s
fmul v7.2s, v0.2s, v2.2s
1:
subs x14, x14, #2
ldr w6, [x3], #4
fmul v4.2s, v0.2s, v3.2s
fmul v5.2s, v17.2s, v3.2s
fsub v4.2s, v6.2s, v4.2s
fadd v5.2s, v5.2s, v7.2s
ubfm x8, x6, #16, #31
ubfm x6, x6, #0, #15
add x8, x1, x8, lsl #3
add x6, x1, x6, lsl #3
b.eq 2f
ld2 {v16.2s,v17.2s}, [x7], x12
ld2 {v0.2s,v1.2s}, [x2], #16
rev64 v17.2s, v17.2s
ld2 {v2.2s,v3.2s}, [x4], #16 // d2=c0,c1 d3=s0,s2
fmul v6.2s, v17.2s, v2.2s
fmul v7.2s, v0.2s, v2.2s
st2 {v4.s,v5.s}[0], [x6]
st2 {v4.s,v5.s}[1], [x8]
b 1b
2:
st2 {v4.s,v5.s}[0], [x6]
st2 {v4.s,v5.s}[1], [x8]
mov x19, x0
mov x20, x1
bl X(ff_fft_calc_neon)
mov x12, #1
ldr w14, [x19, #28] // mdct_bits
ldr x4, [x19, #32] // tcos
lsl x12, x12, x14 // n = 1 << nbits
lsr x14, x12, #3 // n8 = n >> 3
add x4, x4, x14, lsl #3
add x6, x20, x14, lsl #3
sub x1, x4, #16
sub x3, x6, #16
mov x7, #-16
mov x8, x6
mov x0, x3
ld2 {v0.2s,v1.2s}, [x3], x7 // d0 =i1,r1 d1 =i0,r0
ld2 {v20.2s,v21.2s},[x6], #16 // d20=i2,r2 d21=i3,r3
ld2 {v16.2s,v17.2s},[x1], x7 // d16=c1,c0 d18=s1,s0
3:
subs x14, x14, #2
fmul v7.2s, v0.2s, v17.2s
ld2 {v18.2s,v19.2s},[x4], #16 // d17=c2,c3 d19=s2,s3
fmul v4.2s, v1.2s, v17.2s
fmul v6.2s, v21.2s, v19.2s
fmul v5.2s, v20.2s, v19.2s
fmul v22.2s, v1.2s, v16.2s
fmul v23.2s, v21.2s, v18.2s
fmul v24.2s, v0.2s, v16.2s
fmul v25.2s, v20.2s, v18.2s
fadd v7.2s, v7.2s, v22.2s
fadd v5.2s, v5.2s, v23.2s
fsub v4.2s, v4.2s, v24.2s
fsub v6.2s, v6.2s, v25.2s
b.eq 4f
ld2 {v0.2s,v1.2s}, [x3], x7
ld2 {v20.2s,v21.2s},[x6], #16
ld2 {v16.2s,v17.2s},[x1], x7 // d16=c1,c0 d18=s1,s0
rev64 v5.2s, v5.2s
rev64 v7.2s, v7.2s
st2 {v4.2s,v5.2s}, [x0], x7
st2 {v6.2s,v7.2s}, [x8], #16
b 3b
4:
rev64 v5.2s, v5.2s
rev64 v7.2s, v7.2s
st2 {v4.2s,v5.2s}, [x0]
st2 {v6.2s,v7.2s}, [x8]
ldp x19, x20, [sp]
ldr x30, [sp, #16]
add sp, sp, #32
ret
endfunc
function ff_imdct_calc_neon, export=1
sub sp, sp, #32
stp x19, x20, [sp]
str x30, [sp, #16]
ldr w3, [x0, #28] // mdct_bits
mov x19, #1
mov x20, x1
lsl x19, x19, x3
add x1, x1, x19
bl X(ff_imdct_half_neon)
add x0, x20, x19, lsl #2
add x1, x20, x19, lsl #1
sub x0, x0, #8
sub x2, x1, #16
mov x3, #-16
mov x6, #-8
1:
ld1 {v0.4s}, [x2], x3
prfum pldl1keep, [x0, #-16]
rev64 v0.4s, v0.4s
ld1 {v2.2s,v3.2s}, [x1], #16
fneg v4.4s, v0.4s
prfum pldl1keep, [x2, #-16]
rev64 v2.2s, v2.2s
rev64 v3.2s, v3.2s
ext v4.16b, v4.16b, v4.16b, #8
st1 {v2.2s}, [x0], x6
st1 {v3.2s}, [x0], x6
st1 {v4.4s}, [x20], #16
subs x19, x19, #16
b.gt 1b
ldp x19, x20, [sp], #16
ldr x30, [sp], #16
ret
endfunc
function ff_mdct_calc_neon, export=1
sub sp, sp, #32
stp x19, x20, [sp]
str x30, [sp, #16]
mov x12, #1
ldr w14, [x0, #28] // mdct_bits
ldr x4, [x0, #32] // tcos
ldr x3, [x0, #8] // revtab
lsl x14, x12, x14 // n = 1 << nbits
add x7, x2, x14 // in4u
sub x9, x7, #16 // in4d
add x2, x7, x14, lsl #1 // in3u
add x8, x9, x14, lsl #1 // in3d
add x5, x4, x14, lsl #1
sub x5, x5, #16
sub x3, x3, #4
mov x12, #-16
lsr x13, x14, #1
ld2 {v16.2s,v17.2s}, [x9], x12 // in0u0,in0u1 in4d1,in4d0
ld2 {v18.2s,v19.2s}, [x8], x12 // in2u0,in2u1 in3d1,in3d0
ld2 {v0.2s, v1.2s}, [x7], #16 // in4u0,in4u1 in2d1,in2d0
rev64 v17.2s, v17.2s // in4d0,in4d1 in3d0,in3d1
rev64 v19.2s, v19.2s // in4d0,in4d1 in3d0,in3d1
ld2 {v2.2s, v3.2s}, [x2], #16 // in3u0,in3u1 in1d1,in1d0
fsub v0.2s, v17.2s, v0.2s // in4d-in4u I
ld2 {v20.2s,v21.2s}, [x4], #16 // c0,c1 s0,s1
rev64 v1.2s, v1.2s // in2d0,in2d1 in1d0,in1d1
rev64 v3.2s, v3.2s // in2d0,in2d1 in1d0,in1d1
ld2 {v30.2s,v31.2s}, [x5], x12 // c2,c3 s2,s3
fadd v2.2s, v2.2s, v19.2s // in3u+in3d -R
fsub v16.2s, v16.2s, v1.2s // in0u-in2d R
fadd v18.2s, v18.2s, v3.2s // in2u+in1d -I
1:
fmul v7.2s, v0.2s, v21.2s // I*s
ldr w10, [x3, x13]
fmul v6.2s, v2.2s, v20.2s // -R*c
ldr w6, [x3, #4]!
fmul v4.2s, v2.2s, v21.2s // -R*s
fmul v5.2s, v0.2s, v20.2s // I*c
fmul v24.2s, v16.2s, v30.2s // R*c
fmul v25.2s, v18.2s, v31.2s // -I*s
fmul v22.2s, v16.2s, v31.2s // R*s
fmul v23.2s, v18.2s, v30.2s // I*c
subs x14, x14, #16
subs x13, x13, #8
fsub v6.2s, v6.2s, v7.2s // -R*c-I*s
fadd v7.2s, v4.2s, v5.2s // -R*s+I*c
fsub v24.2s, v25.2s, v24.2s // I*s-R*c
fadd v25.2s, v22.2s, v23.2s // R*s-I*c
b.eq 1f
mov x12, #-16
ld2 {v16.2s,v17.2s}, [x9], x12 // in0u0,in0u1 in4d1,in4d0
ld2 {v18.2s,v19.2s}, [x8], x12 // in2u0,in2u1 in3d1,in3d0
fneg v7.2s, v7.2s // R*s-I*c
ld2 {v0.2s, v1.2s}, [x7], #16 // in4u0,in4u1 in2d1,in2d0
rev64 v17.2s, v17.2s // in4d0,in4d1 in3d0,in3d1
rev64 v19.2s, v19.2s // in4d0,in4d1 in3d0,in3d1
ld2 {v2.2s, v3.2s}, [x2], #16 // in3u0,in3u1 in1d1,in1d0
fsub v0.2s, v17.2s, v0.2s // in4d-in4u I
ld2 {v20.2s,v21.2s}, [x4], #16 // c0,c1 s0,s1
rev64 v1.2s, v1.2s // in2d0,in2d1 in1d0,in1d1
rev64 v3.2s, v3.2s // in2d0,in2d1 in1d0,in1d1
ld2 {v30.2s,v31.2s}, [x5], x12 // c2,c3 s2,s3
fadd v2.2s, v2.2s, v19.2s // in3u+in3d -R
fsub v16.2s, v16.2s, v1.2s // in0u-in2d R
fadd v18.2s, v18.2s, v3.2s // in2u+in1d -I
ubfm x12, x6, #16, #31
ubfm x6, x6, #0, #15
add x12, x1, x12, lsl #3
add x6, x1, x6, lsl #3
st2 {v6.s,v7.s}[0], [x6]
st2 {v6.s,v7.s}[1], [x12]
ubfm x6, x10, #16, #31
ubfm x10, x10, #0, #15
add x6 , x1, x6, lsl #3
add x10, x1, x10, lsl #3
st2 {v24.s,v25.s}[0], [x10]
st2 {v24.s,v25.s}[1], [x6]
b 1b
1:
fneg v7.2s, v7.2s // R*s-I*c
ubfm x12, x6, #16, #31
ubfm x6, x6, #0, #15
add x12, x1, x12, lsl #3
add x6, x1, x6, lsl #3
st2 {v6.s,v7.s}[0], [x6]
st2 {v6.s,v7.s}[1], [x12]
ubfm x6, x10, #16, #31
ubfm x10, x10, #0, #15
add x6 , x1, x6, lsl #3
add x10, x1, x10, lsl #3
st2 {v24.s,v25.s}[0], [x10]
st2 {v24.s,v25.s}[1], [x6]
mov x19, x0
mov x20, x1
bl X(ff_fft_calc_neon)
mov x12, #1
ldr w14, [x19, #28] // mdct_bits
ldr x4, [x19, #32] // tcos
lsl x12, x12, x14 // n = 1 << nbits
lsr x14, x12, #3 // n8 = n >> 3
add x4, x4, x14, lsl #3
add x6, x20, x14, lsl #3
sub x1, x4, #16
sub x3, x6, #16
mov x7, #-16
mov x8, x6
mov x0, x3
ld2 {v0.2s,v1.2s}, [x3], x7 // d0 =r1,i1 d1 =r0,i0
ld2 {v20.2s,v21.2s}, [x6], #16 // d20=r2,i2 d21=r3,i3
ld2 {v16.2s,v17.2s}, [x1], x7 // c1,c0 s1,s0
1:
subs x14, x14, #2
fmul v7.2s, v0.2s, v17.2s // r1*s1,r0*s0
ld2 {v18.2s,v19.2s}, [x4], #16 // c2,c3 s2,s3
fmul v4.2s, v1.2s, v17.2s // i1*s1,i0*s0
fmul v6.2s, v21.2s, v19.2s // i2*s2,i3*s3
fmul v5.2s, v20.2s, v19.2s // r2*s2,r3*s3
fmul v24.2s, v0.2s, v16.2s // r1*c1,r0*c0
fmul v25.2s, v20.2s, v18.2s // r2*c2,r3*c3
fmul v22.2s, v21.2s, v18.2s // i2*c2,i3*c3
fmul v23.2s, v1.2s, v16.2s // i1*c1,i0*c0
fadd v4.2s, v4.2s, v24.2s // i1*s1+r1*c1,i0*s0+r0*c0
fadd v6.2s, v6.2s, v25.2s // i2*s2+r2*c2,i3*s3+r3*c3
fsub v5.2s, v22.2s, v5.2s // i2*c2-r2*s2,i3*c3-r3*s3
fsub v7.2s, v23.2s, v7.2s // i1*c1-r1*s1,i0*c0-r0*s0
fneg v4.2s, v4.2s
fneg v6.2s, v6.2s
b.eq 1f
ld2 {v0.2s, v1.2s}, [x3], x7
ld2 {v20.2s,v21.2s}, [x6], #16
ld2 {v16.2s,v17.2s}, [x1], x7 // c1,c0 s1,s0
rev64 v5.2s, v5.2s
rev64 v7.2s, v7.2s
st2 {v4.2s,v5.2s}, [x0], x7
st2 {v6.2s,v7.2s}, [x8], #16
b 1b
1:
rev64 v5.2s, v5.2s
rev64 v7.2s, v7.2s
st2 {v4.2s,v5.2s}, [x0]
st2 {v6.2s,v7.2s}, [x8]
ldp x19, x20, [sp], #16
ldr x30, [sp], #16
ret
endfunc

View File

@@ -0,0 +1,39 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/mpegaudiodsp.h"
#include "config.h"
void ff_mpadsp_apply_window_fixed_neon(int32_t *synth_buf, int32_t *window,
int *dither, int16_t *samples, int incr);
void ff_mpadsp_apply_window_float_neon(float *synth_buf, float *window,
int *dither, float *samples, int incr);
av_cold void ff_mpadsp_init_aarch64(MPADSPContext *s)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
s->apply_window_fixed = ff_mpadsp_apply_window_fixed_neon;
s->apply_window_float = ff_mpadsp_apply_window_float_neon;
}
}

View File

@@ -0,0 +1,226 @@
/*
* Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
#define FRAC_BITS 23 // fractional bits for sb_samples and dct
#define WFRAC_BITS 16 // fractional bits for window
#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
const tbl_rev128.s, align=4
.byte 12, 13, 14, 15
.byte 8, 9, 10, 11
.byte 4, 5, 6, 7
.byte 0, 1, 2, 3
endconst
.macro apply_window type, st
function ff_mpadsp_apply_window_\type\()_neon, export=1
mov x7, x0
sxtw x4, w4 // incr
add x8, x0, #512<<2
ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x7], #64
ld1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x7], #64
st1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x8], #64
st1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x8], #64
movrel x15, tbl_rev128.s
ld1 {v27.4s}, [x15]
.ifc \type, fixed
lsl x4, x4, #1
.else
lsl x4, x4, #2
.endif
add x10, x0, #45<<2
add x0, x0, #16<<2
add x1, x1, #16<<2
add x5, x3, x4, lsl #5
sub x5, x5, x4 // samples2
neg x13, x4 // -incr
mov x9, #64<<2
.ifc \type, fixed
ld1r {v16.2s}, [x2] // dither_state
sxtl v16.2d, v16.2s
movi v29.2d, #0
movi v30.2d, #(1<<OUT_SHIFT)-1
trn1 v31.2d, v29.2d, v30.2d
trn2 v30.2d, v30.2d, v29.2d
trn1 v16.2d, v16.2d, v29.2d
.else
movi v16.4s, #0
movi v28.4s, #0
.endif
mov x14, #4
1:
mov x8, x0
sub x7, x1, #3<<2
sub x6, x1, x14, lsl #4
add x7, x7, x14, lsl #4
add x11, x6, #(32)<<2 // w + 32
add x12, x7, #(32)<<2 // w2 + 32
mov x15, #8
movi v17.2d, #0
movi v18.2d, #0
movi v19.2d, #0
2:
subs x15, x15, #1
ld1 {v0.4s}, [x8], x9
ld1 {v1.4s}, [x10], x9
ld1 {v2.4s}, [x6], x9
ld1 {v3.4s}, [x7], x9
tbl v6.16b, {v0.16b}, v27.16b
tbl v7.16b, {v1.16b}, v27.16b
ld1 {v4.4s}, [x11], x9
ld1 {v5.4s}, [x12], x9
MLA v16, v2, v0
MLA2 v17, v2, v0
MLS v18, v3, v6
MLS2 v19, v3, v6
MLS v16, v4, v7
MLS2 v17, v4, v7
MLS v18, v5, v1
MLS2 v19, v5, v1
b.gt 2b
cmp x14, #4
sub x10, x10, #64<<5 // 64 * 8 * sizeof(int32_t)
.ifc \type, fixed
and v28.16b, v16.16b, v30.16b
ext v28.16b, v29.16b, v28.16b, #8
b.eq 4f
round_sample v19, 1, 1
4:
round_sample v16, 1, 0
shrn v16.2s, v16.2d, #OUT_SHIFT
round_sample v19, 0, 0
shrn v19.2s, v19.2d, #OUT_SHIFT
round_sample v17, 0, 1
round_sample v18, 1, 1
round_sample v17, 1, 0
shrn2 v16.4s, v17.2d, #OUT_SHIFT
round_sample v18, 0, 0
shrn2 v19.4s, v18.2d, #OUT_SHIFT
sqxtn v16.4h, v16.4s
sqxtn v18.4h, v19.4s
.else
ext v18.16b, v18.16b, v18.16b, #8
.endif
st1 {v16.\st\()}[0], [x3], x4
b.eq 4f
st1 {v18.\st\()}[1], [x5], x13
4:
st1 {v16.\st\()}[1], [x3], x4
st1 {v18.\st\()}[0], [x5], x13
st1 {v16.\st\()}[2], [x3], x4
st1 {v18.\st\()}[3], [x5], x13
st1 {v16.\st\()}[3], [x3], x4
st1 {v18.\st\()}[2], [x5], x13
mov v16.16b, v28.16b
subs x14, x14, #1
add x0, x0, #4<<2
sub x10, x10, #4<<2
b.gt 1b
// comuting samples[16]
add x6, x1, #32<<2
ld1 {v0.2s}, [x6], x9
ld1 {v1.2s}, [x0], x9
.rept 3
ld1 {v2.2s}, [x6], x9
ld1 {v3.2s}, [x0], x9
MLS v16, v0, v1
ld1 {v0.2s}, [x6], x9
ld1 {v1.2s}, [x0], x9
MLS v16, v2, v3
.endr
ld1 {v2.2s}, [x6], x9
ld1 {v3.2s}, [x0], x9
MLS v16, v0, v1
MLS v16, v2, v3
.ifc \type, fixed
and v28.16b, v16.16b, v30.16b
shrn v20.2s, v16.2d, #OUT_SHIFT
xtn v28.2s, v28.2d
sqxtn v20.4h, v20.4s
st1 {v28.s}[0], [x2] // save dither_state
st1 {v20.h}[0], [x3]
.else
st1 {v16.s}[0], [x3]
.endif
ret
endfunc
.purgem round_sample
.purgem MLA
.purgem MLA2
.purgem MLS
.purgem MLS2
.endm
.macro round_sample r, idx, next
add \r\().2d, \r\().2d, v28.2d
.if \idx == 0
and v28.16b, \r\().16b, v30.16b
.else // \idx == 1
and v28.16b, \r\().16b, v31.16b
.endif
.if \idx != \next
.if \next == 0
ext v28.16b, v28.16b, v29.16b, #8
.else
ext v28.16b, v29.16b, v28.16b, #8
.endif
.endif
.endm
.macro MLA d, s1, s2
smlal \d\().2d, \s1\().2s, \s2\().2s
.endm
.macro MLA2 d, s1, s2
smlal2 \d\().2d, \s1\().4s, \s2\().4s
.endm
.macro MLS d, s1, s2
smlsl \d\().2d, \s1\().2s, \s2\().2s
.endm
.macro MLS2 d, s1, s2
smlsl2 \d\().2d, \s1\().4s, \s2\().4s
.endm
apply_window fixed, h
// nothing to do for round_sample and ML{A,S}2
.macro round_sample r, idx, next
.endm
.macro MLA2 d, s1, s2
.endm
.macro MLS2 d, s1, s2
.endm
.macro MLA d, s1, s2
fmla \d\().4s, \s1\().4s, \s2\().4s
.endm
.macro MLS d, s1, s2
fmls \d\().4s, \s1\().4s, \s2\().4s
.endm
apply_window float, s

View File

@@ -0,0 +1,149 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
.macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
trn1 \r8\().8B, \r0\().8B, \r1\().8B
trn2 \r9\().8B, \r0\().8B, \r1\().8B
trn1 \r1\().8B, \r2\().8B, \r3\().8B
trn2 \r3\().8B, \r2\().8B, \r3\().8B
trn1 \r0\().8B, \r4\().8B, \r5\().8B
trn2 \r5\().8B, \r4\().8B, \r5\().8B
trn1 \r2\().8B, \r6\().8B, \r7\().8B
trn2 \r7\().8B, \r6\().8B, \r7\().8B
trn1 \r4\().4H, \r0\().4H, \r2\().4H
trn2 \r2\().4H, \r0\().4H, \r2\().4H
trn1 \r6\().4H, \r5\().4H, \r7\().4H
trn2 \r7\().4H, \r5\().4H, \r7\().4H
trn1 \r5\().4H, \r9\().4H, \r3\().4H
trn2 \r9\().4H, \r9\().4H, \r3\().4H
trn1 \r3\().4H, \r8\().4H, \r1\().4H
trn2 \r8\().4H, \r8\().4H, \r1\().4H
trn1 \r0\().2S, \r3\().2S, \r4\().2S
trn2 \r4\().2S, \r3\().2S, \r4\().2S
trn1 \r1\().2S, \r5\().2S, \r6\().2S
trn2 \r5\().2S, \r5\().2S, \r6\().2S
trn2 \r6\().2S, \r8\().2S, \r2\().2S
trn1 \r2\().2S, \r8\().2S, \r2\().2S
trn1 \r3\().2S, \r9\().2S, \r7\().2S
trn2 \r7\().2S, \r9\().2S, \r7\().2S
.endm
.macro transpose_8x16B r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
trn1 \t0\().16B, \r0\().16B, \r1\().16B
trn2 \t1\().16B, \r0\().16B, \r1\().16B
trn1 \r1\().16B, \r2\().16B, \r3\().16B
trn2 \r3\().16B, \r2\().16B, \r3\().16B
trn1 \r0\().16B, \r4\().16B, \r5\().16B
trn2 \r5\().16B, \r4\().16B, \r5\().16B
trn1 \r2\().16B, \r6\().16B, \r7\().16B
trn2 \r7\().16B, \r6\().16B, \r7\().16B
trn1 \r4\().8H, \r0\().8H, \r2\().8H
trn2 \r2\().8H, \r0\().8H, \r2\().8H
trn1 \r6\().8H, \r5\().8H, \r7\().8H
trn2 \r7\().8H, \r5\().8H, \r7\().8H
trn1 \r5\().8H, \t1\().8H, \r3\().8H
trn2 \t1\().8H, \t1\().8H, \r3\().8H
trn1 \r3\().8H, \t0\().8H, \r1\().8H
trn2 \t0\().8H, \t0\().8H, \r1\().8H
trn1 \r0\().4S, \r3\().4S, \r4\().4S
trn2 \r4\().4S, \r3\().4S, \r4\().4S
trn1 \r1\().4S, \r5\().4S, \r6\().4S
trn2 \r5\().4S, \r5\().4S, \r6\().4S
trn2 \r6\().4S, \t0\().4S, \r2\().4S
trn1 \r2\().4S, \t0\().4S, \r2\().4S
trn1 \r3\().4S, \t1\().4S, \r7\().4S
trn2 \r7\().4S, \t1\().4S, \r7\().4S
.endm
.macro transpose_4x16B r0, r1, r2, r3, t4, t5, t6, t7
trn1 \t4\().16B, \r0\().16B, \r1\().16B
trn2 \t5\().16B, \r0\().16B, \r1\().16B
trn1 \t6\().16B, \r2\().16B, \r3\().16B
trn2 \t7\().16B, \r2\().16B, \r3\().16B
trn1 \r0\().8H, \t4\().8H, \t6\().8H
trn2 \r2\().8H, \t4\().8H, \t6\().8H
trn1 \r1\().8H, \t5\().8H, \t7\().8H
trn2 \r3\().8H, \t5\().8H, \t7\().8H
.endm
.macro transpose_4x8B r0, r1, r2, r3, t4, t5, t6, t7
trn1 \t4\().8B, \r0\().8B, \r1\().8B
trn2 \t5\().8B, \r0\().8B, \r1\().8B
trn1 \t6\().8B, \r2\().8B, \r3\().8B
trn2 \t7\().8B, \r2\().8B, \r3\().8B
trn1 \r0\().4H, \t4\().4H, \t6\().4H
trn2 \r2\().4H, \t4\().4H, \t6\().4H
trn1 \r1\().4H, \t5\().4H, \t7\().4H
trn2 \r3\().4H, \t5\().4H, \t7\().4H
.endm
.macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7
trn1 \r4\().4H, \r0\().4H, \r1\().4H
trn2 \r5\().4H, \r0\().4H, \r1\().4H
trn1 \r7\().4H, \r3\().4H, \r2\().4H
trn2 \r6\().4H, \r3\().4H, \r2\().4H
trn1 \r0\().2S, \r4\().2S, \r7\().2S
trn2 \r3\().2S, \r4\().2S, \r7\().2S
trn1 \r1\().2S, \r5\().2S, \r6\().2S
trn2 \r2\().2S, \r5\().2S, \r6\().2S
.endm
.macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
trn1 \r8\().8H, \r0\().8H, \r1\().8H
trn2 \r9\().8H, \r0\().8H, \r1\().8H
trn1 \r1\().8H, \r2\().8H, \r3\().8H
trn2 \r3\().8H, \r2\().8H, \r3\().8H
trn1 \r0\().8H, \r4\().8H, \r5\().8H
trn2 \r5\().8H, \r4\().8H, \r5\().8H
trn1 \r2\().8H, \r6\().8H, \r7\().8H
trn2 \r7\().8H, \r6\().8H, \r7\().8H
trn1 \r4\().4S, \r0\().4S, \r2\().4S
trn2 \r2\().4S, \r0\().4S, \r2\().4S
trn1 \r6\().4S, \r5\().4S, \r7\().4S
trn2 \r7\().4S, \r5\().4S, \r7\().4S
trn1 \r5\().4S, \r9\().4S, \r3\().4S
trn2 \r9\().4S, \r9\().4S, \r3\().4S
trn1 \r3\().4S, \r8\().4S, \r1\().4S
trn2 \r8\().4S, \r8\().4S, \r1\().4S
trn1 \r0\().2D, \r3\().2D, \r4\().2D
trn2 \r4\().2D, \r3\().2D, \r4\().2D
trn1 \r1\().2D, \r5\().2D, \r6\().2D
trn2 \r5\().2D, \r5\().2D, \r6\().2D
trn2 \r6\().2D, \r8\().2D, \r2\().2D
trn1 \r2\().2D, \r8\().2D, \r2\().2D
trn1 \r3\().2D, \r9\().2D, \r7\().2D
trn2 \r7\().2D, \r9\().2D, \r7\().2D
.endm

View File

@@ -0,0 +1,79 @@
/*
* check NEON registers for clobbers
* Copyright (c) 2013 Martin Storsjo
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavcodec/avcodec.h"
#include "libavutil/aarch64/neontest.h"
wrap(avcodec_open2(AVCodecContext *avctx,
AVCodec *codec,
AVDictionary **options))
{
testneonclobbers(avcodec_open2, avctx, codec, options);
}
wrap(avcodec_decode_audio4(AVCodecContext *avctx,
AVFrame *frame,
int *got_frame_ptr,
AVPacket *avpkt))
{
testneonclobbers(avcodec_decode_audio4, avctx, frame,
got_frame_ptr, avpkt);
}
wrap(avcodec_decode_video2(AVCodecContext *avctx,
AVFrame *picture,
int *got_picture_ptr,
AVPacket *avpkt))
{
testneonclobbers(avcodec_decode_video2, avctx, picture,
got_picture_ptr, avpkt);
}
wrap(avcodec_decode_subtitle2(AVCodecContext *avctx,
AVSubtitle *sub,
int *got_sub_ptr,
AVPacket *avpkt))
{
testneonclobbers(avcodec_decode_subtitle2, avctx, sub,
got_sub_ptr, avpkt);
}
wrap(avcodec_encode_audio2(AVCodecContext *avctx,
AVPacket *avpkt,
const AVFrame *frame,
int *got_packet_ptr))
{
testneonclobbers(avcodec_encode_audio2, avctx, avpkt, frame,
got_packet_ptr);
}
wrap(avcodec_encode_subtitle(AVCodecContext *avctx,
uint8_t *buf, int buf_size,
const AVSubtitle *sub))
{
testneonclobbers(avcodec_encode_subtitle, avctx, buf, buf_size, sub);
}
wrap(avcodec_encode_video2(AVCodecContext *avctx, AVPacket *avpkt,
const AVFrame *frame, int *got_packet_ptr))
{
testneonclobbers(avcodec_encode_video2, avctx, avpkt, frame, got_packet_ptr);
}

View File

@@ -0,0 +1,48 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/rv34dsp.h"
#include "config.h"
void ff_put_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_put_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_avg_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_avg_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
av_cold void ff_rv40dsp_init_aarch64(RV34DSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_neon;
c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_neon;
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_neon;
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_neon;
}
}

View File

@@ -0,0 +1,47 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/vc1dsp.h"
#include "config.h"
void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_neon;
dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = ff_put_vc1_chroma_mc4_neon;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = ff_avg_vc1_chroma_mc4_neon;
}
}

View File

@@ -0,0 +1,28 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
function ff_prefetch_aarch64, export=1
subs w2, w2, #2
prfm pldl1strm, [x0]
prfm pldl1strm, [x0, x1]
add x0, x0, x1, lsl #1
b.gt X(ff_prefetch_aarch64)
ret
endfunc

View File

@@ -0,0 +1,32 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/videodsp.h"
void ff_prefetch_aarch64(uint8_t *mem, ptrdiff_t stride, int h);
av_cold void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc)
{
int cpu_flags = av_get_cpu_flags();
if (have_armv8(cpu_flags))
ctx->prefetch = ff_prefetch_aarch64;
}

View File

@@ -0,0 +1,34 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/vorbisdsp.h"
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang,
intptr_t blocksize);
av_cold void ff_vorbisdsp_init_aarch64(VorbisDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
}
}

View File

@@ -0,0 +1,82 @@
/*
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/aarch64/asm.S"
function ff_vorbis_inverse_coupling_neon, export=1
movi v20.4s, #1<<7, lsl #24
subs x2, x2, #4
mov x3, x0
mov x4, x1
b.eq 3f
ld1 {v7.4s}, [x1], #16
ld1 {v6.4s}, [x0], #16
cmle v4.4s, v7.4s, #0
and v5.16b, v6.16b, v20.16b
eor v7.16b, v7.16b, v5.16b
and v2.16b, v7.16b, v4.16b
bic v3.16b, v7.16b, v4.16b
fadd v7.4s, v6.4s, v2.4s
fsub v6.4s, v6.4s, v3.4s
1: ld1 {v1.4s}, [x1], #16
ld1 {v0.4s}, [x0], #16
cmle v4.4s, v1.4s, #0
and v5.16b, v0.16b, v20.16b
eor v1.16b, v1.16b, v5.16b
st1 {v7.4s}, [x3], #16
st1 {v6.4s}, [x4], #16
and v2.16b, v1.16b, v4.16b
bic v3.16b, v1.16b, v4.16b
fadd v1.4s, v0.4s, v2.4s
fsub v0.4s, v0.4s, v3.4s
subs x2, x2, #8
b.le 2f
ld1 {v7.4s}, [x1], #16
ld1 {v6.4s}, [x0], #16
cmle v4.4s, v7.4s, #0
and v5.16b, v6.16b, v20.16b
eor v7.16b, v7.16b, v5.16b
st1 {v1.4s}, [x3], #16
st1 {v0.4s}, [x4], #16
and v2.16b, v7.16b, v4.16b
bic v3.16b, v7.16b, v4.16b
fadd v7.4s, v6.4s, v2.4s
fsub v6.4s, v6.4s, v3.4s
b 1b
2: st1 {v1.4s}, [x3], #16
st1 {v0.4s}, [x4], #16
b.lt ret
3: ld1 {v1.4s}, [x1]
ld1 {v0.4s}, [x0]
cmle v4.4s, v1.4s, #0
and v5.16b, v0.16b, v20.16b
eor v1.16b, v1.16b, v5.16b
and v2.16b, v1.16b, v4.16b
bic v3.16b, v1.16b, v4.16b
fadd v1.4s, v0.4s, v2.4s
fsub v0.4s, v0.4s, v3.4s
st1 {v1.4s}, [x0], #16
st1 {v0.4s}, [x1], #16
ret:
ret
endfunc

View File

@@ -0,0 +1,162 @@
/*
* Autodesk RLE Decoder
* Copyright (c) 2005 The FFmpeg Project
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Autodesk RLE Video Decoder by Konstantin Shishkov
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "avcodec.h"
#include "internal.h"
#include "msrledec.h"
typedef struct AascContext {
AVCodecContext *avctx;
GetByteContext gb;
AVFrame *frame;
uint32_t palette[AVPALETTE_COUNT];
int palette_size;
} AascContext;
static av_cold int aasc_decode_init(AVCodecContext *avctx)
{
AascContext *s = avctx->priv_data;
uint8_t *ptr;
int i;
s->avctx = avctx;
switch (avctx->bits_per_coded_sample) {
case 8:
avctx->pix_fmt = AV_PIX_FMT_PAL8;
ptr = avctx->extradata;
s->palette_size = FFMIN(avctx->extradata_size, AVPALETTE_SIZE);
for (i = 0; i < s->palette_size / 4; i++) {
s->palette[i] = 0xFFU << 24 | AV_RL32(ptr);
ptr += 4;
}
break;
case 16:
avctx->pix_fmt = AV_PIX_FMT_RGB555LE;
break;
case 24:
avctx->pix_fmt = AV_PIX_FMT_BGR24;
break;
default:
av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", avctx->bits_per_coded_sample);
return -1;
}
s->frame = av_frame_alloc();
if (!s->frame)
return AVERROR(ENOMEM);
return 0;
}
static int aasc_decode_frame(AVCodecContext *avctx,
void *data, int *got_frame,
AVPacket *avpkt)
{
const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
AascContext *s = avctx->priv_data;
int compr, i, stride, psize, ret;
if (buf_size < 4) {
av_log(avctx, AV_LOG_ERROR, "frame too short\n");
return AVERROR_INVALIDDATA;
}
if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
return ret;
compr = AV_RL32(buf);
buf += 4;
buf_size -= 4;
psize = avctx->bits_per_coded_sample / 8;
switch (avctx->codec_tag) {
case MKTAG('A', 'A', 'S', '4'):
bytestream2_init(&s->gb, buf - 4, buf_size + 4);
ff_msrle_decode(avctx, (AVPicture*)s->frame, 8, &s->gb);
break;
case MKTAG('A', 'A', 'S', 'C'):
switch (compr) {
case 0:
stride = (avctx->width * psize + psize) & ~psize;
if (buf_size < stride * avctx->height)
return AVERROR_INVALIDDATA;
for (i = avctx->height - 1; i >= 0; i--) {
memcpy(s->frame->data[0] + i * s->frame->linesize[0], buf, avctx->width * psize);
buf += stride;
buf_size -= stride;
}
break;
case 1:
bytestream2_init(&s->gb, buf, buf_size);
ff_msrle_decode(avctx, (AVPicture*)s->frame, 8, &s->gb);
break;
default:
av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
return AVERROR_INVALIDDATA;
}
break;
default:
av_log(avctx, AV_LOG_ERROR, "Unknown FourCC: %X\n", avctx->codec_tag);
return -1;
}
if (avctx->pix_fmt == AV_PIX_FMT_PAL8)
memcpy(s->frame->data[1], s->palette, s->palette_size);
*got_frame = 1;
if ((ret = av_frame_ref(data, s->frame)) < 0)
return ret;
/* report that the buffer was completely consumed */
return avpkt->size;
}
static av_cold int aasc_decode_end(AVCodecContext *avctx)
{
AascContext *s = avctx->priv_data;
av_frame_free(&s->frame);
return 0;
}
AVCodec ff_aasc_decoder = {
.name = "aasc",
.long_name = NULL_IF_CONFIG_SMALL("Autodesk RLE"),
.type = AVMEDIA_TYPE_VIDEO,
.id = AV_CODEC_ID_AASC,
.priv_data_size = sizeof(AascContext),
.init = aasc_decode_init,
.close = aasc_decode_end,
.decode = aasc_decode_frame,
.capabilities = AV_CODEC_CAP_DR1,
};

View File

@@ -0,0 +1,234 @@
/*
* Common code between the AC-3 encoder and decoder
* Copyright (c) 2000 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Common code between the AC-3 encoder and decoder.
*/
#include "avcodec.h"
#include "ac3.h"
#include "get_bits.h"
/**
* Starting frequency coefficient bin for each critical band.
*/
const uint8_t ff_ac3_band_start_tab[AC3_CRITICAL_BANDS+1] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 31,
34, 37, 40, 43, 46, 49, 55, 61, 67, 73,
79, 85, 97, 109, 121, 133, 157, 181, 205, 229, 253
};
#if CONFIG_HARDCODED_TABLES
/**
* Map each frequency coefficient bin to the critical band that contains it.
*/
const uint8_t ff_ac3_bin_to_band_tab[253] = {
0,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 28, 28, 29, 29, 29, 30, 30, 30,
31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 34,
35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36,
37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38,
39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40,
41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,
49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49
};
#else /* CONFIG_HARDCODED_TABLES */
uint8_t ff_ac3_bin_to_band_tab[253];
#endif
static inline int calc_lowcomp1(int a, int b0, int b1, int c)
{
if ((b0 + 256) == b1) {
a = c;
} else if (b0 > b1) {
a = FFMAX(a - 64, 0);
}
return a;
}
static inline int calc_lowcomp(int a, int b0, int b1, int bin)
{
if (bin < 7) {
return calc_lowcomp1(a, b0, b1, 384);
} else if (bin < 20) {
return calc_lowcomp1(a, b0, b1, 320);
} else {
return FFMAX(a - 128, 0);
}
}
void ff_ac3_bit_alloc_calc_psd(int8_t *exp, int start, int end, int16_t *psd,
int16_t *band_psd)
{
int bin, band;
/* exponent mapping to PSD */
for (bin = start; bin < end; bin++) {
psd[bin]=(3072 - (exp[bin] << 7));
}
/* PSD integration */
bin = start;
band = ff_ac3_bin_to_band_tab[start];
do {
int v = psd[bin++];
int band_end = FFMIN(ff_ac3_band_start_tab[band+1], end);
for (; bin < band_end; bin++) {
int max = FFMAX(v, psd[bin]);
/* logadd */
int adr = FFMIN(max - ((v + psd[bin] + 1) >> 1), 255);
v = max + ff_ac3_log_add_tab[adr];
}
band_psd[band++] = v;
} while (end > ff_ac3_band_start_tab[band]);
}
int ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd,
int start, int end, int fast_gain, int is_lfe,
int dba_mode, int dba_nsegs, uint8_t *dba_offsets,
uint8_t *dba_lengths, uint8_t *dba_values,
int16_t *mask)
{
int16_t excite[AC3_CRITICAL_BANDS]; /* excitation */
int band;
int band_start, band_end, begin, end1;
int lowcomp, fastleak, slowleak;
if (end <= 0)
return AVERROR_INVALIDDATA;
/* excitation function */
band_start = ff_ac3_bin_to_band_tab[start];
band_end = ff_ac3_bin_to_band_tab[end-1] + 1;
if (band_start == 0) {
lowcomp = 0;
lowcomp = calc_lowcomp1(lowcomp, band_psd[0], band_psd[1], 384);
excite[0] = band_psd[0] - fast_gain - lowcomp;
lowcomp = calc_lowcomp1(lowcomp, band_psd[1], band_psd[2], 384);
excite[1] = band_psd[1] - fast_gain - lowcomp;
begin = 7;
for (band = 2; band < 7; band++) {
if (!(is_lfe && band == 6))
lowcomp = calc_lowcomp1(lowcomp, band_psd[band], band_psd[band+1], 384);
fastleak = band_psd[band] - fast_gain;
slowleak = band_psd[band] - s->slow_gain;
excite[band] = fastleak - lowcomp;
if (!(is_lfe && band == 6)) {
if (band_psd[band] <= band_psd[band+1]) {
begin = band + 1;
break;
}
}
}
end1 = FFMIN(band_end, 22);
for (band = begin; band < end1; band++) {
if (!(is_lfe && band == 6))
lowcomp = calc_lowcomp(lowcomp, band_psd[band], band_psd[band+1], band);
fastleak = FFMAX(fastleak - s->fast_decay, band_psd[band] - fast_gain);
slowleak = FFMAX(slowleak - s->slow_decay, band_psd[band] - s->slow_gain);
excite[band] = FFMAX(fastleak - lowcomp, slowleak);
}
begin = 22;
} else {
/* coupling channel */
begin = band_start;
fastleak = (s->cpl_fast_leak << 8) + 768;
slowleak = (s->cpl_slow_leak << 8) + 768;
}
for (band = begin; band < band_end; band++) {
fastleak = FFMAX(fastleak - s->fast_decay, band_psd[band] - fast_gain);
slowleak = FFMAX(slowleak - s->slow_decay, band_psd[band] - s->slow_gain);
excite[band] = FFMAX(fastleak, slowleak);
}
/* compute masking curve */
for (band = band_start; band < band_end; band++) {
int tmp = s->db_per_bit - band_psd[band];
if (tmp > 0) {
excite[band] += tmp >> 2;
}
mask[band] = FFMAX(ff_ac3_hearing_threshold_tab[band >> s->sr_shift][s->sr_code], excite[band]);
}
/* delta bit allocation */
if (dba_mode == DBA_REUSE || dba_mode == DBA_NEW) {
int i, seg, delta;
if (dba_nsegs > 8)
return -1;
band = band_start;
for (seg = 0; seg < dba_nsegs; seg++) {
band += dba_offsets[seg];
if (band >= AC3_CRITICAL_BANDS || dba_lengths[seg] > AC3_CRITICAL_BANDS-band)
return -1;
if (dba_values[seg] >= 4) {
delta = (dba_values[seg] - 3) * 128;
} else {
delta = (dba_values[seg] - 4) * 128;
}
for (i = 0; i < dba_lengths[seg]; i++) {
mask[band++] += delta;
}
}
}
return 0;
}
/**
* Initialize some tables.
* note: This function must remain thread safe because it is called by the
* AVParser init code.
*/
av_cold void ff_ac3_common_init(void)
{
#if !CONFIG_HARDCODED_TABLES
/* compute ff_ac3_bin_to_band_tab from ff_ac3_band_start_tab */
int bin = 0, band;
for (band = 0; band < AC3_CRITICAL_BANDS; band++) {
int band_end = ff_ac3_band_start_tab[band+1];
while (bin < band_end)
ff_ac3_bin_to_band_tab[bin++] = band;
}
#endif /* !CONFIG_HARDCODED_TABLES */
}

View File

@@ -0,0 +1,267 @@
/*
* Common code between the AC-3 encoder and decoder
* Copyright (c) 2000, 2001, 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Common code between the AC-3 encoder and decoder.
*/
#ifndef AVCODEC_AC3_H
#define AVCODEC_AC3_H
#define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
#define AC3_MAX_CHANNELS 7 /**< maximum number of channels, including coupling channel */
#define CPL_CH 0 /**< coupling channel index */
#define AC3_MAX_COEFS 256
#define AC3_BLOCK_SIZE 256
#define AC3_MAX_BLOCKS 6
#define AC3_FRAME_SIZE (AC3_MAX_BLOCKS * 256)
#define AC3_WINDOW_SIZE (AC3_BLOCK_SIZE * 2)
#define AC3_CRITICAL_BANDS 50
#define AC3_MAX_CPL_BANDS 18
#include "libavutil/opt.h"
#include "avcodec.h"
#include "ac3tab.h"
/* exponent encoding strategy */
#define EXP_REUSE 0
#define EXP_NEW 1
#define EXP_D15 1
#define EXP_D25 2
#define EXP_D45 3
#ifndef USE_FIXED
#define USE_FIXED 0
#endif
#if USE_FIXED
#define FFT_FLOAT 0
#define FIXR(a) ((int)((a) * 0 + 0.5))
#define FIXR12(a) ((int)((a) * 4096 + 0.5))
#define FIXR15(a) ((int)((a) * 32768 + 0.5))
#define ROUND15(x) ((x) + 16384) >> 15
#define AC3_RENAME(x) x ## _fixed
#define AC3_NORM(norm) (1<<24)/(norm)
#define AC3_MUL(a,b) ((((int64_t) (a)) * (b))>>12)
#define AC3_RANGE(x) ((x)|(((x)&128)<<1))
#define AC3_HEAVY_RANGE(x) ((x)<<1)
#define AC3_DYNAMIC_RANGE(x) (x)
#define AC3_SPX_BLEND(x) (x)
#define AC3_DYNAMIC_RANGE1 0
#define INTFLOAT int
#define SHORTFLOAT int16_t
#else /* USE_FIXED */
#define FIXR(x) ((float)(x))
#define FIXR12(x) ((float)(x))
#define FIXR15(x) ((float)(x))
#define ROUND15(x) (x)
#define AC3_RENAME(x) x
#define AC3_NORM(norm) (1.0f/(norm))
#define AC3_MUL(a,b) ((a) * (b))
#define AC3_RANGE(x) (dynamic_range_tab[(x)])
#define AC3_HEAVY_RANGE(x) (heavy_dynamic_range_tab[(x)])
#define AC3_DYNAMIC_RANGE(x) (powf(x, s->drc_scale))
#define AC3_SPX_BLEND(x) (x)* (1.0f/32)
#define AC3_DYNAMIC_RANGE1 1.0f
#define INTFLOAT float
#define SHORTFLOAT float
#endif /* USE_FIXED */
#define AC3_LEVEL(x) ROUND15((x) * FIXR15(0.7071067811865476))
/* pre-defined gain values */
#define LEVEL_PLUS_3DB 1.4142135623730950
#define LEVEL_PLUS_1POINT5DB 1.1892071150027209
#define LEVEL_MINUS_1POINT5DB 0.8408964152537145
#define LEVEL_MINUS_3DB 0.7071067811865476
#define LEVEL_MINUS_4POINT5DB 0.5946035575013605
#define LEVEL_MINUS_6DB 0.5000000000000000
#define LEVEL_MINUS_9DB 0.3535533905932738
#define LEVEL_ZERO 0.0000000000000000
#define LEVEL_ONE 1.0000000000000000
/** Delta bit allocation strategy */
typedef enum {
DBA_REUSE = 0,
DBA_NEW,
DBA_NONE,
DBA_RESERVED
} AC3DeltaStrategy;
/** Channel mode (audio coding mode) */
typedef enum {
AC3_CHMODE_DUALMONO = 0,
AC3_CHMODE_MONO,
AC3_CHMODE_STEREO,
AC3_CHMODE_3F,
AC3_CHMODE_2F1R,
AC3_CHMODE_3F1R,
AC3_CHMODE_2F2R,
AC3_CHMODE_3F2R
} AC3ChannelMode;
/** Dolby Surround mode */
typedef enum AC3DolbySurroundMode {
AC3_DSURMOD_NOTINDICATED = 0,
AC3_DSURMOD_OFF,
AC3_DSURMOD_ON,
AC3_DSURMOD_RESERVED
} AC3DolbySurroundMode;
/** Dolby Surround EX mode */
typedef enum AC3DolbySurroundEXMode {
AC3_DSUREXMOD_NOTINDICATED = 0,
AC3_DSUREXMOD_OFF,
AC3_DSUREXMOD_ON,
AC3_DSUREXMOD_PLIIZ
} AC3DolbySurroundEXMode;
/** Dolby Headphone mode */
typedef enum AC3DolbyHeadphoneMode {
AC3_DHEADPHONMOD_NOTINDICATED = 0,
AC3_DHEADPHONMOD_OFF,
AC3_DHEADPHONMOD_ON,
AC3_DHEADPHONMOD_RESERVED
} AC3DolbyHeadphoneMode;
/** Preferred Stereo Downmix mode */
typedef enum AC3PreferredStereoDownmixMode {
AC3_DMIXMOD_NOTINDICATED = 0,
AC3_DMIXMOD_LTRT,
AC3_DMIXMOD_LORO,
AC3_DMIXMOD_DPLII // reserved value in A/52, but used by encoders to indicate DPL2
} AC3PreferredStereoDownmixMode;
typedef struct AC3BitAllocParameters {
int sr_code;
int sr_shift;
int slow_gain, slow_decay, fast_decay, db_per_bit, floor;
int cpl_fast_leak, cpl_slow_leak;
} AC3BitAllocParameters;
/**
* @struct AC3HeaderInfo
* Coded AC-3 header values up to the lfeon element, plus derived values.
*/
typedef struct AC3HeaderInfo {
/** @name Coded elements
* @{
*/
uint16_t sync_word;
uint16_t crc1;
uint8_t sr_code;
uint8_t bitstream_id;
uint8_t bitstream_mode;
uint8_t channel_mode;
uint8_t lfe_on;
uint8_t frame_type;
int substreamid; ///< substream identification
int center_mix_level; ///< Center mix level index
int surround_mix_level; ///< Surround mix level index
uint16_t channel_map;
int num_blocks; ///< number of audio blocks
#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
int dolby_surround_mode;
#endif
/** @} */
/** @name Derived values
* @{
*/
uint8_t sr_shift;
uint16_t sample_rate;
uint32_t bit_rate;
uint8_t channels;
uint16_t frame_size;
uint64_t channel_layout;
/** @} */
#if !AV_HAVE_INCOMPATIBLE_LIBAV_ABI
int dolby_surround_mode;
#endif
} AC3HeaderInfo;
typedef enum {
EAC3_FRAME_TYPE_INDEPENDENT = 0,
EAC3_FRAME_TYPE_DEPENDENT,
EAC3_FRAME_TYPE_AC3_CONVERT,
EAC3_FRAME_TYPE_RESERVED
} EAC3FrameType;
void ff_ac3_common_init(void);
/**
* Calculate the log power-spectral density of the input signal.
* This gives a rough estimate of signal power in the frequency domain by using
* the spectral envelope (exponents). The psd is also separately grouped
* into critical bands for use in the calculating the masking curve.
* 128 units in psd = -6 dB. The dbknee parameter in AC3BitAllocParameters
* determines the reference level.
*
* @param[in] exp frequency coefficient exponents
* @param[in] start starting bin location
* @param[in] end ending bin location
* @param[out] psd signal power for each frequency bin
* @param[out] band_psd signal power for each critical band
*/
void ff_ac3_bit_alloc_calc_psd(int8_t *exp, int start, int end, int16_t *psd,
int16_t *band_psd);
/**
* Calculate the masking curve.
* First, the excitation is calculated using parameters in s and the signal
* power in each critical band. The excitation is compared with a predefined
* hearing threshold table to produce the masking curve. If delta bit
* allocation information is provided, it is used for adjusting the masking
* curve, usually to give a closer match to a better psychoacoustic model.
*
* @param[in] s adjustable bit allocation parameters
* @param[in] band_psd signal power for each critical band
* @param[in] start starting bin location
* @param[in] end ending bin location
* @param[in] fast_gain fast gain (estimated signal-to-mask ratio)
* @param[in] is_lfe whether or not the channel being processed is the LFE
* @param[in] dba_mode delta bit allocation mode (none, reuse, or new)
* @param[in] dba_nsegs number of delta segments
* @param[in] dba_offsets location offsets for each segment
* @param[in] dba_lengths length of each segment
* @param[in] dba_values delta bit allocation for each segment
* @param[out] mask calculated masking curve
* @return returns 0 for success, non-zero for error
*/
int ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd,
int start, int end, int fast_gain, int is_lfe,
int dba_mode, int dba_nsegs, uint8_t *dba_offsets,
uint8_t *dba_lengths, uint8_t *dba_values,
int16_t *mask);
#endif /* AVCODEC_AC3_H */

View File

@@ -0,0 +1,213 @@
/*
* AC-3 parser
* Copyright (c) 2003 Fabrice Bellard
* Copyright (c) 2003 Michael Niedermayer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/channel_layout.h"
#include "parser.h"
#include "ac3_parser.h"
#include "aac_ac3_parser.h"
#include "get_bits.h"
#define AC3_HEADER_SIZE 7
static const uint8_t eac3_blocks[4] = {
1, 2, 3, 6
};
/**
* Table for center mix levels
* reference: Section 5.4.2.4 cmixlev
*/
static const uint8_t center_levels[4] = { 4, 5, 6, 5 };
/**
* Table for surround mix levels
* reference: Section 5.4.2.5 surmixlev
*/
static const uint8_t surround_levels[4] = { 4, 6, 7, 6 };
int avpriv_ac3_parse_header2(GetBitContext *gbc, AC3HeaderInfo **phdr)
{
int frame_size_code;
AC3HeaderInfo *hdr;
if (!*phdr)
*phdr = av_mallocz(sizeof(AC3HeaderInfo));
if (!*phdr)
return AVERROR(ENOMEM);
hdr = *phdr;
memset(hdr, 0, sizeof(*hdr));
hdr->sync_word = get_bits(gbc, 16);
if(hdr->sync_word != 0x0B77)
return AAC_AC3_PARSE_ERROR_SYNC;
/* read ahead to bsid to distinguish between AC-3 and E-AC-3 */
hdr->bitstream_id = show_bits_long(gbc, 29) & 0x1F;
if(hdr->bitstream_id > 16)
return AAC_AC3_PARSE_ERROR_BSID;
hdr->num_blocks = 6;
/* set default mix levels */
hdr->center_mix_level = 5; // -4.5dB
hdr->surround_mix_level = 6; // -6.0dB
/* set default dolby surround mode */
hdr->dolby_surround_mode = AC3_DSURMOD_NOTINDICATED;
if(hdr->bitstream_id <= 10) {
/* Normal AC-3 */
hdr->crc1 = get_bits(gbc, 16);
hdr->sr_code = get_bits(gbc, 2);
if(hdr->sr_code == 3)
return AAC_AC3_PARSE_ERROR_SAMPLE_RATE;
frame_size_code = get_bits(gbc, 6);
if(frame_size_code > 37)
return AAC_AC3_PARSE_ERROR_FRAME_SIZE;
skip_bits(gbc, 5); // skip bsid, already got it
hdr->bitstream_mode = get_bits(gbc, 3);
hdr->channel_mode = get_bits(gbc, 3);
if(hdr->channel_mode == AC3_CHMODE_STEREO) {
hdr->dolby_surround_mode = get_bits(gbc, 2);
} else {
if((hdr->channel_mode & 1) && hdr->channel_mode != AC3_CHMODE_MONO)
hdr-> center_mix_level = center_levels[get_bits(gbc, 2)];
if(hdr->channel_mode & 4)
hdr->surround_mix_level = surround_levels[get_bits(gbc, 2)];
}
hdr->lfe_on = get_bits1(gbc);
hdr->sr_shift = FFMAX(hdr->bitstream_id, 8) - 8;
hdr->sample_rate = ff_ac3_sample_rate_tab[hdr->sr_code] >> hdr->sr_shift;
hdr->bit_rate = (ff_ac3_bitrate_tab[frame_size_code>>1] * 1000) >> hdr->sr_shift;
hdr->channels = ff_ac3_channels_tab[hdr->channel_mode] + hdr->lfe_on;
hdr->frame_size = ff_ac3_frame_size_tab[frame_size_code][hdr->sr_code] * 2;
hdr->frame_type = EAC3_FRAME_TYPE_AC3_CONVERT; //EAC3_FRAME_TYPE_INDEPENDENT;
hdr->substreamid = 0;
} else {
/* Enhanced AC-3 */
hdr->crc1 = 0;
hdr->frame_type = get_bits(gbc, 2);
if(hdr->frame_type == EAC3_FRAME_TYPE_RESERVED)
return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
hdr->substreamid = get_bits(gbc, 3);
hdr->frame_size = (get_bits(gbc, 11) + 1) << 1;
if(hdr->frame_size < AC3_HEADER_SIZE)
return AAC_AC3_PARSE_ERROR_FRAME_SIZE;
hdr->sr_code = get_bits(gbc, 2);
if (hdr->sr_code == 3) {
int sr_code2 = get_bits(gbc, 2);
if(sr_code2 == 3)
return AAC_AC3_PARSE_ERROR_SAMPLE_RATE;
hdr->sample_rate = ff_ac3_sample_rate_tab[sr_code2] / 2;
hdr->sr_shift = 1;
} else {
hdr->num_blocks = eac3_blocks[get_bits(gbc, 2)];
hdr->sample_rate = ff_ac3_sample_rate_tab[hdr->sr_code];
hdr->sr_shift = 0;
}
hdr->channel_mode = get_bits(gbc, 3);
hdr->lfe_on = get_bits1(gbc);
hdr->bit_rate = 8LL * hdr->frame_size * hdr->sample_rate /
(hdr->num_blocks * 256);
hdr->channels = ff_ac3_channels_tab[hdr->channel_mode] + hdr->lfe_on;
}
hdr->channel_layout = avpriv_ac3_channel_layout_tab[hdr->channel_mode];
if (hdr->lfe_on)
hdr->channel_layout |= AV_CH_LOW_FREQUENCY;
return 0;
}
int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
{
AC3HeaderInfo tmp, *ptmp = &tmp;
int ret = avpriv_ac3_parse_header2(gbc, &ptmp);
memcpy(hdr, ptmp, ((intptr_t)&tmp.channel_layout) - ((intptr_t)&tmp) + sizeof(uint64_t));
return ret;
}
static int ac3_sync(uint64_t state, AACAC3ParseContext *hdr_info,
int *need_next_header, int *new_frame_start)
{
int err;
union {
uint64_t u64;
uint8_t u8[8 + AV_INPUT_BUFFER_PADDING_SIZE];
} tmp = { av_be2ne64(state) };
AC3HeaderInfo hdr, *phdr = &hdr;
GetBitContext gbc;
init_get_bits(&gbc, tmp.u8+8-AC3_HEADER_SIZE, 54);
err = avpriv_ac3_parse_header2(&gbc, &phdr);
if(err < 0)
return 0;
hdr_info->sample_rate = hdr.sample_rate;
hdr_info->bit_rate = hdr.bit_rate;
hdr_info->channels = hdr.channels;
hdr_info->channel_layout = hdr.channel_layout;
hdr_info->samples = hdr.num_blocks * 256;
hdr_info->service_type = hdr.bitstream_mode;
if (hdr.bitstream_mode == 0x7 && hdr.channels > 1)
hdr_info->service_type = AV_AUDIO_SERVICE_TYPE_KARAOKE;
if(hdr.bitstream_id>10)
hdr_info->codec_id = AV_CODEC_ID_EAC3;
else if (hdr_info->codec_id == AV_CODEC_ID_NONE)
hdr_info->codec_id = AV_CODEC_ID_AC3;
*need_next_header = (hdr.frame_type != EAC3_FRAME_TYPE_AC3_CONVERT);
*new_frame_start = (hdr.frame_type != EAC3_FRAME_TYPE_DEPENDENT);
return hdr.frame_size;
}
static av_cold int ac3_parse_init(AVCodecParserContext *s1)
{
AACAC3ParseContext *s = s1->priv_data;
s->header_size = AC3_HEADER_SIZE;
s->sync = ac3_sync;
return 0;
}
AVCodecParser ff_ac3_parser = {
.codec_ids = { AV_CODEC_ID_AC3, AV_CODEC_ID_EAC3 },
.priv_data_size = sizeof(AACAC3ParseContext),
.parser_init = ac3_parse_init,
.parser_parse = ff_aac_ac3_parse,
.parser_close = ff_parse_close,
};

View File

@@ -0,0 +1,44 @@
/*
* AC-3 parser prototypes
* Copyright (c) 2003 Fabrice Bellard
* Copyright (c) 2003 Michael Niedermayer
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AC3_PARSER_H
#define AVCODEC_AC3_PARSER_H
#include "ac3.h"
#include "get_bits.h"
/**
* Parse AC-3 frame header.
* Parse the header up to the lfeon element, which is the first 52 or 54 bits
* depending on the audio coding mode.
* @param[in] gbc BitContext containing the first 54 bits of the frame.
* @param[out] hdr Pointer to Pointer to struct where header info is written.
* will be allocated if NULL
* @return Returns 0 on success, -1 if there is a sync word mismatch,
* -2 if the bsid (version) element is invalid, -3 if the fscod (sample rate)
* element is invalid, or -4 if the frmsizecod (bit rate) element is invalid.
*/
int avpriv_ac3_parse_header2(GetBitContext *gbc, AC3HeaderInfo **hdr);
int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr);
#endif /* AVCODEC_AC3_PARSER_H */

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More