Libav
mpegvideoencdsp_init.c
Go to the documentation of this file.
1 /*
2  * This file is part of Libav.
3  *
4  * Libav is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * Libav is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with Libav; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "libavutil/attributes.h"
20 #include "libavutil/cpu.h"
21 #include "libavutil/x86/cpu.h"
22 #include "libavcodec/avcodec.h"
24 
25 int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
26 int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
27 
28 #if HAVE_INLINE_ASM
29 
30 #define PHADDD(a, t) \
31  "movq " #a ", " #t " \n\t" \
32  "psrlq $32, " #a " \n\t" \
33  "paddd " #t ", " #a " \n\t"
34 
35 /*
36  * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
37  * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
38  * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
39  */
40 #define PMULHRW(x, y, s, o) \
41  "pmulhw " #s ", " #x " \n\t" \
42  "pmulhw " #s ", " #y " \n\t" \
43  "paddw " #o ", " #x " \n\t" \
44  "paddw " #o ", " #y " \n\t" \
45  "psraw $1, " #x " \n\t" \
46  "psraw $1, " #y " \n\t"
47 #define DEF(x) x ## _mmx
48 #define SET_RND MOVQ_WONE
49 #define SCALE_OFFSET 1
50 
52 
53 #undef DEF
54 #undef SET_RND
55 #undef SCALE_OFFSET
56 #undef PMULHRW
57 
58 #define DEF(x) x ## _3dnow
59 #define SET_RND(x)
60 #define SCALE_OFFSET 0
61 #define PMULHRW(x, y, s, o) \
62  "pmulhrw " #s ", " #x " \n\t" \
63  "pmulhrw " #s ", " #y " \n\t"
64 
66 
67 #undef DEF
68 #undef SET_RND
69 #undef SCALE_OFFSET
70 #undef PMULHRW
71 
72 #if HAVE_SSSE3_INLINE
73 #undef PHADDD
74 #define DEF(x) x ## _ssse3
75 #define SET_RND(x)
76 #define SCALE_OFFSET -1
77 
78 #define PHADDD(a, t) \
79  "pshufw $0x0E, " #a ", " #t " \n\t" \
80  /* faster than phaddd on core2 */ \
81  "paddd " #t ", " #a " \n\t"
82 
83 #define PMULHRW(x, y, s, o) \
84  "pmulhrsw " #s ", " #x " \n\t" \
85  "pmulhrsw " #s ", " #y " \n\t"
86 
88 
89 #undef DEF
90 #undef SET_RND
91 #undef SCALE_OFFSET
92 #undef PMULHRW
93 #undef PHADDD
94 #endif /* HAVE_SSSE3_INLINE */
95 
96 /* Draw the edges of width 'w' of an image of size width, height
97  * this MMX version can only handle w == 8 || w == 16. */
98 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
99  int w, int h, int sides)
100 {
101  uint8_t *ptr, *last_line;
102  int i;
103 
104  last_line = buf + (height - 1) * wrap;
105  /* left and right */
106  ptr = buf;
107  if (w == 8) {
108  __asm__ volatile (
109  "1: \n\t"
110  "movd (%0), %%mm0 \n\t"
111  "punpcklbw %%mm0, %%mm0 \n\t"
112  "punpcklwd %%mm0, %%mm0 \n\t"
113  "punpckldq %%mm0, %%mm0 \n\t"
114  "movq %%mm0, -8(%0) \n\t"
115  "movq -8(%0, %2), %%mm1 \n\t"
116  "punpckhbw %%mm1, %%mm1 \n\t"
117  "punpckhwd %%mm1, %%mm1 \n\t"
118  "punpckhdq %%mm1, %%mm1 \n\t"
119  "movq %%mm1, (%0, %2) \n\t"
120  "add %1, %0 \n\t"
121  "cmp %3, %0 \n\t"
122  "jb 1b \n\t"
123  : "+r" (ptr)
124  : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
125  "r" (ptr + wrap * height));
126  } else {
127  __asm__ volatile (
128  "1: \n\t"
129  "movd (%0), %%mm0 \n\t"
130  "punpcklbw %%mm0, %%mm0 \n\t"
131  "punpcklwd %%mm0, %%mm0 \n\t"
132  "punpckldq %%mm0, %%mm0 \n\t"
133  "movq %%mm0, -8(%0) \n\t"
134  "movq %%mm0, -16(%0) \n\t"
135  "movq -8(%0, %2), %%mm1 \n\t"
136  "punpckhbw %%mm1, %%mm1 \n\t"
137  "punpckhwd %%mm1, %%mm1 \n\t"
138  "punpckhdq %%mm1, %%mm1 \n\t"
139  "movq %%mm1, (%0, %2) \n\t"
140  "movq %%mm1, 8(%0, %2) \n\t"
141  "add %1, %0 \n\t"
142  "cmp %3, %0 \n\t"
143  "jb 1b \n\t"
144  : "+r" (ptr)
145  : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
146  "r" (ptr + wrap * height));
147  }
148 
149  /* top and bottom (and hopefully also the corners) */
150  if (sides & EDGE_TOP) {
151  for (i = 0; i < h; i += 4) {
152  ptr = buf - (i + 1) * wrap - w;
153  __asm__ volatile (
154  "1: \n\t"
155  "movq (%1, %0), %%mm0 \n\t"
156  "movq %%mm0, (%0) \n\t"
157  "movq %%mm0, (%0, %2) \n\t"
158  "movq %%mm0, (%0, %2, 2) \n\t"
159  "movq %%mm0, (%0, %3) \n\t"
160  "add $8, %0 \n\t"
161  "cmp %4, %0 \n\t"
162  "jb 1b \n\t"
163  : "+r" (ptr)
164  : "r" ((x86_reg) buf - (x86_reg) ptr - w),
165  "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3),
166  "r" (ptr + width + 2 * w));
167  }
168  }
169 
170  if (sides & EDGE_BOTTOM) {
171  for (i = 0; i < h; i += 4) {
172  ptr = last_line + (i + 1) * wrap - w;
173  __asm__ volatile (
174  "1: \n\t"
175  "movq (%1, %0), %%mm0 \n\t"
176  "movq %%mm0, (%0) \n\t"
177  "movq %%mm0, (%0, %2) \n\t"
178  "movq %%mm0, (%0, %2, 2) \n\t"
179  "movq %%mm0, (%0, %3) \n\t"
180  "add $8, %0 \n\t"
181  "cmp %4, %0 \n\t"
182  "jb 1b \n\t"
183  : "+r" (ptr)
184  : "r" ((x86_reg) last_line - (x86_reg) ptr - w),
185  "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3),
186  "r" (ptr + width + 2 * w));
187  }
188  }
189 }
190 
191 #endif /* HAVE_INLINE_ASM */
192 
194  AVCodecContext *avctx)
195 {
196  int cpu_flags = av_get_cpu_flags();
197 
198  if (EXTERNAL_MMX(cpu_flags)) {
201  }
202 
203 #if HAVE_INLINE_ASM
204 
205  if (INLINE_MMX(cpu_flags)) {
206  if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
207  c->try_8x8basis = try_8x8basis_mmx;
208  }
209  c->add_8x8basis = add_8x8basis_mmx;
210 
211  if (avctx->bits_per_raw_sample <= 8) {
212  c->draw_edges = draw_edges_mmx;
213  }
214  }
215 
216  if (INLINE_AMD3DNOW(cpu_flags)) {
217  if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
218  c->try_8x8basis = try_8x8basis_3dnow;
219  }
220  c->add_8x8basis = add_8x8basis_3dnow;
221  }
222 
223 #if HAVE_SSSE3_INLINE
224  if (INLINE_SSSE3(cpu_flags)) {
225  if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
226  c->try_8x8basis = try_8x8basis_ssse3;
227  }
228  c->add_8x8basis = add_8x8basis_ssse3;
229  }
230 #endif /* HAVE_SSSE3_INLINE */
231 
232 #endif /* HAVE_INLINE_ASM */
233 }
#define EXTERNAL_MMX(flags)
Definition: cpu.h:47
int(* try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, AVCodecContext *avctx)
void(* add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
int bits_per_raw_sample
Bits per sample/pixel of internal libavcodec pixel/sample format.
Definition: avcodec.h:2514
#define EDGE_TOP
Macro definitions for various function/variable attributes.
uint8_t
#define av_cold
Definition: attributes.h:66
int x86_reg
Definition: asm.h:70
#define CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:658
#define INLINE_MMX(flags)
Definition: cpu.h:63
int flags
CODEC_FLAG_*.
Definition: avcodec.h:1144
#define wrap(func)
Definition: neontest.h:62
#define INLINE_SSSE3(flags)
Definition: cpu.h:68
#define INLINE_AMD3DNOW(flags)
Definition: cpu.h:61
int(* pix_sum)(uint8_t *pix, int line_size)
static int width
Definition: utils.c:156
Libavcodec external API header.
main external API structure.
Definition: avcodec.h:1050
int(* pix_norm1)(uint8_t *pix, int line_size)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:47
int height
Definition: gxfenc.c:72
int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
void(* draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides)
#define EDGE_BOTTOM