libvisual  0.5.0
lv_cpu.c
1 /* Libvisual - The audio visualisation framework.
2  *
3  * Copyright (C) 2012-2013 Libvisual team
4  * 2004-2006 Dennis Smit
5  *
6  * Authors: Dennis Smit <ds@nerds-incorporated.org>
7  * Chong Kai Xiong <kaixiong@codeleft.sg>
8  * Eric Anholt <anholt@freebsd.org>
9  *
10  * Extra Credits: MPlayer cpudetect hackers.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU Lesser General Public License as
14  * published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25  */
26 
27 /* FIXME: clean this entire file up */
28 
29 #define _POSIX_SOURCE
30 #define _BSD_SOURCE
31 
32 #include "config.h"
33 #include "lv_cpu.h"
34 #include "lv_common.h"
35 #include <unistd.h>
36 
37 #if defined(VISUAL_ARCH_POWERPC)
38 #if defined(VISUAL_OS_DARWIN)
39 #include <sys/sysctl.h>
40 #else
41 #include <signal.h>
42 #include <setjmp.h>
43 #endif
44 #endif
45 
46 #if defined(VISUAL_OS_HPUX)
47 #include <sys/mpctl.h>
48 #endif
49 
50 #if defined(VISUAL_OS_NETBSD) || defined(VISUAL_OS_OPENBSD)
51 #include <sys/param.h>
52 #include <sys/sysctl.h>
53 #include <machine/cpu.h>
54 #endif
55 
56 #if defined(VISUAL_OS_FREEBSD)
57 #include <sys/types.h>
58 #include <sys/sysctl.h>
59 #endif
60 
61 #if defined(VISUAL_OS_LINUX)
62 #include <signal.h>
63 #endif
64 
65 #if defined(VISUAL_OS_WIN32)
66 #include <windows.h>
67 #endif
68 
69 #if defined(VISUAL_OS_ANDROID)
70 #include <cpu-features.h>
71 #endif
72 
73 #define TEST_BIT(x, n) (((x) >> (n)) & 1)
74 
75 typedef struct {
76  VisCPUType type;
77  int num_cores;
78  int x86cpuType;
79  int cacheline;
80 
81  int hasMMX;
82  int hasMMX2;
83  int hasSSE;
84  int hasSSE2;
85  int has3DNow;
86  int has3DNowExt;
87  int hasAltiVec;
88  int hasARMv7;
89  int hasVFPv3;
90  int hasNeon;
91  int hasLDREX_STREX;
92 } VisCPU;
93 
94 static VisCPU cpu_caps;
95 static int cpu_initialized = FALSE;
96 
97 /* The sigill handlers */
98 #if defined(VISUAL_ARCH_X86) //x86 (linux katmai handler check thing)
99 #if defined(VISUAL_OS_LINUX)
100 static void sigill_handler_sse (int signal, struct sigcontext sc)
101 {
102  /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
103  * instructions are 3 bytes long. We must increment the instruction
104  * pointer manually to avoid repeated execution of the offending
105  * instruction.
106  *
107  * If the SIGILL is caused by a divide-by-zero when unmasked
108  * exceptions aren't supported, the SIMD FPU status and control
109  * word will be restored at the end of the test, so we don't need
110  * to worry about doing it here. Besides, we may not be able to...
111  */
112  sc.eip += 3;
113 
114  cpu_caps.hasSSE = FALSE;
115 }
116 
117 static void sigfpe_handler_sse (int signal, struct sigcontext sc)
118 {
119  if (sc.fpstate->magic != 0xffff) {
120  /* Our signal context has the extended FPU state, so reset the
121  * divide-by-zero exception mask and clear the divide-by-zero
122  * exception bit. */
123  sc.fpstate->mxcsr |= 0x00000200;
124  sc.fpstate->mxcsr &= 0xfffffffb;
125  } else {
126  /* If we ever get here, we're completely hosed. */
127  }
128 }
129 #endif
130 #endif /* VISUAL_OS_LINUX */
131 
132 #if defined(VISUAL_OS_WIN32)
133 LONG CALLBACK win32_sig_handler_sse (EXCEPTION_POINTERS* ep)
134 {
135  if (ep->ExceptionRecord->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) {
136  ep->ContextRecord->Eip +=3;
137  cpu_caps.hasSSE = FALSE;
138  return EXCEPTION_CONTINUE_EXECUTION;
139  }
140  return EXCEPTION_CONTINUE_SEARCH;
141 }
142 #endif /* VISUAL_OS_WIN32 */
143 
144 
145 #if defined(VISUAL_ARCH_POWERPC) && !defined(VISUAL_OS_DARWIN)
146 static sigjmp_buf powerpc_jmpbuf;
147 static volatile sig_atomic_t powerpc_canjump = 0;
148 
149 static void sigill_handler (int sig)
150 {
151  if (!powerpc_canjump) {
152  signal (sig, SIG_DFL);
153  raise (sig);
154  }
155 
156  powerpc_canjump = 0;
157  siglongjmp (powerpc_jmpbuf, 1);
158 }
159 
160 static void check_os_altivec_support (void)
161 {
162 #if defined(VISUAL_OS_DARWIN)
163  int sels[2] = {CTL_HW, HW_VECTORUNIT};
164  int has_vu = 0;
165  visual_size_t len = sizeof(has_vu);
166  int err;
167 
168  err = sysctl (sels, 2, &has_vu, &len, NULL, 0);
169 
170  if (err == 0)
171  if (has_vu != 0)
172  cpu_caps.hasAltiVec = 1;
173 #else /* !VISUAL_OS_DARWIN */
174  /* no Darwin, do it the brute-force way */
175  /* this is borrowed from the libmpeg2 library */
176  signal (SIGILL, sigill_handler);
177  if (sigsetjmp (powerpc_jmpbuf, 1)) {
178  signal (SIGILL, SIG_DFL);
179  } else {
180  powerpc_canjump = 1;
181 
182  asm volatile
183  ("mtspr 256, %0\n\t"
184  "vand %%v0, %%v0, %%v0"
185  :
186  : "r" (-1));
187 
188  signal (SIGILL, SIG_DFL);
189  cpu_caps.hasAltiVec = 1;
190  }
191 #endif
192 }
193 #endif
194 
195 /* If we're running on a processor that can do SSE, let's see if we
196  * are allowed to or not. This will catch 2.4.0 or later kernels that
197  * haven't been configured for a Pentium III but are running on one,
198  * and RedHat patched 2.2 kernels that have broken exception handling
199  * support for user space apps that do SSE.
200  */
201 #if defined(VISUAL_ARCH_X86)
202 static void check_os_katmai_support (void)
203 {
204 #if defined(VISUAL_OS_FREEBSD)
205  int has_sse = 0, ret;
206  visual_size_t len = sizeof(has_sse);
207 
208  ret = sysctlbyname ("hw.instruction_sse", &has_sse, &len, NULL, 0);
209  if (ret || !has_sse)
210  cpu_caps.hasSSE = FALSE;
211 
212 #elif defined(VISUAL_OS_NETBSD) || defined(VISUAL_OS_OPENBSD)
213  int has_sse, has_sse2, ret, mib[2];
214  visual_size_t varlen;
215 
216  mib[0] = CTL_MACHDEP;
217  mib[1] = CPU_SSE;
218  varlen = sizeof(has_sse);
219 
220  ret = sysctl (mib, 2, &has_sse, &varlen, NULL, 0);
221  if (ret < 0 || !has_sse) {
222  cpu_caps.hasSSE = FALSE;
223  } else {
224  cpu_caps.hasSSE = TRUE;
225  }
226 
227  mib[1] = CPU_SSE2;
228  varlen = sizeof (has_sse2);
229  ret = sysctl (mib, 2, &has_sse2, &varlen, NULL, 0);
230  if (ret < 0 || !has_sse2) {
231  cpu_caps.hasSSE2 = FALSE;
232  } else {
233  cpu_caps.hasSSE2 = TRUE;
234  }
235  cpu_caps.hasSSE = FALSE; /* FIXME ?!?!? */
236 
237 #elif defined(VISUAL_OS_WIN32)
238  LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
239  if (cpu_caps.hasSSE) {
240  exc_fil = SetUnhandledExceptionFilter (win32_sig_handler_sse);
241  __asm __volatile ("xorps %xmm0, %xmm0");
242  SetUnhandledExceptionFilter (exc_fil);
243  }
244 #elif defined(VISUAL_OS_LINUX)
245  struct sigaction saved_sigill;
246  struct sigaction saved_sigfpe;
247 
248  /* Save the original signal handlers.
249  */
250  sigaction (SIGILL, NULL, &saved_sigill);
251  sigaction (SIGFPE, NULL, &saved_sigfpe);
252 
253  signal (SIGILL, (void (*)(int)) sigill_handler_sse);
254  signal (SIGFPE, (void (*)(int)) sigfpe_handler_sse);
255 
256  /* Emulate test for OSFXSR in CR4. The OS will set this bit if it
257  * supports the extended FPU save and restore required for SSE. If
258  * we execute an SSE instruction on a PIII and get a SIGILL, the OS
259  * doesn't support Streaming SIMD Exceptions, even if the processor
260  * does.
261  */
262  if (cpu_caps.hasSSE) {
263  __asm __volatile ("xorps %xmm1, %xmm0");
264  }
265 
266  /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if
267  * it supports unmasked SIMD FPU exceptions. If we unmask the
268  * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
269  * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE
270  * as expected, we're okay but we need to clean up after it.
271  *
272  * Are we being too stringent in our requirement that the OS support
273  * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by
274  * setting CR4.OSFXSR but don't support unmasked exceptions. Win98
275  * doesn't even support them. We at least know the user-space SSE
276  * support is good in kernels that do support unmasked exceptions,
277  * and therefore to be safe I'm going to leave this test in here.
278  */
279  //if (cpu_caps.hasSSE) {
280  // test_os_katmai_exception_support();
281  //}
282 
283  /* Restore the original signal handlers. */
284  sigaction (SIGILL, &saved_sigill, NULL);
285  sigaction (SIGFPE, &saved_sigfpe, NULL);
286 
287 #else
288  /* We can't use POSIX signal handling to test the availability of
289  * SSE, so we disable it by default.
290  */
291  cpu_caps.hasSSE = FALSE;
292 #endif /* __linux__ */
293 }
294 #endif /* VISUAL_ARCH_X86 */
295 
296 
297 #if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
298 static int has_cpuid (void)
299 {
300 #if defined(VISUAL_ARCH_X86)
301  int a, c;
302 
303  __asm __volatile
304  ("pushf\n"
305  "popl %0\n"
306  "movl %0, %1\n"
307  "xorl $0x200000, %0\n"
308  "push %0\n"
309  "popf\n"
310  "pushf\n"
311  "popl %0\n"
312  : "=a" (a), "=c" (c)
313  :
314  : "cc");
315 
316  return a != c;
317 #elif defined(VISUAL_ARCH_X86_64)
318  return TRUE;
319 #else
320  return FALSE;
321 #endif
322 }
323 #endif /* defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64) */
324 
325 #if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
326 static void cpuid (unsigned int ax, unsigned int *p)
327 {
328  __asm __volatile
329  ("movl %%ebx, %%esi\n\t"
330  "cpuid\n\t"
331  "xchgl %%ebx, %%esi"
332  : "=a" (p[0]), "=S" (p[1]),
333  "=c" (p[2]), "=d" (p[3])
334  : "0" (ax));
335 }
336 #endif
337 
338 static unsigned int get_number_of_cores (void)
339 {
340  /* See: http://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine */
341 
342 #if defined(VISUAL_OS_LINUX) || defined(VISUAL_OS_SOLARIS) || defined(VISUAL_OS_AIX)
343 
344  int ncpus = sysconf (_SC_NPROCESSORS_ONLN);
345 
346  return ncpus != -1 ? ncpus : 1;
347 
348 #elif defined(VISUAL_OS_NETBSD) || defined(VISUAL_OS_FREEBSD) || defined(VISUAL_OS_OPENBSD) || defined(VISUAL_OS_DARWIN)
349 
350  int ncpus;
351  int mib[2];
352  visual_size_t len = sizeof (ncpus);
353 
354  mib[0] = CTL_HW;
355  mib[1] = HW_NCPU;
356 
357  sysctl (mib, 2, &ncpus, &len, NULL, 0);
358 
359  return ncpus >= 1 ? ncpus : 1;
360 
361 #elif defined(VISUAL_OS_HPUX)
362 
363  return mpctl (MPC_GETNUMSPUS, NULL, NULL);
364 
365 #elif defined(VISUAL_OS_IRIX)
366 
367  return sysconf (_SC_NPROC_ONLN);
368 
369 #elif defined(VISUAL_OS_ANDROID)
370 
371  return android_getCpuCount ();
372 
373 #elif defined(VISUAL_OS_WIN32)
374 
375  SYSTEM_INFO system_info;
376 
377  GetSystemInfo (&system_info);
378 
379  return system_info.dwNumberOfProcessors;
380 
381 #else
382 
383  return 1;
384 
385 #endif
386 }
387 
388 static VisCPUType get_cpu_type (void)
389 {
390 #if defined(VISUAL_ARCH_MIPS)
391  return VISUAL_CPU_TYPE_MIPS;
392 #elif defined(VISUAL_ARCH_ALPHA)
393  return VISUAL_CPU_TYPE_ALPHA;
394 #elif defined(VISUAL_ARCH_SPARC)
395  return VISUAL_CPU_TYPE_SPARC;
396 #elif defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
397  return VISUAL_CPU_TYPE_X86;
398 #elif defined(VISUAL_ARCH_POWERPC)
400 #elif defined(VISUAL_ARCH_ARM)
401  return VISUAL_CPU_TYPE_ARM;
402 #else
403  return VISUAL_CPU_TYPE_OTHER;
404 #endif
405 }
406 
407 static void print_cpu_info (void)
408 {
409  visual_log (VISUAL_LOG_DEBUG, "CPU: Number of CPUs: %d", cpu_caps.num_cores);
410  visual_log (VISUAL_LOG_DEBUG, "CPU: type %d", cpu_caps.type);
411 
412 #if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
413  visual_log (VISUAL_LOG_DEBUG, "CPU: X86 type %d", cpu_caps.x86cpuType);
414  visual_log (VISUAL_LOG_DEBUG, "CPU: cacheline %d", cpu_caps.cacheline);
415  visual_log (VISUAL_LOG_DEBUG, "CPU: MMX %d", cpu_caps.hasMMX);
416  visual_log (VISUAL_LOG_DEBUG, "CPU: MMX2 %d", cpu_caps.hasMMX2);
417  visual_log (VISUAL_LOG_DEBUG, "CPU: SSE %d", cpu_caps.hasSSE);
418  visual_log (VISUAL_LOG_DEBUG, "CPU: SSE2 %d", cpu_caps.hasSSE2);
419  visual_log (VISUAL_LOG_DEBUG, "CPU: 3DNow %d", cpu_caps.has3DNow);
420  visual_log (VISUAL_LOG_DEBUG, "CPU: 3DNowExt %d", cpu_caps.has3DNowExt);
421 #elif defined(VISUAL_ARCH_POWERPC)
422  visual_log (VISUAL_LOG_DEBUG, "CPU: AltiVec %d", cpu_caps.hasAltiVec);
423 #elif defined(VISUAL_ARCH_ARM)
424  visual_log (VISUAL_LOG_DEBUG, "CPU: ARM v7 %d", cpu_caps.hasARMv7);
425  visual_log (VISUAL_LOG_DEBUG, "CPU: ARM VFPv3 %d", cpu_caps.hasVFPv3);
426  visual_log (VISUAL_LOG_DEBUG, "CPU: ARM NEON %d", cpu_caps.hasNeon);
427  visual_log (VISUAL_LOG_DEBUG, "CPU: ARM LDREX_STREX %d", cpu_caps.hasLDREX_STREX);
428 #endif /* VISUAL_ARCH_X86 || VISUAL_ARCH_X86_64 */
429 }
430 
431 void visual_cpu_initialize ()
432 {
433  visual_mem_set (&cpu_caps, 0, sizeof (VisCPU));
434 
435  cpu_caps.type = get_cpu_type ();
436  cpu_caps.num_cores = get_number_of_cores ();
437 
438 #if defined(VISUAL_ARCH_ARM)
439 # if defined(VISUAL_OS_ANDROID)
440  if (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM) {
441  uint64_t type = android_getCpuFeatures ();
442 
443  if (type & ANDROID_CPU_ARM_FEATURE_ARMv7)
444  cpu_caps.hasARMv7 = TRUE;
445 
446  if (type & ANDROID_CPU_ARM_FEATURE_VFPv3)
447  cpu_caps.hasVFPv3 = TRUE;
448 
449  if (type & ANDROID_CPU_ARM_FEATURE_NEON)
450  cpu_caps.hasNeon = TRUE;
451 
452  if(type & ANDROID_CPU_ARM_FEATURE_LDREX_STREX)
453  cpu_caps.hasLDREX_STREX = TRUE;
454  }
455 # endif /* VISUAL_OS_ANDROID */
456 #endif /* VISUAL_ARCH_ARM */
457 
458 #if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
459  /* No cpuid, old 486 or lower */
460  if (!has_cpuid ()) {
461  return;
462  }
463 
464  cpu_caps.cacheline = 32;
465 
466  unsigned int regs[4];
467  unsigned int regs2[4];
468 
469  /* Get max cpuid level */
470  cpuid (0x00000000, regs);
471 
472  if (regs[0] >= 0x00000001) {
473  unsigned int cacheline;
474 
475  cpuid (0x00000001, regs2);
476 
477  cpu_caps.x86cpuType = (regs2[0] >> 8) & 0xf;
478  if (cpu_caps.x86cpuType == 0xf)
479  cpu_caps.x86cpuType = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */
480 
481  /* general feature flags */
482  cpu_caps.hasMMX = TEST_BIT (regs2[3], 23); /* 0x0800000 */
483  cpu_caps.hasSSE = TEST_BIT (regs2[3], 25); /* 0x2000000 */
484  cpu_caps.hasSSE2 = TEST_BIT (regs2[3], 26); /* 0x4000000 */
485  cpu_caps.hasMMX2 = cpu_caps.hasSSE; /* SSE cpus supports mmxext too */
486 
487  cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
488  if (cacheline > 0)
489  cpu_caps.cacheline = cacheline;
490  }
491 
492  cpuid (0x80000000, regs);
493 
494  if (regs[0] >= 0x80000001) {
495 
496  cpuid (0x80000001, regs2);
497 
498  cpu_caps.hasMMX |= TEST_BIT (regs2[3], 23); /* 0x0800000 */
499  cpu_caps.hasMMX2 |= TEST_BIT (regs2[3], 22); /* 0x400000 */
500  cpu_caps.has3DNow = TEST_BIT (regs2[3], 31); /* 0x80000000 */
501  cpu_caps.has3DNowExt = TEST_BIT (regs2[3], 30);
502  }
503 
504  if (regs[0] >= 0x80000006) {
505  cpuid (0x80000006, regs2);
506  cpu_caps.cacheline = regs2[2] & 0xFF;
507  }
508 
509 #if defined(VISUAL_ARCH_X86)
510  if (cpu_caps.hasSSE)
511  check_os_katmai_support ();
512 
513  if (!cpu_caps.hasSSE)
514  cpu_caps.hasSSE2 = FALSE;
515 #endif
516 
517 #endif /* VISUAL_ARCH_X86 || VISUAL_ARCH_X86_64 */
518 
519 #if defined(VISUAL_ARCH_POWERPC)
520  check_os_altivec_support ();
521 #endif /* VISUAL_ARCH_POWERPC */
522 
523  print_cpu_info ();
524 
525  cpu_initialized = TRUE;
526 }
527 
529 {
530  visual_return_val_if_fail (cpu_initialized, VISUAL_CPU_TYPE_OTHER);
531 
532  return cpu_caps.type;
533 }
534 
536 {
537  visual_return_val_if_fail (cpu_initialized, 1);
538 
539  return cpu_caps.num_cores;
540 }
541 
543 {
544  visual_return_val_if_fail (cpu_initialized, FALSE);
545 
546  return cpu_caps.hasMMX;
547 }
548 
550 {
551  visual_return_val_if_fail (cpu_initialized, FALSE);
552 
553  return cpu_caps.hasMMX2;
554 }
555 
557 {
558  visual_return_val_if_fail (cpu_initialized, FALSE);
559 
560  return cpu_caps.hasSSE;
561 }
563 {
564  visual_return_val_if_fail (cpu_initialized, FALSE);
565 
566  return cpu_caps.hasSSE2;
567 }
568 
570 {
571  visual_return_val_if_fail (cpu_initialized, FALSE);
572 
573  return cpu_caps.has3DNow;
574 }
575 
577 {
578  visual_return_val_if_fail (cpu_initialized, FALSE);
579 
580  return cpu_caps.has3DNowExt;
581 }
582 
584 {
585  visual_return_val_if_fail (cpu_initialized, FALSE);
586 
587  return cpu_caps.hasAltiVec;
588 }
589 
591 {
592  visual_return_val_if_fail (cpu_initialized, FALSE);
593 
594  return cpu_caps.hasARMv7;
595 }
596 
598 {
599  visual_return_val_if_fail (cpu_initialized, FALSE);
600 
601  return cpu_caps.hasVFPv3;
602 }
603 
605 {
606  visual_return_val_if_fail (cpu_initialized, FALSE);
607 
608  return cpu_caps.hasNeon;
609 }
610 
612 {
613  visual_return_val_if_fail (cpu_initialized, FALSE);
614 
615  return cpu_caps.hasLDREX_STREX;
616 }