181155 Commits

Author SHA1 Message Date
kib
58f7ae85c7 diff --git a/sys/x86/x86/tsc.c b/sys/x86/x86/tsc.c
index c253a96..3d8bd30 100644
--- a/sys/x86/x86/tsc.c
+++ b/sys/x86/x86/tsc.c
@@ -82,7 +82,11 @@ static void tsc_freq_changed(void *arg, const struct cf_level *level,
 static void tsc_freq_changing(void *arg, const struct cf_level *level,
     int *status);
 static unsigned tsc_get_timecount(struct timecounter *tc);
-static unsigned tsc_get_timecount_low(struct timecounter *tc);
+static inline unsigned tsc_get_timecount_low(struct timecounter *tc);
+static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
+static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
+static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
+static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
 static void tsc_levels_changed(void *arg, int unit);

 static struct timecounter tsc_timecounter = {
@@ -262,6 +266,10 @@ probe_tsc_freq(void)
 		    (vm_guest == VM_GUEST_NO &&
 		    CPUID_TO_FAMILY(cpu_id) >= 0x10))
 			tsc_is_invariant = 1;
+		if (cpu_feature & CPUID_SSE2) {
+			tsc_timecounter.tc_get_timecount =
+			    tsc_get_timecount_mfence;
+		}
 		break;
 	case CPU_VENDOR_INTEL:
 		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
@@ -271,6 +279,10 @@ probe_tsc_freq(void)
 		    (CPUID_TO_FAMILY(cpu_id) == 0xf &&
 		    CPUID_TO_MODEL(cpu_id) >= 0x3))))
 			tsc_is_invariant = 1;
+		if (cpu_feature & CPUID_SSE2) {
+			tsc_timecounter.tc_get_timecount =
+			    tsc_get_timecount_lfence;
+		}
 		break;
 	case CPU_VENDOR_CENTAUR:
 		if (vm_guest == VM_GUEST_NO &&
@@ -278,6 +290,10 @@ probe_tsc_freq(void)
 		    CPUID_TO_MODEL(cpu_id) >= 0xf &&
 		    (rdmsr(0x1203) & 0x100000000ULL) == 0)
 			tsc_is_invariant = 1;
+		if (cpu_feature & CPUID_SSE2) {
+			tsc_timecounter.tc_get_timecount =
+			    tsc_get_timecount_lfence;
+		}
 		break;
 	}

@@ -328,16 +344,31 @@ init_TSC(void)

 #ifdef SMP

-/* rmb is required here because rdtsc is not a serializing instruction. */
-#define	TSC_READ(x)			\
-static void				\
-tsc_read_##x(void *arg)			\
-{					\
-	uint32_t *tsc = arg;		\
-	u_int cpu = PCPU_GET(cpuid);	\
-					\
-	rmb();				\
-	tsc[cpu * 3 + x] = rdtsc32();	\
+/*
+ * RDTSC is not a serializing instruction, and does not drain
+ * instruction stream, so we need to drain the stream before executing
+ * it.  It could be fixed by use of RDTSCP, except the instruction is
+ * not available everywhere.
+ *
+ * Use CPUID for draining in the boot-time SMP constistency test.  The
+ * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel
+ * and VIA) when SSE2 is present, and nothing on older machines which
+ * also do not issue RDTSC prematurely.  There, testing for SSE2 and
+ * vendor is too cumbersome, and we learn about TSC presence from
+ * CPUID.
+ *
+ * Do not use do_cpuid(), since we do not need CPUID results, which
+ * have to be written into memory with do_cpuid().
+ */
+#define	TSC_READ(x)							\
+static void								\
+tsc_read_##x(void *arg)							\
+{									\
+	uint32_t *tsc = arg;						\
+	u_int cpu = PCPU_GET(cpuid);					\
+									\
+	__asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx");	\
+	tsc[cpu * 3 + x] = rdtsc32();					\
 }
 TSC_READ(0)
 TSC_READ(1)
@@ -487,7 +518,16 @@ init:
 	for (shift = 0; shift < 31 && (tsc_freq >> shift) > max_freq; shift++)
 		;
 	if (shift > 0) {
-		tsc_timecounter.tc_get_timecount = tsc_get_timecount_low;
+		if (cpu_feature & CPUID_SSE2) {
+			if (cpu_vendor_id == CPU_VENDOR_AMD) {
+				tsc_timecounter.tc_get_timecount =
+				    tsc_get_timecount_low_mfence;
+			} else {
+				tsc_timecounter.tc_get_timecount =
+				    tsc_get_timecount_low_lfence;
+			}
+		} else
+			tsc_timecounter.tc_get_timecount = tsc_get_timecount_low;
 		tsc_timecounter.tc_name = "TSC-low";
 		if (bootverbose)
 			printf("TSC timecounter discards lower %d bit(s)\n",
@@ -599,16 +639,48 @@ tsc_get_timecount(struct timecounter *tc __unused)
 	return (rdtsc32());
 }

-static u_int
+static inline u_int
 tsc_get_timecount_low(struct timecounter *tc)
 {
 	uint32_t rv;

 	__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
-	: "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
+	    : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
 	return (rv);
 }

+static u_int
+tsc_get_timecount_lfence(struct timecounter *tc __unused)
+{
+
+	lfence();
+	return (rdtsc32());
+}
+
+static u_int
+tsc_get_timecount_low_lfence(struct timecounter *tc)
+{
+
+	lfence();
+	return (tsc_get_timecount_low(tc));
+}
+
+static u_int
+tsc_get_timecount_mfence(struct timecounter *tc __unused)
+{
+
+	mfence();
+	return (rdtsc32());
+}
+
+static u_int
+tsc_get_timecount_low_mfence(struct timecounter *tc)
+{
+
+	mfence();
+	return (tsc_get_timecount_low(tc));
+}
+
 uint32_t
 cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th)
 {
2012-08-01 17:26:22 +00:00
kib
36babd37ca Add lfence().
MFC after:	1 week
2012-08-01 17:24:53 +00:00
alc
9c4b62fad8 Revise pmap_enter()'s handling of mapping updates that change the
PTE's PG_M and PG_RW bits but not the physical page frame.  First,
only perform vm_page_dirty() on a managed vm_page when the PG_M bit is
being cleared.  If the updated PTE continues to have PG_M set, then
there is no requirement to perform vm_page_dirty().  Second, flush the
mapping from the TLB when PG_M alone is cleared, not just when PG_M
and PG_RW are cleared.  Otherwise, a stale TLB entry may stop PG_M
from being set again on the next store to the virtual page.  However,
since the vm_page's dirty field already shows the physical page as
being dirty, no actual harm comes from the PG_M bit not being set.
Nonetheless, it is potentially confusing to someone expecting to see
the PTE change after a store to the virtual page.
2012-08-01 16:04:13 +00:00
mav
e5c6ca6783 Fix kernel panic on camcontrol reset for specific target, caused by
uninitialized cm_targ in mpssas_action_resetdev().

Reviewed by:	Desai, Kashyap <Kashyap.Desai@lsi.com>
Sponsored by:	iXsystems, Inc.
MFC after:	3 days
2012-08-01 12:24:13 +00:00
des
3d0e516e94 Restore a piece of BSD history.
PR:		169127
Submitted by:	Ruben de Groot <ruben@hacktor.com>
MFC after:	1 week
2012-08-01 09:10:21 +00:00
glebius
588de42f27 Some more whitespace cleanup. 2012-08-01 09:00:26 +00:00
imp
a66041e632 Add the chip select glue. 2012-08-01 01:18:36 +00:00
delphij
7f137631a1 Teach md5(1) about sha512.
MFC after:	1 month
2012-08-01 00:36:12 +00:00
delphij
dc04035695 Use calloc(). 2012-08-01 00:21:55 +00:00
adrian
95def5f927 Fix a case of "mis-located braces".
PR:		kern/170302
2012-08-01 00:18:02 +00:00
adrian
01f36653db Allow 802.11n hardware to support multi-rate retry when RTS/CTS is
enabled.

The legacy (pre-802.11n) hardware doesn't support this - although
the AR5212 era hardware supports MRR, it doesn't have all the bits
needed to support MRR + RTS/CTS.  The AR5416 and later support
a packet duration and RTS/CTS flags per rate scenario, so we should
support it.

Tested:

* AR9280, STA

PR:		kern/170302
2012-07-31 23:54:15 +00:00
bz
73fec218b5 In case of IPsec he have to do delayed checksum calculations before
adding any extension header, or rather before calling into IPsec
processing as we may send the packet and not return to IPv6 output
processing here.

PR:		kern/170116
MFC After:	3 days
2012-07-31 23:34:06 +00:00
imp
a95bb8c0ec Prefer ate over macb. macb doesn't work anymore, and ate has more
errata workarounds in it.
2012-07-31 19:41:12 +00:00
imp
bb55180b97 Note about where we can boot this. 2012-07-31 19:39:21 +00:00
imp
e198ea51a0 Allow chip selects other than 0. The SAM9260EK board
has its dataflash on CS1.
2012-07-31 19:14:22 +00:00
adrian
3fbbc135fc Restore the PCI bridge configuration upon resume.
This allows my TI1510 cardbus/PCI bridge to work after a suspend/resume,
without having to unload/reload the cbb driver.

I've also tested this on stable/9.  I'll MFC it shortly.

PR:		kern/170058
Reviewed by:	jhb
MFC after:	1 day
2012-07-31 18:47:17 +00:00
jfv
27e973c324 Clean up some unused leftover code from em
Make IRQ style a tuneable
Fix lock handling in the interrupt handler

MFC after:3 days
2012-07-31 18:44:10 +00:00
jhb
cdbfd348a5 Reorder the managament of advisory locks on open files so that the advisory
lock is obtained before the write count is increased during open() and the
lock is released after the write count is decreased during close().

The first change closes a race where an open() that will block with O_SHLOCK
or O_EXLOCK can increase the write count while it waits.  If the process
holding the current lock on the file then tries to call exec() on the file
it has locked, it can fail with ETXTBUSY even though the advisory lock is
preventing other threads from succesfully completeing a writable open().

The second change closes a race where a read-only open() with O_SHLOCK or
O_EXLOCK may return successfully while the write count is non-zero due to
another descriptor that had the advisory lock and was blocking the open()
still being in the process of closing.  If the process that completed the
open() then attempts to call exec() on the file it locked, it can fail with
ETXTBUSY even though the other process that held a write lock has closed
the file and released the lock.

Reviewed by:	kib
MFC after:	1 month
2012-07-31 18:25:00 +00:00
mm
afdda0ef5d Fix wrong indent according to style(9)
MFC after:	2 weeks
> Description of fields to fill in above:                     76 columns --|
> PR:            If a GNATS PR is affected by the change.
> Submitted by:  If someone else sent in the change.
> Reviewed by:   If someone else reviewed your modification.
> Approved by:   If you needed approval for this commit.
> Obtained from: If the change is from a third party.
> MFC after:     N [day[s]|week[s]|month[s]].  Request a reminder email.
> Security:      Vulnerability reference (one per line) or description.
> Empty fields above will be automatically removed.

M    zpool_main.c
2012-07-31 17:32:28 +00:00
mm
caced023cd Fix reporting of root pool upgrade notice.
MFC after:	2 weeks
2012-07-31 17:28:28 +00:00
adrian
fb83e10b61 Shuffle the call to ath_hal_setuplasttxdesc() to _after_ the rate control
code is called and remove it from ath_buf_set_rate().

For the legacy (non-11n API) TX routines, ath_hal_filltxdesc() takes care
of setting up the intermediary and final descriptors right, complete
with copying the rate control info into the final descriptor so the
rate modules can grab it.

The 11n version doesn't do this - ath_hal_chaintxdesc() doesn't
copy the rate control bits over, nor does it clear isaggr/moreaggr/
pad delimiters.  So the call to setuplasttxdesc() is needed here.

So:

* legacy NICs - never call the 11n rate control stuff, so filltxdesc
  copies the rate control info right;
* 11n NICs transmitting legacy or 11n non-aggregate frames -
  ath_hal_set11nratescenario() is called to setup rate control and
  then ath_hal_filltxdesc() chains them together - so the rate control
  info is right;
* 11n aggregate frames - set11nratescenario() is called, then
  ath_hal_chaintxdesc() is called to chain a list of aggregate and subframes
  together. This requires a call to ath_hal_setuplasttxdesc() to complete
  things.

Tested:

* AR9280 in station mode

TODO:

* I really should make sure that the descriptor contents get blanked
  out correctly or garbage left over from aggregate frames may show
  up in non-aggregate frames, leading to badness.
2012-07-31 17:08:29 +00:00
jilles
7f0e821767 find: Remove unnecessary and inconsistent initialization.
Submitted by:	jhb
2012-07-31 16:55:41 +00:00
adrian
9b725a7ac6 Push the rate control and descriptor chaining into the descriptor "set"
functions, for both legacy and 802.11n.

This will simplify supporting the EDMA chipsets as these two descriptor
setup functions can just be overridden in their entirety, hiding all of
the subtle differences in setting things up.

It's not a permanent solution, as eventually the AR5416 HAL should grow
similar versions of the 11n descriptor functions and then those can be
used.

TODO:

* Push the "clr11naggr" call into the legacy setds, just to ensure
  that retried frames don't end up with the aggregate bits set
  inappropriately;
* Remove the "setlasttxdesc" call from the 11n TX path and push it
  into setds_11n.
* Ensure that setds_11n will work correctly for non-aggregate frames;
* .. and then when it does, just unconditionally call "setds_11n" for
  11n NICs and "setds" for non-11n NICs.
2012-07-31 16:41:09 +00:00
glebius
53cb168f80 Some style(9) and whitespace changes.
Together with:	Andrey Zonov <andrey zonov.org>
2012-07-31 11:31:12 +00:00
mav
50f531dfce Add several performance optimizations to acpi_cpu_idle().
For C1 and C2 states use cpu_ticks() to measure sleep time instead of much
slower ACPI timer. We can't do it for C3, as TSC may stop there. But it is
less important there as wake up latency is high any way.

For C1 and C2 states do not check/clear bus mastering activity status, as
it is important only for C3. As side effect it can make CPU enter C2 instead
of C3 if last BM activity was two sleeps back (unlike one before), but
that may be even good because of collecting more statistics. Premature BM
wakeup from C3, entered because of overestimation, can easily be worse then
entering C2 from both performance and power consumption points of view.

Together on dual Xeon E5645 system on sequential 512 bytes read test this
change makes cpu_idle_acpi() as fast as simplest cpu_idle_hlt() and only
few percents slower then cpu_idle_mwait(), while deeper states are still
actively used during idle periods.

To help with diagnostics, add C-state type into dev.cpu.X.cx_supported.

Sponsored by:	iXsystems, Inc.
2012-07-31 10:58:50 +00:00
monthadar
29285e1a1a Fixed some debug output in hwmp_recv_prep. 2012-07-31 08:05:40 +00:00
luigi
bf32267c00 nobody uses this file except the userspace ipfw code, but the cast
of a pointer to an integer needs a cast to prevent a warning for
size mismatch.

MFC after:	1 week
2012-07-31 08:04:49 +00:00
monthadar
7d32606c1b Fix a PREQ comparison error in 11s HWMP.
* Earlier we compared two not equal metrics, one was what we recevied
in the 'new PREQ' while the other was what we already have saved which
was 'old PREQ' + link metric for the last hop;
* Fixed by adding 'new PREQ' + link metric for the last hop in a
temporary variable;
2012-07-31 07:36:27 +00:00
monthadar
c2d63929b4 Fix bugs in net80211s found with wtap simulator.
For description of the test scripts refer to projects/net80211_testsuite/wtap.

* Test 007 showed a bug in intermediate PREP for a proxy entry. Resolved;
* Test 002 showed a bug in the Addressing Mode flag for a PREQ. Resolved;
2012-07-31 07:31:47 +00:00
monthadar
3e4ae46ebb Fix wtap to not panic in wtap_beacon_intrp.
* Changed KASSERT to be debug printf (DWTAP_PRINTF). If state is not
IEEE80211_S_RUN we return without scheduling a new callout;
* When net80211 stack changes state to IEEE802_11_INIT we stop the
beacon callout task;
2012-07-31 07:22:50 +00:00
luigi
b2b6fd9890 remove a redundant MALLOC_DECLARE 2012-07-31 05:51:48 +00:00
davidxu
c8c77f184e I am comparing current pipe code with the one in 8.3-STABLE r236165,
I found 8.3 is a history BSD version using socket to implement FIFO
pipe, it uses per-file seqcount to compare with writer generation
stored in per-pipe object. The concept is after all writers are gone,
the pipe enters next generation, all old readers have not closed the
pipe should get the indication that the pipe is disconnected, result
is they should get EPIPE, SIGPIPE or get POLLHUP in poll().
But newcomer should not know that previous writters were gone, it
should treat it as a fresh session.
I am trying to bring back FIFO pipe to history behavior. It is still
unclear that if single EOF flag can represent SBS_CANTSENDMORE and
SBS_CANTRCVMORE which socket-based version is using, but I have run
the poll regression test in tool directory, output is same as the one
on 8.3-STABLE now.
I think the output "not ok 18 FIFO state 6b: poll result 0 expected 1.
expected POLLHUP; got 0" might be bogus, because newcomer should not
know that old writers were gone. I got the same behavior on Linux.
Our implementation always return POLLIN for disconnected pipe even it
should return POLLHUP, but I think it is not wise to remove POLLIN for
compatible reason, this is our history behavior.

Regression test: /usr/src/tools/regression/poll
2012-07-31 05:48:35 +00:00
bz
712f939cb3 Properly apply #ifdef INET and leave a comment that we are (will) apply
delayed IPv6 checksum processing in ip6_output.c when doing IPsec.

PR:		kern/170116
MFC after:	3 days
2012-07-31 05:44:03 +00:00
bz
e07aa136d7 Improve the should-never-hit printf to ease debugging in case we'd ever hit
it again when doing the delayed IPv6 checksum calculations.

MFC after:	3 days
2012-07-31 05:34:54 +00:00
fjoe
e4171792ab - Change back "d_ofs" to int8_t to not pessimize padding and size of "struct puc_cfg".
- Use "puc_config_moxa" for Moxa boards that need d_ofs greater than 0x7f

Prodded by:	marcel@, gavin@
MFC after:	3 days
2012-07-31 05:23:23 +00:00
imp
23b9008bcd macb doesn't work, switch to ate. 2012-07-31 04:09:27 +00:00
adrian
c9862ba17f Migrate some more TX side setup routines to be methods. 2012-07-31 03:09:48 +00:00
adrian
22fbf81ca5 Break out the hardware handoff and TX DMA restart code into methods.
These (and a few others) will differ based on the underlying DMA
implementation.

For the EDMA NICs, simply stub them out in a fashion which will let
me focus on implementing the necessary descriptor API changes.
2012-07-31 02:28:32 +00:00
adrian
e538814cc9 Placeholder ioctl for an upcoming rate control statistics API change. 2012-07-31 02:18:10 +00:00
davidxu
d2b97b9193 When a thread is blocked in direct write state, it only sets PIPE_DIRECTW
flag but not PIPE_WANTW, but FIFO pipe code does not understand this internal
state, when a FIFO peer reader closes the pipe, it wants to notify the writer,
it checks PIPE_WANTW, if not set, it skips calling wakeup(), so blocked writer
never noticed the case, but in general, the writer should return from the
syscall with EPIPE error code and may get SIGPIPE signal. Setting the
PIPE_WANTW fixed problem, or you can turn off direct write, it should fix the
problem too. This bug is found by PR/170203.

Another bug in FIFO pipe code is when peer closes the pipe, another end which
is being blocked in select() or poll() is not notified, it missed to call
pipeselwakeup().

Third problem is found in poll regression test, the existing code can not
pass 6b,6c,6d tests, but FreeBSD-4 works. This commit does not fix the
problem, I still need to study more to find the cause.

PR: 170203
Tested by: Garrett Copper &lt; yanegomi at gmail dot com &gt;
2012-07-31 02:00:37 +00:00
mm
cf6d705d5b Partial MFV (illumos-gate 13753:2aba784c276b)
2762 zpool command should have better support for feature flags

References:
https://www.illumos.org/issues/2762

MFC after:	2 weeks
2012-07-30 23:14:24 +00:00
davide
06da175461 Until now KTR_ENTRIES, which defines the size of circular buffer used in
ktr(4), was constrained to be a power of two. Remove this constraint and
update sys/conf/NOTES accordingly.

Reviewed by:		jhb
Approved by:		gnn (mentor)
Sponsored by:		Google Summer of Code 2012
2012-07-30 22:46:42 +00:00
kargl
42e27d2ed6 ieeefp.h is only needed on i386 class hardware.
Submitted by:	bde
Approved by:	das (pre-approved)
2012-07-30 21:58:28 +00:00
kargl
0aadb27b41 Whitespace.
Submitted by:	bde
Approved by:	das (pre-approved)
2012-07-30 21:55:49 +00:00
imp
94d6c8b29d These files will support the whole at91sam9x5 family when done,
so rename them now before they get copied further afield...
2012-07-30 21:30:43 +00:00
imp
299454071c List the members of the AT91SAM9G45 family. 2012-07-30 21:19:19 +00:00
joel
84ed784b36 Remove trailing whitespace. 2012-07-30 21:02:44 +00:00
issyl0
6a683d47fc Add more locale-specific functions to the relevant man pages and Makefile:
- lib/libc/locale/islower.3
  - lib/libc/locale/ispunct.3
  - lib/libc/locale/nl_langinfo.3
  - lib/libc/locale/isgraph.3
  - lib/libc/locale/isspace.3

Reviewed by:	bz
Approved by:	theraven
MFC after:	5 days
2012-07-30 20:56:19 +00:00
jhb
89309bc667 Regen. 2012-07-30 20:45:17 +00:00
jhb
5d07d48bdc The linux_lstat() system call accepts a pointer to a 'struct l_stat', not a
'struct ostat'.
2012-07-30 20:44:45 +00:00