/* * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef FOLLY_SMALLLOCKS_H_ #define FOLLY_SMALLLOCKS_H_ /* * This header defines a few very small mutex types. These are useful * in highly memory-constrained environments where contention is * unlikely. * * Note: these locks are for use when you aren't likely to contend on * the critical section, or when the critical section is incredibly * small. Given that, both of the locks defined in this header are * inherently unfair: that is, the longer a thread is waiting, the * longer it waits between attempts to acquire, so newer waiters are * more likely to get the mutex. For the intended use-case this is * fine. * * @author Keith Adams * @author Jordan DeLong */ #include #include #include #include #include #include #include #ifndef __x86_64__ # error "SmallLocks.h is currently x64-only." #endif #include "folly/Portability.h" namespace folly { ////////////////////////////////////////////////////////////////////// namespace detail { /* * A helper object for the condended case. Starts off with eager * spinning, and falls back to sleeping for small quantums. */ class Sleeper { static const uint32_t kMaxActiveSpin = 4000; uint32_t spinCount; public: Sleeper() : spinCount(0) {} void wait() { if (spinCount < kMaxActiveSpin) { ++spinCount; asm volatile("pause"); } else { /* * Always sleep 0.5ms, assuming this will make the kernel put * us down for whatever its minimum timer resolution is (in * linux this varies by kernel version from 1ms to 10ms). */ struct timespec ts = { 0, 500000 }; nanosleep(&ts, NULL); } } }; } ////////////////////////////////////////////////////////////////////// /* * A really, *really* small spinlock for fine-grained locking of lots * of teeny-tiny data. * * Zero initializing these is guaranteed to be as good as calling * init(), since the free state is guaranteed to be all-bits zero. * * This class should be kept a POD, so we can used it in other packed * structs (gcc does not allow __attribute__((packed)) on structs that * contain non-POD data). This means avoid adding a constructor, or * making some members private, etc. */ struct MicroSpinLock { enum { FREE = 0, LOCKED = 1 }; uint8_t lock_; /* * Atomically move lock_ from "compare" to "newval". Return boolean * success. Do not play on or around. */ bool cas(uint8_t compare, uint8_t newVal) { bool out; bool memVal; // only set if the cmpxchg fails asm volatile("lock; cmpxchgb %[newVal], (%[lockPtr]);" "setz %[output];" : [output] "=r" (out), "=a" (memVal) : "a" (compare), // cmpxchgb constrains this to be in %al [newVal] "q" (newVal), // Needs to be byte-accessible [lockPtr] "r" (&lock_) : "memory", "flags"); return out; } // Initialize this MSL. It is unnecessary to call this if you // zero-initialize the MicroSpinLock. void init() { lock_ = FREE; } bool try_lock() { return cas(FREE, LOCKED); } void lock() { detail::Sleeper sleeper; do { while (lock_ != FREE) { asm volatile("" : : : "memory"); sleeper.wait(); } } while (!try_lock()); } void unlock() { asm volatile("" : : : "memory"); lock_ = FREE; // release barrier on x86 } }; ////////////////////////////////////////////////////////////////////// /* * Spin lock on a single bit in an integral type. You can use this * with 16, 32, or 64-bit integral types. * * This is useful if you want a small lock and already have an int * with a bit in it that you aren't using. But note that it can't be * as small as MicroSpinLock (1 byte), if you don't already have a * convenient int with an unused bit lying around to put it on. * * To construct these, either use init() or zero initialize. We don't * have a real constructor because we want this to be a POD type so we * can put it into packed structs. */ template struct PicoSpinLock { // Internally we deal with the unsigned version of the type. typedef typename std::make_unsigned::type UIntType; static_assert(std::is_integral::value, "PicoSpinLock needs an integral type"); static_assert(sizeof(IntType) == 2 || sizeof(IntType) == 4 || sizeof(IntType) == 8, "PicoSpinLock can't work on integers smaller than 2 bytes"); public: static const UIntType kLockBitMask_ = UIntType(1) << Bit; UIntType lock_; /* * You must call this function before using this class, if you * default constructed it. If you zero-initialized it you can * assume the PicoSpinLock is in a valid unlocked state with * getData() == 0. * * (This doesn't use a constructor because we want to be a POD.) */ void init(IntType initialValue = 0) { lock_ = initialValue; } /* * Returns the value of the integer we using for our lock, except * with the bit we are using as a lock cleared, regardless of * whether the lock is held. * * It is 'safe' to call this without holding the lock. (As in: you * get the same guarantees for simultaneous accesses to an integer * as you normally get.) */ IntType getData() const { return static_cast(lock_ & ~kLockBitMask_); } /* * Set the value of the other bits in our integer. * * Don't use this when you aren't holding the lock, unless it can be * guaranteed that no other threads may be trying to use this. */ void setData(IntType w) { lock_ = (lock_ & kLockBitMask_) | (w & ~kLockBitMask_); } /* * Try to get the lock without blocking: returns whether or not we * got it. */ bool try_lock() const { bool ret = false; #define FB_DOBTS(size) \ asm volatile("lock; bts" #size " %1, (%2); setnc %0" \ : "=r" (ret) \ : "i" (Bit), \ "r" (&lock_) \ : "memory", "flags") switch (sizeof(IntType)) { case 2: FB_DOBTS(w); break; case 4: FB_DOBTS(l); break; case 8: FB_DOBTS(q); break; } #undef FB_DOBTS return ret; } /* * Block until we can acquire the lock. Uses Sleeper to wait. */ void lock() const { detail::Sleeper sleeper; while (!try_lock()) { sleeper.wait(); } } /* * Release the lock, without changing the value of the rest of the * integer. */ void unlock() const { #define FB_DOBTR(size) \ asm volatile("lock; btr" #size " %0, (%1)" \ : \ : "i" (Bit), \ "r" (&lock_) \ : "memory", "flags") // Reads and writes can not be reordered wrt locked instructions, // so we don't need a memory fence here. switch (sizeof(IntType)) { case 2: FB_DOBTR(w); break; case 4: FB_DOBTR(l); break; case 8: FB_DOBTR(q); break; } #undef FB_DOBTR } }; ////////////////////////////////////////////////////////////////////// /** * Array of spinlocks where each one is padded to prevent false sharing. * Useful for shard-based locking implementations in environments where * contention is unlikely. */ // TODO: generate it from configure (`getconf LEVEL1_DCACHE_LINESIZE`) #define FOLLY_CACHE_LINE_SIZE 64 template struct SpinLockArray { T& operator[](size_t i) { return data_[i].lock; } const T& operator[](size_t i) const { return data_[i].lock; } constexpr size_t size() const { return N; } private: struct PaddedSpinLock { PaddedSpinLock() : lock() { } T lock; char padding[FOLLY_CACHE_LINE_SIZE - sizeof(T)]; }; static_assert(sizeof(PaddedSpinLock) == FOLLY_CACHE_LINE_SIZE, "Invalid size of PaddedSpinLock"); // Check if T can theoretically cross a cache line. // NOTE: It should be alignof(std::max_align_t), but max_align_t // isn't supported by gcc 4.6.2. static_assert(alignof(MaxAlign) > 0 && FOLLY_CACHE_LINE_SIZE % alignof(MaxAlign) == 0 && sizeof(T) <= alignof(MaxAlign), "T can cross cache line boundaries"); char padding_[FOLLY_CACHE_LINE_SIZE]; std::array data_; } __attribute__((aligned)); ////////////////////////////////////////////////////////////////////// typedef std::lock_guard MSLGuard; ////////////////////////////////////////////////////////////////////// } #endif