在JDK1.6,JDK1.7中,HashMap采用位桶+链表实现,即使用链表处理冲突,同一hash值的链表都存储在一个链表里。但是当位于一个桶中的元素较多,即hash值相等的元素较多时,通过key值依次查找的效率较低。而JDK1.8中,HashMap采用位桶+链表+红黑树实现,当链表长度超过阈值(8)时,将链表转换为红黑树,这样大大减少了查找时间。这里就主要研究一下JDK1.8的HashMap源码。
先简单说下HashMap的实现原理:
首先有一个每个元素都是链表的数组,当添加一个元素(key-value)时,就首先计算元素key的hash值,以此确定插入数组中的位置,但是可能存在同一hash值的元素已经被放在数组同一位置了,这时就添加到同一hash值的元素的后面,他们在数组的同一位置,但是形成了链表,同一各链表上的Hash值是相同的,所以说数组存放的是链表。而当链表长度太长时,链表就转换为红黑树,这样大大提高了查找的效率。
当链表数组的容量超过初始容量的0.75时,再散列将链表数组扩大2倍,把原链表数组的搬移到新的数组中
数据结构 1、 位桶数组
1 transient Node <k ,v>[] table;//存储(位桶)的数组</k,v>
2、 数组元素Node<K,V>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 static class Node <k ,v > implements Map .Entry <k ,v > { final int hash; final K key; V value; Node<k,v> next; Node(int hash, K key, V value, Node<k,v> next) { this .hash = hash; this .key = key; this .value = value; this .next = next; } public final K getKey () { return key; } public final V getValue () { return value; } public final String toString () { return key + = + value; } public final int hashCode () { return Objects.hashCode(key) ^ Objects.hashCode(value); } public final V setValue (V newValue) { V oldValue = value; value = newValue; return oldValue; } public final boolean equals (Object o) { if (o == this ) return true ; if (o instanceof Map.Entry) { Map.Entry<!--?,?--> e = (Map.Entry<!--?,?-->)o; if (Objects.equals(key, e.getKey()) && Objects.equals(value, e.getValue())) return true ; } return false ; }
3、红黑树
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 static final class TreeNode<k ,v> extends LinkedHashMap .Entry<k ,v> { TreeNode <k,v> parent; TreeNode <k,v> left; TreeNode <k,v> right; TreeNode <k,v> prev; boolean red; TreeNode (int hash, K key, V val , Node <k,v> next) { super (hash, key, val , next); } final TreeNode <k,v> root() { for (TreeNode <k,v> r = this , p;;) { if ((p = r.parent) == null ) return r; r = p; } }
数据域 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 -------类常量------------ static final int DEFAULT_INITIAL_CAPACITY = 1 << 4 ; static final int MAXIMUM_CAPACITY = 1 << 30 ;static final float DEFAULT_LOAD_FACTOR = 0.75 f;static final int TREEIFY_THRESHOLD = 8 ;static final int UNTREEIFY_THRESHOLD = 6 ;static final int MIN_TREEIFY_CAPACITY = 64 ;------实例变量--------- transient Node<K,V>[] table; transient int size ;transient int modCount;int threshold;final float loadFactor;
构造函数 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 public HashMap (int initialCapacity, float loadFactor) { if (initialCapacity < 0 ) throw new IllegalArgumentException(Illegal initial capacity: + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException(Illegal load factor: + loadFactor); this .loadFactor = loadFactor; this .threshold = tableSizeFor(initialCapacity); } public HashMap (int initialCapacity) { this (initialCapacity, DEFAULT_LOAD_FACTOR); } public HashMap () { this .loadFactor = DEFAULT_LOAD_FACTOR; } public HashMap (Map<!--? extends K, ? extends V--> m) { this .loadFactor = DEFAULT_LOAD_FACTOR; putMapEntries(m, false ); }
tableSizeFor(initialCapacity)方法,这个方法的作用是,将你传入的initialCapacity做计算,返回一个大于等于initialCapacity 最小的2的幂次方。所以这个操作保证无论你传入的初始化Hash桶长度参数是多少,最后hash表初始化的长度都是2的幂次方。比如你输入的是6,计算出来结果就是8。
1 2 3 4 5 6 7 8 9 static final int tableSizeFor(int cap) { int n = cap - 1 ; n |= n >>> 1 ; n |= n >>> 2 ; n |= n >>> 4 ; n |= n >>> 8 ; n |= n >>> 16 ; return (n < 0 ) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1 ; }
get 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 public V get (Object key ) { Node<K,V> e; return (e = getNode(hash(key ), key )) == null ? null : e.value; } final Node<K,V> getNode(int hash, Object key ) { Node<K,V>[] tab; Node<K,V> first,e; int n; K k; if ((tab = table) != null && (n = tab.length) > 0 &&(first = tab[(n - 1 ) & hash]) != null ) { if (first.hash == hash && ((k = first.key ) == key || (key != null && key .equals(k)))) return first; if ((e = first.next) != null ) { if (first instanceof TreeNode) return ((TreeNode<K,V>)first).getTreeNode(hash, key ); do { if (e.hash == hash && ((k = e.key ) == key || (key != null && key .equals(k)))) return e; } while ((e = e.next) != null ); } } return null ; }
get(key)方法时获取key的hash值,计算hash&(n-1)得到在链表数组中的位置first=tab[hash&(n-1)],先判断first的key是否与参数key相等,不等就遍历后面的链表找到相同的key值返回对应的Value值即可
put 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 public V put(K key , V value) { return putVal(hash(key ), key , value, false , true ); } final V putVal(int hash, K key , V value, boolean onlyIfAbsent, boolean evict) { Node<K,V>[] tab; Node<K,V> p; int n, i; if ((tab = table) == null || (n = tab.length) == 0 ) n = (tab = resize()).length; if ((p = tab[i = (n - 1 ) & hash]) == null ) tab[i] = newNode(hash, key , value, null ); else { Node<K,V> e; K k; if (p.hash == hash && ((k = p.key ) == key || (key != null && key .equals(k)))) e = p; else if (p instanceof TreeNode) e = ((TreeNode<K,V>)p).putTreeVal(this , tab, hash, key , value); else { for (int binCount = 0 ; ; ++binCount) { if ((e = p.next) == null ) { p.next = newNode(hash, key , value, null ); if (binCount >= TREEIFY_THRESHOLD - 1 ) treeifyBin(tab, hash); break ; } if (e.hash == hash && ((k = e.key ) == key || (key != null && key .equals(k)))) break ; p = e; } } if (e != null ) { V oldValue = e.value; if (!onlyIfAbsent || oldValue == null ) e.value = value; afterNodeAccess(e); return oldValue; } } ++modCount; if (++size > threshold) resize(); afterNodeInsertion(evict); return null ; }
对key的hashCode()进行hash后计算数组下标index;
如果当前数组table为null,进行resize()初始化;
如果没碰撞直接放到对应下标的位置上;
如果碰撞了,且节点已经存在,就替换掉 value;
如果碰撞后发现为树结构,挂载到树上。
如果碰撞后为链表,添加到链表尾,并判断链表如果过长(大于等于TREEIFY_THRESHOLD,默认8),就把链表转换成树结构;
数据 put 后,如果数据量超过threshold,就要resize。
扩容机制resize() 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 final Node<K,V>[] resize() { Node<K,V>[] oldTab = table; int oldCap = (oldTab == null ) ? 0 : oldTab .length; int oldThr = threshold; int new Cap , new Thr = 0 ; if (oldCap > 0 ) { if (oldCap >= MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return oldTab; } else if ((new Cap = oldCap << 1 ) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY) new Thr = oldThr << 1 ; } else if (oldThr > 0 ) new Cap = oldThr; else { new Cap = DEFAULT_INITIAL_CAPACITY; new Thr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); } if (new Thr == 0 ) { float ft = (float)new Cap * loadFactor; new Thr = (new Cap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ? (int)ft : Integer .MAX_VALUE); } threshold = new Thr ; @SuppressWarnings({"rawtypes" ,"unchecked" }) Node<K,V>[] new Tab = (Node<K,V>[])new Node [new Cap ]; table = new Tab ; if (oldTab != null ) { for (int j = 0 ; j < oldCap; ++j) { Node<K,V> e; if ((e = oldTab[j]) != null ) { oldTab[j] = null ; if (e.next == null ) new Tab [e.hash & (new Cap - 1 )] = e; else if (e instanceof TreeNode) ((TreeNode<K,V>)e).split(this , new Tab , j, oldCap); else { Node<K,V> loHead = null , loTail = null ; Node<K,V> hiHead = null , hiTail = null ; Node<K,V> next; do { next = e.next; if ((e.hash & oldCap) == 0 ) { if (loTail == null ) loHead = e; else loTail.next = e; loTail = e; } else { if (hiTail == null ) hiHead = e; else hiTail.next = e; hiTail = e; } } while ((e = next) != null ); if (loTail != null ) { loTail.next = null ; new Tab [j] = loHead; } if (hiTail != null ) { hiTail.next = null ; new Tab [j + oldCap] = hiHead; } } } } } return new Tab ; }
那么什么时候回产生扩容呢?
(1)初始化HashMap时,第一次进行put操作
(2)当键值对的个数大于threshold阀值时产生扩容,threshold=size*loadFactor