HashMap源码解析

    xiaoxiao2023-10-30  153

    最近看了下HashMap的源码,来记录下它的实现原理。 HashMap最一开始是一个基于链表的数组结构,数组的每一个索引位置上都链着具有相同hash计算结果的键值对数据,为了提高查找效率,jdk1.8以后当数组某个索引位置的节点数大于8时,会自动转换为红黑树。 首先要了解HashMap的静态内部类Node<K,V>,即数组索引位置上的链表节点,它继承自Map.Entry<K,V>,也就是我们经常遍历Map时用到的对象类型,储存了Key和Value的值,存入HashMap里的键值对都是以该对象类型存在。

    static class Node<K,V> implements Map.Entry<K,V> { final int hash; final K key; V value; Node<K,V> next; Node(int hash, K key, V value, Node<K,V> next) { this.hash = hash; this.key = key; this.value = value; this.next = next; } public final K getKey() { return key; } public final V getValue() { return value; } public final String toString() { return key + "=" + value; } public final int hashCode() { return Objects.hashCode(key) ^ Objects.hashCode(value); } public final V setValue(V newValue) { V oldValue = value; value = newValue; return oldValue; } public final boolean equals(Object o) { if (o == this) return true; if (o instanceof Map.Entry) { Map.Entry<?,?> e = (Map.Entry<?,?>)o; if (Objects.equals(key, e.getKey()) && Objects.equals(value, e.getValue())) return true; } return false; } }

    还有树节点TreeNode<K,V>,当数组某索引上的键值对数量大于8时会转换为红黑树数据结构,里有很多维持红黑树数据结构的方法,这里不做阐述,想了解的同学可以先去学习红黑树再来看这个结构,会更快理解.

    static final class TreeNode<K,V> extends LinkedHashMap.Entry<K,V> { TreeNode<K,V> parent; // red-black tree links TreeNode<K,V> left; TreeNode<K,V> right; TreeNode<K,V> prev; // needed to unlink next upon deletion boolean red; TreeNode(int hash, K key, V val, Node<K,V> next) { super(hash, key, val, next); } //......省略很多方法 }

    接着看看HashMap的成员变量:

    /** * 序列号,序列化时使用 */ private static final long serialVersionUID = 362498820763181265L; /** * 数组默认容量是16,这里使用移位算法是因为移位是计算机基础运算,效率比加减乘除快 */ static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16 /** * 最大容量 */ static final int MAXIMUM_CAPACITY = 1 << 30; /** * 默认加载因子,用于判断是否扩容 */ static final float DEFAULT_LOAD_FACTOR = 0.75f; /** * 当某个桶的节点数大于8时,会转换为红黑树 */ static final int TREEIFY_THRESHOLD = 8; /** * 当某个桶的节点数小于6时,会从红黑树又转换为链表 */ static final int UNTREEIFY_THRESHOLD = 6; /** * 当hashmap中的元素数量大于64时,也会转换为红黑树 */ static final int MIN_TREEIFY_CAPACITY = 64; /** * 存储键值对元素的数组,也是HashMap结构的核心 */ transient Node<K,V>[] table; /** * 将键值对数据包装成Set结构,用于遍历 */ transient Set<Map.Entry<K,V>> entrySet; /** * hashmap内存储的元素数量 */ transient int size; /** * hashmap的修改次数 */ transient int modCount; /** * 当元素数量达到此临界值时会进行扩容 */ int threshold; /** * 也是加载因子 */ final float loadFactor;

    HashMap的构造方法主要围绕数组容量和加载因子的:

    /** * 设置初始容量和加载因子 */ public HashMap(int initialCapacity, float loadFactor) { if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); this.loadFactor = loadFactor; this.threshold = tableSizeFor(initialCapacity); } /** * 设置初始容量,并使用默认的加载因子 */ public HashMap(int initialCapacity) { this(initialCapacity, DEFAULT_LOAD_FACTOR); } /** *空参构造 */ public HashMap() { this.loadFactor = DEFAULT_LOAD_FACTOR; // all other fields defaulted }

    HashMap的散列函数: hashMap在添加元素时,元素会添加在数组的哪个位置是基于拉链法,先计算key的hash值再和数组的大小做&运算得出该元素在数组中的索引位置,为什么这样呢,因为这样可以均匀地散列元素在数组的各个位置上,防止元素过多聚集于数组的一个桶位上,导致查找成本增加,有人可能会说增加数组容量就好,但是这样集合的空间就很浪费,每个桶上元素不多,没必要,元素的分布不能太稀疏,也不能太密集,非常考量散列函数的设计。

    static final int hash(Object key) { int h; return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); }

    **

    HashMap添加元素的方法

    ** 这里有几点可以讲下.。 1.首先是数组索引的确定是通过(n - 1) & hash,计算出来的hash值与数组大小减1做&操作,hashMap的数组大小总是2的n次方,相比于像散列表对数组大小取模,&操作速度更快,同时n-1为奇数能更好地散列加入的键值对元素,如果是偶数的话那&出来的结果用二进制表达最后一位永远是0,那么最后一位为1的索引永远都不可能进行存值,这是非常大的空间浪费,而且键值对都链于一个桶位,查找效率也不高。 2.元素在添加时如果数组位置上没有元素则直接创建新节点,如果有元素就判断是否有相同的key,有则改变value,没就链于旧节点,在键值对达到一定数量时,会转换为红黑树。

    public V put(K key, V value) { return putVal(hash(key), key, value, false, true); } final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) { Node<K,V>[] tab; Node<K,V> p; int n, i; if ((tab = table) == null || (n = tab.length) == 0) n = (tab = resize()).length; if ((p = tab[i = (n - 1) & hash]) == null) tab[i] = newNode(hash, key, value, null); else { Node<K,V> e; K k; if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) e = p; else if (p instanceof TreeNode) e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value); else { for (int binCount = 0; ; ++binCount) { if ((e = p.next) == null) { p.next = newNode(hash, key, value, null); if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st treeifyBin(tab, hash); break; } if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) break; p = e; } } if (e != null) { // existing mapping for key V oldValue = e.value; if (!onlyIfAbsent || oldValue == null) e.value = value; afterNodeAccess(e); return oldValue; } } ++modCount; if (++size > threshold) resize(); afterNodeInsertion(evict); return null; }

    涉及到的扩容操作: 扩容其实是个耗时操作。

    final Node<K,V>[] resize() { Node<K,V>[] oldTab = table; int oldCap = (oldTab == null) ? 0 : oldTab.length; int oldThr = threshold; int newCap, newThr = 0; if (oldCap > 0) { if (oldCap >= MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return oldTab; } else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY) newThr = oldThr << 1; // double threshold } else if (oldThr > 0) // initial capacity was placed in threshold newCap = oldThr; else { // zero initial threshold signifies using defaults newCap = DEFAULT_INITIAL_CAPACITY; newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); } if (newThr == 0) { float ft = (float)newCap * loadFactor; newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ? (int)ft : Integer.MAX_VALUE); } threshold = newThr; @SuppressWarnings({"rawtypes","unchecked"}) Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap]; table = newTab; if (oldTab != null) { for (int j = 0; j < oldCap; ++j) { Node<K,V> e; if ((e = oldTab[j]) != null) { oldTab[j] = null; if (e.next == null) newTab[e.hash & (newCap - 1)] = e; else if (e instanceof TreeNode) ((TreeNode<K,V>)e).split(this, newTab, j, oldCap); else { // preserve order Node<K,V> loHead = null, loTail = null; Node<K,V> hiHead = null, hiTail = null; Node<K,V> next; do { next = e.next; if ((e.hash & oldCap) == 0) { if (loTail == null) loHead = e; else loTail.next = e; loTail = e; } else { if (hiTail == null) hiHead = e; else hiTail.next = e; hiTail = e; } } while ((e = next) != null); if (loTail != null) { loTail.next = null; newTab[j] = loHead; } if (hiTail != null) { hiTail.next = null; newTab[j + oldCap] = hiHead; } } } } } return newTab; }

    涉及到的put到红黑树节点: 根据插入的键的hash值,判断是插入在红黑树的哪个位置,根节点的hash值永远大于等于左子节点的hash值,根节点的hash值永远小于右子节点的hash值。

    final TreeNode<K,V> putTreeVal(HashMap<K,V> map, Node<K,V>[] tab, int h, K k, V v) { Class<?> kc = null; boolean searched = false; TreeNode<K,V> root = (parent != null) ? root() : this; for (TreeNode<K,V> p = root;;) { int dir, ph; K pk; if ((ph = p.hash) > h) dir = -1; else if (ph < h) dir = 1; else if ((pk = p.key) == k || (k != null && k.equals(pk))) return p; else if ((kc == null && (kc = comparableClassFor(k)) == null) || (dir = compareComparables(kc, k, pk)) == 0) { if (!searched) { TreeNode<K,V> q, ch; searched = true; if (((ch = p.left) != null && (q = ch.find(h, k, kc)) != null) || ((ch = p.right) != null && (q = ch.find(h, k, kc)) != null)) return q; } dir = tieBreakOrder(k, pk); } TreeNode<K,V> xp = p; if ((p = (dir <= 0) ? p.left : p.right) == null) { Node<K,V> xpn = xp.next; TreeNode<K,V> x = map.newTreeNode(h, k, v, xpn); if (dir <= 0) xp.left = x; else xp.right = x; xp.next = x; x.parent = x.prev = xp; if (xpn != null) ((TreeNode<K,V>)xpn).prev = x; moveRootToFront(tab, balanceInsertion(root, x)); return null; } } }

    涉及到的转换成红黑树操作:

    final void treeifyBin(Node<K,V>[] tab, int hash) { int n, index; Node<K,V> e; if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY) resize(); else if ((e = tab[index = (n - 1) & hash]) != null) { TreeNode<K,V> hd = null, tl = null; do { TreeNode<K,V> p = replacementTreeNode(e, null); if (tl == null) hd = p; else { p.prev = tl; tl.next = p; } tl = p; } while ((e = e.next) != null); if ((tab[index] = hd) != null) hd.treeify(tab); } }

    查找元素

    就是通过hash值找到对应数组上的位置,然后是链表就沿着链表节点迭代去查,如果是红黑树,则去红黑树查。链表时的平均时间复杂度是o(n),红黑树时的平均时间复杂度是o(lgn)。

    public V get(Object key) { Node<K,V> e; return (e = getNode(hash(key), key)) == null ? null : e.value; } final Node<K,V> getNode(int hash, Object key) { Node<K,V>[] tab; Node<K,V> first, e; int n; K k; if ((tab = table) != null && (n = tab.length) > 0 && (first = tab[(n - 1) & hash]) != null) { if (first.hash == hash && // always check first node ((k = first.key) == key || (key != null && key.equals(k)))) return first; if ((e = first.next) != null) { if (first instanceof TreeNode) return ((TreeNode<K,V>)first).getTreeNode(hash, key); do { if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) return e; } while ((e = e.next) != null); } } return null; }

    获取map集合内所有键值对数据的方法:

    public Set<Map.Entry<K,V>> entrySet() { Set<Map.Entry<K,V>> es; return (es = entrySet) == null ? (entrySet = new EntrySet()) : es; } final class EntrySet extends AbstractSet<Map.Entry<K,V>> { public final int size() { return size; } public final void clear() { HashMap.this.clear(); } public final Iterator<Map.Entry<K,V>> iterator() { return new EntryIterator(); } public final boolean contains(Object o) { if (!(o instanceof Map.Entry)) return false; Map.Entry<?,?> e = (Map.Entry<?,?>) o; Object key = e.getKey(); Node<K,V> candidate = getNode(hash(key), key); return candidate != null && candidate.equals(e); } public final boolean remove(Object o) { if (o instanceof Map.Entry) { Map.Entry<?,?> e = (Map.Entry<?,?>) o; Object key = e.getKey(); Object value = e.getValue(); return removeNode(hash(key), key, value, true, true) != null; } return false; } public final Spliterator<Map.Entry<K,V>> spliterator() { return new EntrySpliterator<>(HashMap.this, 0, -1, 0, 0); } public final void forEach(Consumer<? super Map.Entry<K,V>> action) { Node<K,V>[] tab; if (action == null) throw new NullPointerException(); if (size > 0 && (tab = table) != null) { int mc = modCount; for (int i = 0; i < tab.length; ++i) { for (Node<K,V> e = tab[i]; e != null; e = e.next) action.accept(e); } if (modCount != mc) throw new ConcurrentModificationException(); } } }

    最后提到HashMap就不得不提Hashtable,Hashtable已经是个弃用的类了,这里简单放下它俩的区别:

    HashMap不同步,而Hashtable是同步的,这意味着Hashtable是线程安全的,多个线程可以共享一个Hashtable;而如果没有正确的同步的话,多个线程是不能共享HashMap的。Java 5提供了ConcurrentHashMap,它是HashTable的替代,比HashTable的扩展性更好。不仅如此,Collections类中存在一个静态方法synchronizedMap(),该方法创建了一个线程安全的Map对象,并把它作为一个封装的对象来返回,也可替代Hashtable。另一个区别是HashMap的迭代器(Iterator)是fail-fast迭代器,而Hashtable的enumerator迭代器不是fail-fast的。所以当有其它线程改变了HashMap的结构(增加或者移除元素),将会抛出ConcurrentModificationException,但迭代器本身的remove()方法移除元素则不会抛出ConcurrentModificationException异常。由于Hashtable是线程安全的,所以在单线程环境下它比HashMap要慢。如果你不需要同步,只需要单一线程,那么使用HashMap性能要好过Hashtable。
    最新回复(0)