模仿st

xiaoxiao2024-07-25 139

update1:添加了remove,removeAll()方法以及getSize()方法 update2:添加了keySet()方法用于迭代 update3:经过测试，StTable类在存储Integer类型key时,put的速度比HashMap快了接近3倍，而remove、get却比HashMap慢；而在存储String类型的key时，put比Hashmap慢，但是get、remove却快不少。读ruby hacking guide，其中专门辟了一个章节介绍了st.c中的st_table，这个数据结构也就是类似java中的HashMap，基本原理是利用数组存储，数组的每一个元素是一个单向链表，链表中再存储具体的元素，如下图所示的结构 ruby中利用这个结构来存储对象变量、类方法、常量、全局变量等信息，因为在c ruby中，方法、变量都是用一个整型作为键值来存储在st_table中，因此这个数据结构对于以整性为键值的map类型来说速度非常不错（我没有测试内存的占用情况）。源码如下： // 接口，用于定义hash函数 //HashFunction.java public interface HashFunction < T > { public int hash(T key); } 链表元素类： public class StTableEntry < T, V > { protected int hash; // hash值 protected T key; // 键 protected V value; // 存储值 protected StTableEntry < T, V > next; // 下一节点 public StTableEntry() { } public StTableEntry( int hash, T key, V value, StTableEntry < T, V > next) { super (); this .hash = hash; this .key = key; this .value = value; this .next = next; } public int getHash() { return hash; } public void setHash( int hash) { this .hash = hash; } public T getKey() { return key; } public void setKey(T key) { this .key = key; } public StTableEntry < T, V > getNext() { return next; } public void setNext(StTableEntry < T, V > next) { this .next = next; } public V getValue() { return value; } public void setValue(V value) { this .value = value; } } 完整的StTable实现,没有实现remove，(update:添加了remove,removeAll()方法以及getSize()方法）： public final class StTable < T, V > { private HashFunction < T > hashFunction; private int num_bins; int num_entries; StTableEntry < T, V > [] bins; public static int DEFAULT_SIZE = 11 ; private static int DEFAULT_MAX_DENSITY = 5 ; private static int DEFAULT_MIN_SIZE = 8 ; private static long primes[] = { 8 + 3 , 16 + 3 , 32 + 5 , 64 + 3 , 128 + 3 , 256 + 27 , 512 + 9 , 1024 + 9 , 2048 + 5 , 4096 + 3 , 8192 + 27 , 16384 + 43 , 32768 + 3 , 65536 + 45 , 131072 + 29 , 262144 + 3 , 524288 + 21 , 1048576 + 7 , 2097152 + 17 , 4194304 + 15 , 8388608 + 9 , 16777216 + 43 , 33554432 + 35 , 67108864 + 15 , 134217728 + 29 , 268435456 + 3 , 536870912 + 11 , 1073741824 + 85 , 0 }; public StTable(HashFunction < T > hashFunction) { this .hashFunction = hashFunction; this .num_bins = DEFAULT_SIZE; this .num_entries = 0 ; this .bins = new StTableEntry[ this .num_bins]; } public StTable(HashFunction < T > hashFunction, int size) { this .hashFunction = hashFunction; if (size == 0 ) throw new IllegalArgumentException( " The size could not less than zero: " + size); this .num_bins = size; this .num_entries = 0 ; this .bins = new StTableEntry[ this .num_bins]; } private long newSize( int size) { for ( int i = 0 , newsize = DEFAULT_MIN_SIZE; i < primes.length; i ++ , newsize <<= 1 ) { if (newsize > size) return primes[i]; } /* Ran out of polynomials */ return - 1 ; /* should raise exception */ } public V get(T key) { int hash_val = doHash(key); StTableEntry < T, V > entry = findEntry(hash_val, key); if (entry == null ) return null ; else return entry.getValue(); } public V put(T key, V value) { int hash_val = doHash(key); StTableEntry < T, V > entry = findEntry(hash_val, key); if (entry == null ) { // 未有键值，直接添加 addDirect(key, value); return value; } else { V v = entry.value; entry.value = value; return v; } } public V remove(T key) { int hash_val = doHash(key); int bin_pos = hash_val % this .num_bins; StTableEntry < T, V > entry = this .bins[bin_pos]; // 记录前一节点，考虑修改采用双向链表也可 StTableEntry < T, V > prev = null ; if (entryNotEqual(entry, key, hash_val)) { prev = entry; entry = entry.next; while (entryNotEqual(entry, key, hash_val)) { prev = entry; entry = entry.next; } } if (entry == null ) return null ; else { if (prev != null ) prev.next = entry.next; // 前一节点的next连接到下一节点 else this .bins[bin_pos] = entry.next; // entry恰好是第一个节点，将数组元素设置为next V v = entry.value; entry = null ; // gc友好 return v; } this .num_entries =0; } public void removeAll() { for ( int i = 0 ; i < this .bins.length; i ++ ) { StTableEntry < T, V > entry = this .bins[i]; this .bins[i] = null ; StTableEntry < T, V > temp = entry; if (entry == null ) continue ; while (entry != null ) { entry = null ; this .num_entries -- ; entry = temp.next; temp = entry; } temp = null ; entry = null ; } } public int getSize() { return this .num_entries; } public Set<T> keySet() { Set<T> keys = new HashSet<T>(this.num_entries); for (int i = 0; i < this.bins.length; i++) { StTableEntry<T, V> entry = this.bins[i]; if (entry == null) continue; while (entry != null) { keys.add(entry.key); entry = entry.next; } } return keys; } // hash函数，调用hashFunction的hash方法 private int doHash(T key) { if (hashFunction.hash(key) < 0 ) throw new IllegalArgumentException( " hash value could not less than zero: " + hashFunction.hash(key)); return hashFunction.hash(key); } // 过于拥挤，重新分布 private void reHash() { int new_size = ( int ) newSize( this .num_bins); StTableEntry < T, V > [] new_bins = (StTableEntry < T, V > []) new StTableEntry[new_size]; for ( int i = 0 ; i < this .num_bins; i ++ ) { StTableEntry < T, V > entry = this .bins[i]; while (entry != null ) { StTableEntry < T, V > next = entry.next; int hash_val = entry.hash % new_size; entry.next = new_bins[hash_val]; new_bins[hash_val] = entry; entry = next; } } this .bins = null ; // gc友好 this .num_bins = new_size; this .bins = new_bins; } private void addDirect(T key, V value) { int hash_val = doHash(key); int bin_pos = hash_val % this .num_bins; if (( this .num_entries / this .num_bins) > DEFAULT_MAX_DENSITY) { reHash(); bin_pos = hash_val % this .num_bins; } StTableEntry < T, V > entry = new StTableEntry < T, V > (); entry.setHash(hash_val); entry.setKey(key); entry.setValue(value); entry.setNext( this .bins[bin_pos]); this .bins[bin_pos] = entry; this .num_entries ++ ; } private StTableEntry < T, V > findEntry( int hash_val, T key) { int bin_pos = hash_val % this .num_bins; StTableEntry < T, V > entry = this .bins[bin_pos]; if (entryNotEqual(entry, key, hash_val)) { entry = entry.next; while (entryNotEqual(entry, key, hash_val)) { entry = entry.next; } } return entry; } // 判断元素是否相同 private boolean entryNotEqual(StTableEntry < T, V > entry, T key, int hash_val) { return entry != null && (entry.getHash() != hash_val || ( ! key.equals(entry.getKey()))); } } 单元测试类就不列了，给一个与HashMap的简单性能对比，以整型为键，显然StTable快多了，对于字符串型，关键是HashFunction的定义，我直接调用String的hashCode方法，不知道有没有其他更好的方法让元素分布的更均匀些： import java.util.HashMap; import java.util.Map; public class Benchmark { public static void main(String args[]) { long map_cost = testStringMap(); long table_cost = testStringTable(); if (map_cost <= table_cost) System.out.println( " map is faster than table " ); else System.out.println( " table is faster than map " ); map_cost = testIntegerMap(); table_cost = testIntegerTable(); if (map_cost <= table_cost) System.out.println( " map is faster than table " ); else System.out.println( " table is faster than map " ); } public static long testIntegerMap() { Map < Integer, Integer > map = new HashMap < Integer, Integer > (); long start = System.nanoTime(); for ( int i = 0 ; i < 10000 ; i ++ ) map.put(i, i); long result = 0 ; for ( int i = 0 ; i < 10000 ; i ++ ) result += map.get(i); long end = System.nanoTime(); System.out.println( " result: " + result); System.out.println( " map: " + (end - start)); return (end - start); } public static long testIntegerTable() { HashFunction < Integer > intHash = new HashFunction < Integer > () { public int hash(Integer key) { return key; } }; StTable < Integer, Integer > table = new StTable < Integer, Integer > (intHash); long start = System.nanoTime(); for ( int i = 0 ; i < 10000 ; i ++ ) table.put(i, i); long result = 0 ; for ( int i = 0 ; i < 10000 ; i ++ ) result += table.get(i); long end = System.nanoTime(); System.out.println( " result: " + result); System.out.println( " table: " + (end - start)); return (end - start); } public static long testStringMap() { Map < String, String > map = new HashMap < String, String > (); long start = System.nanoTime(); for ( int i = 0 ; i < 10000 ; i ++ ) map.put(String.valueOf(i), String.valueOf(i)); long result = 0 ; for ( int i = 0 ; i < 10000 ; i ++ ) result += Integer.parseInt(map.get(String.valueOf(i))); long end = System.nanoTime(); System.out.println( " result: " + result); System.out.println( " map: " + (end - start)); return (end - start); } public static long testStringTable() { HashFunction < String > intHash = new HashFunction < String > () { int i = 0 ; public int hash(String key) { int hashCode = key.hashCode(); return hashCode < 0 ? - hashCode : hashCode; } }; StTable < String, String > table = new StTable < String, String > (intHash); long start = System.nanoTime(); for ( int i = 0 ; i < 10000 ; i ++ ) table.put(String.valueOf(i), String.valueOf(i)); long result = 0 ; for ( int i = 0 ; i < 10000 ; i ++ ) result += Integer.parseInt(table.get(String.valueOf(i))); long end = System.nanoTime(); System.out.println( " result: " + result); System.out.println( " table: " + (end - start)); return (end - start); } }

结果为： result:49995000 map:55501468 result:49995000 table:60999652 map is faster than table result:49995000 map:44634444 result:49995000 table:26209477 table is faster than map 将get换成remove方法，结果也与上面的类似。

文章转自庄周梦蝶，原文发布时间2007-09-18