有一段时间没有使用Scala, 我们今天回顾下Scala的常用的函数算子, 并使用一些练习加深理解和印象. 这些函数对于理解Spark的相关算子非常有用. 建议自己练习.
所有的练习代码, 都可以在 https://github.com/SeanYanxml/arsenal/tree/master/arsenal-scala/quick-scala/quick-scala 上找到.
override def foreach[U](f : scala.Function1[A, U]) : scala.Unit = { /* compiled code */ }
遍历数组&集合.
scala> val lst0 = List(1,2,3,4,5,6) lst0: List[Int] = List(1, 2, 3, 4, 5, 6) scala> lst0.foreach(print(_)) 123456 map()final override def map[B, That](f : scala.Function1[A, B])(implicit bf : scala.collection.generic.CanBuildFrom[scala.collection.immutable.List[A], B, That]) : That = { /* compiled code */ }
用于遍历数组、Map集合、List集合、Tuple元祖内的每一个元素. 传入的是一个
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2) lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2) scala> val lst1 = lst0.map(_*10) lst1: List[Int] = List(10, 70, 90, 80, 0, 30, 50, 40, 60, 20) Tips - foreach()&map() foreach()与map()方法的区别在于, foreach()无返回值, 而map()有返回值. scala> val lst0 = List(1,2,3,4,5) lst0: List[Int] = List(1, 2, 3, 4, 5) scala> val lst1 = lst0.foreach(_*10) lst1: Unit = () # 无输出 scala> lst1 scala> val lst2 = lst0.map(_*10) lst2: List[Int] = List(10, 20, 30, 40, 50) # 含有输出 scala> lst2 res2: List[Int] = List(10, 20, 30, 40, 50) map().flatten/flatMap()def flatten[B](implicit asTraversable : scala.Function1[A, scala.collection.GenTraversableOnce[B]]) : CC[B] = { /* compiled code */ }
final override def flatMap[B, That](f : scala.Function1[A, scala.collection.GenTraversableOnce[B]])(implicit bf : scala.collection.generic.CanBuildFrom[scala.collection.immutable.List[A], B, That]) : That = { /* compiled code */ }
先进行map()函数操作, 再将其进行压平.
scala> val lines = List("hello tom hello jerry", "hello jerry", "hello kitty") lines: List[String] = List(hello tom hello jerry, hello jerry, hello kitty) # 获得里面是字符串数组的List对象 scala> val linesSplit = lines.map(_.split(" ")) linesSplit: List[Array[String]] = List(Array(hello, tom, hello, jerry), Array(hello, jerry), Array(hello, kitty)) # 将所有的数组都压平,压在一个List内部 scala> val linesSplitFlat = lines.map(_.split(" ")).flatten linesSplitFlat: List[String] = List(hello, tom, hello, jerry, hello, jerry, hello, kitty) # 使用flatMap scala> val linesSplitFlat2 = lines.flatMap(_.split(" ")) linesSplitFlat2: List[String] = List(hello, tom, hello, jerry, hello, jerry, hello, kitty)``` filter()def filter(p : scala.Function1[A, scala.Boolean]) : Repr = { /* compiled code */ }
过滤, 过滤出数组或集合内满足筛选条件的数据.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2) lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2) scala> val list3 = lst0.filter(_>5) list3: List[Int] = List(7, 9, 8, 6) sorted/sortedBy()/sortedWith() def sorted[B >: A](implicit ord : scala.math.Ordering[B]) : Repr = { /* compiled code */ }def sortBy[B](f : scala.Function1[A, B])(implicit ord : scala.math.Ordering[B]) : Repr = { /* compiled code */ }def sortWith(lt : scala.Function2[A, A, scala.Boolean]) : Repr = { /* compiled code */ }三个函数都可以用于排序. 其中sorted是简单排序, sortedBy可以指定已某一个属性进行排序, sortedWith可以指定排序的比较函数.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2) lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2) scala> val lst3_1 = lst0.sorted lst3_1: List[Int] = List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) scala> val lst3_2 = lst0.sortBy(x =>x) lst3_2: List[Int] = List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) scala> val lst3_2 = lst0.sortBy(x => (-x)) lst3_2: List[Int] = List(9, 8, 7, 6, 5, 4, 3, 2, 1, 0) # 传入一个compare()函数 scala> val lst3_3 = lst0.sortWith((x,y) => (x>y)) lst3_3: List[Int] = List(9, 8, 7, 6, 5, 4, 3, 2, 1, 0) reverseoverride def reverse : scala.collection.immutable.List[A] = { /* compiled code */ }
反转.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2) lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2) scala> val lst4 = lst0.reverse lst4: List[Int] = List(2, 6, 4, 5, 3, 0, 8, 9, 7, 1) grouped()/groupedBy() def grouped(size : scala.Int) : scala.collection.Iterator[Repr] = { /* compiled code */ }def groupBy[K](f : scala.Function1[A, K]) : scala.collection.immutable.Map[K, Repr] = { /* compiled code */ }grouped()是将几个元素进行组合, 返回的是一个List<Iterator>的List; groupedBy()指定分类的函数, 返回的是一个Map<K,List[Value]>的Map.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2) lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2) #grouped() scala> val lst5 = lst0.grouped(4) lst5: Iterator[List[Int]] = non-empty iterator scala> lst5.toList res1: List[List[Int]] = List(List(1, 7, 9, 8), List(0, 3, 5, 4), List(6, 2)) # groupedBy() scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2,3,2,3) lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2, 3, 2, 3) scala> val lst8 = lst0.groupBy(x => (x)) lst8: scala.collection.immutable.Map[Int,List[Int]] = Map(0 -> List(0), 5 -> List(5), 1 -> List(1), 6 -> List(6), 9 -> List(9), 2 -> List(2, 2), 7 -> List(7), 3 -> List(3, 3, 3), 8 -> List(8), 4 -> List(4)) scala> val lst8 = lst0.groupBy(x => (x)).toList lst8: List[(Int, List[Int])] = List((0,List(0)), (5,List(5)), (1,List(1)), (6,List(6)), (9,List(9)), (2,List(2, 2)), (7,List(7)), (3,List(3, 3, 3)), (8,List(8)), (4,List(4))) scala> val lst8 = lst0.groupBy(x => (x%2==1)).toList lst8: List[(Boolean, List[Int])] = List((false,List(8, 0, 4, 6, 2, 2)), (true,List(1, 7, 9, 3, 5, 3, 3))) reduce()def reduce[A1 >: A](op : scala.Function2[A1, A1, A1]) : A1 = { /* compiled code */ }
并行化计算.
scala> val array = Array(1,2,3,4,5,6,7) array: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7) scala> val sum = array.reduce(_+_) sum: Int = 28 scala> array.par.reduce(_+_) res3: Int = 28 flod()def fold[U >: T](z : U)(op : scala.Function2[U, U, U]) : U = { /* compiled code */ }
scala> val array = Array(1,2,3,4,5,6,7) array: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7) scala> array.fold(10)(_+_) res4: Int = 38 scala> array.par.fold(10)(_+_) res5: Int = 98 scala> array.par.fold(0)(_+_) res6: Int = 28 aggregate()def aggregate[B](z : => B)(seqop : scala.Function2[B, A, B], combop : scala.Function2[B, B, B]) : B = { /* compiled code */ }
聚合函数.
scala> val arr = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0)) arr: List[List[Int]] = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0)) scala> arr.aggregate(0)(_+_.sum, _+_) res7: Int = 20 交集、并集、差集 (intersect/union/diff) def intersect[B >: A](that : scala.collection.GenSeq[B]) : Repr = { /* compiled code */ }override def union[B >: A, That](that : scala.collection.GenSeq[B])(implicit bf : scala.collection.generic.CanBuildFrom[Repr, B, That]) : That = { /* compiled code */ }def diff[B >: A](that : scala.collection.GenSeq[B]) : Repr = { /* compiled code */ } scala> val l1 = List(5,6,4,7) l1: List[Int] = List(5, 6, 4, 7) scala> val l2 = List(1,2,3,4) l2: List[Int] = List(1, 2, 3, 4) # 并集 scala> val unionL1 = l1.union(l2) unionL1: List[Int] = List(5, 6, 4, 7, 1, 2, 3, 4) # 交集 scala> val insercetionL1 = l1.intersect(l2) insercetionL1: List[Int] = List(4) # 差集 scala> val differenceL1 = l1.diff(l2) differenceL1: List[Int] = List(5, 6, 7)public String[] split(String regex) { return split(regex, 0); }
分割字符串, 返回一个字符串数组.
scala> val str1 = "a b c d e" str1: String = a b c d e scala> val strArray1 = str1.split(" ") strArray1: Array[String] = Array(a, b, c, d, e) scala> strArray1 res4: Array[String] = Array(a, b, c, d, e)虽然, 有些函数的定义还不能像Java一样完全看懂, 但是记录于此. 多看几次. 此外, 有许多的函数暂没有列举出来. 后续再进行补充.
[1]. Scala 中的foreach和map方法比较