Faster Scanner in Scala

Revision en5, by wrick, 2015-10-26 08:10:34

We all know that java.util.Scanner is slow.

Here is a version in Scala that is idiomatic (you can do all the Collections API e.g. .take, .map, .filter etc) and supports line numbers too and is much faster than the Java Scanner:

import java.io._
import java.nio.file.{Files, Path}
import java.util.StringTokenizer

import scala.io.Codec

/**
 * Scala implementation of a faster java.util.Scanner
 * See: http://mirror.codeforces.com/blog/entry/7018
 */
class Scanner(reader: LineNumberReader) extends Iterator[String] with AutoCloseable {
  def this(reader: BufferedReader) = this(new LineNumberReader(reader))
  def this(reader: Reader) = this(new BufferedReader(reader))
  def this(inputStream: InputStream)(implicit codec: Codec) = this(new InputStreamReader(inputStream, codec.charSet))
  def this(path: Path)(implicit codec: Codec) = this(Files.newBufferedReader(path, codec.charSet))
  def this(file: File)(implicit codec: Codec) = this(file.toPath)(codec)
  def this(str: String) = this(new StringReader(str))

  private[this] val tokenizers = Iterator.continually(reader.readLine()).takeWhile(_ != null).map(new StringTokenizer(_)).filter(_.hasMoreTokens)
  private[this] var current: Option[StringTokenizer] = None

  @inline private[this] def tokenizer(): Option[StringTokenizer] = current.find(_.hasMoreTokens) orElse {
    current = if (tokenizers.hasNext) Some(tokenizers.next()) else None
    current
  }

  /**
   * Unlike Java's scanner which returns till end of current line, this actually returns the next line
   * @see line() if you want the Java behaviour
   */
  def nextLine(): String = {
    current = None   // reset
    reader.readLine()
  }
  def lineNumber: Int = reader.getLineNumber
  def line(): String = tokenizer().get.nextToken("\n\r")
  def nextString(): String = next()
  def nextChar(): Char = next().ensuring(_.length == 1).head
  def nextBoolean(): Boolean = next().toBoolean
  def nextByte(radix: Int = 10): Byte = java.lang.Byte.parseByte(next(), radix)
  def nextShort(radix: Int = 10): Short = java.lang.Short.parseShort(next(), radix)
  def nextInt(radix: Int = 10): Int = java.lang.Integer.parseInt(next(), radix)
  def nextLong(radix: Int = 10): Long = java.lang.Long.parseLong(next(), radix)
  def nextBigInt(radix: Int = 10): BigInt = BigInt(next(), radix)
  def nextFloat(): Float = next().toFloat
  def nextDouble(): Double = next().toDouble
  def nextBigDecimal(): BigDecimal = BigDecimal(next())
  override def next() = tokenizer().get.nextToken()
  override def hasNext = tokenizer().nonEmpty
  override def close() = reader.close()
}

Source: https://github.com/pathikrit/ScalaForces/blob/master/src/main/scala/Scanner.scala

Benchmarks: https://github.com/pathikrit/better-files/tree/master/benchmarks

Tags scala, java, fast input, input reading, scanner

History

 
 
 
 
Revisions
 
 
  Rev. Lang. By When Δ Comment
en5 English wrick 2015-10-26 08:10:34 800
en4 English wrick 2015-10-21 10:28:35 356
en3 English wrick 2015-10-21 10:24:45 0 (published)
en2 English wrick 2015-10-20 00:05:47 197 (saved to drafts)
en1 English wrick 2015-10-19 22:32:29 2493 Initial revision (published)