Skip to content

Instantly share code, notes, and snippets.

@spaceCamel
Created May 14, 2010 14:05
Show Gist options
  • Save spaceCamel/401182 to your computer and use it in GitHub Desktop.
Save spaceCamel/401182 to your computer and use it in GitHub Desktop.
import collection.mutable.{ListBuffer, LinkedList}
import io.Source
import scala.collection.mutable.HashMap
/**
* Date: Apr 16, 2010
* Time: 10:44:22 PM
*/
object Parser
{
def main(args: Array[String]) {
val attributes = List("company", "url", "country", "rate", "field")
//val all_fields = List("Software", "Telecommunications/Networking", "Media/Entertainment", "Internet", "Biotech/Pharmaceutical/ Medical Equipment", "Other", "Greentech", "internet", "Semiconductor, Components and Electronics", "Computers/Peripherals")
//val all_countries = List("Turkey", "United Kingdom", "Belgium", "Germany", "Netherlands", "Republic of Ireland", "Poland", "France", "Norway", "Israel", "Northern Ireland", "Sweden", "Russia", "Romania", "Finland", "Bulgaria", "Hungary", "Greece", "Serbia", "Slovakia", "Denmark", "Austria", "Czech Republic", "Portugal", "Croatia", "Estonia")
import scala.collection.immutable.Set
val my_fields = Set("Software", "Internet")
val my_countries = Set("United Kingdom")
val all_attributes = List("position") ::: attributes
val heading = "|_. %s |".format(all_attributes.map(_.toUpperCase).mkString(" |_. "))
// val all_companies = List[Int]()
val dir = new java.io.File("/Users/xan/IdeaProjects/ScalaTest/data")
.listFiles().filter(_.getName().matches("Fast500_\\d+.txt"))
val company_entry = """(\d+)\s(.*)""".r
val all_companies = ListBuffer[HashMap[String, String]]()
for{file <- dir}
{
val lines = Source.fromFile(file).getLines().buffered
val companies = ListBuffer[HashMap[String, String]]()
while (lines.head.matches(company_entry.pattern.toString)) {
val company = HashMap[String, String]()
val company_entry(position, name) = lines.next
company(all_attributes(0)) = position
company(all_attributes(1)) = name
companies.append(company)
}
for (attribute <- attributes.tail; company <- companies) {
company(attribute) = lines.next()
}
// println(companies.size)
all_companies.appendAll(companies)
}
all_companies.foreach(
(m) => print("%s\n".format(
m.map(
(i) => "%s\n".format(i.toString)))))
println("SIZE: %s".format(all_companies.size))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment