Cloudera CDH/CDP 및 Hadoop EcoSystem, Semantic IoT등의 개발/운영 기술을 정리합니다. gooper@gooper.com로 문의 주세요.
import org.apache.spark.sql.SparkSession
object AccountsByState {
def main(args: Array[String]) {
if (args.length < 1) {
System.err.println("Usage: stubs.AccountByState <state-code>")
System.exit(1)
}
val stateCode = args(0)
val spark = SparkSession.builder.getOrCreate()
val accountsDF = spark.read.table("accounts")
val stateAccountsDF = accountsDF.where(accountsDF("state") === stateCode)
stateAccountsDF.write.mode("overwrite").save("/loudacre/accounts_by_state/"+stateCode)
spark.stop
}
}