{"id":4678,"date":"2019-06-11T14:24:55","date_gmt":"2019-06-11T12:24:55","guid":{"rendered":"http:\/\/blog.via-internet.de\/?p=4678"},"modified":"2019-06-11T14:24:55","modified_gmt":"2019-06-11T12:24:55","slug":"learning-hadoop","status":"publish","type":"post","link":"https:\/\/via-internet.de\/blog\/2019\/06\/11\/learning-hadoop\/","title":{"rendered":"Hadoop | Getting started"},"content":{"rendered":"\n<figure class=\"wp-block-image is-resized\"><a href=\"https:\/\/hadoop.apache.org\/\"><img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/blog.via-internet.de\/wp-content\/uploads\/2019\/06\/hadoop-logo.jpg\" alt=\"\" class=\"wp-image-4705\" width=\"558\" height=\"141\"\/><\/a><\/figure>\n\n\n\n<h2 class=\"wp-block-heading\">Modules<\/h2>\n\n\n\n<table class=\"wp-block-advgb-table advgb-table-frontend\"><tbody><tr><td><strong>HDFS<\/strong><\/td><td>Hadoop\u2019s File Share which can be local or shared depending on your setup<\/td><\/tr><tr><td><strong>MapReduce<\/strong><\/td><td>Hadoop\u2019s Aggregation\/Synchronization tool enabling highly parallel processing\u2026this is the true \u201cengine\u201d or time saver in Hadoop<\/td><\/tr><tr><td><strong>Hive<\/strong><\/td><td>Hadoop\u2019s SQL query window, equivalent to Microsoft Query Analyzer<\/td><\/tr><tr><td><strong><a href=\"https:\/\/pig.apache.org\/\">Pig<\/a><\/strong><\/td><td>Dataflow scripting tool similar to a Batch job or simplistic ETL processer<\/td><\/tr><tr><td><strong>Flume<\/strong><\/td><td>Collector\/Facilitator of Log file information<\/td><\/tr><tr><td><strong>Ambari<\/strong><\/td><td>Web-based Admin tool utilized for managing, provisioning, and monitoring Hadoop Cluster<\/td><\/tr><tr><td><strong>Cassandra<\/strong><\/td><td>High-Availability, Scalable, Multi-Master database platform\u2026RDBMS on sterioids<\/td><\/tr><tr><td><strong>Mahout<\/strong><\/td><td>Machine Learning engine, which translates into, it does complex calculations, algorithmic processing, and statistical\/stochastic operations using R and other frameworks\u2026it does serious math!<\/td><\/tr><tr><td><strong>Spark<\/strong><\/td><td>Programmatic based compute engine allowing for ETL, machine learning, stream processing, and graph computation<\/td><\/tr><tr><td><strong>ZooKeeper<\/strong><\/td><td>Coordinator service for all your distributed processing<\/td><\/tr><tr><td><strong>Oozie<\/strong><\/td><td>Workflow scheduler managing Hadoop jobs<\/td><\/tr><\/tbody><\/table>\n\n\n\n<h2 class=\"wp-block-heading\">Links<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">Apache<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\"><a href=\"https:\/\/sentry.apache.org\/\">https:\/\/sentry.apache.org\/<\/a><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><a href=\"https:\/\/de.hortonworks.com\/apache\/ranger\/\">https:\/\/de.hortonworks.com\/apache\/ranger\/<\/a>  <\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><a href=\"https:\/\/mahout.apache.org\/\">https:\/\/mahout.apache.org\/<\/a><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><a href=\"https:\/\/pig.apache.org\/\">https:\/\/pig.apache.org\/<\/a><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><a href=\"https:\/\/zookeeper.apache.org\/\">https:\/\/zookeeper.apache.org\/<\/a><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><a href=\"https:\/\/oozie.apache.org\/\">https:\/\/oozie.apache.org\/<\/a><\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Diverses<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\"><a href=\"http:\/\/ercoppa.github.io\/HadoopInternals\/\">http:\/\/ercoppa.github.io\/HadoopInternals\/<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>Modules Links Apache https:\/\/sentry.apache.org\/ https:\/\/de.hortonworks.com\/apache\/ranger\/ https:\/\/mahout.apache.org\/ https:\/\/pig.apache.org\/ https:\/\/zookeeper.apache.org\/ https:\/\/oozie.apache.org\/ Diverses http:\/\/ercoppa.github.io\/HadoopInternals\/<\/p>\n","protected":false},"author":1,"featured_media":4707,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"aside","meta":{"_crdt_document":"","_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[31],"tags":[],"class_list":["post-4678","post","type-post","status-publish","format-aside","has-post-thumbnail","hentry","category-hadoop","post_format-post-format-aside"],"jetpack_featured_media_url":"https:\/\/via-internet.de\/blog\/wp-content\/uploads\/2019\/06\/hadoop-logo-elephant-1.png","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/via-internet.de\/blog\/wp-json\/wp\/v2\/posts\/4678","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/via-internet.de\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/via-internet.de\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/via-internet.de\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/via-internet.de\/blog\/wp-json\/wp\/v2\/comments?post=4678"}],"version-history":[{"count":0,"href":"https:\/\/via-internet.de\/blog\/wp-json\/wp\/v2\/posts\/4678\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/via-internet.de\/blog\/wp-json\/wp\/v2\/media\/4707"}],"wp:attachment":[{"href":"https:\/\/via-internet.de\/blog\/wp-json\/wp\/v2\/media?parent=4678"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/via-internet.de\/blog\/wp-json\/wp\/v2\/categories?post=4678"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/via-internet.de\/blog\/wp-json\/wp\/v2\/tags?post=4678"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}