Monday, February 1, 2010
git basics
git config --global user.name "Andre Quina"
git config --global user.email andre.quina@gmail.com
mkdir testgit
cd testgit/
git init
git add readme
git add README
git commit -m 'first commit'
git remote add origin git@github.com:andrequina/testgit.git
git config --global user.email andre.quina@gmail.com
mkdir testgit
cd testgit/
git init
git add readme
git add README
git commit -m 'first commit'
git remote add origin git@github.com:andrequina/testgit.git
git push origin master
Linux paste and sed -- join and remove blanks
following commands 1) join the three files interleaving the lines of the files 2) remove any blank lines that may have been created if the files are not the same length
paste one.txt two.txt three.txt -d'\n'
sed '/^$/d' preQuery.txt > no_blanks
paste one.txt two.txt three.txt -d'\n'
sed '/^$/d' preQuery.txt > no_blanks
Linux command - remove lines
following command finds the lines in file1 that are missing from file2
fgrep -vxf file1_sorted file2_sorted > removed_ids
fgrep -vxf file1_sorted file2_sorted > removed_ids
SSH without password
the following commands allow an ssh connection without a password
ssh-keygen -t rsa -P ""
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
ssh-copy-id hadoop@slave01
ssh-keygen -t rsa -P ""
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
ssh-copy-id hadoop@slave01
Setup Hadoop
# setup hadoop
#sudo adduser --ingroup hadoop hadoop
sudo useradd -d /home/hadoop -m hadoop
sudo usermod -s /bin/bash hadoop
sudo passwd hadoop
su - hadoop
ssh-keygen -t rsa -P ""
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
sudo cp hadoop-0.20.1.tar.gz /usr/local
cd /usr/local
sudo tar xzf hadoop-0.20.1.tar.gz
sudo chown -R hadoop:hadoop hadoop-0.20.1/
sudo ln -s /usr/local/hadoop-0.20.1/ /usr/local/hadoop
nano /usr/local/hadoop/conf/hadoop-env.sh
set:
export JAVA_HOME=/usr/lib/jvm/java-6-sun
# allows hadoop to bind to 0.0.0.0 without binding to IP6 interface
export HADOOP_OPTS=-Djava.net.preferIPv4Stack=true
edit:
core-site.xml
hdfs-site.xml
mapred-site.xml
# format name node
./bin/hadoop namenode -format
# start cluster
./bin/start-all.sh
# command should list hadoop processes
jps
#----------------
# Test
./bin/hadoop dfs -copyFromLocal /tmp/davinci/ davinci
./bin/hadoop dfs -ls
./bin/hadoop dfs -ls davinci
#sudo adduser --ingroup hadoop hadoop
sudo useradd -d /home/hadoop -m hadoop
sudo usermod -s /bin/bash hadoop
sudo passwd hadoop
su - hadoop
ssh-keygen -t rsa -P ""
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
sudo cp hadoop-0.20.1.tar.gz /usr/local
cd /usr/local
sudo tar xzf hadoop-0.20.1.tar.gz
sudo chown -R hadoop:hadoop hadoop-0.20.1/
sudo ln -s /usr/local/hadoop-0.20.1/ /usr/local/hadoop
nano /usr/local/hadoop/conf/hadoop-
set:
export JAVA_HOME=/usr/lib/jvm/java-6-
# allows hadoop to bind to 0.0.0.0 without binding to IP6 interface
export HADOOP_OPTS=-Djava.net.
edit:
core-site.xml
hdfs-site.xml
mapred-site.xml
# format name node
./bin/hadoop namenode -format
# start cluster
./bin/start-all.sh
# command should list hadoop processes
jps
#----------------
# Test
./bin/hadoop dfs -copyFromLocal /tmp/davinci/ davinci
./bin/hadoop dfs -ls
./bin/hadoop dfs -ls davinci
Patch Hadoop
patch is from http://issues.apache.org/jira/browse/MAPREDUCE-375
originally targeted to 0.21.0, but modified to work with 0.20.1 (mostly resolving directory changes made in 0.21.0)
svn co http://svn.apache.org/repos/asf/hadoop/common/tags/release-0.20.1/ hadoop-0.20.1-src
cd hadoop-0.20.1-src
patch -p0 -i /zoom/libraries/apache/hadoop/patch-375-2.txt
there should be one rejection on NLineInputFormat in mapred
take a look at the rejection file, it should be fairly strait forward applying the change.
the rejection is caused by a small inconsistency between the NLineInputFormat in the two versions
it looks like a small bug fix in 0.21.0 not not in 0.20.1... should be fine as the patch just
dumps the code in the method and delegates to the mapreduce NLineInputFormat.
originally targeted to 0.21.0, but modified to work with 0.20.1 (mostly resolving directory changes made in 0.21.0)
svn co http://svn.apache.org/repos/asf/hadoop/common/tags/release-0.20.1/ hadoop-0.20.1-src
cd hadoop-0.20.1-src
patch -p0 -i /zoom/libraries/apache/hadoop/patch-375-2.txt
there should be one rejection on NLineInputFormat in mapred
take a look at the rejection file, it should be fairly strait forward applying the change.
the rejection is caused by a small inconsistency between the NLineInputFormat in the two versions
it looks like a small bug fix in 0.21.0 not not in 0.20.1... should be fine as the patch just
dumps the code in the method and delegates to the mapreduce NLineInputFormat.
Subscribe to:
Posts (Atom)