mirror of
https://github.com/redoules/redoules.github.io.git
synced 2025-12-12 15:59:34 +00:00
324 lines
19 KiB
HTML
324 lines
19 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="fr">
|
|
|
|
<head>
|
|
<meta charset="utf-8">
|
|
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
|
|
<meta name="description" content="Data Science for Political and Social Phenomena">
|
|
<meta name="author" content="Guillaume Redoulès">
|
|
<link rel="icon" href="../favicon.ico">
|
|
|
|
<title>Day 9 - Multiple Linear Regression - Blog</title>
|
|
|
|
<!-- JQuery -->
|
|
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
|
|
<script>
|
|
window.jQuery || document.write('<script src="../theme/js/jquery.min.js"><\/script>')
|
|
</script>
|
|
|
|
<!-- Bootstrap core CSS -->
|
|
<link rel="stylesheet" href="../theme/css/bootstrap.css" />
|
|
<!-- IE10 viewport hack for Surface/desktop Windows 8 bug -->
|
|
<link rel="stylesheet" type="text/css" href="../theme/css/ie10-viewport-bug-workaround.css" />
|
|
<!-- Custom styles for this template -->
|
|
<link rel="stylesheet" type="text/css" href="../theme/css/style.css" />
|
|
<link rel="stylesheet" type="text/css" href="../theme/css/notebooks.css" />
|
|
<link href='https://fonts.googleapis.com/css?family=PT+Serif:400,700|Roboto:400,500,700' rel='stylesheet' type='text/css'>
|
|
|
|
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
|
<!--[if lt IE 9]>
|
|
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
|
|
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
|
<![endif]-->
|
|
|
|
|
|
<meta name="tags" content="Basics" />
|
|
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<div class="navbar navbar-fixed-top">
|
|
<div class="container">
|
|
<div class="navbar-header">
|
|
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
|
<span class="icon-bar"></span>
|
|
<span class="icon-bar"></span>
|
|
<span class="icon-bar"></span>
|
|
</button>
|
|
<a class="navbar-brand" href="..">Guillaume Redoulès</a>
|
|
</div>
|
|
<div class="navbar-collapse collapse" id="searchbar">
|
|
|
|
<ul class="nav navbar-nav navbar-right">
|
|
<li class="dropdown">
|
|
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">About<span class="caret"></span></a>
|
|
<ul class="dropdown-menu">
|
|
<li><a href="../pages/about.html">About Guillaume</a></li>
|
|
<li><a href="https://github.com/redoules">GitHub</a></li>
|
|
<li><a href="https://www.linkedin.com/in/guillaume-redoul%C3%A8s-33923860/">LinkedIn</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="dropdown">
|
|
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Data Science<span class="caret"></span></a>
|
|
<ul class="dropdown-menu">
|
|
<li><a href="..#Blog">Blog</a></li>
|
|
<li><a href="..#Python">Python</a></li>
|
|
<li><a href="..#Bash">Bash</a></li>
|
|
<li><a href="..#SQL">SQL</a></li>
|
|
<li><a href="..#Mathematics">Mathematics</a></li>
|
|
<li><a href="..#Machine_Learning">Machine Learning</a></li>
|
|
<li><a href="..#Projects">Projects</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="dropdown">
|
|
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Projects<span class="caret"></span></a>
|
|
<ul class="dropdown-menu">
|
|
<li><a href="https://github.com/redoules/redoules.github.io">Notes (Github)</a></li>
|
|
</ul>
|
|
</li>
|
|
|
|
<!--<li class="dropdown">
|
|
<a href="../feeds/blog.rss.xml">Blog RSS</a>
|
|
</li>-->
|
|
|
|
|
|
</ul>
|
|
|
|
<form class="navbar-form" action="../search.html" onsubmit="return validateForm(this.elements['q'].value);">
|
|
<div class="form-group" style="display:inline;">
|
|
<div class="input-group" style="display:table;">
|
|
<span class="input-group-addon" style="width:1%;"><span class="glyphicon glyphicon-search"></span></span>
|
|
<input class="form-control search-query" name="q" id="tipue_search_input" placeholder="e.g. scikit KNN, pandas merge" required autocomplete="off" type="text">
|
|
</div>
|
|
</div>
|
|
</form>
|
|
|
|
</div>
|
|
<!--/.nav-collapse -->
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<!-- end of header section -->
|
|
<div class="container">
|
|
<!-- <div class="alert alert-warning" role="alert">
|
|
Did you find this page useful? Please do me a quick favor and <a href="#" class="alert-link">endorse me for data science on LinkedIn</a>.
|
|
</div> -->
|
|
<section id="content" class="body">
|
|
<header>
|
|
<h1>
|
|
Day 9 - Multiple Linear Regression
|
|
</h1>
|
|
<ol class="breadcrumb">
|
|
<li>
|
|
<time class="published" datetime="2018-11-16T20:31:00+01:00">
|
|
16 novembre 2018
|
|
</time>
|
|
</li>
|
|
<li>Blog</li>
|
|
<li>Basics</li>
|
|
</ol>
|
|
</header>
|
|
<div class='article_content'>
|
|
<h2>Problem</h2>
|
|
<p>Here is a simple equation:
|
|
</p>
|
|
<div class="math">$$Y=a+b_1\cdot f_1++b_2\cdot f_2+...++b_m\cdot f_m$$</div>
|
|
<div class="math">$$Y=a+\sum_{i=1}^m b_i\cdot f_i$$</div>
|
|
<p>for <span class="math">\((m+1)\)</span> read constants <span class="math">\((a,f_1, f_2, ..., f_m)\)</span>. We can say that the value of <span class="math">\(Y\)</span> depends on <span class="math">\(m\)</span> features. We study this equation for <span class="math">\(n\)</span> different feature sets <span class="math">\((f_1, f_2, ..., f_m)\)</span> and records each respective value of <span class="math">\(Y\)</span>. </p>
|
|
<p>If we have <span class="math">\(q\)</span> new feature sets, and without accounting for bias and variance trade-offs,what is the value of <span class="math">\(Y\)</span> for each of the sets?</p>
|
|
<h2>Python implementation</h2>
|
|
<div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
<span class="n">m</span> <span class="o">=</span> <span class="mi">2</span>
|
|
<span class="n">n</span> <span class="o">=</span> <span class="mi">7</span>
|
|
<span class="n">x_1</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.18</span><span class="p">,</span> <span class="mf">0.89</span><span class="p">]</span>
|
|
<span class="n">y_1</span> <span class="o">=</span> <span class="mf">109.85</span>
|
|
|
|
<span class="n">x_2</span> <span class="o">=</span> <span class="p">[</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">0.26</span><span class="p">]</span>
|
|
<span class="n">y_2</span> <span class="o">=</span> <span class="mf">155.72</span>
|
|
|
|
<span class="n">x_3</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.92</span><span class="p">,</span> <span class="mf">0.11</span><span class="p">]</span>
|
|
<span class="n">y_3</span> <span class="o">=</span> <span class="mf">137.66</span>
|
|
|
|
<span class="n">x_4</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.07</span><span class="p">,</span> <span class="mf">0.37</span><span class="p">]</span>
|
|
<span class="n">y_4</span> <span class="o">=</span> <span class="mf">76.17</span>
|
|
|
|
<span class="n">x_5</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.85</span><span class="p">,</span> <span class="mf">0.16</span><span class="p">]</span>
|
|
<span class="n">y_5</span> <span class="o">=</span> <span class="mf">139.75</span>
|
|
|
|
<span class="n">x_6</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.99</span><span class="p">,</span> <span class="mf">0.41</span><span class="p">]</span>
|
|
<span class="n">y_6</span> <span class="o">=</span> <span class="mf">162.6</span>
|
|
|
|
<span class="n">x_7</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.87</span><span class="p">,</span> <span class="mf">0.47</span><span class="p">]</span>
|
|
<span class="n">y_7</span> <span class="o">=</span> <span class="mf">151.77</span>
|
|
|
|
|
|
<span class="n">q_1</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.49</span><span class="p">,</span> <span class="mf">0.18</span><span class="p">]</span>
|
|
<span class="n">q_2</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.57</span><span class="p">,</span> <span class="mf">0.83</span><span class="p">]</span>
|
|
<span class="n">q_3</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.56</span><span class="p">,</span> <span class="mf">0.64</span><span class="p">]</span>
|
|
<span class="n">q_4</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.76</span><span class="p">,</span> <span class="mf">0.18</span><span class="p">]</span>
|
|
</pre></div>
|
|
|
|
|
|
<p>With scikit learn</p>
|
|
<div class="highlight"><pre><span></span><span class="n">X</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">x_1</span><span class="p">,</span> <span class="n">x_2</span><span class="p">,</span> <span class="n">x_3</span><span class="p">,</span> <span class="n">x_4</span><span class="p">,</span> <span class="n">x_5</span><span class="p">,</span> <span class="n">x_6</span><span class="p">,</span> <span class="n">x_7</span><span class="p">])</span>
|
|
<span class="n">Y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">y_1</span><span class="p">,</span> <span class="n">y_2</span><span class="p">,</span> <span class="n">y_3</span><span class="p">,</span> <span class="n">y_4</span><span class="p">,</span> <span class="n">y_5</span><span class="p">,</span> <span class="n">y_6</span><span class="p">,</span> <span class="n">y_7</span><span class="p">])</span>
|
|
<span class="n">X_q</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">q_1</span><span class="p">,</span> <span class="n">q_2</span><span class="p">,</span> <span class="n">q_3</span><span class="p">,</span> <span class="n">q_4</span><span class="p">])</span>
|
|
|
|
<span class="kn">from</span> <span class="nn">sklearn</span> <span class="kn">import</span> <span class="n">linear_model</span>
|
|
<span class="n">lm</span> <span class="o">=</span> <span class="n">linear_model</span><span class="o">.</span><span class="n">LinearRegression</span><span class="p">()</span>
|
|
<span class="n">lm</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="p">)</span>
|
|
|
|
<span class="n">lm</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">X_q</span><span class="p">)</span>
|
|
</pre></div>
|
|
|
|
|
|
<div class="highlight"><pre><span></span><span class="err">array([105.21455835, 142.67095131, 132.93605469, 129.70175405])</span>
|
|
</pre></div>
|
|
|
|
|
|
<p>without scikit learn (but with numpy)</p>
|
|
<div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">numpy.linalg</span> <span class="kn">import</span> <span class="n">inv</span>
|
|
|
|
<span class="c1">#center</span>
|
|
<span class="n">X_R</span> <span class="o">=</span> <span class="n">X</span><span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">X</span><span class="p">,</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
<span class="n">a</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">Y</span><span class="p">)</span>
|
|
<span class="n">Y_R</span> <span class="o">=</span> <span class="n">Y</span><span class="o">-</span><span class="n">a</span>
|
|
|
|
<span class="c1">#calculate b</span>
|
|
<span class="n">B</span> <span class="o">=</span> <span class="n">inv</span><span class="p">(</span><span class="n">X_R</span><span class="o">.</span><span class="n">T</span><span class="nd">@X_R</span><span class="p">)</span><span class="nd">@X_R</span><span class="o">.</span><span class="n">T</span><span class="nd">@Y_R</span>
|
|
|
|
|
|
<span class="c1">#predict</span>
|
|
<span class="n">X_new_R</span> <span class="o">=</span> <span class="n">X_q</span><span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">X</span><span class="p">,</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
<span class="n">Y_new_R</span> <span class="o">=</span> <span class="n">X_new_R</span><span class="nd">@B</span>
|
|
<span class="n">Y_new</span> <span class="o">=</span> <span class="n">Y_new_R</span> <span class="o">+</span> <span class="n">a</span>
|
|
|
|
<span class="n">Y_new</span>
|
|
</pre></div>
|
|
|
|
|
|
<div class="highlight"><pre><span></span><span class="err">array([105.21455835, 142.67095131, 132.93605469, 129.70175405])</span>
|
|
</pre></div>
|
|
|
|
|
|
<script type="text/javascript">if (!document.getElementById('mathjaxscript_pelican_#%@#$@#')) {
|
|
var align = "center",
|
|
indent = "0em",
|
|
linebreak = "false";
|
|
|
|
if (false) {
|
|
align = (screen.width < 768) ? "left" : align;
|
|
indent = (screen.width < 768) ? "0em" : indent;
|
|
linebreak = (screen.width < 768) ? 'true' : linebreak;
|
|
}
|
|
|
|
var mathjaxscript = document.createElement('script');
|
|
mathjaxscript.id = 'mathjaxscript_pelican_#%@#$@#';
|
|
mathjaxscript.type = 'text/javascript';
|
|
mathjaxscript.src = 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.3/latest.js?config=TeX-AMS-MML_HTMLorMML';
|
|
mathjaxscript[(window.opera ? "innerHTML" : "text")] =
|
|
"MathJax.Hub.Config({" +
|
|
" config: ['MMLorHTML.js']," +
|
|
" TeX: { extensions: ['AMSmath.js','AMSsymbols.js','noErrors.js','noUndefined.js'], equationNumbers: { autoNumber: 'AMS' } }," +
|
|
" jax: ['input/TeX','input/MathML','output/HTML-CSS']," +
|
|
" extensions: ['tex2jax.js','mml2jax.js','MathMenu.js','MathZoom.js']," +
|
|
" displayAlign: '"+ align +"'," +
|
|
" displayIndent: '"+ indent +"'," +
|
|
" showMathMenu: true," +
|
|
" messageStyle: 'normal'," +
|
|
" tex2jax: { " +
|
|
" inlineMath: [ ['\\\\(','\\\\)'] ], " +
|
|
" displayMath: [ ['$$','$$'] ]," +
|
|
" processEscapes: true," +
|
|
" preview: 'TeX'," +
|
|
" }, " +
|
|
" 'HTML-CSS': { " +
|
|
" styles: { '.MathJax_Display, .MathJax .mo, .MathJax .mi, .MathJax .mn': {color: 'inherit ! important'} }," +
|
|
" linebreaks: { automatic: "+ linebreak +", width: '90% container' }," +
|
|
" }, " +
|
|
"}); " +
|
|
"if ('default' !== 'default') {" +
|
|
"MathJax.Hub.Register.StartupHook('HTML-CSS Jax Ready',function () {" +
|
|
"var VARIANT = MathJax.OutputJax['HTML-CSS'].FONTDATA.VARIANT;" +
|
|
"VARIANT['normal'].fonts.unshift('MathJax_default');" +
|
|
"VARIANT['bold'].fonts.unshift('MathJax_default-bold');" +
|
|
"VARIANT['italic'].fonts.unshift('MathJax_default-italic');" +
|
|
"VARIANT['-tex-mathit'].fonts.unshift('MathJax_default-italic');" +
|
|
"});" +
|
|
"MathJax.Hub.Register.StartupHook('SVG Jax Ready',function () {" +
|
|
"var VARIANT = MathJax.OutputJax.SVG.FONTDATA.VARIANT;" +
|
|
"VARIANT['normal'].fonts.unshift('MathJax_default');" +
|
|
"VARIANT['bold'].fonts.unshift('MathJax_default-bold');" +
|
|
"VARIANT['italic'].fonts.unshift('MathJax_default-italic');" +
|
|
"VARIANT['-tex-mathit'].fonts.unshift('MathJax_default-italic');" +
|
|
"});" +
|
|
"}";
|
|
(document.body || document.getElementsByTagName('head')[0]).appendChild(mathjaxscript);
|
|
}</script>
|
|
</div>
|
|
<aside>
|
|
<div class="bug-reporting__panel">
|
|
<h3>Find an error or bug? Have a suggestion?</h3>
|
|
<p>Everything on this site is avaliable on GitHub. Head on over and <a href='https://github.com/redoules/redoules.github.io/issues/new'>submit an issue.</a> You can also message me directly by <a href='mailto:guillaume.redoules@gadz.org'>email</a>.</p>
|
|
</div>
|
|
</aside>
|
|
</section>
|
|
|
|
</div>
|
|
<!-- start of footer section -->
|
|
<footer class="footer">
|
|
<div class="container">
|
|
<p class="text-muted">
|
|
<center>This project contains 115 pages and is available on <a href="https://github.com/redoules/redoules.github.io">GitHub</a>.
|
|
<br/>
|
|
Copyright © Guillaume Redoulès,
|
|
<time datetime="2018">2018</time>.
|
|
</center>
|
|
</p>
|
|
</div>
|
|
</footer>
|
|
|
|
<!-- This jQuery line finds any span that contains code highlighting classes and then selects the parent <pre> tag and adds a border. This is done as a workaround to visually distinguish the code inputs and outputs -->
|
|
<script>
|
|
$( ".hll, .n, .c, .err, .k, .o, .cm, .cp, .c1, .cs, .gd, .ge, .gr, .gh, .gi, .go, .gp, .gs, .gu, .gt, .kc, .kd, .kn, .kp, .kr, .kt, .m, .s, .na, .nb, .nc, .no, .nd, .ni, .ne, .nf, .nl, .nn, .nt, .nv, .ow, .w, .mf, .mh, .mi, .mo, .sb, .sc, .sd, .s2, .se, .sh, .si, .sx, .sr, .s1, .ss, .bp, .vc, .vg, .vi, .il" ).parent( "pre" ).css( "border", "1px solid #DEDEDE" );
|
|
</script>
|
|
|
|
|
|
<!-- Load Google Analytics -->
|
|
<script>
|
|
/*
|
|
(function(i, s, o, g, r, a, m) {
|
|
i['GoogleAnalyticsObject'] = r;
|
|
i[r] = i[r] || function() {
|
|
(i[r].q = i[r].q || []).push(arguments)
|
|
}, i[r].l = 1 * new Date();
|
|
a = s.createElement(o),
|
|
m = s.getElementsByTagName(o)[0];
|
|
a.async = 1;
|
|
a.src = g;
|
|
m.parentNode.insertBefore(a, m)
|
|
})(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');
|
|
|
|
ga('create', 'UA-66582-32', 'auto');
|
|
ga('send', 'pageview');
|
|
*/
|
|
</script>
|
|
<!-- End of Google Analytics -->
|
|
|
|
<!-- Bootstrap core JavaScript
|
|
================================================== -->
|
|
<!-- Placed at the end of the document so the pages load faster -->
|
|
<script src="../theme/js/bootstrap.min.js"></script>
|
|
<!-- IE10 viewport hack for Surface/desktop Windows 8 bug -->
|
|
<script src="../theme/js/ie10-viewport-bug-workaround.js"></script>
|
|
|
|
|
|
</body>
|
|
|
|
</html> |