{ "cells": [ { "cell_type": "markdown", "id": "uniform-marshall", "metadata": {}, "source": [ "# Dictionaries and functions (24/2-2021)" ] }, { "cell_type": "markdown", "id": "square-franklin", "metadata": {}, "source": [ "## Exercise\n", "\n", "Make a function ``print_words(words)`` that given a list of words ``words``, prints each word on a separate line (in an arbitrary order)." ] }, { "cell_type": "code", "execution_count": 6, "id": "comprehensive-encyclopedia", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "zoo=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']\n", "zoo=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python'] words=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']\n", "python\n", "zoo=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra'] words=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra']\n", "cobra\n", "zoo=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus'] words=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus']\n", "hippopotamus\n", "zoo=['zoo', 'crocodile', 'elephant', 'giraf'] words=['zoo', 'crocodile', 'elephant', 'giraf']\n", "giraf\n", "zoo=['zoo', 'crocodile', 'elephant'] words=['zoo', 'crocodile', 'elephant']\n", "elephant\n", "zoo=['zoo', 'crocodile'] words=['zoo', 'crocodile']\n", "crocodile\n", "zoo=['zoo'] words=['zoo']\n", "zoo\n", "zoo=[]\n" ] } ], "source": [ "# Warning: Dont modify the argument when it is a list\n", "\n", "def print_words(words):\n", " while words:\n", " print(f'{zoo=} {words=}')\n", " word = words.pop()\n", " print(word)\n", "\n", "zoo = ['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']\n", "\n", "print(f'{zoo=}')\n", "print_words(zoo)\n", "print(f'{zoo=}')" ] }, { "cell_type": "code", "execution_count": 48, "id": "directed-nancy", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "words=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']\n", "zoo\n", "crocodile\n", "elephant\n", "giraf\n", "hippopotamus\n", "cobra\n", "python\n", "words=['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']\n" ] } ], "source": [ "# Leave the list unchanged\n", "\n", "def print_words(words):\n", " for word in words:\n", " print(word)\n", " \n", "words = ['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']\n", "\n", "print(f'{words=}')\n", "print_words(words)\n", "print(f'{words=}')" ] }, { "cell_type": "code", "execution_count": 46, "id": "loose-roads", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "zoo\n", "crocodile\n", "elephant\n", "giraf\n", "hippopotamus\n", "cobra\n", "python\n" ] } ], "source": [ "# Use * and keyword argument to print\n", "\n", "words = ['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']\n", "\n", "def print_words(words):\n", " print(*words, sep='\\n')\n", " \n", "print_words(words)" ] }, { "cell_type": "markdown", "id": "possible-taste", "metadata": {}, "source": [ "## Exercise\n", "\n", "Create a function ``print_words(words, indent=4)`` that given a list of words ``words``, prints each word on a separate line (in an arbitrary order) with ``indent`` leading spaces. Indent should be an optional keyword argument." ] }, { "cell_type": "code", "execution_count": 60, "id": "compact-ensemble", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "zoo\n", "crocodile\n", "elephant\n", "giraf\n", "hippopotamus\n", "cobra\n", "python\n", "..zoo\n", "..crocodile\n", "..elephant\n", "..giraf\n", "..hippopotamus\n", "..cobra\n", "..python\n", "...zoo\n", "...crocodile\n", "...elephant\n", "...giraf\n", "...hippopotamus\n", "...cobra\n", "...python\n", ".......zoo\n", ".......crocodile\n", ".......elephant\n", ".......giraf\n", ".......hippopotamus\n", ".......cobra\n", ".......python\n" ] } ], "source": [ "# Leave the list unchanged\n", "\n", "space = '.'\n", "\n", "#def print_words(words, indent): # indent not optional\n", "def print_words(words, indent=0): # the standard way\n", "#def print_words(words, *, indent=0): # force indent to be given as a keyword argument\n", " for word in words:\n", " print(space * indent + word)\n", " \n", "words = ['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']\n", "\n", "print_words(words)\n", "print_words(words, 2) # is it obvious from the code that 2 is the indent?\n", "print_words(words, indent=3)\n", "print_words(words, indent=7)" ] }, { "cell_type": "markdown", "id": "aquatic-bermuda", "metadata": {}, "source": [ "## Exercise\n", "\n", "Create a function ``longest(words)`` that giving a list of words returns a longest word, say the first if several words have the maximum length." ] }, { "cell_type": "code", "execution_count": 33, "id": "acquired-least", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hippopotamus\n" ] } ], "source": [ "def longest(words):\n", " long = words[0]\n", " for word in words[1:]:\n", " if len(word) > len(long):\n", " long = word\n", " # print(f'{word=}, {long=}')\n", "\n", " return long\n", "\n", "print(longest(['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']))" ] }, { "cell_type": "code", "execution_count": 2, "id": "becoming-grace", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hippopotamus\n" ] } ], "source": [ "# slightly cheating for now...\n", "\n", "def longest(words):\n", " return max(words, key=len) # max takes a keyword argument that is a function\n", "\n", "print(longest(['zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python']))" ] }, { "cell_type": "markdown", "id": "decreased-manufacturer", "metadata": {}, "source": [ "## Exercise\n", "\n", "Create function ``longest(word1, word2, ...)`` to return the longest word of *one or more* arguments provided." ] }, { "cell_type": "code", "execution_count": 3, "id": "fatal-withdrawal", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hippopotamus\n" ] } ], "source": [ "# Use * argument for zero or more arguments; fails on zero arguments\n", "\n", "def longest(*words):\n", " long = words[0]\n", " for word in words[1:]:\n", " if len(word) > len(long):\n", " long = word\n", " # print(f'{word=}, {long=}')\n", " return long\n", "\n", "print(longest('zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python'))\n", "# print(longest()) # IndexError: tuple index out of range" ] }, { "cell_type": "code", "execution_count": 4, "id": "square-centre", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hippopotamus\n" ] } ], "source": [ "# Better error message, by forcing at least one argument\n", "\n", "def longest(word, *words):\n", " long = word\n", " for word in words:\n", " if len(word) > len(long):\n", " long = word\n", " return long\n", "\n", "print(longest('zoo', 'crocodile', 'elephant', 'giraf', 'hippopotamus', 'cobra', 'python'))\n", "# print(longest())" ] }, { "cell_type": "markdown", "id": "occupational-energy", "metadata": {}, "source": [ "## Exercise\n", "\n", "Create a function ``eliminate(text, words)`` that given a space separated text of words ``text``, and a list of words ``words``, returns the text with all occurences of words in ``words`` replaced by a corresponding number of ``*``. E.g.\n", "\n", "```\n", "eliminate('Python Java and C are popular languages but Java and C '\n", " 'are statically typed whereas Python is dynamically typed', \n", " ['java', 'c', 'statically'])\n", "```\n", "\n", "should return the string\n", "\n", "```\n", "'Python **** and * are popular languages but **** and * are ********** typed whereas Python is dynamically typed'\n", "```" ] }, { "cell_type": "code", "execution_count": 5, "id": "solar-minority", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Python **** and * are popular languages but **** and * are ********** typed whereas Python is dynamically typed'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Bad version, since many string concatenations\n", "\n", "def eliminate(text, words):\n", " words = set(words) # 'in' queries are much faster of sets than on lists\n", " new_string = ''\n", " for word in text.split():\n", " new_string += ' '\n", " if word not in words:\n", " new_string += word\n", " else:\n", " new_string += '*' * len(word)\n", " return new_string[1:] # drop space\n", "\n", "eliminate(\n", " 'Python Java and C are popular languages but Java and C '\n", " 'are statically typed whereas Python is dynamically typed', \n", " ['Java', 'C', 'statically']\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "id": "seasonal-composer", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Python **** and * are popular languages but **** and * are ********** typed whereas Python is dynamically typed'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def eliminate(text, words):\n", " words = set(words) # 'in' queries are much faster of sets than on lists\n", " new_content = []\n", " for word in text.split():\n", " if word not in words:\n", " new_content.append(word)\n", " else:\n", " new_content.append('*' * len(word))\n", " return ' '.join(new_content)\n", "\n", "eliminate('Python Java and C are popular languages but Java and C '\n", " 'are statically typed whereas Python is dynamically typed', \n", " ['Java', 'C', 'statically'])" ] }, { "cell_type": "markdown", "id": "anonymous-choice", "metadata": {}, "source": [ "## Exercise\n", "\n", "Modify the definition of ``eliminate`` so that it can be called as ``eliminate(text, word1, word2, ...)``" ] }, { "cell_type": "code", "execution_count": 7, "id": "planned-wesley", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Python **** and * are popular languages but **** and * are ********** typed whereas Python is dynamically typed'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def eliminate(text, *words): # * notation\n", " words = set(words)\n", " new_content = []\n", " for word in text.split():\n", " if word not in words:\n", " new_content.append(word)\n", " else:\n", " new_content.append('*' * len(word))\n", " return ' '.join(new_content)\n", "\n", "eliminate('Python Java and C are popular languages but Java and C '\n", " 'are statically typed whereas Python is dynamically typed', \n", " 'Java', 'C', 'statically')" ] }, { "cell_type": "code", "execution_count": 8, "id": "accredited-source", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Python **** and * are popular languages but **** and * are ********** typed whereas Python is dynamically typed'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# A list comprehension solution\n", "\n", "def eliminate(text, *words):\n", " words = set(words)\n", " new_content = ['*' * len(word) if word in words else word for word in text.split()]\n", " \n", " return ' '.join(new_content)\n", "\n", "eliminate('Python Java and C are popular languages but Java and C '\n", " 'are statically typed whereas Python is dynamically typed', \n", " 'Java', 'C', 'statically')" ] }, { "cell_type": "markdown", "id": "overhead-mother", "metadata": {}, "source": [ "## Exercise\n", "\n", "Create a function ``rewrite(text, initial=[word,...], hide=[word,...], upper=[word,...])`` where ``initial``, ``hide``, and ``upper`` are optional keyword arguments each containing a list of words where words in ``initial`` should be replace by only the first letter followed by ``*``, words in ``hide`` all letters should be replaced by ``*``, and words in ``upper`` should be converted to all upper case. E.g.\n", "\n", "```\n", "rewrite('Python rocks but Java sucks', initial=['sucks'], hide=['Java'], upper=['Python'])\n", "```\n", "\n", "should return \n", "\n", "```\n", "'PYTHON rocks but **** s****'\n", "```" ] }, { "cell_type": "code", "execution_count": null, "id": "composite-effort", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Let us use frozensets, since the sets are not going to be updated\n", "\n", "def rewrite(text, initial=None, hide=None, upper=None):\n", " initial = frozenset(initial) if initial != None else frozenset()\n", " hide = frozenset(hide) if hide != None else frozenset()\n", " upper = frozenset(upper) if upper != None else frozenset()\n", " \n", " new_content = []\n", " for word in text.split():\n", " if word in hide:\n", " new_content.append('*' * len(word))\n", " elif word in initial:\n", " new_content.append(word[0] + '*' * (len(word) - 1))\n", " elif word in upper:\n", " new_content.append(word.upper())\n", " else:\n", " new_content.append(word) \n", " return ' '.join(new_content)\n", "\n", "rewrite('Python rocks but Java sucks', initial=['sucks'], hide=['Java'], upper=['Python'])" ] }, { "cell_type": "code", "execution_count": null, "id": "straight-language", "metadata": {}, "outputs": [], "source": [ "# Let us use frozensets as default values, since the sets are not going to be updated\n", "\n", "def rewrite(text, \n", " initial=frozenset(), \n", " hide=frozenset(), \n", " upper=frozenset()):\n", " \n", " initial = frozenset(initial)\n", " hide = frozenset(hide)\n", " upper = frozenset(upper)\n", " \n", " new_content = []\n", " for word in text.split():\n", " if word in hide:\n", " new_content.append('*' * len(word))\n", " elif word in initial:\n", " new_content.append(word[0] + '*' * (len(word) - 1))\n", " elif word in upper:\n", " new_content.append(word.upper())\n", " else:\n", " new_content.append(word) \n", " return ' '.join(new_content)\n", "\n", "rewrite('Python rocks but Java sucks', initial=['sucks'], hide=['Java'], upper=['Python'])" ] }, { "cell_type": "code", "execution_count": 12, "id": "first-nursing", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'PYTHON rocks but **** s****'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# ... could also use an empty tuple for default value\n", "\n", "def rewrite(text, initial=(), hide=(), upper=()): # <=== \n", " \n", " initial = frozenset(initial)\n", " hide = frozenset(hide)\n", " upper = frozenset(upper)\n", " \n", " new_content = []\n", " for word in text.split():\n", " if word in hide:\n", " new_content.append('*' * len(word))\n", " elif word in initial:\n", " new_content.append(word[0] + '*' * (len(word) - 1))\n", " elif word in upper:\n", " new_content.append(word.upper())\n", " else:\n", " new_content.append(word) \n", " return ' '.join(new_content)\n", "\n", "rewrite('Python rocks but Java sucks', initial=['sucks'], hide=['Java'], upper=['Python'])" ] }, { "cell_type": "markdown", "id": "attached-conversion", "metadata": {}, "source": [ "## Exercise\n", "\n", "Write a method ``substitute(text, replace)`` where ``replace`` should contain a dictionary of (key, value) items, where keys are words that should be replaced by the corresponing value." ] }, { "cell_type": "code", "execution_count": 14, "id": "sublime-humidity", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'PYTHON rocks but **** sucks'" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def substitute(text, replace):\n", " new_content = []\n", " for word in text.split():\n", " if word in replace:\n", " new_content.append(replace[word])\n", " else:\n", " new_content.append(word)\n", " \n", " return ' '.join(new_content)\n", "\n", "substitute('Python rocks but Java sucks', {'Python': 'PYTHON', 'Java':'****'})" ] }, { "cell_type": "code", "execution_count": 18, "id": "naked-joining", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'PYTHON rocks but **** sucks'" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def substitute(text, replace):\n", " return ' '.join([replace.get(word, word) for word in text.split()])\n", "\n", "substitute('Python rocks but Java sucks', {'Python': 'PYTHON', 'Java':'****'})" ] }, { "cell_type": "markdown", "id": "regulation-vision", "metadata": {}, "source": [ "## Exercise\n", "\n", "Implement ``rewrite(text, initial=[word,...], hide=[word,...], upper=[word,...])`` using ``substitute(text, replace)``." ] }, { "cell_type": "code", "execution_count": 26, "id": "dramatic-camera", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'PYTHON rocks but **** s****'" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# |= operator, new in Python 3.9\n", "\n", "def rewrite(text, initial=(), hide=(), upper=()): # <=== \n", " replace = {word:'*' * len(word) for word in hide}\n", " replace |= {word:word[0] + '*' * (len(word) - 1) for word in initial}\n", " replace |= {word:word.upper() for word in upper}\n", " # print(f'{replace=}')\n", " return substitute(text, replace)\n", "\n", "rewrite('Python rocks but Java sucks', initial=['sucks'], hide=['Java'], upper=['Python'])" ] }, { "cell_type": "code", "execution_count": 7, "id": "duplicate-herald", "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'substitute' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 8\u001b[0m )\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 10\u001b[1;33m print(rewrite('Python rocks but Java sucks', \n\u001b[0m\u001b[0;32m 11\u001b[0m initial=['sucks'], hide=['Java'], upper=['Python']))\n\u001b[0;32m 12\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m\u001b[0m in \u001b[0;36mrewrite\u001b[1;34m(text, initial, hide, upper)\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mrewrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minitial\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mhide\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mupper\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m return substitute(text, \n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[1;33m{\u001b[0m\u001b[0mword\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;34m'*'\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mword\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mword\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mhide\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;33m|\u001b[0m \u001b[1;33m{\u001b[0m\u001b[0mword\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mword\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m'*'\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mword\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mword\u001b[0m \u001b[1;32min\u001b[0m \u001b[0minitial\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'substitute' is not defined" ] } ], "source": [ "# Use the | operator on dictionaries, new in Python 3.9\n", "\n", "def rewrite(text, initial=(), hide=(), upper=()):\n", " return substitute(text, \n", " {word:'*' * len(word) for word in hide}\n", " | {word:word[0] + '*' * (len(word) - 1) for word in initial}\n", " | {word:word.upper() for word in upper}\n", " )\n", "\n", "print(rewrite('Python rocks but Java sucks', \n", " initial=['sucks'], hide=['Java'], upper=['Python']))\n", "\n", "# Can create a simple eliminate function using rewrite\n", "\n", "def eliminate(text, words):\n", " return rewrite(text, hide=words)\n", "\n", "print(eliminate(\n", " 'Python Java and C are popular languages but Java and C '\n", " 'are statically typed whereas Python is dynamically typed', \n", " ['Java', 'C', 'statically']\n", "))" ] }, { "cell_type": "code", "execution_count": 9, "id": "surrounded-connection", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'3.9.1 (tags/v3.9.1:1e5d33e, Dec 7 2020, 17:08:21) [MSC v.1927 64 bit (AMD64)]'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sys\n", "sys.version" ] }, { "cell_type": "code", "execution_count": null, "id": "potential-demographic", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 5 }