{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Code for topic #7:
Sort, search and complexity "
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import random"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Search "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Sequential search "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5\n"
]
}
],
"source": [
"def sequential_search(lst, key):\n",
" for i in range(len(lst)):\n",
" if lst[i] == key :\n",
" return i\n",
" # we get here when key is not in list\n",
" return None\n",
"\n",
"print(sequential_search(list(range(10)),5))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5\n"
]
}
],
"source": [
"def sequential_search_back(lst, key):\n",
" lst = lst[::-1]\n",
" for i in range(len(lst)):\n",
" if lst[i] == key :\n",
" return len(lst)-i-1\n",
" # we get here when key is not in list\n",
" return None\n",
"print(sequential_search(list(range(10)),5))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Binary search "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"8\n",
"None\n"
]
}
],
"source": [
"# Binary search\n",
"def binary_search(lst, key):\n",
" \"\"\" iterative binary search. lst must be sorted \"\"\"\n",
" n= len(lst)\n",
" left = 0\n",
" right = n-1\n",
" \n",
" while left <= right :\n",
" middle = (right + left)//2 # middle rounded down\n",
" if key == lst[middle]: # item found\n",
" return middle\n",
" elif key < lst[middle]: # item not in top half\n",
" right = middle-1\n",
" else: # item not in bottom half\n",
" left = middle+1\n",
" # not found\n",
" return None\n",
"\n",
"lst = [3,4,5,6,9,10,13,16,18,20,22,28,29,31,32,33,40,42,47,48,50,52]\n",
"print(binary_search(lst,18))\n",
"print(binary_search(lst,17))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"n= 1000000\n",
"sequential search: 4.321558300000106\n",
"binary search: 0.0002607999999781896\n",
"n= 2000000\n",
"sequential search: 10.07461380000018\n",
"binary search: 0.00025860000005195616\n",
"n= 4000000\n",
"sequential search: 16.15123709999989\n",
"binary search: 0.0004266999999344989\n"
]
}
],
"source": [
"# Execution time comparison: sequential vs. binary search\n",
"import timeit\n",
"\n",
"repeat = 20 #repeat execution several times, for more significant results\n",
"\n",
"for n in [10**6, 2*10**6, 4*10**6]:\n",
" print(\"n=\", n)\n",
" \n",
" L = [i for i in range(n)] \n",
" key = -1 # why?\n",
" \n",
" t0 = timeit.default_timer()\n",
" for i in range(repeat):\n",
" res = sequential_search(L, key)\n",
" t1 = timeit.default_timer()\n",
" print(\"sequential search:\", t1-t0)\n",
"\n",
" t0 = timeit.default_timer()\n",
" for i in range(repeat):\n",
" res = binary_search(L, key)\n",
" t1 = timeit.default_timer()\n",
" print(\"binary search:\", t1-t0)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"8\n",
"None\n"
]
}
],
"source": [
"def rec_binary_search(lst, key, left, right):\n",
" \"\"\" recursive binary search.\n",
" passing lower and upper boundaries\"\"\"\n",
" if left > right:\n",
" return None\n",
"\n",
" middle = (left+right)//2\n",
" if key == lst[middle]:\n",
" return middle \n",
" elif key < lst[middle]: # item cannot be in top half\n",
" return rec_binary_search(lst, key, left, middle-1)\n",
" else: # item cannot be in bottom half\n",
" return rec_binary_search(lst, key, middle+1, right)\n",
"\n",
"lst = [3,4,5,6,9,10,13,16,18,20,22,28,29,31,32,33,40,42,47,48,50,52]\n",
"n = len(lst)\n",
"print(rec_binary_search(lst,18,0,n-1))\n",
"print(rec_binary_search(lst,17,0,n-1))"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"def rec_display_binary_search(lst, key, left, right):\n",
" \"\"\" recursive binary search.\n",
" printing intermediate values to track execution \"\"\"\n",
" if left > right:\n",
" return None\n",
"\n",
" middle = (left+right)//2\n",
"\n",
" print(\"left=\", left, \"middle=\", middle, \"right=\", right)\n",
" print(\"middle element=\", lst[middle])\n",
"\n",
" if key == lst[middle]:\n",
" return middle \n",
" elif key < lst[middle]: # item cannot be in top half\n",
" return rec_display_binary_search(lst, key, left, middle-1)\n",
" else: # item cannot be in bottom half\n",
" return rec_display_binary_search(lst, key, middle+1, right)\n"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[7, 8, 20, 47, 48, 54, 57, 64, 96, 98, 105, 116, 120, 132, 154, 167, 201, 203, 208, 209, 234, 237, 267, 268, 281, 287, 292, 315, 342, 346, 347, 351, 353, 361, 376, 377, 380, 418, 424, 441, 459, 460, 463, 468, 472, 484, 485, 487, 492, 508, 513, 527, 540, 547, 552, 587, 587, 594, 597, 619, 620, 632, 636, 642, 648, 653, 653, 655, 655, 657, 680, 693, 696, 703, 730, 735, 736, 741, 741, 778, 778, 785, 790, 798, 799, 803, 813, 848, 865, 881, 884, 896, 902, 919, 926, 942, 971, 972, 972, 981]\n"
]
}
],
"source": [
"import random;\n",
"L = [random.randrange(1000) for i in range(100)]\n",
"L = sorted(L)\n",
"print(L)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"left= 0 middle= 49 right= 99\n",
"middle element= 508\n",
"left= 0 middle= 24 right= 48\n",
"middle element= 281\n",
"left= 0 middle= 11 right= 23\n",
"middle element= 116\n",
"left= 12 middle= 17 right= 23\n",
"middle element= 203\n",
"left= 18 middle= 20 right= 23\n",
"middle element= 234\n",
"left= 18 middle= 18 right= 19\n",
"middle element= 208\n",
"left= 19 middle= 19 right= 19\n",
"middle element= 209\n"
]
}
],
"source": [
"rec_display_binary_search(L, 210, 0, len(L)-1)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"iterative binary search: 0.01975062286510365\n",
"recursive binary search: 0.0290216731664259\n"
]
}
],
"source": [
"import timeit\n",
"repeat = 10000\n",
"\n",
"t0 = timeit.default_timer()\n",
"for i in range(repeat):\n",
" res = binary_search(L, 250)\n",
"t1 = timeit.default_timer()\n",
"print(\"iterative binary search:\", t1-t0)\n",
"\n",
"\n",
"# import timeit\n",
"t0 = timeit.default_timer()\n",
"for i in range(repeat):\n",
" res = rec_binary_search(L, 250, 0, len(L)-1)\n",
"t1 = timeit.default_timer()\n",
"print(\"recursive binary search:\", t1-t0)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"def rec_slice_binary_search(lst, key):\n",
" \"\"\" recursive binary search.\n",
" passing sliced list \"\"\"\n",
" n = len(lst)\n",
" if n<=0:\n",
" return None\n",
" if key == lst[n//2]:\n",
" return n//2 \n",
" elif key < lst[n//2]: # item cannot be in top half\n",
" return rec_slice_binary_search(lst[0:n//2], key)\n",
" else: # item cannot be in bottom half\n",
" tmp = rec_slice_binary_search(lst[n//2+1:n], key)\n",
" if tmp == None:\n",
" return None\n",
" else:\n",
" return tmp + n//2 + 1"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"without slicing: 0.05725237328442745\n",
"with slicing: 3.523330970259849\n"
]
}
],
"source": [
"import timeit\n",
"repeat = 10000\n",
"\n",
"L = [i for i in range(10**5)]\n",
"\n",
"t0 = timeit.default_timer()\n",
"for i in range(repeat):\n",
" rec_binary_search(L, -1, 0, len(L)-1)\n",
"t1 = timeit.default_timer()\n",
"print(\"without slicing:\", t1-t0)\n",
"\n",
"\n",
"# import timeit\n",
"t0 = timeit.default_timer()\n",
"for i in range(repeat):\n",
" res = rec_slice_binary_search(L, -1)\n",
"t1 = timeit.default_timer()\n",
"print(\"with slicing:\", t1-t0)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"def wrap_binary_search(lst, key):\n",
" \"\"\" calls a recursive binary search\n",
" lst better be sorted for binary search to work\"\"\"\n",
" n = len(lst)\n",
" return rec_binary_search(lst, key, 0, n-1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Sort "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Selection sort "
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2, 2, 2, 3, 4, 5, 6, 6, 7, 8, 9, 44]\n"
]
}
],
"source": [
"def selection_sort(lst):\n",
" ''' sort lst (in-place) '''\n",
" n = len(lst)\n",
" for i in range(n):\n",
" m_index = i #index of minimum\n",
" for j in range(i+1,n):\n",
" if lst[m_index] > lst[j]:\n",
" m_index = j\n",
" swap(lst, i, m_index)\n",
" return None \n",
"\n",
"def swap(lst, i, j):\n",
" tmp = lst[i]\n",
" lst[i] = lst[j]\n",
" lst[j] = tmp\n",
"\n",
"\n",
"lst = [5,2,9,4,3,6,2,6,44,7,2,8]\n",
"selection_sort(lst)\n",
"print(lst)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"n= 1000 0.04210536679602228\n",
"n= 2000 0.1216297035352909\n",
"n= 4000 0.47894713426649105\n",
"n= 8000 1.9123299900238635\n"
]
}
],
"source": [
"# Selection sort - actual running time\n",
"import timeit\n",
"import random\n",
"\n",
"for n in [1000,2000,4000,8000]:\n",
" lst = list(range(n)) # [0,1,2,…,n-1]\n",
" random.shuffle(lst) # balagan\n",
"\n",
" t0 = timeit.default_timer() # stopper go!\n",
" selection_sort(lst)\n",
" t1 = timeit.default_timer() # stopper end\n",
"\n",
" print(\"n=\", n, t1-t0)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def selection_sort2(lst):\n",
" n = len(lst)\n",
" for i in range(n):\n",
" m = min(lst[i:n])\n",
" m_index = lst[i:n].index(m) + i # ind of minimum\n",
" lst[i], lst[m_index] = lst[m_index], lst[i] "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2, 2, 2, 3, 4, 5, 6, 6, 7, 8, 9, 44]\n"
]
}
],
"source": [
"lst = [5,2,9,4,3,6,2,6,44,7,2,8]\n",
"selection_sort2(lst)\n",
"print(lst)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Bubble sort "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[57, 55, 44, 73, 0, 71, 8, 45, 62, 39, 94, 19, 31, 67, 4, 46, 28, 29, 93, 54, 96, 34, 90, 60, 74, 20, 49, 3, 79, 36, 80, 63, 89, 22, 30, 5, 47, 14, 6, 98, 92, 7, 99, 87, 59, 75, 33, 43, 35, 50, 97, 68, 10, 15, 82, 17, 65, 16, 38, 12, 41, 11, 26, 88, 9, 52, 83, 25, 78, 84, 1, 2, 95, 51, 72, 85, 18, 69, 42, 77, 61, 91, 53, 37, 27, 24, 48, 23, 40, 58, 13, 70, 76, 56, 64, 81, 21, 86, 32, 66]\n",
"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]\n"
]
}
],
"source": [
"# Simplest Bubble sort\n",
"def bub_sort(lst):\n",
" for i in range(len(lst)):\n",
" for j in range(len(lst)-1):\n",
" lst[j], lst[j+1] = min(lst[j], lst[j+1]), max(lst[j], lst[j+1])\n",
"\n",
"# lst = [5,2,9,4,3,6,2,6,44,7,2,8]\n",
"lst = random.sample(range(100),100)\n",
"print(lst) \n",
"bub_sort(lst)\n",
"print(lst) "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2, 2, 2, 3, 4, 5, 6, 6, 7, 8, 9, 44]\n"
]
}
],
"source": [
"# Bubble sort\n",
"def bubble_sort(lst):\n",
" for i in range(len(lst)-1,-1,-1): \n",
" for j in range(0,i):\n",
" if lst[j] > lst[j+1]:\n",
" lst[j], lst[j+1]= lst[j+1], lst[j] # swap\n",
"\n",
"lst = [5,2,9,4,3,6,2,6,44,7,2,8]\n",
"bubble_sort(lst)\n",
"print(lst) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Merge sort "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def merge(A, B):\n",
" ''' Merge list A and list B. Lists must be sorted! '''\n",
" n, m = len(A), len(B)\n",
" C = [0]*(n+m)\n",
"\n",
" a,b,c = 0,0,0\n",
" while a Bucket sort "
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[40, 96, 1, 95, 32, 45, 57, 88, 64, 24, 52, 69, 81, 90, 48, 31, 74, 91, 9, 60, 76, 16, 62, 6, 17, 47, 20, 22, 19, 79]\n",
"[1, 6, 9, 16, 17, 19, 20, 22, 24, 31, 32, 40, 45, 47, 48, 52, 57, 60, 62, 64, 69, 74, 76, 79, 81, 88, 90, 91, 95, 96]\n"
]
}
],
"source": [
"def bucket_sort(lst):\n",
" min_val, max_val, n = min(lst), max(lst), len(lst)\n",
" input_range = max_val-min_val\n",
" buckets = [[] for i in range(n)] \n",
" sorted_lst = [0] * n\n",
" \n",
" for x in lst:\n",
" idx = min(int(n * (x-min_val) / input_range),n-1)\n",
" buckets[idx].append(x)\n",
" \n",
" for i in buckets:\n",
" selection_sort(i)\n",
" \n",
" idx = 0\n",
" for i in buckets:\n",
" for j in i:\n",
" sorted_lst[idx] = j\n",
" idx += 1\n",
" \n",
" return sorted_lst\n",
"\n",
"lst1 = random.sample(range(100),30)\n",
"print(lst1)\n",
"print(bucket_sort(lst1))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"n = 1000000\n",
"mergesort:\n",
" 7.53015969999251\n",
"bucket sort:\n",
" 3.2453182000026572\n",
"n = 2000000\n",
"mergesort:\n",
" 16.262934500002302\n",
"bucket sort:\n",
" 7.0175750000053085\n",
"n = 4000000\n",
"mergesort:\n",
" 35.34944900000119\n",
"bucket sort:\n",
" 12.901595700008329\n"
]
}
],
"source": [
"# lst_sorted = [i for i in range(n)] \n",
"# random.shuffle(lst_sorted)\n",
"# lst_bucket = lst_sorted[:]\n",
"# Execution time comparison: merge sort vs. bucket sort\n",
"import timeit\n",
"import random\n",
"\n",
"repeat = 1\n",
"\n",
"for n in [10**6, 2*10**6, 4*10**6]:\n",
" print(\"n =\", n)\n",
" \n",
" lst_msort = [random.random() * n*10 \n",
" for _ in range(n)]\n",
" lst_bucket = lst_msort[:]\n",
" \n",
" t0 = timeit.default_timer()\n",
" for i in range(repeat):\n",
" mergesort(lst_msort)\n",
" t1 = timeit.default_timer()\n",
" print(\"mergesort:\\n \", t1-t0)\n",
"\n",
" t0 = timeit.default_timer()\n",
" for i in range(repeat):\n",
" bucket_sort(lst_bucket)\n",
" t1 = timeit.default_timer()\n",
" print(\"bucket sort:\\n \", t1-t0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Quick sort "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0,\n",
" 1,\n",
" 2,\n",
" 3,\n",
" 4,\n",
" 5,\n",
" 6,\n",
" 7,\n",
" 8,\n",
" 9,\n",
" 10,\n",
" 11,\n",
" 12,\n",
" 13,\n",
" 14,\n",
" 15,\n",
" 16,\n",
" 17,\n",
" 18,\n",
" 19,\n",
" 20,\n",
" 21,\n",
" 22,\n",
" 23,\n",
" 24,\n",
" 25,\n",
" 26,\n",
" 27,\n",
" 28,\n",
" 29,\n",
" 30,\n",
" 31,\n",
" 32,\n",
" 33,\n",
" 34,\n",
" 35,\n",
" 36,\n",
" 37,\n",
" 38,\n",
" 39,\n",
" 40,\n",
" 41,\n",
" 42,\n",
" 43,\n",
" 44,\n",
" 45,\n",
" 46,\n",
" 47,\n",
" 48,\n",
" 49,\n",
" 50,\n",
" 51,\n",
" 52,\n",
" 53,\n",
" 54,\n",
" 55,\n",
" 56,\n",
" 57,\n",
" 58,\n",
" 59,\n",
" 60,\n",
" 61,\n",
" 62,\n",
" 63,\n",
" 64,\n",
" 65,\n",
" 66,\n",
" 67,\n",
" 68,\n",
" 69,\n",
" 70,\n",
" 71,\n",
" 72,\n",
" 73,\n",
" 74,\n",
" 75,\n",
" 76,\n",
" 77,\n",
" 78,\n",
" 79,\n",
" 80,\n",
" 81,\n",
" 82,\n",
" 83,\n",
" 84,\n",
" 85,\n",
" 86,\n",
" 87,\n",
" 88,\n",
" 89,\n",
" 90,\n",
" 91,\n",
" 92,\n",
" 93,\n",
" 94,\n",
" 95,\n",
" 96,\n",
" 97,\n",
" 98,\n",
" 99]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def min_idx(lst,p,q):\n",
" m_idx = q\n",
" for i in range(p,q):\n",
" if lst[i] < lst[m_idx]:\n",
" m_idx = i\n",
" return m_idx\n",
"\n",
"def selection_sort(lst):\n",
" n = len(lst)\n",
" for i in range(n):\n",
" m_index = min_idx(lst,i,n-1) # ind of minimum\n",
" lst[i], lst[m_index] = lst[m_index], lst[i]\n",
"\n",
"lst = random.sample(range(100),100) \n",
"selection_sort(lst)\n",
"lst"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[1, 2, 2, 3, 5, 5, 6, 6, 8, 8, 23]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import random # package for generating (pseudo) random elements\n",
"\n",
"def quicksort(lst):\n",
" \"\"\" quick sort of lst \"\"\"\n",
" if len(lst) <= 1: \n",
" return lst\n",
" else:\n",
" #pivot = random.choice(lst) # select a random element from list\n",
" pivot = lst[0] #for a deterministic quicksort\n",
" \n",
" smaller = [elem for elem in lst if elem < pivot] \n",
" equal = [elem for elem in lst if elem == pivot] \n",
" greater = [elem for elem in lst if elem > pivot]\n",
" \n",
" #two recursive calls\n",
" return quicksort(smaller) + equal + quicksort(greater)\n",
"\n",
"\n",
"quicksort([1,6,8,3,6,8,2,5,23,5,2]) "
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"n= 200 0.04594490001909435\n",
"n= 400 0.0846529999980703\n",
"n= 800 0.19342090000282042\n"
]
}
],
"source": [
"def ordlist(n):\n",
" \"\"\" generates an ordered list [0,1,...,(n-2),(n-1)] \"\"\"\n",
" return [i for i in range(0,n)]\n",
"\n",
"def randlist(n):\n",
" \"\"\" generates a list of n random elements from [0,...,n**2] \"\"\"\n",
" return [random.randint(0,n**2) for i in range(0,n)]\n",
"\n",
"\n",
"import timeit\n",
"\n",
"for n in [200, 400, 800]:\n",
" print(\"n=\", n, end=\" \")\n",
" #lst = ordlist(n)\n",
" lst = randlist(n)\n",
" t0 = timeit.default_timer()\n",
" for i in range(100):\n",
" quicksort(lst) #sort is not inplace, so lst remains unchanged !\n",
" t1 = timeit.default_timer()\n",
" print(t1-t0)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" sorted "
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"n= 1000000\n",
"sorted: 0.362931200012099\n",
"bucket sort: 3.2321027999860235\n",
"n= 2000000\n",
"sorted: 0.8146689000132028\n",
"bucket sort: 7.468129999993835\n",
"n= 4000000\n",
"sorted: 1.695511500001885\n",
"bucket sort: 13.926820900000166\n"
]
}
],
"source": [
"# Execution time comparison: sorted vs. bucket sort\n",
"import timeit\n",
"import random\n",
"\n",
"repeat = 1\n",
"\n",
"for n in [10**6, 2*10**6, 4*10**6]:\n",
" print(\"n=\", n)\n",
" \n",
" lst_sorted = [i for i in range(n)] \n",
" random.shuffle(lst_sorted)\n",
" lst_bucket = lst_sorted[:]\n",
" \n",
" t0 = timeit.default_timer()\n",
" for i in range(repeat):\n",
" sorted(lst_sorted)\n",
" t1 = timeit.default_timer()\n",
" print(\"sorted:\", t1-t0)\n",
"\n",
" t0 = timeit.default_timer()\n",
" for i in range(repeat):\n",
" bucket_sort(lst_bucket)\n",
" t1 = timeit.default_timer()\n",
" print(\"bucket sort:\", t1-t0)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"def merge_by_python_sort(A,B):\n",
" return sorted(A+B)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"merge_by_sort\n",
"n= 1000 0.2080405999731738\n",
"n= 2000 0.7743075999896973\n",
"n= 4000 3.1058253000082914\n",
"merge\n",
"n= 1000 0.0005711999838240445\n",
"n= 2000 0.001320600014878437\n",
"n= 4000 0.00225349998800084\n",
"merge_by_python_sort\n",
"n= 1000 0.00012849998893216252\n",
"n= 2000 0.0001227000029757619\n",
"n= 4000 0.00018619999173097312\n"
]
}
],
"source": [
"# Merge - actual running times\n",
"import timeit\n",
"import random\n",
"for merge_func in [merge_by_sort, merge, merge_by_python_sort]:\n",
" print(merge_func.__name__)\n",
" for n in [1000,2000,4000]:\n",
" lst1 = [random.choice(range(10000)) for i in range(n)]\n",
" lst1 = sorted(lst1)\n",
" lst2 = [random.choice(range(10000)) for i in range(n)]\n",
" lst2 = sorted(lst2)\n",
"\n",
" t0 = timeit.default_timer()\n",
" merge_func(lst1,lst2)\n",
" t1 = timeit.default_timer()\n",
"\n",
" print(\"n=\", n, t1-t0)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"def merge_by_python_sort(A,B):\n",
" return sorted(A+B)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"merge_by_sort\n",
"n= 1000 0.19767950000823475\n",
"n= 2000 0.7480166000023019\n",
"n= 4000 3.4631595999817364\n",
"merge_by_python_sort\n",
"n= 1000 4.9700000090524554e-05\n",
"n= 2000 8.620001608505845e-05\n",
"n= 4000 0.00020249999943189323\n",
"merge\n",
"n= 1000 0.0006977999873925\n",
"n= 2000 0.0012566999939735979\n",
"n= 4000 0.002630099974339828\n"
]
}
],
"source": [
"# Merge - actual running times\n",
"import timeit\n",
"import random\n",
"for merge_func in [merge_by_sort, merge_by_python_sort, merge]:\n",
" print(merge_func.__name__)\n",
" for n in [1000,2000,4000]:\n",
" lst1 = [random.choice(range(10000)) for i in range(n)]\n",
" lst1 = sorted(lst1)\n",
" lst2 = [random.choice(range(10000)) for i in range(n)]\n",
" lst2 = sorted(lst2)\n",
"\n",
" t0 = timeit.default_timer()\n",
" merge_func(lst1,lst2)\n",
" t1 = timeit.default_timer()\n",
"\n",
" print(\"n=\", n, t1-t0)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3 way race\n",
"quicksort\n",
"n= 200 0.06781759997829795\n",
"n= 400 0.09669410000788048\n",
"n= 800 0.23425660000066273\n",
"mergesort\n",
"n= 200 0.06458210002165288\n",
"n= 400 0.13351179999881424\n",
"n= 800 0.29765980000956915\n",
"sorted\n",
"n= 200 0.0007508999842684716\n",
"n= 400 0.001619000016944483\n",
"n= 800 0.004821799986530095\n"
]
}
],
"source": [
"import timeit\n",
"\n",
"print(\"3 way race\")\n",
"for func in [quicksort, mergesort, sorted]:\n",
" print(func.__name__)\n",
"\n",
" for n in [200, 400, 800]:\n",
" print(\"n=\", n, end=\" \")\n",
" rlst = randlist(n)\n",
"\n",
" t0 = timeit.default_timer()\n",
" for i in range(100):\n",
" func(rlst) #sort is not inplace, so lst remains unchanged !\n",
" t1 = timeit.default_timer()\n",
" print(t1-t0)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[3, 4, 5, 6]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = [5,3,4,6]\n",
"sorted(a)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[6, 5, 4, 3]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted(a,reverse=True)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Assaf', 'Eliran', 'Nir', 'Sagy']"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = ['Assaf','Nir','Sagy','Eliran']\n",
"sorted(a)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Nir', 'Sagy', 'Assaf', 'Eliran']"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# sorting by length\n",
"sorted(a,key=len)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Eliran\n",
"f\n"
]
},
{
"data": {
"text/plain": [
"['Assaf', 'Eliran', 'Nir', 'Sagy']"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# sort by last character\n",
"def last(s): return s[-1]\n",
"print(last)\n",
"print(last(a))\n",
"print(last(a[0]))\n",
"sorted(a,key=last)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Uri', 'Maya', 'Assaf', 'Gilad', 'Shunit']"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# sorting lists by two criteria\n",
"def key2compare(s):\n",
" return(len(s),s)\n",
"\n",
"sorted(['Assaf','Shunit','Gilad','Uri','Maya'],key=key2compare)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}