Revision: 52975
Updated Code
at November 12, 2011 07:29 by Cano0617
Updated Code
# No imports are allowed without the permission of your instructor. # Definition: An "association list" is a list of lists, such as # [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]] # Each sublist has two elements: the first is called a "key" and is a value of # any type, and the second is a list of values (of any type) that are # associated with that key. No key occurs more than once in an association # list. # An "image association list" is an association list in which each key is a # string and its associated list contains the names of image files that # have that string in their img tag. # Example: # [["madonna", ["img3541.jpg", "img1234.jpg"]], # ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]] # Definition: A "filter description" is a string made up of one or more terms # separated by "and". Each term is a sequence of characters surrounded by # colons, with optionally the word "not" before it. Example: # :mtv: and not :madonna: def record_associations(description, image, ial): '''Update the image association list ial with the tokens from str description. image (a str) is the name of the image to be associated with the keys in description. ''' # Note: Remember that all tokens in the ial should be lowercase. # Passed as lower case? or made lower case? temp= description.lower().split() for item in temp: ial.append([item,[]]) #appends key with empty list ready to store images #code below is for removing duplicates current=None last=None ial.sort() #smaller lists such as new created duplicates are move ahead of the original which contains for x in range(len(ial)-1): current=ial[x][0] last=ial[x-1][0] if current==last and len(ial)>1 and x!=0: ial.pop(x-1) else: ial.pop(x) for item in temp: for descript in ial: if (item in descript)==True: descript[1].append(image) def process_page(webpage, ial, list_threshold): '''Update the image association list ial with the images found in the text in webpage (a str). int list_threshold is the maximum length of any list in ial. ''' #uses record associations # find_attribute_value(webpage,'src') 'ask for help in creating a list structured like the ial list ' src_temp=[] src= find_attribute_value(webpage, 'src=')#image name alt_temp=[] alt=find_attribute_value(webpage, 'alt=')#description src_loc=0 alt_loc=0 while((src!=None and alt!=None)): src_temp.append(src.lower()) src_loc = webpage.find(src)#location of image src = find_attribute_value(webpage[src_loc:],'src')#searches past last image alt_temp.append(alt.lower()) alt_loc=webpage.find(alt) alt= find_attribute_value(webpage[alt_loc:],'alt') for x in range(len(alt_temp)): record_associations(alt_temp[x],src_temp[x],ial) ial=clean_up(ial, list_threshold) #pass this on to process filter description def find_attribute_value(html_tag, att): '''Return the value of attribute att (a str) in the str html_tag. Return None if att doesn't occur in html_tag. ''' if not(att in html_tag): return None else: start_index = html_tag.find(att) end_index= html_tag.find('=',start_index+len(att)+1) return find_guts(html_tag[start_index:end_index]) # for char in html_tag: def process_filter_description(filter, images, ial): '''Return a new list containing only items from list images that pass the description filter (a str). ial is the related image association list. Matching is done in a case insensitive manner. ''' restrict=filter.split() temp=[] for item in restrict: if item.count(":")>1: key=item.split(":") key=item.strip(':') temp.append(key.lower()) else: temp.append(item.lower()) filter=temp master_set=set(images) working_set= master_set all_key=[] all_set=[] intersect_only=None # if True then find intersection # if False then find intersection of not has_set=False for item in ial: all_set.append(set(item[1])) all_key.append(item[0]) for int in range(len(temp)): if temp[int]=='and' or temp[int]=='not': has_set=False elif temp[int-1]=='not': intersect_only=False has_set=True elif all_key.index(temp[int]) : list.i elif int==0: intersect_only=True has_set=True elif temp[int-1]=='and': intersect_only=True has_set=True else: intersect_only=None has_set=False if intersect_only==False and has_set==True: # find current key in a list of only keys find_key=all_key.index(temp[int]) # find set that matches the key find_set=all_set[find_key] working_set=working_set.difference(find_set) elif intersect_only==True and has_set==True: #same idea as above find_key=all_key.index(temp[int]) find_set=all_set[find_key] working_set=working_set.intersection(find_set) return list(working_set) # # index=filter.find(temp[int]) # remove=filter.find('not',0,index)!=-1 # if remove==True: # pass def all_images(ial): '''Return a list of all the images in image association list ial. Duplicates are excluded. ''' x = 0 temp = [] for bob in range(len(ial)): for int in range(len(ial[bob][1])): temp.append(ial[bob][1][int]) #removes duplicates for item in temp : if temp.count(item) > 1 : temp.remove(item) return temp def find_guts(s): '''Return the characters in str s contained within the outermost pair of matching single or double quotes. If there are no quotes or the outermost quotes don't match, the empty string is returned. ''' firstsq = s.find("'") #location of first single quote lastsq = s.rfind("'") #location of last single quote firstdq = s.find('"') #location of first double quote lastdq = s.rfind('"') #location of last double quote single = False #tracks whether or not there is a complete single quote ('abc') double = False #tracks whether or not there is a complete double quote ("abc") if firstsq == lastsq : #if they're the same then it's either an incomplete quote or no quotes at all single = False elif firstsq != lastsq : single = True if firstdq == lastdq : #same idea as the the single quote check double = False elif firstdq != lastdq : double = True if double == single: #if both single and double quotes are present if firstsq < firstdq: #then find which quote occurs first #in this case single quotes return s[firstsq + 1:lastsq] else: #other case is to return by double quote return s[firstdq + 1:lastdq] elif single == True: #if only single is true then return single return s[firstsq + 1:lastsq] elif double == True: #if only double is true then return double return s[firstdq + 1:lastdq] def first(a, b): '''Return the smaller of the two ints a and b, excluding -1. Both a and b are >= -1. If exactly one is -1, return the other. If both are -1, return -1. ''' if a == -1 & b == -1 : return -1 if a == -1: return b if b == -1 : return a if a < b : return a if b < a : return b def clean_up(alist, list_threshold): '''Return a new association list that is a copy of alist except that any key-value list in alist longer than list_threshold is not included. ''' temp = [] for x in range(len(alist)): if len(alist[x][1]) <= list_threshold : temp.append(alist[x]) return temp
Revision: 52974
Updated Code
at November 11, 2011 02:49 by Cano0617
Updated Code
# No imports are allowed without the permission of your instructor. # Definition: An "association list" is a list of lists, such as # [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]] # Each sublist has two elements: the first is called a "key" and is a value of # any type, and the second is a list of values (of any type) that are # associated with that key. No key occurs more than once in an association # list. # An "image association list" is an association list in which each key is a # string and its associated list contains the names of image files that # have that string in their img tag. # Example: # [["madonna", ["img3541.jpg", "img1234.jpg"]], # ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]] # Definition: A "filter description" is a string made up of one or more terms # separated by "and". Each term is a sequence of characters surrounded by # colons, with optionally the word "not" before it. Example: # :mtv: and not :madonna: import urllib def record_associations(description, image, ial): '''Update the image association list ial with the tokens from str description. image (a str) is the name of the image to be associated with the keys in description. ''' # Note: Remember that all tokens in the ial should be lowercase. # Passed as lower case? or made lower case? temp= description.lower().split() for item in temp: ial.append([item,[]]) #appends key with empty list ready to store images #code below is for removing duplicates current=None last=None ial.sort() #smaller lists such as new created duplicates are move ahead of the original which contains for x in range(len(ial)-1): current=ial[x][0] last=ial[x-1][0] if current==last and len(ial)>1: ial.pop(x-1) for item in temp: for descript in ial: if (item in descript)==True: descript[1].append(image) def process_page(webpage, ial, list_threshold): '''Update the image association list ial with the images found in the text in webpage (a str). int list_threshold is the maximum length of any list in ial. ''' #uses record associations # find_attribute_value(webpage,'src') 'ask for help in creating a list structured like the ial list ' src_temp=[] src= find_attribute_value(webpage, 'src')#image name alt_temp=[] alt=find_attribute_value(webpage, 'alt')#description src_loc=0 alt_loc=0 while((src!=None and alt!=None)): src_temp.append(src.lower()) src_loc = webpage.find(src)#location of image src = find_attribute_value(webpage[src_loc:],'src')#searches past last image alt_temp.append(alt.lower()) alt_loc=webpage.find(alt) alt= find_attribute_value(webpage[alt_loc:],'alt') for x in range(len(alt_temp)): record_associations(alt_temp[x],src_temp[x],ial) ial=clean_up(ial, list_threshold) #pass this on to process filter description def find_attribute_value(html_tag, att): '''Return the value of attribute att (a str) in the str html_tag. Return None if att doesn't occur in html_tag. ''' if not(att in html_tag): return None else: start_index = html_tag.find(att) end_index= html_tag.find('=',start_index+len(att)+1) return find_guts(html_tag[start_index:end_index]) # for char in html_tag: def process_filter_description(filter, images, ial): '''Return a new list containing only items from list images that pass the description filter (a str). ial is the related image association list. Matching is done in a case insensitive manner. ''' test=filter.split() for item in test: if item.count(":")>2: key=item.split(":") key=item.strip(':') print filter print images print ial pass def all_images(ial): '''Return a list of all the images in image association list ial. Duplicates are excluded. ''' x = 0 temp = [] for bob in range(len(ial)): for int in range(len(ial[x][1])): temp.append(ial[bob][1][int]) #removes duplicates for item in temp : if temp.count(item) > 1 : temp.remove(item) return temp def find_guts(s): '''Return the characters in str s contained within the outermost pair of matching single or double quotes. If there are no quotes or the outermost quotes don't match, the empty string is returned. ''' firstsq = s.find("'") #location of first single quote lastsq = s.rfind("'") #location of last single quote firstdq = s.find('"') #location of first double quote lastdq = s.rfind('"') #location of last double quote single = False #tracks whether or not there is a complete single quote ('abc') double = False #tracks whether or not there is a complete double quote ("abc") if firstsq == lastsq : #if they're the same then it's either an incomplete quote or no quotes at all single = False elif firstsq != lastsq : single = True if firstdq == lastdq : #same idea as the the single quote check double = False elif firstdq != lastdq : double = True if double == single: #if both single and double quotes are present if firstsq < firstdq: #then find which quote occurs first #in this case single quotes return s[firstsq + 1:lastsq] else: #other case is to return by double quote return s[firstdq + 1:lastdq] elif single == True: #if only single is true then return single return s[firstsq + 1:lastsq] elif double == True: #if only double is true then return double return s[firstdq + 1:lastdq] def first(a, b): '''Return the smaller of the two ints a and b, excluding -1. Both a and b are >= -1. If exactly one is -1, return the other. If both are -1, return -1. ''' if a == -1 & b == -1 : return -1 if a == -1: return b if b == -1 : return a if a < b : return a if b < a : return b def clean_up(alist, list_threshold): '''Return a new association list that is a copy of alist except that any key-value list in alist longer than list_threshold is not included. ''' temp = [] for x in range(len(alist)): if len(alist[x][1]) <= list_threshold : temp.append(alist[x]) return temp
Revision: 52973
Updated Code
at November 8, 2011 14:17 by Cano0617
Updated Code
# No imports are allowed without the permission of your instructor. # Definition: An "association list" is a list of lists, such as # [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]] # Each sublist has two elements: the first is called a "key" and is a value of # any type, and the second is a list of values (of any type) that are # associated with that key. No key occurs more than once in an association # list. # An "image association list" is an association list in which each key is a # string and its associated list contains the names of image files that # have that string in their img tag. # Example: # [["madonna", ["img3541.jpg", "img1234.jpg"]], # ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]] # Definition: A "filter description" is a string made up of one or more terms # separated by "and". Each term is a sequence of characters surrounded by # colons, with optionally the word "not" before it. Example: # :mtv: and not :madonna: def process_filter_description(filter, images, ial): '''Return a new list containing only items from list images that pass the description filter (a str). ial is the related image association list. Matching is done in a case insensitive manner. ''' pass def all_images(ial): '''Return a list of all the images in image association list ial. Duplicates are excluded. ''' x=0 temp=[] for bob in range(len(ial)): for int in range(len(ial[x][1])): temp.append(ial[bob][1][int]) #removes duplicates for item in temp : if temp.count(item)>1 : temp.remove(item) return temp def record_associations(description, image, ial): '''Update the image association list ial with the tokens from str description. image (a str) is the name of the image to be associated with the keys in description. ''' # Note: Remember that all tokens in the ial should be lowercase. pass def find_guts(s): '''Return the characters in str s contained within the outermost pair of matching single or double quotes. If there are no quotes or the outermost quotes don't match, the empty string is returned. ''' firstsq=s.find("'") #location of first single quote lastsq=s.rfind("'") #location of last single quote firstdq=s.find('"') #location of first double quote lastdq=s.rfind('"') #location of last double quote single=False #tracks whether or not there is a complete single quote ('abc') double=False #tracks whether or not there is a complete double quote ("abc") multi_single=False multi_double=False if s.count("'")>2: multi_single=True if s.count('"')>2: multi_double=True if firstsq==lastsq : #if they're the same then it's either an incomplete quote or no quotes at all single=False elif firstsq!=lastsq : single=True if firstdq==lastdq : #same idea as the the single quote check double=False elif firstdq!=lastdq : double=True if double==single: #if both single and double quotes are present if firstsq<firstdq: #then find which quote occurs first #in this case single quotes return s[firstsq+1:lastsq] else: #other case is to return by double quote return s[firstdq+1:lastdq] elif multi_single==True: while(multi_single): s[firstsq+1] elif single==True: #if only single is true then return single return s[firstsq+1:lastsq] elif double==True: #if only double is true then return double return s[firstdq+1:lastdq] def find_attribute_value(html_tag, att): '''Return the value of attribute att (a str) in the str html_tag. Return None if att doesn't occur in html_tag. ''' print html_tag print att if not(att in html_tag): return None print find_guts(html_tag) for char in html_tag: pass def first(a, b): '''Return the smaller of the two ints a and b, excluding -1. Both a and b are >= -1. If exactly one is -1, return the other. If both are -1, return -1. ''' if a==-1 & b==-1 : return -1 if a== -1: return b if b== -1 : return a if a<b : return a if b<a : return b def process_page(webpage, ial, list_threshold): '''Update the image association list ial with the images found in the text in webpage (a str). int list_threshold is the maximum length of any list in ial. ''' pass def clean_up(alist, list_threshold): '''Return a new association list that is a copy of alist except that any key-value list in alist longer than list_threshold is not included. ''' temp=[] for x in range(len(alist)): if len(alist[x][1])<=list_threshold : temp.append(alist[x]) return temp
Revision: 52972
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at November 8, 2011 12:41 by Cano0617
Initial Code
# No imports are allowed without the permission of your instructor. # Definition: An "association list" is a list of lists, such as # [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]] # Each sublist has two elements: the first is called a "key" and is a value of # any type, and the second is a list of values (of any type) that are # associated with that key. No key occurs more than once in an association # list. # An "image association list" is an association list in which each key is a # string and its associated list contains the names of image files that # have that string in their img tag. # Example: # [["madonna", ["img3541.jpg", "img1234.jpg"]], # ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]] # Definition: A "filter description" is a string made up of one or more terms # separated by "and". Each term is a sequence of characters surrounded by # colons, with optionally the word "not" before it. Example: # :mtv: and not :madonna: def process_filter_description(filter, images, ial): '''Return a new list containing only items from list images that pass the description filter (a str). ial is the related image association list. Matching is done in a case insensitive manner. ''' pass def all_images(ial): '''Return a list of all the images in image association list ial. Duplicates are excluded. ''' x=0 temp=[] for bob in range(len(ial)): for int in range(len(ial[x][1])): temp.append(ial[bob][1][int]) #removes duplicates for item in temp : if temp.count(item)>1 : temp.remove(item) return temp def record_associations(description, image, ial): '''Update the image association list ial with the tokens from str description. image (a str) is the name of the image to be associated with the keys in description. ''' # Note: Remember that all tokens in the ial should be lowercase. pass def find_guts(s): '''Return the characters in str s contained within the outermost pair of matching single or double quotes. If there are no quotes or the outermost quotes don't match, the empty string is returned. ''' firstsq=s.find("'") #location of first single quote lastsq=s.rfind("'") #location of last single quote firstdq=s.find('"') #location of first double quote lastdq=s.rfind('"') #location of last double quote single=False #tracks whether or not there is a complete single quote ('abc') double=False #tracks whether or not there is a complete double quote ("abc") if firstsq==lastsq : #if they're the same then it's either an incomplete quote or no quotes at all single=False elif firstsq!=lastsq : single=True if firstdq==lastdq : #same idea as the the single quote check double=False elif firstdq!=lastdq : double=True if double==single: #if both single and double quotes are present if firstsq<firstdq: #then find which quote occurs first #in this case single quotes return s[firstsq+1:lastsq] else: #other case is to return by double quote return s[firstdq+1:lastdq] elif single==True: #if only single is true then return single return s[firstsq+1:lastsq] elif double==True: #if only double is true then return double return s[firstdq+1:lastdq] def find_attribute_value(html_tag, att): '''Return the value of attribute att (a str) in the str html_tag. Return None if att doesn't occur in html_tag. ''' pass def first(a, b): '''Return the smaller of the two ints a and b, excluding -1. Both a and b are >= -1. If exactly one is -1, return the other. If both are -1, return -1. ''' if a==-1 & b==-1 : return -1 if a== -1: return b if b== -1 : return a if a<b : return a if b<a : return b def process_page(webpage, ial, list_threshold): '''Update the image association list ial with the images found in the text in webpage (a str). int list_threshold is the maximum length of any list in ial. ''' pass def clean_up(alist, list_threshold): '''Return a new association list that is a copy of alist except that any key-value list in alist longer than list_threshold is not included. ''' pass
Initial URL
Initial Description
Initial Title
Com sci Assignment 2
Initial Tags
Initial Language
Python