Return to Snippet

Revision: 52975
at November 12, 2011 07:29 by Cano0617


Updated Code
# No imports are allowed without the permission of your instructor.

# Definition: An "association list" is a list of lists, such as
#    [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]]
# Each sublist has two elements: the first is called a "key" and is a value of
# any type, and the second is a list of values (of any type) that are
# associated with that key. No key occurs more than once in an association
# list.

# An "image association list" is an association list in which each key is a
# string and its associated list contains the names of image files that
# have that string in their img tag.
# Example:
#    [["madonna", ["img3541.jpg", "img1234.jpg"]], 
#     ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]]

# Definition: A "filter description" is a string made up of one or more terms
# separated by "and". Each term is a sequence of characters surrounded by
# colons, with optionally the word "not" before it. Example:
#    :mtv: and not :madonna:


def record_associations(description, image, ial):
    '''Update the image association list ial with the tokens from str 
    description. image (a str) is the name of the image to be associated with
    the keys in description. 
    '''
#     Note: Remember that all tokens in the ial should be lowercase.
#     Passed as lower case? or made lower case?
    temp= description.lower().split()
    
    for item in temp:
        ial.append([item,[]])
        #appends key with empty list ready to store images
    #code below is for removing duplicates
    current=None
    last=None
    ial.sort()
    #smaller lists such as new created duplicates are move ahead of the original which contains
    for x in range(len(ial)-1):
        current=ial[x][0]
        last=ial[x-1][0]
        if current==last and len(ial)>1 and x!=0:
            ial.pop(x-1)      
        else:
            ial.pop(x)

    for item in temp:
        for descript in ial:
            if (item in descript)==True:
                descript[1].append(image)
          

def process_page(webpage, ial, list_threshold):
    '''Update the image association list ial with the images found in the 
    text in webpage (a str). int list_threshold is the maximum length of any 
    list in ial.
    '''
#uses record associations
# find_attribute_value(webpage,'src')
    'ask for help in creating a list structured like the ial list '
    src_temp=[]
    src= find_attribute_value(webpage, 'src=')#image name
    alt_temp=[]
    alt=find_attribute_value(webpage, 'alt=')#description
    src_loc=0
    alt_loc=0

    while((src!=None and alt!=None)):      
        src_temp.append(src.lower())    
        src_loc = webpage.find(src)#location of image
        src = find_attribute_value(webpage[src_loc:],'src')#searches past last image
        alt_temp.append(alt.lower())
        alt_loc=webpage.find(alt)
        alt= find_attribute_value(webpage[alt_loc:],'alt')
        
    for x in range(len(alt_temp)):
        record_associations(alt_temp[x],src_temp[x],ial)
        
    ial=clean_up(ial, list_threshold)
        
#pass this on to process filter description




def find_attribute_value(html_tag, att):
    '''Return the value of attribute att (a str) in the str html_tag.  
    Return None if att doesn't occur in html_tag.
    '''
    
    if not(att in html_tag):
        return None
    else:
        start_index = html_tag.find(att)
        end_index= html_tag.find('=',start_index+len(att)+1)
        return find_guts(html_tag[start_index:end_index])
    
#    for char in html_tag:
        
    

def process_filter_description(filter, images, ial):
    '''Return a new list containing only items from list images that pass 
    the description filter (a str). ial is the related image association list.
    Matching is done in a case insensitive manner.
    '''
    restrict=filter.split()
    temp=[]
    for item in restrict:
        if item.count(":")>1:
            key=item.split(":")
            key=item.strip(':')
            temp.append(key.lower())
        else:
            temp.append(item.lower())
    filter=temp    
    master_set=set(images)
    working_set= master_set
    all_key=[]
    all_set=[]
    intersect_only=None
    # if True then find intersection
    # if False then find intersection of not
    has_set=False    
    for item in ial:
        all_set.append(set(item[1]))
        all_key.append(item[0])
    
    
    for int in range(len(temp)):
        if temp[int]=='and' or temp[int]=='not':
            has_set=False
        elif temp[int-1]=='not':
            intersect_only=False
            has_set=True
        elif all_key.index(temp[int]) :
            list.i
        elif int==0:
            intersect_only=True
            has_set=True
        elif temp[int-1]=='and':
            intersect_only=True
            has_set=True
        else:
            intersect_only=None
            has_set=False
            
        if intersect_only==False and has_set==True:
            # find current key in a list of only keys
            find_key=all_key.index(temp[int])
            # find set that matches the key
            find_set=all_set[find_key]
            working_set=working_set.difference(find_set)
            
        elif intersect_only==True and has_set==True:
            #same idea as above
            find_key=all_key.index(temp[int])
            find_set=all_set[find_key]
            working_set=working_set.intersection(find_set)

        
    return list(working_set)
#
#        index=filter.find(temp[int])
#        remove=filter.find('not',0,index)!=-1
#        if remove==True:
#            pass

   
def all_images(ial):
    '''Return a list of all the images in image association list ial. 
    Duplicates are excluded.
    '''
    x = 0
    temp = []    
    for bob in range(len(ial)):
        for int in range(len(ial[bob][1])):
            temp.append(ial[bob][1][int])               
  
    #removes duplicates
    for item in temp :
        if temp.count(item) > 1 :
            temp.remove(item)
            
    return temp
            
    

    

       
def find_guts(s):
    '''Return the characters in str s contained within the outermost pair of 
    matching single or double quotes. If there are no quotes or the outermost
    quotes don't match, the empty string is returned.
    '''         
    firstsq = s.find("'")
    #location of first single quote
    lastsq =  s.rfind("'")
    #location of last single quote
    firstdq = s.find('"')
    #location of first double quote
    lastdq = s.rfind('"')
    #location of last double quote
    single = False
    #tracks whether or not there is a complete single quote ('abc')
    double = False
    #tracks whether or not there is a complete double quote ("abc")

    if firstsq == lastsq : 
        #if they're the same then it's either an incomplete quote or no quotes at all
        single = False
    elif firstsq != lastsq :
        single = True
    
    if firstdq == lastdq :
        #same idea as the the single quote check
        double = False
    elif firstdq != lastdq :
        double = True
        
    if double == single:
        #if both single and double quotes are present
        if firstsq < firstdq: 
            #then find which quote occurs first
            #in this case single quotes
            return s[firstsq + 1:lastsq]
        else: 
            #other case is to return by double quote
            return s[firstdq + 1:lastdq]
  
              
    elif single == True:
        #if only single is true then return single
        return s[firstsq + 1:lastsq]
    elif double == True:    
        #if only double is true then return double
        return s[firstdq + 1:lastdq]
        
    
    
    

    
def first(a, b):
    '''Return the smaller of the two ints a and b, excluding -1. Both a and b
    are >= -1. If exactly one is -1, return the other. If both are -1, return
    -1.
    '''
    
    if a == -1 & b == -1 :
        return -1
    if a == -1:
        return b
    if b == -1 :
        return a
    if a < b :
        return a
    if b < a :
        return b 

    

    
    
def clean_up(alist, list_threshold):
    '''Return a new association list that is a copy of alist except that any 
    key-value list in alist longer than list_threshold is not included.
    '''
    temp = []
    for x in range(len(alist)):
        if len(alist[x][1]) <= list_threshold :
            temp.append(alist[x])
    return temp

Revision: 52974
at November 11, 2011 02:49 by Cano0617


Updated Code
# No imports are allowed without the permission of your instructor.

# Definition: An "association list" is a list of lists, such as
#    [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]]
# Each sublist has two elements: the first is called a "key" and is a value of
# any type, and the second is a list of values (of any type) that are
# associated with that key. No key occurs more than once in an association
# list.

# An "image association list" is an association list in which each key is a
# string and its associated list contains the names of image files that
# have that string in their img tag.
# Example:
#    [["madonna", ["img3541.jpg", "img1234.jpg"]], 
#     ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]]

# Definition: A "filter description" is a string made up of one or more terms
# separated by "and". Each term is a sequence of characters surrounded by
# colons, with optionally the word "not" before it. Example:
#    :mtv: and not :madonna:
import urllib

def record_associations(description, image, ial):
    '''Update the image association list ial with the tokens from str 
    description. image (a str) is the name of the image to be associated with
    the keys in description. 
    '''
#     Note: Remember that all tokens in the ial should be lowercase.
#     Passed as lower case? or made lower case?
    temp= description.lower().split()
    
    for item in temp:
        ial.append([item,[]])
        #appends key with empty list ready to store images
    #code below is for removing duplicates
    current=None
    last=None
    ial.sort()
    #smaller lists such as new created duplicates are move ahead of the original which contains
    for x in range(len(ial)-1):
        current=ial[x][0]
        last=ial[x-1][0]
        if current==last and len(ial)>1:
            ial.pop(x-1)      

    for item in temp:
        for descript in ial:
            if (item in descript)==True:
                descript[1].append(image)
          

def process_page(webpage, ial, list_threshold):
    '''Update the image association list ial with the images found in the 
    text in webpage (a str). int list_threshold is the maximum length of any 
    list in ial.
    '''
#uses record associations
# find_attribute_value(webpage,'src')
    'ask for help in creating a list structured like the ial list '
    src_temp=[]
    src= find_attribute_value(webpage, 'src')#image name
    alt_temp=[]
    alt=find_attribute_value(webpage, 'alt')#description
    src_loc=0
    alt_loc=0

    while((src!=None and alt!=None)):      
        src_temp.append(src.lower())    
        src_loc = webpage.find(src)#location of image
        src = find_attribute_value(webpage[src_loc:],'src')#searches past last image
        alt_temp.append(alt.lower())
        alt_loc=webpage.find(alt)
        alt= find_attribute_value(webpage[alt_loc:],'alt')
        
    for x in range(len(alt_temp)):
        record_associations(alt_temp[x],src_temp[x],ial)
        
    ial=clean_up(ial, list_threshold)
        
#pass this on to process filter description




def find_attribute_value(html_tag, att):
    '''Return the value of attribute att (a str) in the str html_tag.  
    Return None if att doesn't occur in html_tag.
    '''
    
    if not(att in html_tag):
        return None
    else:
        start_index = html_tag.find(att)
        end_index= html_tag.find('=',start_index+len(att)+1)
        return find_guts(html_tag[start_index:end_index])
    
#    for char in html_tag:
        
    

def process_filter_description(filter, images, ial):
    '''Return a new list containing only items from list images that pass 
    the description filter (a str). ial is the related image association list.
    Matching is done in a case insensitive manner.
    '''
    test=filter.split()
    
    for item in test:
        if item.count(":")>2:
            key=item.split(":")
            key=item.strip(':')
            
    
    print filter
    print images
    print ial

    pass
   
   
def all_images(ial):
    '''Return a list of all the images in image association list ial. 
    Duplicates are excluded.
    '''
    x = 0
    temp = []    
    for bob in range(len(ial)):
        for int in range(len(ial[x][1])):
            temp.append(ial[bob][1][int])               
  
    #removes duplicates
    for item in temp :
        if temp.count(item) > 1 :
            temp.remove(item)
            
    return temp
            
    

    

       
def find_guts(s):
    '''Return the characters in str s contained within the outermost pair of 
    matching single or double quotes. If there are no quotes or the outermost
    quotes don't match, the empty string is returned.
    '''         
    firstsq = s.find("'")
    #location of first single quote
    lastsq =  s.rfind("'")
    #location of last single quote
    firstdq = s.find('"')
    #location of first double quote
    lastdq = s.rfind('"')
    #location of last double quote
    single = False
    #tracks whether or not there is a complete single quote ('abc')
    double = False
    #tracks whether or not there is a complete double quote ("abc")

    if firstsq == lastsq : 
        #if they're the same then it's either an incomplete quote or no quotes at all
        single = False
    elif firstsq != lastsq :
        single = True
    
    if firstdq == lastdq :
        #same idea as the the single quote check
        double = False
    elif firstdq != lastdq :
        double = True
        
    if double == single:
        #if both single and double quotes are present
        if firstsq < firstdq: 
            #then find which quote occurs first
            #in this case single quotes
            return s[firstsq + 1:lastsq]
        else: 
            #other case is to return by double quote
            return s[firstdq + 1:lastdq]
  
              
    elif single == True:
        #if only single is true then return single
        return s[firstsq + 1:lastsq]
    elif double == True:    
        #if only double is true then return double
        return s[firstdq + 1:lastdq]
        
    
    
    

    
def first(a, b):
    '''Return the smaller of the two ints a and b, excluding -1. Both a and b
    are >= -1. If exactly one is -1, return the other. If both are -1, return
    -1.
    '''
    
    if a == -1 & b == -1 :
        return -1
    if a == -1:
        return b
    if b == -1 :
        return a
    if a < b :
        return a
    if b < a :
        return b 

    

    
    
def clean_up(alist, list_threshold):
    '''Return a new association list that is a copy of alist except that any 
    key-value list in alist longer than list_threshold is not included.
    '''
    temp = []
    for x in range(len(alist)):
        if len(alist[x][1]) <= list_threshold :
            temp.append(alist[x])
    return temp

Revision: 52973
at November 8, 2011 14:17 by Cano0617


Updated Code
# No imports are allowed without the permission of your instructor.

# Definition: An "association list" is a list of lists, such as
#    [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]]
# Each sublist has two elements: the first is called a "key" and is a value of
# any type, and the second is a list of values (of any type) that are
# associated with that key. No key occurs more than once in an association
# list.

# An "image association list" is an association list in which each key is a
# string and its associated list contains the names of image files that
# have that string in their img tag.
# Example:
#    [["madonna", ["img3541.jpg", "img1234.jpg"]], 
#     ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]]

# Definition: A "filter description" is a string made up of one or more terms
# separated by "and". Each term is a sequence of characters surrounded by
# colons, with optionally the word "not" before it. Example:
#    :mtv: and not :madonna:


def process_filter_description(filter, images, ial):
    '''Return a new list containing only items from list images that pass 
    the description filter (a str). ial is the related image association list.
    Matching is done in a case insensitive manner.
    '''
    

    pass
   
   
def all_images(ial):
    '''Return a list of all the images in image association list ial. 
    Duplicates are excluded.
    '''
    x=0
    temp=[]    
    for bob in range(len(ial)):
        for int in range(len(ial[x][1])):
            temp.append(ial[bob][1][int])   
            
    #removes duplicates
    for item in temp :
        if temp.count(item)>1 :
            temp.remove(item)
            
    return temp
            
    
def record_associations(description, image, ial):
    '''Update the image association list ial with the tokens from str 
    description. image (a str) is the name of the image to be associated with
    the keys in description. 
    '''

    # Note: Remember that all tokens in the ial should be lowercase.
     
    pass        

       
def find_guts(s):
    '''Return the characters in str s contained within the outermost pair of 
    matching single or double quotes. If there are no quotes or the outermost
    quotes don't match, the empty string is returned.
    '''         
    firstsq=s.find("'")
    #location of first single quote
    lastsq=s.rfind("'")
    #location of last single quote
    firstdq=s.find('"')
    #location of first double quote
    lastdq=s.rfind('"')
    #location of last double quote
    single=False
    #tracks whether or not there is a complete single quote ('abc')
    double=False
    #tracks whether or not there is a complete double quote ("abc")
    multi_single=False
    multi_double=False
    if s.count("'")>2:
        multi_single=True
    if s.count('"')>2:
        multi_double=True

    if firstsq==lastsq : 
        #if they're the same then it's either an incomplete quote or no quotes at all
        single=False
    elif firstsq!=lastsq :
        single=True
    
    if firstdq==lastdq :
        #same idea as the the single quote check
        double=False
    elif firstdq!=lastdq :
        double=True
        
    if double==single:
        #if both single and double quotes are present
        if firstsq<firstdq:
            #then find which quote occurs first
            #in this case single quotes
            return s[firstsq+1:lastsq]
        else:
            #other case is to return by double quote
            return s[firstdq+1:lastdq]
    elif multi_single==True:
        while(multi_single):
            s[firstsq+1]
    elif single==True:
        #if only single is true then return single
        return s[firstsq+1:lastsq]
    elif double==True:
        #if only double is true then return double
        return s[firstdq+1:lastdq]
        
    
    
    
def find_attribute_value(html_tag, att):
    '''Return the value of attribute att (a str) in the str html_tag.  
    Return None if att doesn't occur in html_tag.
    '''
    print html_tag
    print att
    if not(att in html_tag):
        return None
    print find_guts(html_tag)
    
    for char in html_tag:
        
    
    pass
    
    
def first(a, b):
    '''Return the smaller of the two ints a and b, excluding -1. Both a and b
    are >= -1. If exactly one is -1, return the other. If both are -1, return
    -1.
    '''
    if a==-1 & b==-1 :
        return -1
    if a== -1:
        return b
    if b== -1 :
        return a
    if a<b :
        return a
    if b<a :
        return b 

    
def process_page(webpage, ial, list_threshold):
    '''Update the image association list ial with the images found in the 
    text in webpage (a str). int list_threshold is the maximum length of any 
    list in ial.
    '''
      
    pass
    
    
def clean_up(alist, list_threshold):
    '''Return a new association list that is a copy of alist except that any 
    key-value list in alist longer than list_threshold is not included.
    '''
    temp=[]
    for x in range(len(alist)):
        if len(alist[x][1])<=list_threshold :
            temp.append(alist[x])
    return temp

Revision: 52972
at November 8, 2011 12:41 by Cano0617


Initial Code
# No imports are allowed without the permission of your instructor.

# Definition: An "association list" is a list of lists, such as
#    [[3, ["hello", 27.4, True]], ["drama", [13, "comedy", "goodbye", 1]]]
# Each sublist has two elements: the first is called a "key" and is a value of
# any type, and the second is a list of values (of any type) that are
# associated with that key. No key occurs more than once in an association
# list.

# An "image association list" is an association list in which each key is a
# string and its associated list contains the names of image files that
# have that string in their img tag.
# Example:
#    [["madonna", ["img3541.jpg", "img1234.jpg"]], 
#     ["mtv", ["img2999.jpg", "img1234.jpg", "gaga.JPG", "gaga22.JPG"]]]

# Definition: A "filter description" is a string made up of one or more terms
# separated by "and". Each term is a sequence of characters surrounded by
# colons, with optionally the word "not" before it. Example:
#    :mtv: and not :madonna:


def process_filter_description(filter, images, ial):
    '''Return a new list containing only items from list images that pass 
    the description filter (a str). ial is the related image association list.
    Matching is done in a case insensitive manner.
    '''
    

    pass
   
   
def all_images(ial):
    '''Return a list of all the images in image association list ial. 
    Duplicates are excluded.
    '''
    x=0
    temp=[]
    
    for bob in range(len(ial)):
        for int in range(len(ial[x][1])):
            temp.append(ial[bob][1][int])   
            
    #removes duplicates
    for item in temp :
        if temp.count(item)>1 :
            temp.remove(item)
            
    return temp
            
    
def record_associations(description, image, ial):
    '''Update the image association list ial with the tokens from str 
    description. image (a str) is the name of the image to be associated with
    the keys in description. 
    '''

    # Note: Remember that all tokens in the ial should be lowercase.
     
    pass        

       
def find_guts(s):
    '''Return the characters in str s contained within the outermost pair of 
    matching single or double quotes. If there are no quotes or the outermost
    quotes don't match, the empty string is returned.
    '''         
    firstsq=s.find("'")
    #location of first single quote
    lastsq=s.rfind("'")
    #location of last single quote
    firstdq=s.find('"')
    #location of first double quote
    lastdq=s.rfind('"')
    #location of last double quote
    single=False
    #tracks whether or not there is a complete single quote ('abc')
    double=False
    #tracks whether or not there is a complete double quote ("abc")

    if firstsq==lastsq : 
        #if they're the same then it's either an incomplete quote or no quotes at all
        single=False
    elif firstsq!=lastsq :
        single=True
    
    if firstdq==lastdq :
        #same idea as the the single quote check
        double=False
    elif firstdq!=lastdq :
        double=True
        
    if double==single:
        #if both single and double quotes are present
        if firstsq<firstdq:
            #then find which quote occurs first
            #in this case single quotes
            return s[firstsq+1:lastsq]
        else:
            #other case is to return by double quote
            return s[firstdq+1:lastdq]
    elif single==True:
        #if only single is true then return single
        return s[firstsq+1:lastsq]
    elif double==True:
        #if only double is true then return double
        return s[firstdq+1:lastdq]
        

    
    
def find_attribute_value(html_tag, att):
    '''Return the value of attribute att (a str) in the str html_tag.  
    Return None if att doesn't occur in html_tag.
    '''
    
    pass
    
    
def first(a, b):
    '''Return the smaller of the two ints a and b, excluding -1. Both a and b
    are >= -1. If exactly one is -1, return the other. If both are -1, return
    -1.
    '''
    if a==-1 & b==-1 :
        return -1
    if a== -1:
        return b
    if b== -1 :
        return a
    if a<b :
        return a
    if b<a :
        return b 

    
def process_page(webpage, ial, list_threshold):
    '''Update the image association list ial with the images found in the 
    text in webpage (a str). int list_threshold is the maximum length of any 
    list in ial.
    '''
      
    pass
    
    
def clean_up(alist, list_threshold):
    '''Return a new association list that is a copy of alist except that any 
    key-value list in alist longer than list_threshold is not included.
    '''

    pass

Initial URL


Initial Description


Initial Title
Com sci Assignment 2

Initial Tags


Initial Language
Python