Data Programing for Learning Discourse Structure

Elaboration relates Elementary Discourse Units whenever the second unit provides more information about the eventuality introduced in the first constituent.

LL (linguistic source -> linguistic target)

Rule(s):

def LF_Elab_LL_case1(row):
    l=0
    if row.source_emitter == row.target_emitter: 
        if (row.source_surface_act == "Assertion" and row.target_dialogue_act == "Offer") or (row.source_dialogue_act == "Offer" and row.target_dialogue_act == "Offer") \
        or (row.source_dialogue_act in ["Counteroffer", "Accept", "Other"] and row.target_dialogue_act in ["Counteroffer", "Refusal"]) \
        or (row.source_dialogue_act == "Offer" and row.target_surface_act == "Question") \
        or (row.source_dialogue_act == "Refusal" and row.target_dialogue_act in ["Refusal", "Other"]) \
        or (row.source_dialogue_act == "Accept" and row.target_dialogue_act in ["Accept", "Other"]):
            l=1 
    return l

def LF_Elab_LL_case2(row):
    l=0
    if row.source_emitter == row.target_emitter:
        if any(x in row.source_text.lower() for x in ["mmm", "arrrrrgh", "sorry", "well", "man","yup","ouch", "lol", "oh", "ugh", "no worries","cool" , "fingers crossed", "actually no","whoa", "wow", "yeah","yep"]) \
        and (row.target_surface_act == "Assertion"):
            l=1
    return l


def LF_Elab_LL_case3(row):
    l=0
    if row.source_emitter == row.target_emitter:
        if any(x in row.source_text.lower() for x in money) and row.source_surface_act == "Question" \
        and (any( x in row.target_text.lower() for x in ["give", "i'll", "i will", "i have"]) \
             or (row.target_surface_act == "Assertion")):
            l=1
    return l

Application of rules to candidates:

#1 -- get dialogues
finals = []
dialogues = cands.dialogue_num.drop_duplicates()

for d in tqdm(dialogues):
    
    #segments are not allowed to be the sources of more than one elaboration
    source_memo = []
    target_memo = []
 
    # 2 -- get segment list
    seg_list = get_seg_list(cands[cands.dialogue_num == d])

    # 3 -- create seg pairs list
    seg_pairs = []
    for i, s in enumerate(seg_list):
        for n in [j for j in reversed(range((i+1)-20, i+1)) if j>=0]:
            try:
                seg_pairs.append((seg_list[n], seg_list[i+1]))   
            except IndexError:
                pass
            
    # 4 -- for each pair, pull row and append rules

    for s in seg_pairs:
        row = cands[(cands.source_id == s[0]) & (cands.target_id == s[1])]
        if row.empty:
            continue
        else:
            r_index = row.index[0]
            row = row.iloc[0]
        
        # make sure that if any backwards links exist they are also 0
            
        backwards = cands[(cands.source_id == s[1]) & (cands.target_id == s[0])]
        if backwards.shape[0] > 0:
            b_index = backwards.index[0]
            finals.append((b_index, -1))
            
        
        #we only look at ling --> ling cases
        
        if row.source_type != 'Segment' or row.target_type != 'Segment':
            
            link = 0  
            
        else: 
        
            if row.distance == 1 and (LF_Elab_LL_case1(row) or LF_Elab_LL_case2(row) or LF_Elab_LL_case3(row)):
                if row.source_id in source_memo or row.target_id in target_memo:
                    link = 0
                else:
                    source_memo.append(row.source_id)
                    target_memo.append(row.target_id)
                    link = 1      
            else:
                
                link = 0
   
                    
        finals.append((r_index, link))

NLNL (non-linguistic source -> non-linguistic target)

Rule(s):

def LF_Elab_NLNL_case1(row):
    l=0
    if (madeanoffertotrade(row.source_text) and fromX(row.target_text)):
        l=1
    return l

def LF_Elab_NLNL_case2(row):
    l=0
    if hasNpoint(row.source_text) and is_head(cands, row.source_turn_id, row.source_span_end) and hasgift(row.target_text):
        l=1
    return l

def LF_Elab_NLNL_case3(row):
    l=0
    if wonthegame(row.source_text) and hasNpoint(row.target_text) and is_head(cands, row.target_turn_id, row.target_span_end, "target"):
        l=1
    return l

Application of rules to candidates:

#1 -- get dialogues
finals = []
dialogues = cands.dialogue_num.drop_duplicates()
#dialogues = [563]

for d in tqdm(dialogues):
    
    source_memo = []
    target_memo = []
 
    # 2 -- get segment list
    seg_list = get_seg_list(cands[cands.dialogue_num == d])

    # 3 -- create seg pairs list
    seg_pairs = []
    for i, s in enumerate(seg_list):
        for n in [j for j in reversed(range((i+1)-20, i+1)) if j>=0]:
            try:
                seg_pairs.append((seg_list[n], seg_list[i+1]))   
            except IndexError:
                pass
            
    # 4 -- for each pair, pull row and append rules

    for s in seg_pairs:
        row = cands[(cands.source_id == s[0]) & (cands.target_id == s[1])]
        if row.empty:
            continue
        else:
            r_index = row.index[0]
            row = row.iloc[0]
        
        # make sure that if any backwards links exist they are also 0
            
        backwards = cands[(cands.source_id == s[1]) & (cands.target_id == s[0])]
        if backwards.shape[0] > 0:
            b_index = backwards.index[0]
            finals.append((b_index, -1))
            
        
        #we only look at ling --> ling cases
        
        if row.source_type != 'NonplayerSegment' or row.target_type != 'NonplayerSegment':
            
            link = 0  
            
        else: 
            
            if LF_Elab_NLNL_case1(row) or LF_Elab_NLNL_case2(row) or LF_Elab_NLNL_case3(row):
                if row.target_id in target_memo:
                    link = 0
                else:
                    target_memo.append(row.target_id)
                    link = 1 
            else:
                
                link = 0
   
                    
        finals.append((r_index, link))

Home page

Elaboration LF

LL (linguistic source -> linguistic target)

Rule(s):

Application of rules to candidates:

NLNL (non-linguistic source -> non-linguistic target)

Rule(s):

Application of rules to candidates: