Data Programing for Learning Discourse Structure

Sequence is used to relate two DUs, one that happens afer the other. Usually including at least one non-linguistic.

NLL (non-linguistic source -> linguistic target)

Rule(s):

def LF_Seq_NL_L_case1(row):
    #case 1 -- might be tagging a lot of results 
    l=0
    if has_resources(row.source_text):
        if is_head(cands, row.source_turn_id, row.source_span_end) and contains_resource_name(row.target_text):
            l=1
    return l

def LF_Seq_NL_L_case2(row):
    #case 2
    l=0
    if XbuiltaA(row.source_text) or boughtAcard(row.source_text) or willMoveTheRobber(row.source_text):
        if row.target_emitter == row.source_text.split(' ')[0]:
            l=1
    return l

def LF_Seq_NL_L_case3(row):
    #case 3
    l=0
    if traded(row.source_text) and not stoleAResourceFrom(row.source_text):
        if row.source_text.split(' ')[0] == row.target_emitter and row.target_dialogue_act == 'Offer' and row.distance == 1:
            l=1
    return l

def LF_Seq_NL_L_case4(row):
    #case 4 -- x sat down then next move is EDU by X
    l=0
    if satDownAtSeat(row.source_text):
        if row.target_emitter == row.source_text.split(' ')[0] and row.distance == 1:
            l=1
    return l

Application of rules to candidates:

#1 -- get dialogues
finals = []
dialogues = cands.dialogue_num.drop_duplicates()

for d in tqdm(dialogues):
    
    #keep track of marked relations -- because we assume that each node can only take part in one sequence relation
    memo = []
    # 2 -- get segment list
    seg_list = get_seg_list(cands[cands.dialogue_num == d])

    # 3 -- create seg pairs list
    seg_pairs = []
    for i, s in enumerate(seg_list):
        for n in [j for j in reversed(range((i+1)-20, i+1)) if j>=0]:
            try:
                seg_pairs.append((seg_list[n], seg_list[i+1]))   
            except IndexError:
                pass
            
    # 4 -- for each pair, pull row and append rules
    for s in seg_pairs:
        row = cands[(cands.source_id == s[0]) & (cands.target_id == s[1])]
        if row.empty:
            continue
        else:
            r_index = row.index[0]
            row = row.iloc[0]
        
        # make sure that if any backwards links exist they are also 0
            
        backwards = cands[(cands.source_id == s[1]) & (cands.target_id == s[0])]
        if backwards.shape[0] > 0:
            b_index = backwards.index[0]
            finals.append((b_index, -1))
            
        
        #we only look at ling --> ling cases
        
        if row.source_type != 'NonplayerSegment' or row.target_type != 'Segment':
            
            link = 0  
            
        else: 
            
            #try each of three cases, keeping track of the candidates which have been used in a sequence relation already
        
            if LF_Seq_NL_L_case1(row) or LF_Seq_NL_L_case2(row) or LF_Seq_NL_L_case3(row) or LF_Seq_NL_L_case4(row):
            #if LF_Seq_NL_L_case3(row):
                if row.source_id in memo or row.target_id in memo: 
                    link = 0
                else:
                    link = 1
                    memo.extend([row.source_id, row.target_id])
            else:
                link = 0
                    
        finals.append((r_index, link))

NLNL (non-linguistic source -> non-linguistic target)

Rule(s):

#NB: the false positives that come from these rules are in part due to the rule not being able to see whether 
#there has already been a sequence relation added to between an EEU node and an intervening EDU node, which wouldn't 
#show up in the NL-NL case

#TODO!! Figure out why we are missing the first eeus in the candidates.

def LF_Seq_NL_NL_case1(row):
    #'has resources' to next eeu
    l=0
    if row.source_turn_id != row.target_turn_id and has_resources(row.source_text):
        if is_head(cands, row.source_turn_id, row.source_span_end):
            l=1
    return l

def LF_Seq_NL_NL_case2(row):
    #target = X ended their turn to most recent 'X' eeu
    l=0
    if endedtheirturn(row.target_text):
        if not has_resources(row.source_text) and not stoleAResourceFrom(row.source_text):
            if row.target_text.split(' ')[0] == row.source_text.split(' ')[0] and row.distance <= 3:
                l=1
    else:
            l=0
    return l

def LF_Seq_NL_NL_case3(row):
    #For ever mid-game set of moves where each turn == 1 segment and distance == 1
    l=0
    if row.distance == 1 and not has_resources(row.source_text) \
    and not youcantmaketrade(row.source_text) and not stoleAResourceFrom(row.source_text):
        l=1
    return l
   
    
def LF_Seq_NL_NL_case4(row):
    #For beginning game moves -- if target is "sat down at" or "game started"
    l=0
    if satDownAtSeat(row.target_text) or joinedTheGame(row.target_text) or gamestarted(row.target_text):
        l=1
    return l

Application of rules to candidates:

#1 -- get dialogues
finals = []
dialogues = cands.dialogue_num.drop_duplicates()
#dialogues = [98]

for d in tqdm(dialogues):
    
    #keep track of dus involved in a seq as source or target -- on du cannot be source or target for > 1 seq rel
    memo_source = []
    memo_target = []
 
    # 2 -- get segment list
    seg_list = get_seg_list(cands[cands.dialogue_num == d])

    # 3 -- create seg pairs list
    seg_pairs = []
    for i, s in enumerate(seg_list):
        for n in [j for j in reversed(range((i+1)-20, i+1)) if j>=0]:
            try:
                seg_pairs.append((seg_list[n], seg_list[i+1]))   
            except IndexError:
                pass
            
    # 4 -- for each pair, pull row and append rules

    for s in seg_pairs:
        row = cands[(cands.source_id == s[0]) & (cands.target_id == s[1])]
        if row.empty:
            continue
        else:
            r_index = row.index[0]
            row = row.iloc[0]
        
        # make sure that if any backwards links exist they are also 0
            
        backwards = cands[(cands.source_id == s[1]) & (cands.target_id == s[0])]
        if backwards.shape[0] > 0:
            b_index = backwards.index[0]
            finals.append((b_index, -1))
            
        
        #we only look at ling --> ling cases
        
        if row.source_type != 'NonplayerSegment' or row.target_type != 'NonplayerSegment':
            
            link = 0  
            
        else: 
            
            #if LF_Seq_NL_NL_case3(row):
            if LF_Seq_NL_NL_case1(row) or LF_Seq_NL_NL_case2(row) or LF_Seq_NL_NL_case3(row) or LF_Seq_NL_NL_case4(row):
                if row.target_id in memo_target or row.source_id in memo_source:
                    link = 0
                else:
                    link = 1
                    memo_target.append(row.target_id)
                    memo_source.append(row.source_id) 
            else:
                link = 0
               
        finals.append((r_index, link))

Home page

Sequence LF

NLL (non-linguistic source -> linguistic target)

Rule(s):

Application of rules to candidates:

NLNL (non-linguistic source -> non-linguistic target)

Rule(s):

Application of rules to candidates: