U
    >vf5e                     @   s  d dl m Z  d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z dd Zdd Zdd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zejdddgd d! Zd"d# Zd$d% Zd&d' Zd(d) Zejd*d+d,d-gfd.d d-gfgd/d0 Z d1d2 Z!d3d4 Z"ejd5g d6fg d7fg d8fg d9fd:d;d<gd6fd:d;d<gd9fd:d;d<gd8fd:d;d<gd9fgd=d> Z#d?d@ Z$dAdB Z%dCdD Z&dEdF Z'dGdH Z(dS )I    )datetimeN)	DataFrameIndex
MultiIndexSeries_testingc              	   C   sB   t dtjdg| d}tjtdd |jjdd d W 5 Q R X d S )NfooBAD__barBADfoodtypezexpand must be True or Falsematch.*(BAD[_]+).*(BAD)expand)r   npnanpytestraises
ValueErrorstrextract)any_string_dtypevalues r   E/tmp/pip-unpacked-wheel-vdrwu74i/pandas/tests/strings/test_extract.py+test_extract_expand_kwarg_wrong_type_raises   s    r   c                 C   s   t dtjdg| d}tdtjtjg| d}|jd}t|| |jjddd}t|| tddgtjtjgtjtjgg| d}|jjd	d
d}t|| d S )Nr   r	   r
   BAD__z.*(BAD[_]+).*Tr   BADr   F)r   r   r   r   r   r   tmassert_frame_equal)r   sexpectedresultr   r   r   test_extract_expand_kwarg   s     r$   c               
   C   s   t dtjddt dd ddg	} | jjddd	}tjtjg}td
dg|d
dg||||||g	}t	|| | jjddd	}t d
tjd
tjtjtjtjtjtjg	}t
|| d S )NaBAD_BAD	BAD_b_BADTr	             @r   Fr   BAD_r   z.*(BAD[_]+).*BAD)r   r   r   r   todayr   r   r   r   r    assert_series_equal)serr#   err"   r   r   r   &test_extract_expand_False_mixed_object(   s    ""r.   c               	   C   sD   t dddddg} d}tjt|d | jjdd	d
 W 5 Q R X d S )NA1A2A3ZA4ZB5z,only one regex group is supported with Indexr   ([AB])([123])Fr   )r   r   r   r   r   r   )idxmsgr   r   r    test_extract_expand_index_raises;   s    r5   c              	   C   sn   | dddg|d}d}t jt|d |jjddd	 W 5 Q R X t jt|d |jjd
dd	 W 5 Q R X d S )Nr/   B2C3r
   "pattern contains no capture groupsr   
[ABC][123]Fr   
(?:[AB]).*r   r   r   r   r   index_or_seriesr   s_or_idxr4   r   r   r   ,test_extract_expand_no_capture_groups_raisesE   s    r?   c                 C   sX   | ddg|d}|j jddd}| ddgd|d	}| tkrHt|| nt|| d S )
Nr/   r0   r
   (?P<uno>A)\dFr   Aunonamer   )r   r   r   r   r+   Zassert_index_equalr=   r   r>   r#   r"   r   r   r   (test_extract_expand_single_capture_groupR   s    rF   c                 C   s  t dddg| d}|jjddd}t tjtjtjg| d}t|| |jjddd}ttjtjgtjtjgtjtjgg| d}t|| |jjd	dd}t d
dtjg| d}t|| |jjddd}td
dgddgtjtjgg| d}t|| |jjddd}t d
dtjgd| d}t|| |jjddd}td
dgddgtjtjggddg| d}t|| |jjddd}td
dgddgtjtjggddg| d}t|| |jjddd}t d
dtjg| d}t|| t dddg| d}|jjddd}td
dgddgtjtjgg| d}t|| t dddg| d}|jjddd}td
dgddgtjdggddg| d}t|| t dddg| d}|jjddd}td
dgddgdtjggddg| d}t|| d S ) Nr/   r6   r7   r
   (_)Fr   (_)(_)([AB])[123]rA   Br2   12(?P<letter>[AB])letterrC   !(?P<letter>[AB])(?P<number>[123])numbercolumnsr   ([AB])(?P<number>[123])r   ([AB])(?:[123])A11B22C33([AB])([123])(?:[123])3"(?P<letter>[AB])?(?P<number>[123])C#(?P<letter>[ABC])(?P<number>[123])?)	r   r   r   r   r   r   r+   r   r    r   r!   r#   r"   r   r   r   "test_extract_expand_capture_groups^   s|        r^   c                 C   s   dddg}t | dkr td t | t |k r<| d} q | d t | } t|| |d}|jjdd	d
}tddtjg| |d}t	
|| |jjdd	d
}tddgddgdtjggddg| |d}t	|| d S )Nr/   r6   r[   r   zTest requires len(index) > 0   indexr   (\d)Fr   rK   rL   (?P<letter>\D)(?P<number>\d)?rA   rJ   rN   rP   rR   ra   r   )lenr   skiprepeatr   r   r   r   r   r   r+   r   r    )ra   r   datar,   r#   r"   r   r   r   (test_extract_expand_capture_groups_index   s$    

ri   c                 C   sH   t dddgd| d}|jjddd}t d	d
dgd| d}t|| d S )Na3b3c2ZbobrC   z(?P<sue>[a-z])Fr   abcZsue)r   r   r   r   r+   r]   r   r   r   ,test_extract_single_series_name_is_preserved   s    rp   c                 C   sZ   t dtjdg| d}|jjddd}tddgtjtjgtjtjgg| d}t|| d S )	Nr   r	   r
   r   Tr   r   r   )r   r   r   r   r   r   r   r    r]   r   r   r   test_extract_expand_True   s     rq   c               
   C   sn   t jt jg} tdt jddt dd ddg	}|jjddd}td	d
g| d	d
g| | | | | | g	}t	|| d S )Nr%   r&   Tr	   r'   r(   r   r   r)   r   )
r   r   r   r   r*   r   r   r   r   r    )r-   mixedr#   r"   r   r   r   %test_extract_expand_True_mixed_object   s     "rs   c              	   C   sn   | dddg|d}d}t jt|d |jjddd	 W 5 Q R X t jt|d |jjd
dd	 W 5 Q R X d S )Nr/   r6   r7   r
   r8   r   r9   Tr   r:   r;   r<   r   r   r   4test_extract_expand_True_single_capture_group_raises   s    rt   c                 C   sD   | ddg|d}|j jddd}tdddgi|d}t|| d S )	Nr/   r0   r
   r@   Tr   rB   rA   )r   r   r   r   r    rE   r   r   r   -test_extract_expand_True_single_capture_group  s    ru   rD   series_namec                 C   s  t dddg| |d}|jjddd}ttjtjtjg|d}t|| |jjd	dd}ttjtjgtjtjgtjtjgg|d}t|| |jjd
dd}tddtjg|d}t|| |jjddd}tddgddgtjtjgg|d}t|| |jjddd}tdddtjgi|d}t|| |jjddd}tddgddgtjtjggddg|d}t|| |jjddd}tddgddgtjtjggddg|d}t|| |jjddd}tddtjg|d}t|| d S )Nr/   r6   r7   rC   rG   Tr   r
   rH   rI   rA   rJ   r2   rK   rL   rM   rN   rO   rP   rQ   rS   r   rT   r   r   r   r   r   r   r   r    )rD   r   r!   r#   r"   r   r   r   test_extract_series
  sN       rx   c                 C   s   t dddg| d}|jjddd}tdd	gd
dgtjtjgg| d}t|| t dddg| d}|jjddd}tdd	gd
dgtjdggddg| d}t|| t dddg| d}|jjddd}tdd	gd
dgdtjggddg| d}t|| d S )NrU   rV   rW   r
   rX   Tr   rA   rK   rJ   rL   r/   r6   rY   rZ   rN   rP   rQ   r[   r\   rw   r]   r   r   r   test_extract_optional_groupsD  s.     ry   c                 C   s   dddg}t | t |k r$td | d t | } t|| |d}|jjddd}td	d
tjg| |d}t	
|| |jjddd}tdd	gdd
gdtjggddg| |d}t	
|| d S )Nr/   r6   r[   zIndex too shortr`   rb   Tr   rK   rL   rc   rA   rJ   rN   rP   rd   )re   r   rf   r   r   r   r   r   r   r   r    )ra   r   rh   r!   r#   r"   r   r   r   +test_extract_dataframe_capture_groups_indexb  s     

rz   c                 C   sJ   t dddgd| d}|jjddd}td	d
ddgi| d}t|| d S )Nrj   rk   rl   rv   rC   (?P<letter>[a-z])Tr   rN   rm   rn   ro   r
   )r   r   r   r   r   r    r]   r   r   r   'test_extract_single_group_returns_frame|  s    r|   c           
   	   C   sN  dddddt jdg}ddd	d
ddddg}d}dddg}t|| d}tjddddddddgdd}t|||| d}|jj|tj	d}t
|| tdd d!d"d#d$d%g}	t||	| d&}tjd'd(d)d*d+d,d-d.gd/d}t|||| d}|jj|tj	d}t
|| t||	| d&}d0|j_d1|_t|||| d}|jj|tj	d}t
|| d S )2Nzdave@google.comztdhock5@gmail.comzmaudelaperriere@gmail.comz'rob@gmail.com some text steve@gmail.comz%a@b.com some text c@d.com and e@f.com )ZdaveZgooglecom)Ztdhock5gmailr~   )Zmaudelaperrierer   r~   )Zrobr   r~   )Zstever   r~   )rm   rn   r~   )ro   dr~   )efr~   zY
    (?P<user>[a-z0-9]+)
    @
    (?P<domain>[a-z]+)
    \.
    (?P<tld>[a-z]{2,4})
    userdomaintldr
   r   r   r'   r   r_   r   )   r   )r   r'   )   r   )r   r'   )r   r_   Nr   names)flags)singleDave)r   Toby)r   Maude)multiplerobAndSteve)r   abcdef)nonemissing)r   emptyr`   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r'   )r   r   r   )r   r   r'   )r   r   r_   )NNr   )matchesdescription)r   r   r   )r   r   r   r   from_tuplesr   r   
extractallreVERBOSEr   r    ra   r   )
r   rh   Zexpected_tuplespatZexpected_columnsr!   expected_indexr"   r#   mir   r   r   test_extractall  s    


   
      r   zpat,expected_namesrZ   rN   rP   z([AB])?(?P<number>[123])c                 C   s`   t dddg|d}|j| }tdtjdftjdfgtjdd	d
gdd||d}t	|| d S )Nr}   r/   Z32r
   )rA   rK   rY   rL   r   r   r_   r'   r   r   )ra   rR   r   )
r   r   r   r   r   r   r   r   r   r    )r   Zexpected_namesr   r!   r#   r"   r   r   r   test_extractall_column_names  s    
r   c                 C   s   t dddgd| d}tjdddd	gd
d}|jd}tdddddgi|| d}t|| |jd}tddddg|| d}t|| d S )Nrj   rk   d4c2rv   rC   r   r   r   r   r   r   r{   rN   rm   rn   r   ro   r`   ([a-z]))r   r   r   r   r   r   r   r    )r   r!   r   r#   r"   r   r   r   test_extractall_single_group  s&    
   
  r   c                 C   sX   t dddgd| d}|jd}tddd	d
gtjddddgdd| d}t|| d S )NZab3Zabc3Zd4cd2rv   rC   z([a-z]+)ababcr   Zcdr   r   r   r   r   r   r`   )r   r   r   r   r   r   r   r    r]   r   r   r   ,test_extractall_single_group_with_quantifier  s    

 r   zdata, names)N)i1)Ni2)r   r   rj   rk   r   c           	         sB  t |  t |dkr*tt |d d}n$ fddt D }tj||d}t| d||d}tjg |d	 d}|jd
}tdg||d}t	
|| |jd}tddg||d}t	
|| |jd}tdg||d}t	
|| |jd}tddg||d}t	
|| |jd}tddg||d}t	
|| d S )Nr'   r   rD   c                 3   s    | ]}t |g d   V  qdS )r'   N)tuple).0inr   r   	<genexpr>)  s     z-test_extractall_no_matches.<locals>.<genexpr>r   rv   rD   ra   r   r   z(z)rd   z(z)(z)z(?P<first>z)firstz(?P<first>z)(?P<second>z)secondz(z)(?P<second>z))re   r   ranger   r   r   r   r   r   r   r    )	rh   r   r   ra   Ztuplesr!   r   r#   r"   r   r   r   test_extractall_no_matches  sD          r   c                 C   s
  t dddgd| d}|jd}tddd	dgitjd
ddgd dgd| d}t|| | dkrtdddgtdddgddfD ]}|jd}t|| qt dddgdtdddgdd| d}|jd}tddd	dgitjdddgddgd| d}t|| d S )NZa1a2Zb1Zc1ZxxxrC   z[ab](?P<digit>\d)digitrK   rL   r   )r   r'   r   r   r   r`   objectr   Zs_nameXXyyzzZidx_namer   )r   r   )r   r'   )r   r   )	r   r   r   r   r   r   r   r    r   )r   r!   r#   r"   r3   r   r   r   test_extractall_stringindexN  s<     r   c              	   C   s>   t dddgd| d}tjtdd |jd W 5 Q R X d S )	Nrj   rk   r   rv   rC   zno capture groupsr   z[a-z])r   r   r   r   r   r   )r   r!   r   r   r   (test_extractall_no_capture_groups_raisess  s    r   c                  C   s   t dddgdddgdd} | jjjd	d
d}tdddg}t|| | jjjdd
d}dddg}t|ddgd}t|| d S )Nrj   rk   r   r1   ZB3ZD4rv   )ra   rD   z([A-Z])Tr   rA   rJ   Dz!(?P<letter>[A-Z])(?P<digit>[0-9]))rA   rY   )rJ   rY   )r   4rN   r   )rR   )r   ra   r   r   r   r   r    )r!   rr   Ze_listr   r   r   !test_extract_index_one_two_groups{  s    
r   c                 C   s   t dddgd| d}d}|jj|dd}|j|}|jd	d
d}t|| d}|jj|dd}|j|}|jd	d
d}t|| d}|jj|dd}	|j|}|jd	d
d}t|	| d}
|jj|
dd}|j|
}|jd	d
d}t|| d S )Nrj   rk   rl   rv   rC   ([a-z])([0-9])Tr   r   r   level!(?P<letter>[a-z])(?P<digit>[0-9])(?P<group_name>[a-z])r   )r   r   r   r   xsr   r    )r   r!   pattern_two_nonameextract_two_nonameZhas_multi_indexZno_multi_indexpattern_two_namedextract_two_namedpattern_one_namedextract_one_namedpattern_one_nonameextract_one_nonamer   r   r   test_extractall_same_as_extract  s*    r   c                 C   s  t jdddgdd}tdddg|d	| d
}d}|jj|dd}|j|}|jddd}t|| d}|jj|dd}|j|}|jddd}t|| d}	|jj|	dd}
|j|	}|jddd}t|
| d}|jj|dd}|j|}|jddd}t|| d S )N)rA   r   )rJ   r   )r[   third)ZcapitalZordinalr   rj   rk   rl   rv   )ra   rD   r   r   Tr   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r    )r   r   r!   r   r   Zhas_match_indexZno_match_indexr   r   r   r   r   r   r   r   r   -test_extractall_same_as_extract_subject_index  s2    r   ))r   r   Znumpyr   r   Zpandasr   r   r   r   r   r   r   r$   r.   r5   r?   rF   r^   ri   rp   rq   rs   rt   ru   markZparametrizerx   ry   rz   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sd   	
S
9
U

	

+%